<?php
class ParseFilter {
private $allow_iframe = ['vkontakte.ru', 'vk.com', 'youtube.com', 'player.vimeo.com', 'dailymotion.com', 'video.yandex.ru', 'money.yandex.ru', 'player.rutv.ru', 'promodj.com', 'rutube.ru', 'embed.beatport.com', 'w.soundcloud.com', 'bandcamp.com', 'google.com'];
private $tags_bad = ['applet', 'body', 'bgsound', 'base', 'basefont', 'frame', 'frameset', 'head', 'html', 'id', 'ilayer', 'layer', 'link', 'meta', 'name', 'script', 'style', 'title', 'xml', 'form'];
private $attr_bad = ['action', 'background', 'codebase', 'dynsrc', 'lowsrc', 'lang'];
public function __construct() {}
public function simple($data, $revert = false) {
$data = trim( $data );
$data = preg_replace('/[\xF0-\xF7].../s', '', $data); // удаляем 4-byte UTF-8
if ( $revert ) {
$data = str_replace( ["<br>", "<br/>", "<br />"], "\n", $data );
$data = strip_tags( $data );
} else {
$data = strip_tags( $data );
$data = str_replace( ["\r\n", "\r", "\n"], "<br>", $data );
}
$data = str_replace( ["<?", "?>"], ["<?", "?>"], $data );
$data = str_replace( ["\t", " "], " ", $data );
$data = str_replace( " ", " ", $data );
return $data;
}
public function process_nr2br($data) {
return $this->simple( $data );
}
public function process_br2nr($data) {
return $this->simple( $data, true );
}
public function process($data) {
$data = trim( $data );
$data = preg_replace('/[\xF0-\xF7].../s', '', $data);
$data = $this->filterTags( $data );
$data = preg_replace_callback( "#<iframe(.+?)src=['\"](.+?)['\"](.*?)>(.*?)</iframe>#is", [&$this, 'check_iframe'], $data );
$data = preg_replace_callback( "#<a(.+?)href=['\"](.+?)['\"](.*?)>(.*?)</a>#is", [&$this, 'check_link'], $data );
$data = str_replace( ["<br/>", "<br />"], "<br>", $data );
$data = str_replace( ["<?", "?>"], ["<?", "?>"], $data );
$data = str_replace( ["\t", " "], " ", $data );
$data = preg_replace( "!\s+!", " ", $data);
return $data;
}
private function filterTags($data) {
if ( is_array($this->tags_bad) && count($this->tags_bad) ) {
$data = preg_replace('#<(' . implode( '|', $this->tags_bad ) . ')[^>]*>(.*?)</\1>#isu', "$2", $data);
$data = preg_replace('#</?(' . implode( '|', $this->tags_bad ) . ')[^>]*>#is', "", $data);
}
$data = preg_replace( '#<([div|p|span|h1|h2|h3|h4|b|em|strong|]+)[^>]*>\s*</\1>#is', "", $data);
if ( is_array($this->attr_bad) && count($this->attr_bad) ) {
preg_match_all('/<([^>\s]+)\s+([^>]+)>/i', $data, $matches);
if ( ! empty($matches[1]) ) {
foreach ( $matches[2] as $key => $val ) {
$matches[2][$key] = $this->filterAttr( $matches[1][$key], $val );
}
foreach ( $matches[0] as $key => $val ) {
$replace = '<' . $matches[1][$key] . ( ! empty($matches[2][$key]) ? ' ' . $matches[2][$key] : '' ) . '>';
$data = str_replace( $val, $replace, $data );
}
}
}
return $data;
}
private function filterAttr($tag, $attr) {
$attr_str = '';
preg_match_all('/([\w\-]+)="([^"]*)"/i', $attr, $matches);
if ( ! empty($matches[1]) ) {
foreach ( $matches[1] as $key => $val ) {
$val = strtolower( $val );
$val = html_entity_decode($val, ENT_COMPAT, 'utf-8');
$bad = false;
if ( in_array($val, $this->attr_bad) || trim($matches[2][$key]) == '' || strpbrk($matches[2][$key], '><') ) {
$bad = true;
} elseif ( $this->filterXSS($val, $matches[2][$key]) ) {
$bad = true;
}
if ( $bad ) unset( $matches[1][$key] );
}
if ( ! empty($matches[1]) ) {
$attr = [];
foreach ( $matches[1] as $key => $val ) $attr[] = $val . '="' . $matches[2][$key] . '"';
$attr_str = implode( " ", $attr );
}
}
return $attr_str;
}
private function filterXSS($attr, $value) {
$xss = ['xss:', 'expression', 'javascript:', 'behaviour:', 'vbscript:', 'mocha:', 'data:', 'livescript:', 'document.cookie'];
if ( preg_match('/^on|xmlns/i', $attr) ) return true;
else {
$value = html_entity_decode($value, ENT_COMPAT, 'utf-8');
foreach ( $xss as $x ) {
if ( stripos($value, $x) !== false ) return true;
}
}
return false;
}
private function check_iframe( $matches = [] ) {
$allow = false;
if ( strpos($matches[3], "src=") !== false || empty($matches[2]) ) return '';
$link = str_replace( array("http://", "https://", "ftp://", "//", "www."), "", $matches[2] );
foreach ( $this->allow_iframe as $domain ) {
if ( substr($link, 0, strlen($domain)) == $domain ) {
$allow = true;
break;
}
}
if ( ! $allow ) return '';
return '<iframe src="' . $matches[2] . '"' . $matches[1] . $matches[3] . '></iframe>';
}
private function check_link( $matches = [] ) {
if ( empty($matches[2]) || empty($matches[4]) ) return '';
$url = strip_tags( trim( stripslashes( $matches[2] ) ) );
$url = str_replace( '\"', '"', $url );
$url = str_replace( "'", "", $url );
$url = str_replace( '"', "", $url );
$url = htmlspecialchars( $url, ENT_QUOTES, 'utf-8' );
$url = str_replace( "&", "&", $url );
$url = str_replace( " ", "%20", $url );
$url = str_replace( "<", "<", $url );
$url = str_replace( ">", ">", $url );
$nofollow = ( ( defined('SITE_URL') && strpos($url, SITE_URL) !== false ) || strpos($url, '://') === false ) ? '' : ' rel="nofollow"';
$blank = ( preg_match( "/target=['|\"]_blank['|\"]/i", $matches[0] ) ) ? ' target="_blank"' : '';
$style = ( preg_match( "/style=['|\"](.+?)['|\"]/i", $matches[0], $style ) ) ? ' style="' . $style[1] . '"' : '';
$link = '<a' . $nofollow . ' href="' . $url . '"' . $blank . $style . '>' . $matches[4] . '</a>';
return $link;
}
}
$parse = new ParseFilter();
$test = htmlspecialchars( stripslashes(" 🔥🔥🔥 🎄 🎁❄️ 💌 "));
echo $test; //резултата е 🔥🔥🔥 🎄 🎁❄️ 💌
echo '<br >icons:';
echo $parse->process( $test ); //резултата е: icons: ❄️