10 defined(
'JPATH_PLATFORM') or die;
30 protected static $instances = array();
78 public $tagBlacklist = array(
109 public $attrBlacklist = array(
128 public function __construct($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1)
131 $tagsArray = array_map(
'strtolower', (array) $tagsArray);
132 $attrArray = array_map(
'strtolower', (array) $attrArray);
135 $this->tagsArray = $tagsArray;
136 $this->attrArray = $attrArray;
137 $this->tagsMethod = $tagsMethod;
138 $this->attrMethod = $attrMethod;
139 $this->xssAuto = $xssAuto;
155 public static function &getInstance($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1)
157 $sig = md5(serialize(array($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto)));
159 if (empty(self::$instances[$sig]))
161 self::$instances[$sig] =
new JFilterInput($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto);
164 return self::$instances[$sig];
194 public function clean($source, $type =
'string')
197 switch (strtoupper($type))
202 preg_match(
'/-?[0-9]+/', (
string) $source, $matches);
203 $result = @ (int) $matches[0];
208 preg_match(
'/-?[0-9]+/', (
string) $source, $matches);
209 $result = @ abs((
int) $matches[0]);
215 preg_match(
'/-?[0-9]+(\.[0-9]+)?/', (
string) $source, $matches);
216 $result = @ (float) $matches[0];
221 $result = (bool) $source;
225 $result = (string) preg_replace(
'/[^A-Z_]/i',
'', $source);
229 $result = (string) preg_replace(
'/[^A-Z0-9]/i',
'', $source);
233 $result = (string) preg_replace(
'/[^A-Z0-9_\.-]/i',
'', $source);
234 $result = ltrim($result,
'.');
238 $result = (string) preg_replace(
'/[^A-Z0-9\/+=]/i',
'', $source);
242 $result = (string) $this->_remove($this->_decode((
string) $source));
246 $result = (string) $this->_remove((
string) $source);
250 $result = (array) $source;
254 $pattern =
'/^[A-Za-z0-9_-]+[A-Za-z0-9_\.-]*([\\\\\/][A-Za-z0-9_-]+[A-Za-z0-9_\.-]*)*$/';
255 preg_match($pattern, (
string) $source, $matches);
256 $result = @ (string) $matches[0];
260 $result = (string) preg_replace(
'/[\x00-\x1F\x7F<>"\'%&]/',
'', $source);
269 if (is_array($source))
271 foreach ($source as $key => $value)
274 if (is_string($value))
276 $source[$key] = $this->_remove($this->_decode($value));
284 if (is_string($source) && !empty($source))
287 $result = $this->_remove($this->_decode($source));
310 public static function checkAttribute($attrSubSet)
312 $attrSubSet[0] = strtolower($attrSubSet[0]);
313 $attrSubSet[1] = strtolower($attrSubSet[1]);
315 return (((strpos($attrSubSet[1],
'expression') !==
false) && ($attrSubSet[0]) ==
'style') || (strpos($attrSubSet[1],
'javascript:') !==
false) ||
316 (strpos($attrSubSet[1],
'behaviour:') !==
false) || (strpos($attrSubSet[1],
'vbscript:') !==
false) ||
317 (strpos($attrSubSet[1],
'mocha:') !==
false) || (strpos($attrSubSet[1],
'livescript:') !==
false));
329 protected function _remove($source)
334 while ($source != $this->_cleanTags($source))
336 $source = $this->_cleanTags($source);
352 protected function _cleanTags($source)
355 $source = $this->_escapeAttributeValues($source);
360 $currentSpace =
false;
366 $tagOpen_start = strpos($source,
'<');
368 while ($tagOpen_start !==
false)
371 $preTag .= substr($postTag, 0, $tagOpen_start);
372 $postTag = substr($postTag, $tagOpen_start);
373 $fromTagOpen = substr($postTag, 1);
374 $tagOpen_end = strpos($fromTagOpen,
'>');
377 $nextOpenTag = (strlen($postTag) > $tagOpen_start) ? strpos($postTag,
'<', $tagOpen_start + 1) :
false;
379 if (($nextOpenTag !==
false) && ($nextOpenTag < $tagOpen_end))
382 $postTag = substr($postTag, 0, $tagOpen_start) . substr($postTag, $tagOpen_start + 1);
383 $tagOpen_start = strpos($postTag,
'<');
388 if ($tagOpen_end ===
false)
390 $postTag = substr($postTag, $tagOpen_start + 1);
391 $tagOpen_start = strpos($postTag,
'<');
396 $tagOpen_nested = strpos($fromTagOpen,
'<');
398 if (($tagOpen_nested !==
false) && ($tagOpen_nested < $tagOpen_end))
400 $preTag .= substr($postTag, 0, ($tagOpen_nested + 1));
401 $postTag = substr($postTag, ($tagOpen_nested + 1));
402 $tagOpen_start = strpos($postTag,
'<');
407 $tagOpen_nested = (strpos($fromTagOpen,
'<') + $tagOpen_start + 1);
408 $currentTag = substr($fromTagOpen, 0, $tagOpen_end);
409 $tagLength = strlen($currentTag);
410 $tagLeft = $currentTag;
412 $currentSpace = strpos($tagLeft,
' ');
415 if (substr($currentTag, 0, 1) ==
'/')
419 list ($tagName) = explode(
' ', $currentTag);
420 $tagName = substr($tagName, 1);
426 list ($tagName) = explode(
' ', $currentTag);
434 if ((!preg_match(
"/^[a-z][a-z0-9]*$/i", $tagName)) || (!$tagName) || ((in_array(strtolower($tagName), $this->tagBlacklist)) && ($this->xssAuto)))
436 $postTag = substr($postTag, ($tagLength + 2));
437 $tagOpen_start = strpos($postTag,
'<');
447 while ($currentSpace !==
false)
450 $fromSpace = substr($tagLeft, ($currentSpace + 1));
451 $nextEqual = strpos($fromSpace,
'=');
452 $nextSpace = strpos($fromSpace,
' ');
453 $openQuotes = strpos($fromSpace,
'"');
454 $closeQuotes = strpos(substr($fromSpace, ($openQuotes + 1)),
'"') + $openQuotes + 1;
457 $startAttPosition = 0;
460 if (preg_match(
'#\s*=\s*\"#', $fromSpace, $matches, PREG_OFFSET_CAPTURE))
462 $startAtt = $matches[0][0];
463 $startAttPosition = $matches[0][1];
464 $closeQuotes = strpos(substr($fromSpace, ($startAttPosition + strlen($startAtt))),
'"') + $startAttPosition + strlen($startAtt);
465 $nextEqual = $startAttPosition + strpos($startAtt,
'=');
466 $openQuotes = $startAttPosition + strpos($startAtt,
'"');
467 $nextSpace = strpos(substr($fromSpace, $closeQuotes),
' ') + $closeQuotes;
471 if ($fromSpace !=
'/' && (($nextEqual && $nextSpace && $nextSpace < $nextEqual) || !$nextEqual))
475 $attribEnd = strpos($fromSpace,
'/') - 1;
479 $attribEnd = $nextSpace - 1;
484 $fromSpace = substr($fromSpace, $attribEnd + 1);
487 if (strpos($fromSpace,
'=') !==
false)
491 if (($openQuotes !==
false) && (strpos(substr($fromSpace, ($openQuotes + 1)),
'"') !==
false))
493 $attr = substr($fromSpace, 0, ($closeQuotes + 1));
497 $attr = substr($fromSpace, 0, $nextSpace);
503 if ($fromSpace !=
'/')
505 $attr = substr($fromSpace, 0, $nextSpace);
510 if (!$attr && $fromSpace !=
'/')
519 $tagLeft = substr($fromSpace, strlen($attr));
520 $currentSpace = strpos($tagLeft,
' ');
524 $tagFound = in_array(strtolower($tagName), $this->tagsArray);
527 if ((!$tagFound && $this->tagsMethod) || ($tagFound && !$this->tagsMethod))
533 $attrSet = $this->_cleanAttributes($attrSet);
534 $preTag .=
'<' . $tagName;
536 for ($i = 0, $count = count($attrSet); $i < $count; $i++)
538 $preTag .=
' ' . $attrSet[$i];
542 if (strpos($fromTagOpen,
'</' . $tagName))
554 $preTag .=
'</' . $tagName .
'>';
559 $postTag = substr($postTag, ($tagLength + 2));
560 $tagOpen_start = strpos($postTag,
'<');
581 protected function _cleanAttributes($attrSet)
585 $count = count($attrSet);
588 for ($i = 0; $i < $count; $i++)
597 $attrSubSet = explode(
'=', trim($attrSet[$i]), 2);
600 $attrSubSet[0] = array_pop(explode(
' ', trim($attrSubSet[0])));
605 if ((!preg_match(
'/[a-z]*$/i', $attrSubSet[0]))
606 || (($this->xssAuto) && ((in_array(strtolower($attrSubSet[0]), $this->attrBlacklist))
607 || (substr($attrSubSet[0], 0, 2) ==
'on'))))
613 if (isset($attrSubSet[1]))
616 $attrSubSet[1] = trim($attrSubSet[1]);
619 $attrSubSet[1] = str_replace(
'&#',
'', $attrSubSet[1]);
622 $attrSubSet[1] = preg_replace(
'/[\n\r]/',
'', $attrSubSet[1]);
625 $attrSubSet[1] = str_replace(
'"',
'', $attrSubSet[1]);
628 if ((substr($attrSubSet[1], 0, 1) ==
"'") && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) ==
"'"))
630 $attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
633 $attrSubSet[1] = stripslashes($attrSubSet[1]);
641 if (self::checkAttribute($attrSubSet))
647 $attrFound = in_array(strtolower($attrSubSet[0]), $this->attrArray);
650 if ((!$attrFound && $this->attrMethod) || ($attrFound && !$this->attrMethod))
653 if (empty($attrSubSet[1]) ===
false)
655 $newSet[] = $attrSubSet[0] .
'="' . $attrSubSet[1] .
'"';
657 elseif ($attrSubSet[1] ===
"0")
661 $newSet[] = $attrSubSet[0] .
'="0"';
666 $newSet[] = $attrSubSet[0] .
'=""';
683 protected function _decode($source)
690 if (version_compare(PHP_VERSION,
'5.3.4',
'>='))
692 $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT,
'ISO-8859-1');
696 $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT);
699 foreach ($trans_tbl as $k => $v)
701 $ttr[$v] = utf8_encode($k);
705 $source = strtr($source, $ttr);
708 $source = preg_replace_callback(
'/&#(\d+);/m',
function($m)
710 return utf8_encode(chr($m[1]));
715 $source = preg_replace_callback(
'/&#x([a-f0-9]+);/mi',
function($m)
717 return utf8_encode(chr(
'0x' . $m[1]));
733 protected function _escapeAttributeValues($source)
735 $alreadyFiltered =
'';
736 $remainder = $source;
737 $badChars = array(
'<',
'"',
'>');
738 $escapedChars = array(
'<',
'"',
'>');
742 while (preg_match(
'#<[^>]*?=\s*?(\"|\')#s', $remainder, $matches, PREG_OFFSET_CAPTURE))
745 $quotePosition = $matches[0][1];
746 $nextBefore = $quotePosition + strlen($matches[0][0]);
750 $quote = substr($matches[0][0], -1);
751 $pregMatch = ($quote ==
'"') ?
'#(\"\s*/\s*>|\"\s*>|\"\s+|\"$)#' :
"#(\'\s*/\s*>|\'\s*>|\'\s+|\'$)#";
754 if (preg_match($pregMatch, substr($remainder, $nextBefore), $matches, PREG_OFFSET_CAPTURE))
757 $nextAfter = $nextBefore + $matches[0][1];
762 $nextAfter = strlen($remainder);
766 $attributeValue = substr($remainder, $nextBefore, $nextAfter - $nextBefore);
769 $attributeValue = str_replace($badChars, $escapedChars, $attributeValue);
770 $attributeValue = $this->_stripCSSExpressions($attributeValue);
771 $alreadyFiltered .= substr($remainder, 0, $nextBefore) . $attributeValue . $quote;
772 $remainder = substr($remainder, $nextAfter + 1);
776 return $alreadyFiltered . $remainder;
788 protected function _stripCSSExpressions($source)
791 $test = preg_replace(
'#\/\*.*\*\/#U',
'', $source);
794 if (!stripos($test,
':expression'))
803 if (preg_match_all(
'#:expression\s*\(#', $test, $matches))
806 $test = str_ireplace(
':expression',
'', $test);