// All rights reserved. When finished, I'll find a suitable OS license. // Haven't decided which one yet. // // Patches welcome with the understanding that sending them to me gives me // permission to use them. // KNOWN BUGS - does not properly deal with namespace attributes. // IE - become // RESPONSIBLE FUNCTION: filterAttributes() // - Need to check the scope of the base tag class cspfilter { public $version = "0.4"; // parameters for Content Security Policy public $csp = Array('allow' => 'none', 'img-src' => '', 'script-src' => '', 'object-src' => '', 'frame-src' => '', 'report-uri' => '', 'policy-uri' => ''); // if you are going to use makeCSP() - do you want to send a header? // if left alone, it just adds a meta tag to document. Set to true // to actually send a header. // Since no one implements CSP yet - calling makeCSP() is currently // rather pointless. public $cspHeader = false; // the host used for filtering when a csp setting is set to self public $httphost; // array of event Attributes you don't want eaten by class. // Note that if script is not allowed, whitelist ignored. public $eventWhitelist = Array(); // private variables private $pv_allow; private $pv_imgsrc; private $pv_scriptsrc; private $pv_objectsrc; private $pv_framesrc; private $cspContent; private $blacklist; private $base = ""; private $eventAttributes = Array('onabort','onactivate', 'onafterprint','onafterupdate','onbeforeactivate', 'onbeforecopy','onbeforecut','onbeforedeactivate', 'onbeforeeditfocus','onbeforepaste','onbeforeprint', 'onbeforeunload','onbeforeupdate','onblur', 'onbounce','oncellchange','onchange', 'onclick','oncontextmenu','oncontrolselect', 'oncopy','oncut','ondataavailable', 'ondatasetchanged','ondatasetcomplete','ondblclick', 'ondeactivate','ondrag','ondragend', 'ondragenter','ondragleave','ondragover', 'ondragstart','ondrop','onerror', 'onerrorupdate','onfilterchange','onfinish', 'onfocus','onfocusin','onfocusout', 'onhelp','onkeydown','onkeypress', 'onkeyup','onlayoutcomplete','onload', 'onlosecapture','onmousedown','onmouseenter', 'onmouseleave','onmousemove','onmouseout', 'onmouseover','onmouseup','onmousewheel', 'onmove','onmoveend','onmovestart', 'onpaste','onpropertychange','onreadystatechange', 'onreset','onresize','onresizeend', 'onresizestart','onrowenter','onrowexit', 'onrowsdelete','onrowsinserted','onscroll', 'onselect','onselectionchange','onselectstart', 'onstart','onstop','onsubmit', 'onunload'); private $occurOnce = Array('html','head','title','body'); // isindex appears to be head only in html 3.2 - 4.0 doesn't specify head only private $headOnly = Array('base','isindex','link','meta','style'); private $noChildren = Array('base','basefont','br','col','frame', 'hr','img','input','isindex','link','meta','param','script'); // public functions function cspfilter() { // I highly recommend you overwrite the value that gets set here // with one you set and know is clean. // // This is constructor so after you initiate the class you can // change httphost same way you set any other variable used by // the class. // This probably isn't proper input validation, do not print this // variable w/o escaping and I really suggest you reset it after // the class is initiated anyway, I just needed it set to // something if the user didn't set it to something. $this->httphost = $_SERVER["HTTP_HOST"]; } public function inputDom($input) { // must be fed a valid DOMDocument object $this->dom = $input; } public function processData() { // chews the data - call after inputDom function $this->getFilterOptions(); $this->makeBlackList(); $this->walkTheDog(); $this->getBase(); $this->deleteExtraOnce(); $this->deleteNotInHead(); $this->deleteIllegitimateChildren(); $this->forbiddenTags(); } public function makeCSP() { // creates the meta tag or send the header // not called within the class, call it from your script if (strlen($this->csp['allow']) == 0) { $this->cspContent = "allow none"; } else { $this->cspContent = "allow " . $this->csp['allow']; } if (strlen($this->csp['img-src']) > 0) { $this->cspContent .= "; img-src " . $this->csp['img-src']; } if (strlen($this->csp['script-src']) > 0) { $this->cspContent .= "; script-src " . $this->csp['script-src']; } if (strlen($this->csp['object-src']) > 0) { $this->cspContent .= "; object-src " . $this->csp['object-src']; } if (strlen($this->csp['frame-src']) > 0) { $this->cspContent .= "; frame-src " . $this->csp['frame-src']; } if (strlen($this->csp['report-uri']) > 0) { $this->cspContent .= "; report-uri " . $this->csp['report-uri']; } if (strlen($this->csp['policy-uri']) > 0) { $this->cspContent .= "; report-uri " . $this->csp['policy-uri']; } if ($this->cspHeader == true) { $header = "X-Content-Security-Policy: " . $this->cspContent; header($header); } else { $meta = $this->dom->createElement("meta"); $meta->setAttribute("http-equiv","X-Content-Security-Policy"); $meta->setAttribute("content",$this->cspContent); $headtags = $this->dom->getElementsByTagName("head"); foreach ($headtags as $headtag) { $head = $headtag; } $head->appendChild($meta); } } private function getFilterOptions() { $this->csp['allow'] = strtolower(trim($this->csp['allow'])); $foo = strtolower(trim($this->csp['img-src'])); $this->csp['img-src'] = preg_replace('/\s+/',' ',$foo); $foo = strtolower(trim($this->csp['script-src'])); $this->csp['script-src'] = preg_replace('/\s+/',' ',$foo); $foo = strtolower(trim($this->csp['object-src'])); $this->csp['object-src'] = preg_replace('/\s+/',' ',$foo); $foo = strtolower(trim($this->csp['frame-src'])); $this->csp['frame-src'] = preg_replace('/\s+/',' ',$foo); $this->csp['report-uri'] = trim($this->csp['report-uri']); $this->csp['policy-uri'] = trim($this->csp['policy-uri']); if (strlen($this->csp['allow']) == 0) { $this->csp['allow'] = 'none'; } $this->pv_allow = $this->csp['allow']; if ($this->pv_allow == "self") { $this->pv_allow = $this->httphost; } if (strlen($this->csp['img-src']) > 0) { if ($this->csp['img-src'] == "self") { $this->pv_imgsrc = $this->httphost; } else { $this->pv_imgsrc = $this->csp['img-src']; } } else { $this->pv_imgsrc = $this->pv_allow; } if (strlen($this->csp['script-src']) > 0) { if ($this->csp['script-src'] == "self") { $this->pv_scriptsrc = $this->httphost; } else { $this->pv_scriptsrc = $this->csp['script-src']; } } else { $this->pv_scriptsrc = $this->pv_allow; } if (strlen($this->csp['object-src']) > 0) { if ($this->csp['object-src'] == "self") { $this->pv_objectsrc = $this->httphost; } else { $this->pv_objectsrc = $this->csp['object-src']; } } else { $this->pv_objectsrc = $this->pv_allow; } if (strlen($this->csp['frame-src']) > 0) { if ($this->csp['frame-src'] == "self") { $this->pv_framesrc = $this->httphost; } else { $this->pv_framesrc = $this->csp['frame-src']; } } else { $this->pv_framesrc = $this->pv_allow; } } private function makeBlackList() { // creates black list of attributes we don't allow, taking whitelist // into account $this->blacklist = " "; for ($i=0; $ieventAttributes); $i++) { $white = false; if (strcmp($this->pv_scriptsrc,'none') != 0) { for ($j=0; $jeventWhitelist); $j++) { if (strcmp($this->eventAttributes[$i],$this->eventWhitelist[$j]) == 0) { $white = true; } // end specific check if in whitelist } // end general check if in whitelist } // end check if script set to none if ($white == false) { $this->blacklist .= $this->eventAttributes[$i] . " "; } } // end walk through eventAttributes } // end of function private function obfus($input,$src=0) { // this function only intended to operates on attribute values // // based on http://kallahar.com/smallprojects/php_xss_filter_function.php // which is public domain. So as far as I'm concerned, this function // is public domain. // // should I add \t \r \n to the first preg_replace ?? Need to check for them somewhere // to avoid script: dodging. $return = preg_replace('/([\x00-\x08,\x0b-\x0c,\x0e-\x19])/', '', $input); $search = 'abcdefghijklmnopqrstuvwxyz'; $search .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; $search .= '1234567890!@#$%^&*()'; $search .= '~`";:?+/={}[]-_|\'\\'; for ($i = 0; $i < strlen($search); $i++) { $return = preg_replace('/(&#[xX]0{0,8}'.dechex(ord($search[$i])).';?)/i', $search[$i], $return); $return = preg_replace('/(�{0,8}'.ord($search[$i]).';?)/', $search[$i], $return); } $search = '/(javascript|vbscript|mocha):/i'; $return = preg_replace($search,'',$return); if ($src != 0) { // make sure relative src attributes start with only one slash $return = preg_replace('/^\s*[\/]+/','/',$return); } return $return; } private function filterAttributes($node) { // filters the attribute names and content $attributes = $node->attributes; foreach ($attributes as $attribute) { // allow colon as it is used in namespace attributes - // needs to be tested though, may require different handling?? // I should get a MathML document and try it out. $pattern = '/[^a-z0-9:-]+/i'; $clean = strtolower(preg_replace($pattern,'',$attribute->name)); $saniAtt[] = $clean; if ($clean == "src") { $saniVal[] = $this->obfus($attribute->value,1); } else { $saniVal[] = $this->obfus($attribute->value); } $oldAtt[] = $attribute->name; } if (isset($oldAtt)) { for ($i=0; $iremoveAttribute($oldAtt[$i]); } } if (isset($saniAtt)) { for ($i=0; $iblacklist, $check) == 0) { $node->setAttribute($saniAtt[$i],$saniVal[$i]); } } } } private function changeNodeElement($node,$element) { // ugh - why doesn't php let you change the element tag easily ?? $newNode = $this->dom->createElement($element); // get all attributes from old node $attributes = $node->attributes; foreach ($attributes as $attribute) { $name = $attribute->name; $value = $attribute->value; $newNode->setAttribute($name,$value); } // get all children from old node $children = $node->childNodes; foreach ($children as $child) { // clone node and add it to newNode $newChild = $child->cloneNode(true); $newNode->appendChild($newChild); } // replace the old node with the newNode $node->parentNode->replaceChild($newNode,$node); } private function checkNodeTag($node) { $tag = $node->tagName; $pattern = '/[^a-z0-9]+/i'; $clean = preg_replace($pattern,'',$tag); $newTag = strtolower($clean); if ($tag != $newTag) { $this->changeNodeElement($node,$newTag); } } private function walkTheDog() { // Makes sure all element tags and attribute names are lower case. // also triggers filter dodging checks and event attribute filtering // pass one - clean up the attributes $elements = $this->dom->getElementsByTagName("*"); foreach ($elements as $element) { $this->filterAttributes($element); } // pass two - fix any element tags that are not lower case $elements = $this->dom->getElementsByTagName("*"); for ($j = $elements->length; --$j >= 0; ) { $element = $elements->item($j); $this->checkNodeTag($element); } } private function getBase() { $elements = $this->dom->getElementsByTagName("base"); foreach ($elements as $element) { if ($element->hasAttribute('href')) { $this->base = $element->getAttribute('href'); } } } private function mycompare($needle,$haystack) { $hayOne = substr($haystack,0,1); if (strcmp($hayOne,"*") == 0) { $haystack = preg_replace('/^[\*]/','',$haystack); $length = (0 - strlen($haystack)); $needle = substr($needle,$length); } if (strcmp($needle,$haystack) == 0) { return 1; } else { return 0; } } private function checkSource($src,$pv) { // returns true if source is valid if ($pv == "none") { return false; } else { $src = trim(strtolower($src)); // replace http:// and https:// with a | $src = preg_replace('/^\s*http[s]{0,1}:\/\//','|',$src); if (strlen($this->base) > 0) { $mybase = preg_replace('/^\s*http[s]{0,1}:\/\//','',$this->base); } else { $mybase = $this->httphost; } // replace src that don't start with a | with the $base $src = preg_replace('/^[^\|].*/',$mybase,$src); // remove the | and make sure at least one / exists $src = preg_replace('/^[\|]/','',$src) . "/"; $farray = explode('/',$src); $src = $farray[0]; //die($src); $allowed_array = explode(" ",$pv); $match = 0; for ($i=0; $imycompare($src,$allowed_array[$i]); } if ($match == 0) { return false; } else { return true; } } } private function getElementsByAttribute($attribute) { // not currently used - but keeping here anyway in case I decide to at some point $tags = $this->dom->getElementsByTagName("*"); foreach ($tags as $tag) { if ($tag->hasAttribute($attribute)) { $return[] = $tag; } } if (! isset($return)) { $return = Array(); } return $return; } private function deleteExtraOnce() { // nukes extra elements that are only suppose to occure once - IE body tag for ($i=0;$ioccurOnce);$i++) { $elements = $this->dom->getElementsByTagName($this->occurOnce[$i]); for ($j = $elements->length; --$j > 0; ) { $element = $elements->item($j); $element->parentNode->removeChild($element); } } } private function forbiddenTags() { // nukes tags that are forbidden by the CSP $this->filterImg(); $this->filterScript(); $this->filterObject(); $this->filterFrame(); } private function filterImg() { $elements = $this->dom->getElementsByTagName("img"); for ($j = $elements->length; --$j >= 0; ) { $element = $elements->item($j); if ($element->hasAttribute("src")) { $src = trim($element->getAttribute("src")); if (! $this->checkSource($src,$this->pv_imgsrc)) { if ($element->hasAttribute("alt")) { $element->removeAttribute("src"); } else { $element->parentNode->removeChild($element); } } } } } private function filterScript() { $elements = $this->dom->getElementsByTagName("script"); for ($j = $elements->length; --$j >= 0; ) { $element = $elements->item($j); if ($element->hasAttribute("src")) { $src = trim($element->getAttribute("src")); if (! $this->checkSource($src,$this->pv_scriptsrc)) { $element->parentNode->removeChild($element); } } else { // no src - yank it $element->parentNode->removeChild($element); } } } private function filterObject() { $objScope = Array('object','embed','applet'); for ($i=0; $i<3; $i++) { $elements = $this->dom->getElementsByTagName($objScope[$i]); for ($j = $elements->length; --$j >= 0; ) { $element = $elements->item($j); if ($element->hasAttribute("src")) { $src = trim($element->getAttribute("src")); if (! $this->checkSource($src,$this->pv_scripysrc)) { $element->parentNode->removeChild($element); } } else { $element->parentNode->removeChild($element); } } } } private function filterFrame() { $frmScope = Array('frame','iframe'); for ($i=0; $i<2; $i++) { $elements = $this->dom->getElementsByTagName($frmScope[$i]); for ($j = $elements->length; --$j >= 0; ) { $element = $elements->item($j); if ($element->hasAttribute("src")) { $src = trim($element->getAttribute("src")); if (! $this->checkSource($src,$this->pv_scripysrc)) { $element->parentNode->removeChild($element); } } else { $element->parentNode->removeChild($element); } } } } private function deleteNotInHead() { // nukes elements that are suppose to be in head but are not for ($i=0;$iheadOnly);$i++) { $elements = $this->dom->getElementsByTagName($this->headOnly[$i]); for ($j = $elements->length; --$j >= 0; ) { $element = $elements->item($j); $parent = $element->parentNode->tagName; if ($parent != "head") { $element->parentNode->removeChild($element); } } } } private function deleteIllegitimateChildren() { // nukes children of elements that ain't suppose to have children for ($i=0;$inoChildren);$i++) { $elements = $this->dom->getElementsByTagName($this->noChildren[$i]); foreach ($elements as $element) { $children = $element->childNodes; //foreach ($children as $child) { for ($j = $children->length; --$j >= 0; ) { $child = $children->item($j); $element->removeChild($child); } } } } } // end of class ?>