// All rights reserved. When finished, I'll find a suitable OS license.
// Haven't decided which one yet.
//
// Patches welcome with the understanding that sending them to me gives me
// permission to use them.
// KNOWN BUGS - does not properly deal with namespace attributes.
// IE - become
// RESPONSIBLE FUNCTION: filterAttributes()
// - Need to check the scope of the base tag
class cspfilter {
public $version = "0.4";
// parameters for Content Security Policy
public $csp = Array('allow' => 'none',
'img-src' => '',
'script-src' => '',
'object-src' => '',
'frame-src' => '',
'report-uri' => '',
'policy-uri' => '');
// if you are going to use makeCSP() - do you want to send a header?
// if left alone, it just adds a meta tag to document. Set to true
// to actually send a header.
// Since no one implements CSP yet - calling makeCSP() is currently
// rather pointless.
public $cspHeader = false;
// the host used for filtering when a csp setting is set to self
public $httphost;
// array of event Attributes you don't want eaten by class.
// Note that if script is not allowed, whitelist ignored.
public $eventWhitelist = Array();
// private variables
private $pv_allow;
private $pv_imgsrc;
private $pv_scriptsrc;
private $pv_objectsrc;
private $pv_framesrc;
private $cspContent;
private $blacklist;
private $base = "";
private $eventAttributes = Array('onabort','onactivate',
'onafterprint','onafterupdate','onbeforeactivate',
'onbeforecopy','onbeforecut','onbeforedeactivate',
'onbeforeeditfocus','onbeforepaste','onbeforeprint',
'onbeforeunload','onbeforeupdate','onblur',
'onbounce','oncellchange','onchange',
'onclick','oncontextmenu','oncontrolselect',
'oncopy','oncut','ondataavailable',
'ondatasetchanged','ondatasetcomplete','ondblclick',
'ondeactivate','ondrag','ondragend',
'ondragenter','ondragleave','ondragover',
'ondragstart','ondrop','onerror',
'onerrorupdate','onfilterchange','onfinish',
'onfocus','onfocusin','onfocusout',
'onhelp','onkeydown','onkeypress',
'onkeyup','onlayoutcomplete','onload',
'onlosecapture','onmousedown','onmouseenter',
'onmouseleave','onmousemove','onmouseout',
'onmouseover','onmouseup','onmousewheel',
'onmove','onmoveend','onmovestart',
'onpaste','onpropertychange','onreadystatechange',
'onreset','onresize','onresizeend',
'onresizestart','onrowenter','onrowexit',
'onrowsdelete','onrowsinserted','onscroll',
'onselect','onselectionchange','onselectstart',
'onstart','onstop','onsubmit',
'onunload');
private $occurOnce = Array('html','head','title','body');
// isindex appears to be head only in html 3.2 - 4.0 doesn't specify head only
private $headOnly = Array('base','isindex','link','meta','style');
private $noChildren = Array('base','basefont','br','col','frame',
'hr','img','input','isindex','link','meta','param','script');
// public functions
function cspfilter() {
// I highly recommend you overwrite the value that gets set here
// with one you set and know is clean.
//
// This is constructor so after you initiate the class you can
// change httphost same way you set any other variable used by
// the class.
// This probably isn't proper input validation, do not print this
// variable w/o escaping and I really suggest you reset it after
// the class is initiated anyway, I just needed it set to
// something if the user didn't set it to something.
$this->httphost = $_SERVER["HTTP_HOST"];
}
public function inputDom($input) {
// must be fed a valid DOMDocument object
$this->dom = $input;
}
public function processData() {
// chews the data - call after inputDom function
$this->getFilterOptions();
$this->makeBlackList();
$this->walkTheDog();
$this->getBase();
$this->deleteExtraOnce();
$this->deleteNotInHead();
$this->deleteIllegitimateChildren();
$this->forbiddenTags();
}
public function makeCSP() {
// creates the meta tag or send the header
// not called within the class, call it from your script
if (strlen($this->csp['allow']) == 0) {
$this->cspContent = "allow none";
} else {
$this->cspContent = "allow " . $this->csp['allow'];
}
if (strlen($this->csp['img-src']) > 0) {
$this->cspContent .= "; img-src " . $this->csp['img-src'];
}
if (strlen($this->csp['script-src']) > 0) {
$this->cspContent .= "; script-src " . $this->csp['script-src'];
}
if (strlen($this->csp['object-src']) > 0) {
$this->cspContent .= "; object-src " . $this->csp['object-src'];
}
if (strlen($this->csp['frame-src']) > 0) {
$this->cspContent .= "; frame-src " . $this->csp['frame-src'];
}
if (strlen($this->csp['report-uri']) > 0) {
$this->cspContent .= "; report-uri " . $this->csp['report-uri'];
}
if (strlen($this->csp['policy-uri']) > 0) {
$this->cspContent .= "; report-uri " . $this->csp['policy-uri'];
}
if ($this->cspHeader == true) {
$header = "X-Content-Security-Policy: " . $this->cspContent;
header($header);
} else {
$meta = $this->dom->createElement("meta");
$meta->setAttribute("http-equiv","X-Content-Security-Policy");
$meta->setAttribute("content",$this->cspContent);
$headtags = $this->dom->getElementsByTagName("head");
foreach ($headtags as $headtag) {
$head = $headtag;
}
$head->appendChild($meta);
}
}
private function getFilterOptions() {
$this->csp['allow'] = strtolower(trim($this->csp['allow']));
$foo = strtolower(trim($this->csp['img-src']));
$this->csp['img-src'] = preg_replace('/\s+/',' ',$foo);
$foo = strtolower(trim($this->csp['script-src']));
$this->csp['script-src'] = preg_replace('/\s+/',' ',$foo);
$foo = strtolower(trim($this->csp['object-src']));
$this->csp['object-src'] = preg_replace('/\s+/',' ',$foo);
$foo = strtolower(trim($this->csp['frame-src']));
$this->csp['frame-src'] = preg_replace('/\s+/',' ',$foo);
$this->csp['report-uri'] = trim($this->csp['report-uri']);
$this->csp['policy-uri'] = trim($this->csp['policy-uri']);
if (strlen($this->csp['allow']) == 0) {
$this->csp['allow'] = 'none';
}
$this->pv_allow = $this->csp['allow'];
if ($this->pv_allow == "self") {
$this->pv_allow = $this->httphost;
}
if (strlen($this->csp['img-src']) > 0) {
if ($this->csp['img-src'] == "self") {
$this->pv_imgsrc = $this->httphost;
} else {
$this->pv_imgsrc = $this->csp['img-src'];
}
} else {
$this->pv_imgsrc = $this->pv_allow;
}
if (strlen($this->csp['script-src']) > 0) {
if ($this->csp['script-src'] == "self") {
$this->pv_scriptsrc = $this->httphost;
} else {
$this->pv_scriptsrc = $this->csp['script-src'];
}
} else {
$this->pv_scriptsrc = $this->pv_allow;
}
if (strlen($this->csp['object-src']) > 0) {
if ($this->csp['object-src'] == "self") {
$this->pv_objectsrc = $this->httphost;
} else {
$this->pv_objectsrc = $this->csp['object-src'];
}
} else {
$this->pv_objectsrc = $this->pv_allow;
}
if (strlen($this->csp['frame-src']) > 0) {
if ($this->csp['frame-src'] == "self") {
$this->pv_framesrc = $this->httphost;
} else {
$this->pv_framesrc = $this->csp['frame-src'];
}
} else {
$this->pv_framesrc = $this->pv_allow;
}
}
private function makeBlackList() {
// creates black list of attributes we don't allow, taking whitelist
// into account
$this->blacklist = " ";
for ($i=0; $ieventAttributes); $i++) {
$white = false;
if (strcmp($this->pv_scriptsrc,'none') != 0) {
for ($j=0; $jeventWhitelist); $j++) {
if (strcmp($this->eventAttributes[$i],$this->eventWhitelist[$j]) == 0) {
$white = true;
} // end specific check if in whitelist
} // end general check if in whitelist
} // end check if script set to none
if ($white == false) {
$this->blacklist .= $this->eventAttributes[$i] . " ";
}
} // end walk through eventAttributes
} // end of function
private function obfus($input,$src=0) {
// this function only intended to operates on attribute values
//
// based on http://kallahar.com/smallprojects/php_xss_filter_function.php
// which is public domain. So as far as I'm concerned, this function
// is public domain.
//
// should I add \t \r \n to the first preg_replace ?? Need to check for them somewhere
// to avoid script: dodging.
$return = preg_replace('/([\x00-\x08,\x0b-\x0c,\x0e-\x19])/', '', $input);
$search = 'abcdefghijklmnopqrstuvwxyz';
$search .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
$search .= '1234567890!@#$%^&*()';
$search .= '~`";:?+/={}[]-_|\'\\';
for ($i = 0; $i < strlen($search); $i++) {
$return = preg_replace('/([xX]0{0,8}'.dechex(ord($search[$i])).';?)/i', $search[$i], $return);
$return = preg_replace('/({0,8}'.ord($search[$i]).';?)/', $search[$i], $return);
}
$search = '/(javascript|vbscript|mocha):/i';
$return = preg_replace($search,'',$return);
if ($src != 0) {
// make sure relative src attributes start with only one slash
$return = preg_replace('/^\s*[\/]+/','/',$return);
}
return $return;
}
private function filterAttributes($node) {
// filters the attribute names and content
$attributes = $node->attributes;
foreach ($attributes as $attribute) {
// allow colon as it is used in namespace attributes -
// needs to be tested though, may require different handling??
// I should get a MathML document and try it out.
$pattern = '/[^a-z0-9:-]+/i';
$clean = strtolower(preg_replace($pattern,'',$attribute->name));
$saniAtt[] = $clean;
if ($clean == "src") {
$saniVal[] = $this->obfus($attribute->value,1);
} else {
$saniVal[] = $this->obfus($attribute->value);
}
$oldAtt[] = $attribute->name;
}
if (isset($oldAtt)) {
for ($i=0; $iremoveAttribute($oldAtt[$i]);
}
}
if (isset($saniAtt)) {
for ($i=0; $iblacklist, $check) == 0) {
$node->setAttribute($saniAtt[$i],$saniVal[$i]);
}
}
}
}
private function changeNodeElement($node,$element) {
// ugh - why doesn't php let you change the element tag easily ??
$newNode = $this->dom->createElement($element);
// get all attributes from old node
$attributes = $node->attributes;
foreach ($attributes as $attribute) {
$name = $attribute->name;
$value = $attribute->value;
$newNode->setAttribute($name,$value);
}
// get all children from old node
$children = $node->childNodes;
foreach ($children as $child) {
// clone node and add it to newNode
$newChild = $child->cloneNode(true);
$newNode->appendChild($newChild);
}
// replace the old node with the newNode
$node->parentNode->replaceChild($newNode,$node);
}
private function checkNodeTag($node) {
$tag = $node->tagName;
$pattern = '/[^a-z0-9]+/i';
$clean = preg_replace($pattern,'',$tag);
$newTag = strtolower($clean);
if ($tag != $newTag) {
$this->changeNodeElement($node,$newTag);
}
}
private function walkTheDog() {
// Makes sure all element tags and attribute names are lower case.
// also triggers filter dodging checks and event attribute filtering
// pass one - clean up the attributes
$elements = $this->dom->getElementsByTagName("*");
foreach ($elements as $element) {
$this->filterAttributes($element);
}
// pass two - fix any element tags that are not lower case
$elements = $this->dom->getElementsByTagName("*");
for ($j = $elements->length; --$j >= 0; ) {
$element = $elements->item($j);
$this->checkNodeTag($element);
}
}
private function getBase() {
$elements = $this->dom->getElementsByTagName("base");
foreach ($elements as $element) {
if ($element->hasAttribute('href')) {
$this->base = $element->getAttribute('href');
}
}
}
private function mycompare($needle,$haystack) {
$hayOne = substr($haystack,0,1);
if (strcmp($hayOne,"*") == 0) {
$haystack = preg_replace('/^[\*]/','',$haystack);
$length = (0 - strlen($haystack));
$needle = substr($needle,$length);
}
if (strcmp($needle,$haystack) == 0) {
return 1;
} else {
return 0;
}
}
private function checkSource($src,$pv) {
// returns true if source is valid
if ($pv == "none") {
return false;
} else {
$src = trim(strtolower($src));
// replace http:// and https:// with a |
$src = preg_replace('/^\s*http[s]{0,1}:\/\//','|',$src);
if (strlen($this->base) > 0) {
$mybase = preg_replace('/^\s*http[s]{0,1}:\/\//','',$this->base);
} else {
$mybase = $this->httphost;
}
// replace src that don't start with a | with the $base
$src = preg_replace('/^[^\|].*/',$mybase,$src);
// remove the | and make sure at least one / exists
$src = preg_replace('/^[\|]/','',$src) . "/";
$farray = explode('/',$src);
$src = $farray[0];
//die($src);
$allowed_array = explode(" ",$pv);
$match = 0;
for ($i=0; $imycompare($src,$allowed_array[$i]);
}
if ($match == 0) {
return false;
} else {
return true;
}
}
}
private function getElementsByAttribute($attribute) {
// not currently used - but keeping here anyway in case I decide to at some point
$tags = $this->dom->getElementsByTagName("*");
foreach ($tags as $tag) {
if ($tag->hasAttribute($attribute)) {
$return[] = $tag;
}
}
if (! isset($return)) {
$return = Array();
}
return $return;
}
private function deleteExtraOnce() {
// nukes extra elements that are only suppose to occure once - IE body tag
for ($i=0;$ioccurOnce);$i++) {
$elements = $this->dom->getElementsByTagName($this->occurOnce[$i]);
for ($j = $elements->length; --$j > 0; ) {
$element = $elements->item($j);
$element->parentNode->removeChild($element);
}
}
}
private function forbiddenTags() {
// nukes tags that are forbidden by the CSP
$this->filterImg();
$this->filterScript();
$this->filterObject();
$this->filterFrame();
}
private function filterImg() {
$elements = $this->dom->getElementsByTagName("img");
for ($j = $elements->length; --$j >= 0; ) {
$element = $elements->item($j);
if ($element->hasAttribute("src")) {
$src = trim($element->getAttribute("src"));
if (! $this->checkSource($src,$this->pv_imgsrc)) {
if ($element->hasAttribute("alt")) {
$element->removeAttribute("src");
} else {
$element->parentNode->removeChild($element);
}
}
}
}
}
private function filterScript() {
$elements = $this->dom->getElementsByTagName("script");
for ($j = $elements->length; --$j >= 0; ) {
$element = $elements->item($j);
if ($element->hasAttribute("src")) {
$src = trim($element->getAttribute("src"));
if (! $this->checkSource($src,$this->pv_scriptsrc)) {
$element->parentNode->removeChild($element);
}
} else {
// no src - yank it
$element->parentNode->removeChild($element);
}
}
}
private function filterObject() {
$objScope = Array('object','embed','applet');
for ($i=0; $i<3; $i++) {
$elements = $this->dom->getElementsByTagName($objScope[$i]);
for ($j = $elements->length; --$j >= 0; ) {
$element = $elements->item($j);
if ($element->hasAttribute("src")) {
$src = trim($element->getAttribute("src"));
if (! $this->checkSource($src,$this->pv_scripysrc)) {
$element->parentNode->removeChild($element);
}
} else {
$element->parentNode->removeChild($element);
}
}
}
}
private function filterFrame() {
$frmScope = Array('frame','iframe');
for ($i=0; $i<2; $i++) {
$elements = $this->dom->getElementsByTagName($frmScope[$i]);
for ($j = $elements->length; --$j >= 0; ) {
$element = $elements->item($j);
if ($element->hasAttribute("src")) {
$src = trim($element->getAttribute("src"));
if (! $this->checkSource($src,$this->pv_scripysrc)) {
$element->parentNode->removeChild($element);
}
} else {
$element->parentNode->removeChild($element);
}
}
}
}
private function deleteNotInHead() {
// nukes elements that are suppose to be in head but are not
for ($i=0;$iheadOnly);$i++) {
$elements = $this->dom->getElementsByTagName($this->headOnly[$i]);
for ($j = $elements->length; --$j >= 0; ) {
$element = $elements->item($j);
$parent = $element->parentNode->tagName;
if ($parent != "head") {
$element->parentNode->removeChild($element);
}
}
}
}
private function deleteIllegitimateChildren() {
// nukes children of elements that ain't suppose to have children
for ($i=0;$inoChildren);$i++) {
$elements = $this->dom->getElementsByTagName($this->noChildren[$i]);
foreach ($elements as $element) {
$children = $element->childNodes;
//foreach ($children as $child) {
for ($j = $children->length; --$j >= 0; ) {
$child = $children->item($j);
$element->removeChild($child);
}
}
}
}
} // end of class
?>