| 1 | <?php |
| 2 | |
| 3 | class htmlparser { |
| 4 | |
| 5 | public static function htmlparse($data) |
| 6 | { |
| 7 | /* |
| 8 | if function finds anything unsafe,it will return |
| 9 | FALSE and saves a reason info global variable $htmlparse |
| 10 | */ |
| 11 | |
| 12 | global $htmlparse; |
| 13 | |
| 14 | $data = StrToLower(" ".$data); |
| 15 | |
| 16 | // tags, I don\14 need to close |
| 17 | $unpaired = Array('br'=>1, |
| 18 | 'br/'=>1, // fix later ;) |
| 19 | 'li'=>1, |
| 20 | 'hr'=>1, |
| 21 | '/tr'=>1, |
| 22 | 'img'=>1, |
| 23 | 'p'=>1 |
| 24 | ); |
| 25 | |
| 26 | // allowed tags |
| 27 | $allowed = Array('b'=>1, |
| 28 | 'i'=>1, |
| 29 | 'u'=>1, |
| 30 | 'a'=>1, |
| 31 | 'img'=>1, |
| 32 | 'sup'=>1, |
| 33 | 'sub'=>1, |
| 34 | 'table'=>1, |
| 35 | 'tr'=>1, |
| 36 | 'td'=>1, |
| 37 | 'font'=>1, |
| 38 | 'ul'=>1, |
| 39 | 'ol'=>1, |
| 40 | 'li'=>1, |
| 41 | 'tt'=>1, |
| 42 | 'address'=>1, |
| 43 | 'code'=>1, |
| 44 | 'small'=>1, |
| 45 | 'big'=>1, |
| 46 | 'caption'=>1, |
| 47 | 'thead'=>1, |
| 48 | 'tfoot'=>1, |
| 49 | 'col'=>1, |
| 50 | 'colgroup'=>1, |
| 51 | 'th'=>1, |
| 52 | 'br'=>1, |
| 53 | 'br/'=>1, // fix later |
| 54 | 'hr'=>1, |
| 55 | 'em'=>1, |
| 56 | 'th'=>1, |
| 57 | 'center'=>1, |
| 58 | 'pre'=>1, |
| 59 | 'xmp'=>1, |
| 60 | 's'=>1, |
| 61 | 'strong'=>1, |
| 62 | 'legend'=>1, |
| 63 | 'h1'=>1, |
| 64 | 'h2'=>1, |
| 65 | 'h3'=>1, |
| 66 | 'h4'=>1, |
| 67 | 'h5'=>1, |
| 68 | 'h6'=>1, |
| 69 | 'p'=>1, |
| 70 | 'blockquote'=>1, |
| 71 | 'div'=>1, |
| 72 | 'span'=>1, |
| 73 | 'fieldset'=>1 |
| 74 | ); |
| 75 | |
| 76 | /* |
| 77 | this part will go trought string and will ensure, if all tags are closed |
| 78 | */ |
| 79 | |
| 80 | $tok = StrTok($data, '<'); |
| 81 | $tok = StrTok('<'); |
| 82 | while(!($tok === FALSE)){ |
| 83 | if(!StrStr($tok,'>')): |
| 84 | $htmlparse = 'Chyba HTML syntaxe!'; |
| 85 | //$htmlparse = 'Wrong HTML syntax!'; |
| 86 | return 0; |
| 87 | elseif(StrStr($tok,"<")): |
| 88 | $htmlparse = 'Chyba HTML syntaxe!'; |
| 89 | //$htmlparse = 'Wrong HTML syntax!'; |
| 90 | return 0; |
| 91 | endif; |
| 92 | $tok = StrTok('<'); |
| 93 | } |
| 94 | |
| 95 | /* |
| 96 | main part of the function - it will check allowed tags, some parameters and so on... |
| 97 | */ |
| 98 | |
| 99 | $tok = StrTok($data, '<'); |
| 100 | $i = 0; |
| 101 | $j = 0; |
| 102 | while(!($tok === FALSE)): |
| 103 | if($i == 1): |
| 104 | $tag = Split('>',$tok,2); |
| 105 | $attrib = Split("[[:space:]>]",$tag[0],2); |
| 106 | if($allowed[$attrib[0]] != 1 && $allowed[SubStr($attrib[0],1)] != 1): // if tag isn\14 in allowed array |
| 107 | $htmlparse = 'Zakazany tag <'.$attrib[0].'>!'; |
| 108 | //$htmlparse = 'Forbidden tag <'.$attrib[0].'>!'; |
| 109 | return 0; |
| 110 | endif; |
| 111 | if('/'.$tags[$j] == $attrib[0]): // closing tag for last opening tag |
| 112 | if($tags[$j] == 'table' && $opened_tables > 0): |
| 113 | $opened_tables--; |
| 114 | endif; |
| 115 | $j--; |
| 116 | elseif($tags[$j] == 'xmp'): // XMP tag...ignore eny other tags between them |
| 117 | else: |
| 118 | if(SubStr($attrib[0],0,1) == '/' && $unpaired[$tags[$j]]): // do I need to close the tag? |
| 119 | $j--; |
| 120 | continue; |
| 121 | elseif(SubStr($attrib[0],0,1) == '/'): // am I closing something, I didn\14 open? |
| 122 | $htmlparse = 'Chyba u tagu <'.$tag[0].'>! Zavirate tag, ktery jste neotevrel!'; |
| 123 | //$htmlparse = 'Error near tag <'.$tag[0].'>! Closing tag, that wasn\14 opened!'; |
| 124 | return 0; |
| 125 | elseif(Ereg(' on',' '.$attrib[1])): // temporary solution for pernament problem...and it isn\14 suicide |
| 126 | $htmlparse = 'JavaScript je na hovno!'; |
| 127 | //$htmlparse = 'JavaScript sux!'; |
| 128 | return 0; |
| 129 | elseif(Ereg('/on',' '.$attrib[1])): // temporary solution for pernament problem...and it isn\14 suicide |
| 130 | $htmlparse = 'z bezpecnostnych dovodov nieje povolene vkladat do tagov retazec "/on"'; |
| 131 | return 0; |
| 132 | |
| 133 | |
| 134 | /* |
| 135 | elseif(Ereg(' style',' '.$attrib[1])): // styles are forbidden - don\14 look at me THAT way ;) |
| 136 | $htmlparse = 'Ten "style" se mi tam nezda!'; |
| 137 | //$htmlparse = '"styles" are forbidden!'; |
| 138 | return 0; |
| 139 | */ |
| 140 | elseif(Ereg('://',' '.$attrib[1]) && $attrib[0] != "img" && $attrib[0] != "a"): // adresses in attributes (except A and IMG tags) are forbidden |
| 141 | $htmlparse = 'Neco se mi tam nelibi! To je hlaska HTML validace - nejedna se o nejakou cenzuru ;)'; |
| 142 | //$htmlparse = 'Forbidden usage of adresses in tags!'; |
| 143 | return 0; |
| 144 | elseif((SubStr_Count($attrib[1],'"')%2) > 0): // are quotes closed? can do mess if they aren\14 |
| 145 | $htmlparse = 'Neuzavrel jste uvozovky uvnitr tagu <'.$attrib[0].'>!'; |
| 146 | //$htmlparse = 'Close quotes in tag <'.$tag[0].'>!'; |
| 147 | return 0; |
| 148 | elseif(Ereg('\?',$attrib[1]) && $attrib[0] == 'img'): // don\14 allow parameters in IMG tags |
| 149 | $htmlparse = 'Chyba u tagu <img> - nejsou povoleny parametry v adrese!'; |
| 150 | //$htmlparse = 'Error in tag <img> - parameters in image adresses are forbidden!'; |
| 151 | return 0; |
| 152 | elseif(($attrib[0] == 'td' || $attrib[0] == 'tr') && $opened_tables == 0): |
| 153 | $htmlparse = 'Strkej si ty tagy do vlastni tabulky, jo?'; |
| 154 | return 0; |
| 155 | elseif($attrib[0] == 'table'): |
| 156 | $opened_tables++; |
| 157 | endif; |
| 158 | $j++; |
| 159 | $tags[$j] = $attrib[0]; |
| 160 | endif; |
| 161 | endif; |
| 162 | $tok = StrTok('<'); |
| 163 | $i = 1; |
| 164 | endwhile; |
| 165 | |
| 166 | /* |
| 167 | just check, if all tags are properly closed |
| 168 | */ |
| 169 | |
| 170 | while($j > 0): |
| 171 | if($unpaired[$tags[$j]]): |
| 172 | $j--; |
| 173 | continue; |
| 174 | else: |
| 175 | $htmlparse = 'Neuzavrel jste tag <'.$tags[$j].'>!'; |
| 176 | //$htmlparse = 'Tag <'.$tags[$j].'> wasn\14 closed correctly!'; |
| 177 | return 0; |
| 178 | endif; |
| 179 | endwhile; |
| 180 | return 1; |
| 181 | } |
| 182 | |
| 183 | } |