Commit | Line | Data |
---|---|---|
bc13d5d6 | 1 | <?php |
e586807d H |
2 | /* This program is free software. It comes without any warranty, to |
3 | * the extent permitted by applicable law. You can redistribute it | |
4 | * and/or modify it under the terms of the Do What The Fuck You Want | |
5 | * To Public License, Version 2, as published by Sam Hocevar. See | |
6 | * http://sam.zoy.org/wtfpl/COPYING for more details. */ | |
7 | ||
bc13d5d6 H |
8 | |
9 | class htmlparse { | |
10 | ||
11 | function htmlparse($data) | |
12 | { | |
13 | /* | |
14 | if function finds anything unsafe,it will return | |
15 | FALSE and saves a reason info global variable $htmlparse | |
16 | */ | |
17 | ||
18 | global $htmlparse; | |
19 | ||
20 | $data = StrToLower(" ".$data); | |
21 | ||
22 | // tags, I don\14 need to close | |
23 | $unpaired = Array('br'=>1, | |
24 | 'br/'=>1, // fix later ;) | |
25 | 'li'=>1, | |
26 | 'hr'=>1, | |
e586807d | 27 | '/tr'=>1, |
bc13d5d6 H |
28 | 'img'=>1, |
29 | 'p'=>1 | |
30 | ); | |
31 | ||
32 | // allowed tags | |
33 | $allowed = Array('b'=>1, | |
34 | 'i'=>1, | |
35 | 'u'=>1, | |
36 | 'a'=>1, | |
37 | 'img'=>1, | |
38 | 'sup'=>1, | |
39 | 'sub'=>1, | |
40 | 'table'=>1, | |
41 | 'tr'=>1, | |
42 | 'td'=>1, | |
43 | 'font'=>1, | |
44 | 'ul'=>1, | |
45 | 'ol'=>1, | |
46 | 'li'=>1, | |
47 | 'tt'=>1, | |
e586807d H |
48 | 'address'=>1, |
49 | 'code'=>1, | |
bc13d5d6 H |
50 | 'small'=>1, |
51 | 'big'=>1, | |
52 | 'caption'=>1, | |
53 | 'thead'=>1, | |
54 | 'tfoot'=>1, | |
55 | 'col'=>1, | |
56 | 'colgroup'=>1, | |
57 | 'th'=>1, | |
58 | 'br'=>1, | |
59 | 'br/'=>1, // fix later | |
60 | 'hr'=>1, | |
61 | 'em'=>1, | |
62 | 'th'=>1, | |
63 | 'center'=>1, | |
64 | 'pre'=>1, | |
65 | 'xmp'=>1, | |
66 | 's'=>1, | |
67 | 'strong'=>1, | |
68 | 'legend'=>1, | |
69 | 'h1'=>1, | |
70 | 'h2'=>1, | |
71 | 'h3'=>1, | |
72 | 'h4'=>1, | |
73 | 'h5'=>1, | |
74 | 'h6'=>1, | |
75 | 'p'=>1, | |
76 | 'blockquote'=>1, | |
77 | 'div'=>1, | |
78 | 'span'=>1, | |
79 | 'fieldset'=>1 | |
80 | ); | |
81 | ||
82 | /* | |
83 | this part will go trought string and will ensure, if all tags are closed | |
84 | */ | |
85 | ||
86 | $tok = StrTok($data, '<'); | |
87 | $tok = StrTok('<'); | |
88 | while(!($tok === FALSE)){ | |
89 | if(!StrStr($tok,'>')): | |
90 | $htmlparse = 'Chyba HTML syntaxe!'; | |
91 | //$htmlparse = 'Wrong HTML syntax!'; | |
92 | return 0; | |
93 | elseif(StrStr($tok,"<")): | |
94 | $htmlparse = 'Chyba HTML syntaxe!'; | |
95 | //$htmlparse = 'Wrong HTML syntax!'; | |
96 | return 0; | |
97 | endif; | |
98 | $tok = StrTok('<'); | |
99 | } | |
100 | ||
101 | /* | |
102 | main part of the function - it will check allowed tags, some parameters and so on... | |
103 | */ | |
104 | ||
105 | $tok = StrTok($data, '<'); | |
106 | $i = 0; | |
107 | $j = 0; | |
108 | while(!($tok === FALSE)): | |
109 | if($i == 1): | |
110 | $tag = Split('>',$tok,2); | |
111 | $attrib = Split("[[:space:]>]",$tag[0],2); | |
112 | if($allowed[$attrib[0]] != 1 && $allowed[SubStr($attrib[0],1)] != 1): // if tag isn\14 in allowed array | |
113 | $htmlparse = 'Zakazany tag <'.$attrib[0].'>!'; | |
114 | //$htmlparse = 'Forbidden tag <'.$attrib[0].'>!'; | |
115 | return 0; | |
116 | endif; | |
117 | if('/'.$tags[$j] == $attrib[0]): // closing tag for last opening tag | |
118 | if($tags[$j] == 'table' && $opened_tables > 0): | |
119 | $opened_tables--; | |
120 | endif; | |
121 | $j--; | |
122 | elseif($tags[$j] == 'xmp'): // XMP tag...ignore eny other tags between them | |
123 | else: | |
124 | if(SubStr($attrib[0],0,1) == '/' && $unpaired[$tags[$j]]): // do I need to close the tag? | |
125 | $j--; | |
126 | continue; | |
127 | elseif(SubStr($attrib[0],0,1) == '/'): // am I closing something, I didn\14 open? | |
128 | $htmlparse = 'Chyba u tagu <'.$tag[0].'>! Zavirate tag, ktery jste neotevrel!'; | |
129 | //$htmlparse = 'Error near tag <'.$tag[0].'>! Closing tag, that wasn\14 opened!'; | |
130 | return 0; | |
131 | elseif(Ereg(' on',' '.$attrib[1])): // temporary solution for pernament problem...and it isn\14 suicide | |
132 | $htmlparse = 'JavaScript je na hovno!'; | |
133 | //$htmlparse = 'JavaScript sux!'; | |
134 | return 0; | |
e586807d H |
135 | elseif(Ereg('/on',' '.$attrib[1])): // temporary solution for pernament problem...and it isn\14 suicide |
136 | $htmlparse = 'z bezpecnostnych dovodov nieje povolene vkladat do tagov retazec "/on"'; | |
137 | return 0; | |
138 | ||
139 | ||
140 | /* | |
bc13d5d6 H |
141 | elseif(Ereg(' style',' '.$attrib[1])): // styles are forbidden - don\14 look at me THAT way ;) |
142 | $htmlparse = 'Ten "style" se mi tam nezda!'; | |
143 | //$htmlparse = '"styles" are forbidden!'; | |
144 | return 0; | |
e586807d | 145 | */ |
bc13d5d6 H |
146 | elseif(Ereg('://',' '.$attrib[1]) && $attrib[0] != "img" && $attrib[0] != "a"): // adresses in attributes (except A and IMG tags) are forbidden |
147 | $htmlparse = 'Neco se mi tam nelibi! To je hlaska HTML validace - nejedna se o nejakou cenzuru ;)'; | |
148 | //$htmlparse = 'Forbidden usage of adresses in tags!'; | |
149 | return 0; | |
150 | elseif((SubStr_Count($attrib[1],'"')%2) > 0): // are quotes closed? can do mess if they aren\14 | |
151 | $htmlparse = 'Neuzavrel jste uvozovky uvnitr tagu <'.$attrib[0].'>!'; | |
152 | //$htmlparse = 'Close quotes in tag <'.$tag[0].'>!'; | |
153 | return 0; | |
154 | elseif(Ereg('\?',$attrib[1]) && $attrib[0] == 'img'): // don\14 allow parameters in IMG tags | |
155 | $htmlparse = 'Chyba u tagu <img> - nejsou povoleny parametry v adrese!'; | |
156 | //$htmlparse = 'Error in tag <img> - parameters in image adresses are forbidden!'; | |
157 | return 0; | |
158 | elseif(($attrib[0] == 'td' || $attrib[0] == 'tr') && $opened_tables == 0): | |
159 | $htmlparse = 'Strkej si ty tagy do vlastni tabulky, jo?'; | |
160 | return 0; | |
161 | elseif($attrib[0] == 'table'): | |
162 | $opened_tables++; | |
163 | endif; | |
164 | $j++; | |
165 | $tags[$j] = $attrib[0]; | |
166 | endif; | |
167 | endif; | |
168 | $tok = StrTok('<'); | |
169 | $i = 1; | |
170 | endwhile; | |
171 | ||
172 | /* | |
173 | just check, if all tags are properly closed | |
174 | */ | |
175 | ||
176 | while($j > 0): | |
177 | if($unpaired[$tags[$j]]): | |
178 | $j--; | |
179 | continue; | |
180 | else: | |
181 | $htmlparse = 'Neuzavrel jste tag <'.$tags[$j].'>!'; | |
182 | //$htmlparse = 'Tag <'.$tags[$j].'> wasn\14 closed correctly!'; | |
183 | return 0; | |
184 | endif; | |
185 | endwhile; | |
186 | return 1; | |
187 | } | |
188 | ||
189 | } |