51ff3226 |
1 | <?php |
2 | |
54bc3775 |
3 | class htmlparser { |
51ff3226 |
4 | |
54bc3775 |
5 | public static function htmlparse($data) |
51ff3226 |
6 | { |
7 | /* |
8 | if function finds anything unsafe,it will return |
9 | FALSE and saves a reason info global variable $htmlparse |
10 | */ |
11 | |
12 | global $htmlparse; |
13 | |
14 | $data = StrToLower(" ".$data); |
15 | |
16 | // tags, I don\14 need to close |
17 | $unpaired = Array('br'=>1, |
18 | 'br/'=>1, // fix later ;) |
19 | 'li'=>1, |
20 | 'hr'=>1, |
21 | '/tr'=>1, |
22 | 'img'=>1, |
23 | 'p'=>1 |
24 | ); |
25 | |
26 | // allowed tags |
27 | $allowed = Array('b'=>1, |
28 | 'i'=>1, |
29 | 'u'=>1, |
30 | 'a'=>1, |
31 | 'img'=>1, |
32 | 'sup'=>1, |
33 | 'sub'=>1, |
34 | 'table'=>1, |
35 | 'tr'=>1, |
36 | 'td'=>1, |
37 | 'font'=>1, |
38 | 'ul'=>1, |
39 | 'ol'=>1, |
40 | 'li'=>1, |
41 | 'tt'=>1, |
42 | 'address'=>1, |
43 | 'code'=>1, |
44 | 'small'=>1, |
45 | 'big'=>1, |
46 | 'caption'=>1, |
47 | 'thead'=>1, |
48 | 'tfoot'=>1, |
49 | 'col'=>1, |
50 | 'colgroup'=>1, |
51 | 'th'=>1, |
52 | 'br'=>1, |
53 | 'br/'=>1, // fix later |
54 | 'hr'=>1, |
55 | 'em'=>1, |
56 | 'th'=>1, |
57 | 'center'=>1, |
58 | 'pre'=>1, |
59 | 'xmp'=>1, |
60 | 's'=>1, |
61 | 'strong'=>1, |
62 | 'legend'=>1, |
63 | 'h1'=>1, |
64 | 'h2'=>1, |
65 | 'h3'=>1, |
66 | 'h4'=>1, |
67 | 'h5'=>1, |
68 | 'h6'=>1, |
69 | 'p'=>1, |
70 | 'blockquote'=>1, |
71 | 'div'=>1, |
72 | 'span'=>1, |
73 | 'fieldset'=>1 |
74 | ); |
75 | |
76 | /* |
77 | this part will go trought string and will ensure, if all tags are closed |
78 | */ |
79 | |
80 | $tok = StrTok($data, '<'); |
81 | $tok = StrTok('<'); |
82 | while(!($tok === FALSE)){ |
83 | if(!StrStr($tok,'>')): |
84 | $htmlparse = 'Chyba HTML syntaxe!'; |
85 | //$htmlparse = 'Wrong HTML syntax!'; |
86 | return 0; |
87 | elseif(StrStr($tok,"<")): |
88 | $htmlparse = 'Chyba HTML syntaxe!'; |
89 | //$htmlparse = 'Wrong HTML syntax!'; |
90 | return 0; |
91 | endif; |
92 | $tok = StrTok('<'); |
93 | } |
94 | |
95 | /* |
96 | main part of the function - it will check allowed tags, some parameters and so on... |
97 | */ |
98 | |
99 | $tok = StrTok($data, '<'); |
100 | $i = 0; |
101 | $j = 0; |
102 | while(!($tok === FALSE)): |
103 | if($i == 1): |
104 | $tag = Split('>',$tok,2); |
105 | $attrib = Split("[[:space:]>]",$tag[0],2); |
106 | if($allowed[$attrib[0]] != 1 && $allowed[SubStr($attrib[0],1)] != 1): // if tag isn\14 in allowed array |
107 | $htmlparse = 'Zakazany tag <'.$attrib[0].'>!'; |
108 | //$htmlparse = 'Forbidden tag <'.$attrib[0].'>!'; |
109 | return 0; |
110 | endif; |
111 | if('/'.$tags[$j] == $attrib[0]): // closing tag for last opening tag |
112 | if($tags[$j] == 'table' && $opened_tables > 0): |
113 | $opened_tables--; |
114 | endif; |
115 | $j--; |
116 | elseif($tags[$j] == 'xmp'): // XMP tag...ignore eny other tags between them |
117 | else: |
118 | if(SubStr($attrib[0],0,1) == '/' && $unpaired[$tags[$j]]): // do I need to close the tag? |
119 | $j--; |
120 | continue; |
121 | elseif(SubStr($attrib[0],0,1) == '/'): // am I closing something, I didn\14 open? |
122 | $htmlparse = 'Chyba u tagu <'.$tag[0].'>! Zavirate tag, ktery jste neotevrel!'; |
123 | //$htmlparse = 'Error near tag <'.$tag[0].'>! Closing tag, that wasn\14 opened!'; |
124 | return 0; |
125 | elseif(Ereg(' on',' '.$attrib[1])): // temporary solution for pernament problem...and it isn\14 suicide |
126 | $htmlparse = 'JavaScript je na hovno!'; |
127 | //$htmlparse = 'JavaScript sux!'; |
128 | return 0; |
129 | elseif(Ereg('/on',' '.$attrib[1])): // temporary solution for pernament problem...and it isn\14 suicide |
130 | $htmlparse = 'z bezpecnostnych dovodov nieje povolene vkladat do tagov retazec "/on"'; |
131 | return 0; |
132 | |
133 | |
134 | /* |
135 | elseif(Ereg(' style',' '.$attrib[1])): // styles are forbidden - don\14 look at me THAT way ;) |
136 | $htmlparse = 'Ten "style" se mi tam nezda!'; |
137 | //$htmlparse = '"styles" are forbidden!'; |
138 | return 0; |
139 | */ |
140 | elseif(Ereg('://',' '.$attrib[1]) && $attrib[0] != "img" && $attrib[0] != "a"): // adresses in attributes (except A and IMG tags) are forbidden |
141 | $htmlparse = 'Neco se mi tam nelibi! To je hlaska HTML validace - nejedna se o nejakou cenzuru ;)'; |
142 | //$htmlparse = 'Forbidden usage of adresses in tags!'; |
143 | return 0; |
144 | elseif((SubStr_Count($attrib[1],'"')%2) > 0): // are quotes closed? can do mess if they aren\14 |
145 | $htmlparse = 'Neuzavrel jste uvozovky uvnitr tagu <'.$attrib[0].'>!'; |
146 | //$htmlparse = 'Close quotes in tag <'.$tag[0].'>!'; |
147 | return 0; |
148 | elseif(Ereg('\?',$attrib[1]) && $attrib[0] == 'img'): // don\14 allow parameters in IMG tags |
149 | $htmlparse = 'Chyba u tagu <img> - nejsou povoleny parametry v adrese!'; |
150 | //$htmlparse = 'Error in tag <img> - parameters in image adresses are forbidden!'; |
151 | return 0; |
152 | elseif(($attrib[0] == 'td' || $attrib[0] == 'tr') && $opened_tables == 0): |
153 | $htmlparse = 'Strkej si ty tagy do vlastni tabulky, jo?'; |
154 | return 0; |
155 | elseif($attrib[0] == 'table'): |
156 | $opened_tables++; |
157 | endif; |
158 | $j++; |
159 | $tags[$j] = $attrib[0]; |
160 | endif; |
161 | endif; |
162 | $tok = StrTok('<'); |
163 | $i = 1; |
164 | endwhile; |
165 | |
166 | /* |
167 | just check, if all tags are properly closed |
168 | */ |
169 | |
170 | while($j > 0): |
171 | if($unpaired[$tags[$j]]): |
172 | $j--; |
173 | continue; |
174 | else: |
175 | $htmlparse = 'Neuzavrel jste tag <'.$tags[$j].'>!'; |
176 | //$htmlparse = 'Tag <'.$tags[$j].'> wasn\14 closed correctly!'; |
177 | return 0; |
178 | endif; |
179 | endwhile; |
180 | return 1; |
181 | } |
182 | |
32a54266 |
183 | } |