51ff3226 |
1 | <?php |
2 | |
3 | class htmlparse { |
4 | |
32a54266 |
5 | public static function htmlparse($data) |
51ff3226 |
6 | { |
9ae34b38 |
7 | echo 'fooooook\n'; |
51ff3226 |
8 | /* |
9 | if function finds anything unsafe,it will return |
10 | FALSE and saves a reason info global variable $htmlparse |
11 | */ |
12 | |
13 | global $htmlparse; |
14 | |
15 | $data = StrToLower(" ".$data); |
16 | |
17 | // tags, I don\14 need to close |
18 | $unpaired = Array('br'=>1, |
19 | 'br/'=>1, // fix later ;) |
20 | 'li'=>1, |
21 | 'hr'=>1, |
22 | '/tr'=>1, |
23 | 'img'=>1, |
24 | 'p'=>1 |
25 | ); |
26 | |
27 | // allowed tags |
28 | $allowed = Array('b'=>1, |
29 | 'i'=>1, |
30 | 'u'=>1, |
31 | 'a'=>1, |
32 | 'img'=>1, |
33 | 'sup'=>1, |
34 | 'sub'=>1, |
35 | 'table'=>1, |
36 | 'tr'=>1, |
37 | 'td'=>1, |
38 | 'font'=>1, |
39 | 'ul'=>1, |
40 | 'ol'=>1, |
41 | 'li'=>1, |
42 | 'tt'=>1, |
43 | 'address'=>1, |
44 | 'code'=>1, |
45 | 'small'=>1, |
46 | 'big'=>1, |
47 | 'caption'=>1, |
48 | 'thead'=>1, |
49 | 'tfoot'=>1, |
50 | 'col'=>1, |
51 | 'colgroup'=>1, |
52 | 'th'=>1, |
53 | 'br'=>1, |
54 | 'br/'=>1, // fix later |
55 | 'hr'=>1, |
56 | 'em'=>1, |
57 | 'th'=>1, |
58 | 'center'=>1, |
59 | 'pre'=>1, |
60 | 'xmp'=>1, |
61 | 's'=>1, |
62 | 'strong'=>1, |
63 | 'legend'=>1, |
64 | 'h1'=>1, |
65 | 'h2'=>1, |
66 | 'h3'=>1, |
67 | 'h4'=>1, |
68 | 'h5'=>1, |
69 | 'h6'=>1, |
70 | 'p'=>1, |
71 | 'blockquote'=>1, |
72 | 'div'=>1, |
73 | 'span'=>1, |
74 | 'fieldset'=>1 |
75 | ); |
76 | |
77 | /* |
78 | this part will go trought string and will ensure, if all tags are closed |
79 | */ |
80 | |
81 | $tok = StrTok($data, '<'); |
82 | $tok = StrTok('<'); |
83 | while(!($tok === FALSE)){ |
84 | if(!StrStr($tok,'>')): |
85 | $htmlparse = 'Chyba HTML syntaxe!'; |
86 | //$htmlparse = 'Wrong HTML syntax!'; |
87 | return 0; |
88 | elseif(StrStr($tok,"<")): |
89 | $htmlparse = 'Chyba HTML syntaxe!'; |
90 | //$htmlparse = 'Wrong HTML syntax!'; |
91 | return 0; |
92 | endif; |
93 | $tok = StrTok('<'); |
94 | } |
95 | |
96 | /* |
97 | main part of the function - it will check allowed tags, some parameters and so on... |
98 | */ |
99 | |
100 | $tok = StrTok($data, '<'); |
101 | $i = 0; |
102 | $j = 0; |
103 | while(!($tok === FALSE)): |
104 | if($i == 1): |
105 | $tag = Split('>',$tok,2); |
106 | $attrib = Split("[[:space:]>]",$tag[0],2); |
107 | if($allowed[$attrib[0]] != 1 && $allowed[SubStr($attrib[0],1)] != 1): // if tag isn\14 in allowed array |
108 | $htmlparse = 'Zakazany tag <'.$attrib[0].'>!'; |
109 | //$htmlparse = 'Forbidden tag <'.$attrib[0].'>!'; |
110 | return 0; |
111 | endif; |
112 | if('/'.$tags[$j] == $attrib[0]): // closing tag for last opening tag |
113 | if($tags[$j] == 'table' && $opened_tables > 0): |
114 | $opened_tables--; |
115 | endif; |
116 | $j--; |
117 | elseif($tags[$j] == 'xmp'): // XMP tag...ignore eny other tags between them |
118 | else: |
119 | if(SubStr($attrib[0],0,1) == '/' && $unpaired[$tags[$j]]): // do I need to close the tag? |
120 | $j--; |
121 | continue; |
122 | elseif(SubStr($attrib[0],0,1) == '/'): // am I closing something, I didn\14 open? |
123 | $htmlparse = 'Chyba u tagu <'.$tag[0].'>! Zavirate tag, ktery jste neotevrel!'; |
124 | //$htmlparse = 'Error near tag <'.$tag[0].'>! Closing tag, that wasn\14 opened!'; |
125 | return 0; |
126 | elseif(Ereg(' on',' '.$attrib[1])): // temporary solution for pernament problem...and it isn\14 suicide |
127 | $htmlparse = 'JavaScript je na hovno!'; |
128 | //$htmlparse = 'JavaScript sux!'; |
129 | return 0; |
130 | elseif(Ereg('/on',' '.$attrib[1])): // temporary solution for pernament problem...and it isn\14 suicide |
131 | $htmlparse = 'z bezpecnostnych dovodov nieje povolene vkladat do tagov retazec "/on"'; |
132 | return 0; |
133 | |
134 | |
135 | /* |
136 | elseif(Ereg(' style',' '.$attrib[1])): // styles are forbidden - don\14 look at me THAT way ;) |
137 | $htmlparse = 'Ten "style" se mi tam nezda!'; |
138 | //$htmlparse = '"styles" are forbidden!'; |
139 | return 0; |
140 | */ |
141 | elseif(Ereg('://',' '.$attrib[1]) && $attrib[0] != "img" && $attrib[0] != "a"): // adresses in attributes (except A and IMG tags) are forbidden |
142 | $htmlparse = 'Neco se mi tam nelibi! To je hlaska HTML validace - nejedna se o nejakou cenzuru ;)'; |
143 | //$htmlparse = 'Forbidden usage of adresses in tags!'; |
144 | return 0; |
145 | elseif((SubStr_Count($attrib[1],'"')%2) > 0): // are quotes closed? can do mess if they aren\14 |
146 | $htmlparse = 'Neuzavrel jste uvozovky uvnitr tagu <'.$attrib[0].'>!'; |
147 | //$htmlparse = 'Close quotes in tag <'.$tag[0].'>!'; |
148 | return 0; |
149 | elseif(Ereg('\?',$attrib[1]) && $attrib[0] == 'img'): // don\14 allow parameters in IMG tags |
150 | $htmlparse = 'Chyba u tagu <img> - nejsou povoleny parametry v adrese!'; |
151 | //$htmlparse = 'Error in tag <img> - parameters in image adresses are forbidden!'; |
152 | return 0; |
153 | elseif(($attrib[0] == 'td' || $attrib[0] == 'tr') && $opened_tables == 0): |
154 | $htmlparse = 'Strkej si ty tagy do vlastni tabulky, jo?'; |
155 | return 0; |
156 | elseif($attrib[0] == 'table'): |
157 | $opened_tables++; |
158 | endif; |
159 | $j++; |
160 | $tags[$j] = $attrib[0]; |
161 | endif; |
162 | endif; |
163 | $tok = StrTok('<'); |
164 | $i = 1; |
165 | endwhile; |
166 | |
167 | /* |
168 | just check, if all tags are properly closed |
169 | */ |
170 | |
171 | while($j > 0): |
172 | if($unpaired[$tags[$j]]): |
173 | $j--; |
174 | continue; |
175 | else: |
176 | $htmlparse = 'Neuzavrel jste tag <'.$tags[$j].'>!'; |
177 | //$htmlparse = 'Tag <'.$tags[$j].'> wasn\14 closed correctly!'; |
178 | return 0; |
179 | endif; |
180 | endwhile; |
181 | return 1; |
182 | } |
183 | |
32a54266 |
184 | } |