Commit | Line | Data |
---|---|---|
b42b2bf9 H |
1 | <?php |
2 | ||
3 | class htmlparse { | |
4 | ||
5 | function htmlparse($data) | |
6 | { | |
7 | /* | |
8 | if function finds anything unsafe,it will return | |
9 | FALSE and saves a reason info global variable $htmlparse | |
10 | */ | |
11 | ||
12 | global $htmlparse; | |
13 | ||
14 | $data = StrToLower(" ".$data); | |
15 | ||
16 | // tags, I don\14 need to close | |
17 | $unpaired = Array('br'=>1, | |
18 | 'br/'=>1, // fix later ;) | |
19 | 'li'=>1, | |
20 | 'hr'=>1, | |
21 | 'img'=>1, | |
22 | 'p'=>1 | |
23 | ); | |
24 | ||
25 | // allowed tags | |
26 | $allowed = Array('b'=>1, | |
27 | 'i'=>1, | |
28 | 'u'=>1, | |
29 | 'a'=>1, | |
30 | 'img'=>1, | |
31 | 'sup'=>1, | |
32 | 'sub'=>1, | |
33 | 'table'=>1, | |
34 | 'tr'=>1, | |
35 | 'td'=>1, | |
36 | 'font'=>1, | |
37 | 'ul'=>1, | |
38 | 'ol'=>1, | |
39 | 'li'=>1, | |
40 | 'tt'=>1, | |
41 | 'address'=>1, | |
42 | 'code'=>1, | |
43 | 'small'=>1, | |
44 | 'big'=>1, | |
45 | 'caption'=>1, | |
46 | 'thead'=>1, | |
47 | 'tfoot'=>1, | |
48 | 'col'=>1, | |
49 | 'colgroup'=>1, | |
50 | 'th'=>1, | |
51 | 'br'=>1, | |
52 | 'br/'=>1, // fix later | |
53 | 'hr'=>1, | |
54 | 'em'=>1, | |
55 | 'th'=>1, | |
56 | 'center'=>1, | |
57 | 'pre'=>1, | |
58 | 'xmp'=>1, | |
59 | 's'=>1, | |
60 | 'strong'=>1, | |
61 | 'legend'=>1, | |
62 | 'h1'=>1, | |
63 | 'h2'=>1, | |
64 | 'h3'=>1, | |
65 | 'h4'=>1, | |
66 | 'h5'=>1, | |
67 | 'h6'=>1, | |
68 | 'p'=>1, | |
69 | 'blockquote'=>1, | |
70 | 'div'=>1, | |
71 | 'span'=>1, | |
72 | 'fieldset'=>1 | |
73 | ); | |
74 | ||
75 | /* | |
76 | this part will go trought string and will ensure, if all tags are closed | |
77 | */ | |
78 | ||
79 | $tok = StrTok($data, '<'); | |
80 | $tok = StrTok('<'); | |
81 | while(!($tok === FALSE)){ | |
82 | if(!StrStr($tok,'>')): | |
83 | $htmlparse = 'Chyba HTML syntaxe!'; | |
84 | //$htmlparse = 'Wrong HTML syntax!'; | |
85 | return 0; | |
86 | elseif(StrStr($tok,"<")): | |
87 | $htmlparse = 'Chyba HTML syntaxe!'; | |
88 | //$htmlparse = 'Wrong HTML syntax!'; | |
89 | return 0; | |
90 | endif; | |
91 | $tok = StrTok('<'); | |
92 | } | |
93 | ||
94 | /* | |
95 | main part of the function - it will check allowed tags, some parameters and so on... | |
96 | */ | |
97 | ||
98 | $tok = StrTok($data, '<'); | |
99 | $i = 0; | |
100 | $j = 0; | |
101 | while(!($tok === FALSE)): | |
102 | if($i == 1): | |
103 | $tag = Split('>',$tok,2); | |
104 | $attrib = Split("[[:space:]>]",$tag[0],2); | |
105 | if($allowed[$attrib[0]] != 1 && $allowed[SubStr($attrib[0],1)] != 1): // if tag isn\14 in allowed array | |
106 | $htmlparse = 'Zakazany tag <'.$attrib[0].'>!'; | |
107 | //$htmlparse = 'Forbidden tag <'.$attrib[0].'>!'; | |
108 | return 0; | |
109 | endif; | |
110 | if('/'.$tags[$j] == $attrib[0]): // closing tag for last opening tag | |
111 | if($tags[$j] == 'table' && $opened_tables > 0): | |
112 | $opened_tables--; | |
113 | endif; | |
114 | $j--; | |
115 | elseif($tags[$j] == 'xmp'): // XMP tag...ignore eny other tags between them | |
116 | else: | |
117 | if(SubStr($attrib[0],0,1) == '/' && $unpaired[$tags[$j]]): // do I need to close the tag? | |
118 | $j--; | |
119 | continue; | |
120 | elseif(SubStr($attrib[0],0,1) == '/'): // am I closing something, I didn\14 open? | |
121 | $htmlparse = 'Chyba u tagu <'.$tag[0].'>! Zavirate tag, ktery jste neotevrel!'; | |
122 | //$htmlparse = 'Error near tag <'.$tag[0].'>! Closing tag, that wasn\14 opened!'; | |
123 | return 0; | |
124 | elseif(Ereg(' on',' '.$attrib[1])): // temporary solution for pernament problem...and it isn\14 suicide | |
125 | $htmlparse = 'JavaScript je na hovno!'; | |
126 | //$htmlparse = 'JavaScript sux!'; | |
127 | return 0; | |
128 | elseif(Ereg(' style',' '.$attrib[1])): // styles are forbidden - don\14 look at me THAT way ;) | |
129 | $htmlparse = 'Ten "style" se mi tam nezda!'; | |
130 | //$htmlparse = '"styles" are forbidden!'; | |
131 | return 0; | |
132 | elseif(Ereg('://',' '.$attrib[1]) && $attrib[0] != "img" && $attrib[0] != "a"): // adresses in attributes (except A and IMG tags) are forbidden | |
133 | $htmlparse = 'Neco se mi tam nelibi! To je hlaska HTML validace - nejedna se o nejakou cenzuru ;)'; | |
134 | //$htmlparse = 'Forbidden usage of adresses in tags!'; | |
135 | return 0; | |
136 | elseif((SubStr_Count($attrib[1],'"')%2) > 0): // are quotes closed? can do mess if they aren\14 | |
137 | $htmlparse = 'Neuzavrel jste uvozovky uvnitr tagu <'.$attrib[0].'>!'; | |
138 | //$htmlparse = 'Close quotes in tag <'.$tag[0].'>!'; | |
139 | return 0; | |
140 | elseif(Ereg('\?',$attrib[1]) && $attrib[0] == 'img'): // don\14 allow parameters in IMG tags | |
141 | $htmlparse = 'Chyba u tagu <img> - nejsou povoleny parametry v adrese!'; | |
142 | //$htmlparse = 'Error in tag <img> - parameters in image adresses are forbidden!'; | |
143 | return 0; | |
144 | elseif(($attrib[0] == 'td' || $attrib[0] == 'tr') && $opened_tables == 0): | |
145 | $htmlparse = 'Strkej si ty tagy do vlastni tabulky, jo?'; | |
146 | return 0; | |
147 | elseif($attrib[0] == 'table'): | |
148 | $opened_tables++; | |
149 | endif; | |
150 | $j++; | |
151 | $tags[$j] = $attrib[0]; | |
152 | endif; | |
153 | endif; | |
154 | $tok = StrTok('<'); | |
155 | $i = 1; | |
156 | endwhile; | |
157 | ||
158 | /* | |
159 | just check, if all tags are properly closed | |
160 | */ | |
161 | ||
162 | while($j > 0): | |
163 | if($unpaired[$tags[$j]]): | |
164 | $j--; | |
165 | continue; | |
166 | else: | |
167 | $htmlparse = 'Neuzavrel jste tag <'.$tags[$j].'>!'; | |
168 | //$htmlparse = 'Tag <'.$tags[$j].'> wasn\14 closed correctly!'; | |
169 | return 0; | |
170 | endif; | |
171 | endwhile; | |
172 | return 1; | |
173 | } | |
174 | ||
175 | } |