Kyberia v2.0
[mirrors/Kyberia-bloodline.git] / inc / htmlparse.inc
1 <?php
2 /* This program is free software. It comes without any warranty, to
3 * the extent permitted by applicable law. You can redistribute it
4 * and/or modify it under the terms of the Do What The Fuck You Want
5 * To Public License, Version 2, as published by Sam Hocevar. See
6 * http://sam.zoy.org/wtfpl/COPYING for more details. */
7
8
9 class htmlparse {
10
11 function htmlparse($data)
12 {
13 /*
14 if function finds anything unsafe,it will return
15 FALSE and saves a reason info global variable $htmlparse
16 */
17
18 global $htmlparse;
19
20 $data = StrToLower(" ".$data);
21
22 // tags, I don\14 need to close
23 $unpaired = Array('br'=>1,
24 'br/'=>1, // fix later ;)
25 'li'=>1,
26 'hr'=>1,
27 '/tr'=>1,
28 'img'=>1,
29 'p'=>1
30 );
31
32 // allowed tags
33 $allowed = Array('b'=>1,
34 'i'=>1,
35 'u'=>1,
36 'a'=>1,
37 'img'=>1,
38 'sup'=>1,
39 'sub'=>1,
40 'table'=>1,
41 'tr'=>1,
42 'td'=>1,
43 'font'=>1,
44 'ul'=>1,
45 'ol'=>1,
46 'li'=>1,
47 'tt'=>1,
48 'address'=>1,
49 'code'=>1,
50 'small'=>1,
51 'big'=>1,
52 'caption'=>1,
53 'thead'=>1,
54 'tfoot'=>1,
55 'col'=>1,
56 'colgroup'=>1,
57 'th'=>1,
58 'br'=>1,
59 'br/'=>1, // fix later
60 'hr'=>1,
61 'em'=>1,
62 'th'=>1,
63 'center'=>1,
64 'pre'=>1,
65 'xmp'=>1,
66 's'=>1,
67 'strong'=>1,
68 'legend'=>1,
69 'h1'=>1,
70 'h2'=>1,
71 'h3'=>1,
72 'h4'=>1,
73 'h5'=>1,
74 'h6'=>1,
75 'p'=>1,
76 'blockquote'=>1,
77 'div'=>1,
78 'span'=>1,
79 'fieldset'=>1
80 );
81
82 /*
83 this part will go trought string and will ensure, if all tags are closed
84 */
85
86 $tok = StrTok($data, '<');
87 $tok = StrTok('<');
88 while(!($tok === FALSE)){
89 if(!StrStr($tok,'>')):
90 $htmlparse = 'Chyba HTML syntaxe!';
91 //$htmlparse = 'Wrong HTML syntax!';
92 return 0;
93 elseif(StrStr($tok,"<")):
94 $htmlparse = 'Chyba HTML syntaxe!';
95 //$htmlparse = 'Wrong HTML syntax!';
96 return 0;
97 endif;
98 $tok = StrTok('<');
99 }
100
101 /*
102 main part of the function - it will check allowed tags, some parameters and so on...
103 */
104
105 $tok = StrTok($data, '<');
106 $i = 0;
107 $j = 0;
108 while(!($tok === FALSE)):
109 if($i == 1):
110 $tag = Split('>',$tok,2);
111 $attrib = Split("[[:space:]>]",$tag[0],2);
112 if($allowed[$attrib[0]] != 1 && $allowed[SubStr($attrib[0],1)] != 1): // if tag isn\14 in allowed array
113 $htmlparse = 'Zakazany tag &lt;'.$attrib[0].'&gt;!';
114 //$htmlparse = 'Forbidden tag &lt;'.$attrib[0].'&gt;!';
115 return 0;
116 endif;
117 if('/'.$tags[$j] == $attrib[0]): // closing tag for last opening tag
118 if($tags[$j] == 'table' && $opened_tables > 0):
119 $opened_tables--;
120 endif;
121 $j--;
122 elseif($tags[$j] == 'xmp'): // XMP tag...ignore eny other tags between them
123 else:
124 if(SubStr($attrib[0],0,1) == '/' && $unpaired[$tags[$j]]): // do I need to close the tag?
125 $j--;
126 continue;
127 elseif(SubStr($attrib[0],0,1) == '/'): // am I closing something, I didn\14 open?
128 $htmlparse = 'Chyba u tagu &lt;'.$tag[0].'&gt;! Zavirate tag, ktery jste neotevrel!';
129 //$htmlparse = 'Error near tag &lt;'.$tag[0].'&gt;! Closing tag, that wasn\14 opened!';
130 return 0;
131 elseif(Ereg(' on',' '.$attrib[1])): // temporary solution for pernament problem...and it isn\14 suicide
132 $htmlparse = 'JavaScript je na hovno!';
133 //$htmlparse = 'JavaScript sux!';
134 return 0;
135 elseif(Ereg('/on',' '.$attrib[1])): // temporary solution for pernament problem...and it isn\14 suicide
136 $htmlparse = 'z bezpecnostnych dovodov nieje povolene vkladat do tagov retazec "/on"';
137 return 0;
138
139
140 /*
141 elseif(Ereg(' style',' '.$attrib[1])): // styles are forbidden - don\14 look at me THAT way ;)
142 $htmlparse = 'Ten "style" se mi tam nezda!';
143 //$htmlparse = '"styles" are forbidden!';
144 return 0;
145 */
146 elseif(Ereg('://',' '.$attrib[1]) && $attrib[0] != "img" && $attrib[0] != "a"): // adresses in attributes (except A and IMG tags) are forbidden
147 $htmlparse = 'Neco se mi tam nelibi! To je hlaska HTML validace - nejedna se o nejakou cenzuru ;)';
148 //$htmlparse = 'Forbidden usage of adresses in tags!';
149 return 0;
150 elseif((SubStr_Count($attrib[1],'"')%2) > 0): // are quotes closed? can do mess if they aren\14
151 $htmlparse = 'Neuzavrel jste uvozovky uvnitr tagu &lt;'.$attrib[0].'&gt;!';
152 //$htmlparse = 'Close quotes in tag &lt;'.$tag[0].'&gt;!';
153 return 0;
154 elseif(Ereg('\?',$attrib[1]) && $attrib[0] == 'img'): // don\14 allow parameters in IMG tags
155 $htmlparse = 'Chyba u tagu &lt;img&gt; - nejsou povoleny parametry v adrese!';
156 //$htmlparse = 'Error in tag &lt;img&gt; - parameters in image adresses are forbidden!';
157 return 0;
158 elseif(($attrib[0] == 'td' || $attrib[0] == 'tr') && $opened_tables == 0):
159 $htmlparse = 'Strkej si ty tagy do vlastni tabulky, jo?';
160 return 0;
161 elseif($attrib[0] == 'table'):
162 $opened_tables++;
163 endif;
164 $j++;
165 $tags[$j] = $attrib[0];
166 endif;
167 endif;
168 $tok = StrTok('<');
169 $i = 1;
170 endwhile;
171
172 /*
173 just check, if all tags are properly closed
174 */
175
176 while($j > 0):
177 if($unpaired[$tags[$j]]):
178 $j--;
179 continue;
180 else:
181 $htmlparse = 'Neuzavrel jste tag &lt;'.$tags[$j].'&gt;!';
182 //$htmlparse = 'Tag &lt;'.$tags[$j].'&gt; wasn\14 closed correctly!';
183 return 0;
184 endif;
185 endwhile;
186 return 1;
187 }
188
189 }
This page took 0.305487 seconds and 4 git commands to generate.