00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "weTagScanner.h"
00022
00023 #ifndef __DOXYGEN__
00024
00025
00026 static inline bool equal(const char* s, const char* s1, size_t length)
00027 {
00028 switch(length)
00029 {
00030 case 8: if(s1[7] != s[7]) return false;
00031 case 7: if(s1[6] != s[6]) return false;
00032 case 6: if(s1[5] != s[5]) return false;
00033 case 5: if(s1[4] != s[4]) return false;
00034 case 4: if(s1[3] != s[3]) return false;
00035 case 3: if(s1[2] != s[2]) return false;
00036 case 2: if(s1[1] != s[1]) return false;
00037 case 1: if(s1[0] != s[0]) return false;
00038 case 0: return true;
00039 default: return strncmp(s,s1,length) == 0;
00040 }
00041 }
00042 #endif //__DOXYGEN__
00043
00050 const char* WeTagScanner::GetValue(void)
00051 {
00052 value.push_back(0);
00053 return &value[0];
00054 }
00055
00062 const char* WeTagScanner::GetAttrName(void)
00063 {
00064 attr_name.push_back(0);
00065 return &attr_name[0];
00066 }
00067
00074 const char* WeTagScanner::GetTagName(void)
00075 {
00076 tag_name.push_back(0);
00077 return &tag_name[0];
00078 }
00079
00086 WeScannerToken WeTagScanner::ScanBody(void)
00087 {
00088 char c = GetChar();
00089
00090 value.clear();
00091
00092 bool ws = false;
00093
00094 if(c == 0) return wstEof;
00095 else if(c == '<') return ScanTag();
00096 else if(c == '&')
00097 c = ScanEntity();
00098 else
00099 ws = IsWhitespace(c);
00100
00101 while(true)
00102 {
00103 AppendValue(c);
00104 c = input.GetChar();
00105 if(c == 0) { break; }
00106 if(c == '<') { PushBack(c); break; }
00107 if(c == '&') { PushBack(c); break; }
00108
00109 if(IsWhitespace(c) != ws)
00110 {
00111 PushBack(c);
00112 break;
00113 }
00114
00115 }
00116 return ws? wstSpace:wstWord;
00117 }
00118
00125 WeScannerToken WeTagScanner::ScanHead(void)
00126 {
00127 char c = SkipWhitespace();
00128
00129 if(c == '>') { c_scan = &WeTagScanner::ScanBody; return ScanBody(); }
00130 if(c == '/')
00131 {
00132 char t = GetChar();
00133 if(t == '>') { c_scan = &WeTagScanner::ScanBody; return wstTagEnd; }
00134 else { PushBack(t); return wstError; }
00135 }
00136
00137 attr_name.clear();
00138 value.clear();
00139
00140
00141 while(c != '=')
00142 {
00143 if( c == 0) return wstEof;
00144 if( c == '>' ) { PushBack(c); return wstAttr; }
00145 if( IsWhitespace(c) )
00146 {
00147 c = SkipWhitespace();
00148 if(c != '=') { PushBack(c); return wstAttr; }
00149 else break;
00150 }
00151 if( c == '<') return wstError;
00152 AppendAttrName(c);
00153 c = GetChar();
00154 }
00155
00156 c = SkipWhitespace();
00157
00158
00159 if(c == '\"')
00160 while(c = GetChar())
00161 {
00162 if(c == '\"') return wstAttr;
00163 if(c == '&') c = ScanEntity();
00164 AppendValue(c);
00165 }
00166 else if(c == '\'')
00167 while(c = GetChar())
00168 {
00169 if(c == '\'') return wstAttr;
00170 if(c == '&') c = ScanEntity();
00171 AppendValue(c);
00172 }
00173 else
00174 do
00175 {
00176 if( IsWhitespace(c) ) return wstAttr;
00177
00178
00179
00180 if( c == '>' ) { PushBack(c); return wstAttr; }
00181 AppendValue(c);
00182 } while(c = GetChar());
00183
00184 return wstError;
00185 }
00186
00193 WeScannerToken WeTagScanner::ScanComment(void)
00194 {
00195 if(got_tail)
00196 {
00197 c_scan = &WeTagScanner::ScanBody;
00198 got_tail = false;
00199 return wstCommentEnd;
00200 }
00201 value.clear();
00202 for(int value_length = 0; ; value_length++)
00203 {
00204 char c = GetChar();
00205 if( c == 0) return wstEof;
00206 value.push_back(c);
00207
00208 if(value_length >= 2
00209 && value[value_length] == '>'
00210 && value[value_length - 1] == '-'
00211 && value[value_length - 2] == '-')
00212 {
00213 got_tail = true;
00214 value.pop_back();
00215 value.pop_back();
00216 value.pop_back();
00217 break;
00218 }
00219 }
00220 return wstData;
00221 }
00222
00229 WeScannerToken WeTagScanner::ScanCdata()
00230 {
00231 if(got_tail)
00232 {
00233 c_scan = &WeTagScanner::ScanBody;
00234 got_tail = false;
00235 return wstCDataEnd;
00236 }
00237 value.clear();
00238 for(int value_length = 0; ; value_length++)
00239 {
00240 char c = GetChar();
00241 if( c == 0) return wstEof;
00242 value.push_back(c);
00243
00244 if(value_length >= 2
00245 && value[value_length] == '>'
00246 && value[value_length - 1] == ']'
00247 && value[value_length - 2] == ']')
00248 {
00249 got_tail = true;
00250 value_length -= 2;
00251 value.pop_back();
00252 value.pop_back();
00253 value.pop_back();
00254 break;
00255 }
00256 }
00257 return wstData;
00258 }
00259
00266 WeScannerToken WeTagScanner::ScanPi()
00267 {
00268 if(got_tail)
00269 {
00270 c_scan = &WeTagScanner::ScanBody;
00271 got_tail = false;
00272 return wstPiEnd;
00273 }
00274 value.clear();
00275 for(int value_length = 0; ; value_length++)
00276 {
00277 char c = GetChar();
00278 if( c == 0) return wstEof;
00279 value.push_back(c);
00280
00281 if(value_length >= 1
00282 && value[value_length] == '>'
00283 && value[value_length - 1] == '?')
00284 {
00285 got_tail = true;
00286 value_length -= 1;
00287 value.pop_back();
00288 value.pop_back();
00289 break;
00290 }
00291 }
00292 return wstData;
00293 }
00294
00301 WeScannerToken WeTagScanner::ScanTag()
00302 {
00303 tag_name.clear();
00304
00305 char c = GetChar();
00306
00307 bool is_tail = c == '/';
00308 if(is_tail) c = GetChar();
00309
00310 while(c)
00311 {
00312 if(IsWhitespace(c)) { c = SkipWhitespace(); break; }
00313 if(c == '/' || c == '>') break;
00314 AppendTagName(c);
00315
00316 switch(tag_name.size())
00317 {
00318 case 3:
00319 if(equal(&tag_name[0],"!--",3)) { c_scan = &WeTagScanner::ScanComment; return wstCommentStart; }
00320 break;
00321 case 8:
00322 if( equal(&tag_name[0],"![CDATA[",8) ) { c_scan = &WeTagScanner::ScanCdata; return wstCDataStart; }
00323 break;
00324 case 7:
00325 if( equal(&tag_name[0],"!ENTITY",8) ) { c_scan = &WeTagScanner::ScanEntityDecl; return wstEntityStart; }
00326 break;
00327 }
00328
00329 c = GetChar();
00330 }
00331
00332 if(c == 0) return wstError;
00333
00334 if(is_tail)
00335 {
00336 if(c == '>') return wstTagEnd;
00337 return wstError;
00338 }
00339 else
00340 PushBack(c);
00341
00342 c_scan = &WeTagScanner::ScanHead;
00343 return wstTagStart;
00344 }
00345
00352 char WeTagScanner::ScanEntity()
00353 {
00354 char buf[32];
00355 int i = 0;
00356 char t;
00357 char ret;
00358 for(; i < 31 ; ++i )
00359 {
00360 t = GetChar();
00361 if(t == 0) return wstEof;
00362 if( !isalnum(t) )
00363 {
00364 PushBack(t);
00365 break;
00366
00367 }
00368 buf[i] = char(t);
00369 if(t == ';')
00370 break;
00371 }
00372 buf[i] = 0;
00373 ret = 0;
00374 if(i == 2)
00375 {
00376 if(equal(buf,"gt",2)) ret = '>';
00377 if(equal(buf,"lt",2)) ret = '<';
00378 if (ret) {
00379 GetChar();
00380 return ret;
00381 }
00382 }
00383 else if(i == 3 && equal(buf,"amp",3)) {
00384 GetChar();
00385 return '&';
00386 }
00387 else if(i == 4)
00388 {
00389 if(equal(buf,"apos",4)) ret = '\'';
00390 if(equal(buf,"quot",4)) ret = '\"';
00391 if (ret) {
00392 GetChar();
00393 return ret;
00394 }
00395 }
00396 t = ResolveEntity(buf,i);
00397 if(t) {
00398 GetChar();
00399 return t;
00400 }
00401
00402 AppendValue('&');
00403 for(int n = 0; n < i; ++n)
00404 AppendValue(buf[n]);
00405 return GetChar();
00406 }
00407
00414 WeScannerToken WeTagScanner::ScanEntityDecl()
00415 {
00416 if(got_tail)
00417 {
00418 c_scan = &WeTagScanner::ScanBody;
00419 got_tail = false;
00420 return wstEntityEnd;
00421 }
00422 char t;
00423 unsigned int tc = 0;
00424 for(;;)
00425 {
00426 t = GetChar();
00427 if( t == 0 ) return wstEof;
00428 value.push_back(t);
00429 if(t == '\"') tc++;
00430 else if( t == '>' && (tc & 1) == 0 )
00431 {
00432 got_tail = true;
00433 break;
00434 }
00435 }
00436 return wstData;
00437 }
00438
00445 char WeTagScanner::SkipWhitespace()
00446 {
00447 while(char c = GetChar())
00448 {
00449 if(!IsWhitespace(c)) return c;
00450 }
00451 return 0;
00452 }
00453
00460 void WeTagScanner::PushBack(char c)
00461 {
00462 input.PushBack(c);
00463 }
00464
00471 char WeTagScanner::GetChar()
00472 {
00473 return input.GetChar();
00474 }
00475
00483 bool WeTagScanner::IsWhitespace(char c)
00484 {
00485 return c <= ' '
00486 && (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f');
00487 }
00488
00495 void WeTagScanner::AppendValue(char c)
00496 {
00497 value.push_back(c);
00498
00499
00500 }
00501
00508 void WeTagScanner::AppendAttrName(char c)
00509 {
00510 attr_name.push_back((char)c);
00511 }
00512
00519 void WeTagScanner::AppendTagName(char c)
00520 {
00521 tag_name.push_back((char)c);
00522 }
00523