00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <time.h>
00022 #include <boost/algorithm/string/predicate.hpp>
00023 #include <boost/algorithm/string/compare.hpp>
00024 #include <boost/algorithm/string/case_conv.hpp>
00025 #include <boost/regex.hpp>
00026 #include "weHtmlEntity.h"
00027 #include "weHelper.h"
00028
00029 using namespace boost;
00030 using namespace boost::algorithm;
00031
00032 #ifndef __DOXYGEN__
00033
00034 #endif //__DOXYGEN__
00035
00050 static bool WeParseNeedToBreakTag(const string& tagName, const string& nextTag)
00051 {
00052 bool retval = false;
00053
00055 if (iequals(tagName, "br")) {
00056 return true;
00057 }
00058 if (iequals(tagName, "img")) {
00059 return true;
00060 }
00061 if (iequals(tagName, "base")) {
00062 return true;
00063 }
00064 if (iequals(tagName, "link")) {
00065 return true;
00066 }
00067 if (iequals(tagName, "input")) {
00068 return true;
00069 }
00070 if (iequals(tagName, "li")) {
00071 if (iequals(nextTag, "li")) {
00072 return true;
00073 }
00074 }
00075 if (iequals(tagName, "tr")) {
00076 if (iequals(nextTag, "tr")) {
00077 return true;
00078 }
00079
00080
00081
00082 }
00083 if (iequals(tagName, "td")) {
00084 if (iequals(nextTag, "td")) {
00085 return true;
00086 }
00087 if (iequals(nextTag, "tr")) {
00088 return true;
00089 }
00090 }
00091
00092 return retval;
00093 }
00094
00100 WeHtmlEntity::WeHtmlEntity(iweEntity* prnt )
00101
00102 {
00103 chldList.resize(0);
00104 attributes.clear();
00105 parent = prnt;
00106 entityName = "";
00107 GenerateId();
00108 }
00109
00117 WeHtmlEntity::WeHtmlEntity(WeHtmlEntity &entity)
00118 {
00119 chldList = entity.chldList;
00120 attributes = entity.attributes;
00121 parent = entity.parent;
00122 entityName = entity.entityName;
00123 GenerateId();
00124 }
00125
00131 WeHtmlEntity::~WeHtmlEntity(void)
00132 {
00133 ClearAttr();
00134 ClearChildren();
00135 }
00136
00145 const string& WeHtmlEntity::InnerText(void)
00146 {
00147 string *retval = new string;
00148 WeEntityList::iterator chld;
00149
00150 *retval = "";
00151 for (chld = chldList.begin(); chld != chldList.end(); chld++) {
00152 if (iequals((*chld)->Name(), string("#text"))) {
00153 *retval += (*chld)->Attr("");
00154 }
00155 }
00156
00157 return(*retval);
00158 }
00159
00170 const string& WeHtmlEntity::OuterText(void)
00171 {
00172 string *retval = new string;
00173 WeEntityList::iterator chld;
00174 WeAttrMap::iterator attr;
00175 char quote = '"';
00176
00177 *retval = "<";
00178 *retval += entityName;
00179 for (attr = attributes.begin(); attr != attributes.end(); attr++) {
00180 *retval += " ";
00181 *retval += attributes.key(attr);
00182 *retval += "=";
00183 if (attributes.val(attr) != "") {
00184 if (attributes.val(attr).find("\\\"") != string::npos) {
00185
00186 quote = '"';
00187 }
00188 else {
00189 quote = '\'';
00190 }
00191 *retval += quote;
00192 *retval += attributes.val(attr);
00193 *retval += quote;
00194 }
00195 }
00196 *retval += ">";
00197 if (chldList.size() > 0 || !WeParseNeedToBreakTag(entityName, "")){
00198 for (chld = chldList.begin(); chld != chldList.end(); chld++) {
00199 *retval += (*chld)->OuterText();
00200 }
00201 *retval += "</";
00202 *retval += entityName;
00203 *retval += ">";
00204 }
00205
00206 return(*retval);
00207 }
00208
00220 weCmpState WeHtmlEntity::Compare(iweEntity& entity, weCmpMode mode)
00221 {
00222 if (mode == weCmpDefault) {
00223 mode = compareMode;
00224 }
00226 throw runtime_error("Not implemented");
00227 return(weCmpLess);
00228 }
00229
00244 WeScannerToken WeHtmlEntity::Parse( string tagName, WeTagScanner& scanner, iweTransport* processor )
00245 {
00246 WeScannerToken state;
00247 WeHtmlEntity *chld;
00248 WeInnerText *txt;
00249 string txtAttr;
00250
00251 if (! tagName.empty()) {
00252
00253
00254 entityName = tagName;
00255 }
00256
00257 if (processor != NULL) {
00258
00259 }
00260 else {
00261
00262
00263 }
00264 startPos = scanner.GetPos() - tagName.length();
00265 if (tagName.length() > 0) {
00266 startPos--;
00267 }
00268 attributes.clear();
00269 ClearChildren();
00270 txt = NULL;
00271 chld = NULL;
00272 txtAttr = "";
00273
00274 while (true)
00275 {
00276 state = scanner.GetToken();
00277 parseRestart:
00278 if (state == wstEof || state == wstError) {
00279 if (txt != NULL) {
00280 txt->Attr("", txtAttr);
00281 chldList.push_back(txt);
00282
00283 txtAttr.clear();
00284 txt = NULL;
00285 }
00286 break;
00287 }
00288 switch(state)
00289 {
00290 case wstTagEnd:
00291 if (txt != NULL) {
00292 txt->Attr("", txtAttr);
00293 chldList.push_back(txt);
00294
00295 txtAttr.clear();
00296 txt = NULL;
00297 }
00298 if (iequals(string("form"), string(scanner.GetTagName()))) {
00300
00301
00302 break;
00303 }
00304 if (!iequals(entityName, string(scanner.GetTagName()))) {
00305
00306
00307
00308 if (!IsParentTag(string(scanner.GetTagName()))) {
00309
00310 break;
00311 }
00312 }
00313 else {
00314
00315 }
00316 endPos = scanner.GetPos();
00317 return state;
00318 case wstTagStart:
00319 if (txt != NULL) {
00320 txt->Attr("", txtAttr);
00321 chldList.push_back(txt);
00322
00323 txtAttr.clear();
00324 txt = NULL;
00325 }
00326 if (iequals(string("form"), string(scanner.GetTagName()))) {
00328
00329 }
00330 if (WeParseNeedToBreakTag(entityName, scanner.GetTagName())) {
00331
00332
00333
00334 return state;
00335 }
00336 if (iequals(string("form"), string(scanner.GetTagName()))) {
00338
00339 break;
00340 }
00341
00342 chld = weHtmlFactory.CreateEntity(scanner.GetTagName(), this);
00343 if (chld != NULL) {
00344 WeScannerToken chldState;
00345 chldList.push_back(chld);
00346 chldState = chld->Parse(scanner.GetTagName(), scanner, processor);
00347 if (iequals(string("form"), string(scanner.GetTagName()))) {
00349
00350
00351 break;
00352 }
00353 if (!iequals(chld->Name(), scanner.GetTagName())) {
00354
00355
00356 state = chldState;
00357 goto parseRestart;
00358 }
00359 chld = NULL;
00360 }
00361 break;
00362 case wstAttr:
00363 if (txt != NULL) {
00364 txt->Attr("", txtAttr);
00365 chldList.push_back(txt);
00366
00367 txtAttr.clear();
00368 txt = NULL;
00369 }
00370
00371 attributes[scanner.GetAttrName()] = scanner.GetValue();
00372 break;
00373 case wstWord:
00374 case wstSpace:
00375 if (txt == NULL) {
00376 txt = new WeInnerText(this);
00377 txtAttr = "";
00378 }
00379 if (state == wstSpace && processor != NULL && processor->IsSet(weoCollapseSpaces)) {
00380 txtAttr += " ";
00381 }
00382 else {
00383 txtAttr += scanner.GetValue();
00384 }
00385 break;
00386 case wstCommentStart:
00387 if (txt != NULL) {
00388 txt->Attr("", txtAttr);
00389 chldList.push_back(txt);
00390
00391 txtAttr.clear();
00392 txt = NULL;
00393 }
00394 chld = new WeHtmlComment(this);
00395 break;
00396 case wstCDataStart:
00397 if (txt != NULL) {
00398 txt->Attr("", txtAttr);
00399 chldList.push_back(txt);
00400
00401 txtAttr.clear();
00402 txt = NULL;
00403 }
00404 chld = new WeCData(this);
00405 break;
00406 case wstPiStart:
00407 if (txt != NULL) {
00408 txt->Attr("", txtAttr);
00409 chldList.push_back(txt);
00410
00411 txtAttr.clear();
00412 txt = NULL;
00413 }
00414 chld = new WePhpInclude(this);
00415 break;
00416 case wstData:
00417
00418
00419 chld->Attr("#text", scanner.GetValue());
00420 break;
00421 case wstCommentEnd:
00422 case wstCDataEnd:
00423 case wstPiEnd:
00424 if (chld != NULL) {
00425 chldList.push_back(chld);
00426 chld = NULL;
00427 }
00428 break;
00429 default:
00430 break;
00431 }
00432 }
00433
00434 endPos = scanner.GetPos();
00435 return state;
00436 }
00437
00438 WeCmpResults* WeHtmlEntity::Diff( iweEntity& cmp, weCmpMode mode )
00439 {
00441 return NULL;
00442 }
00443
00449 WeInnerText::WeInnerText(iweEntity* prnt ) :
00450 WeHtmlEntity(prnt)
00451 {
00452 entityName = "#text";
00453 attributes["#text"] = "";
00454 }
00455
00463 WeInnerText::WeInnerText(WeInnerText& entity) :
00464 WeHtmlEntity()
00465 {
00466 entityName = "#text";
00467 attributes["#text"] = entity.attributes["#text"];
00468 }
00469
00475 WeInnerText::~WeInnerText()
00476 {
00477
00478 }
00479
00490 const string WeInnerText::Attr(string name)
00491 {
00492 WeAttrMap::iterator it;
00493
00494 it = attributes.find("#text");
00495 if (it != attributes.end())
00496 {
00497 return attributes.val(it);
00498 }
00499 return(*(new string("")));
00500 }
00501
00510 void WeInnerText::Attr(string name, string value)
00511 {
00512 attributes["#text"] = value;
00513 }
00514
00521 const string& WeInnerText::InnerText(void)
00522 {
00523 WeAttrMap::iterator it;
00524
00525 it = attributes.find(string("#text"));
00526 if (it != attributes.end())
00527 {
00528 return attributes.val(it);
00529 }
00530 return(*(new string("")));
00531 }
00532
00542 const string& WeInnerText::OuterText(void)
00543 {
00544 WeAttrMap::iterator it;
00545
00546 it = attributes.find(string("#text"));
00547 if (it != attributes.end())
00548 {
00549 return attributes.val(it);
00550 }
00551 return(*(new string("")));
00552 }
00553
00565 WeCmpResults* WeInnerText::Diff(iweEntity& cmp, weCmpMode mode)
00566 {
00567 WeInnerText* ptr;
00568 WeCmpResults* retval = NULL;
00569 string s1, s2;
00570 regex repl("\\s+");
00571
00572 if (mode == weCmpDefault) {
00573 mode = compareMode;
00574 }
00575 try {
00576 ptr = reinterpret_cast<WeInnerText*>(&cmp);
00577 s1 = attributes["#text"];
00578 s2 = ptr->attributes["#text"];
00579 retval = WeTextDiff(s1, s2, mode);
00580 } catch (...) {
00581 if (retval != NULL) {
00582 delete retval;
00583 }
00584 retval = NULL;
00585 }
00586
00587 return retval;
00588 }
00589
00603 weCmpState WeInnerText::Compare(iweEntity& cmp, weCmpMode mode)
00604 {
00605 WeInnerText* ptr;
00606 string s1, s2;
00607 weCmpState retval = weCmpNonComparable;
00608 regex repl("\\s+");
00609
00610 if (mode == weCmpDefault) {
00611 mode = compareMode;
00612 }
00613 try {
00614 ptr = reinterpret_cast<WeInnerText*>(&cmp);
00615 s1 = attributes["#text"];
00616 s2 = ptr->attributes["#text"];
00617 if (mode & weCmpCollapseSpace) {
00618 s1 = regex_replace(s1, repl, " ");
00619 s2 = regex_replace(s2, repl, " ");
00620 }
00621 if (mode & weCmpCaseInsens) {
00622 is_iless cp_less;
00623 if (iequals(s1, s2)) {
00624 retval = weCmpEqual;
00625 }
00626 else if(cp_less(s1.c_str(), s2.c_str())) {
00627 retval = weCmpLess;
00628 } else {
00629 retval = weCmpGreather;
00630 }
00631 }
00632 else {
00633 is_less cp_less;
00634 if (equals(s1, s2)) {
00635 retval = weCmpEqual;
00636 }
00637 else if(cp_less(s1, s2)) {
00638 retval = weCmpLess;
00639 } else {
00640 retval = weCmpGreather;
00641 }
00642 }
00643 } catch (...) {
00644 retval = weCmpNonComparable;
00645 }
00646
00647 return retval;
00648 }
00649
00655 WeHtmlDocument::WeHtmlDocument(iweEntity* prnt )
00656
00657 {
00658 entityName = "#document";
00659
00660
00661
00662 }
00663
00671 WeHtmlDocument::WeHtmlDocument(WeHtmlDocument& entity)
00672
00673 {
00674 entityName = "#document";
00675 response = NULL;
00676 }
00677
00683 WeHtmlDocument::~WeHtmlDocument(void)
00684 {
00685
00686 }
00687
00694 const string& WeHtmlDocument::InnerText(void)
00695 {
00696 return OuterText();
00697 }
00698
00708 const string& WeHtmlDocument::OuterText(void)
00709 {
00710 string *retval = new string;
00711 WeEntityList::iterator chld;
00712
00713 *retval = "";
00714 for (chld = chldList.begin(); chld != chldList.end(); chld++) {
00715 *retval += (*chld)->OuterText();
00716 }
00717
00718 return(*retval);
00719 }
00720
00730 bool WeHtmlDocument::ParseData(iweResponse* resp, iweTransport* processor )
00731 {
00732 bool retval = false;
00733
00734 try
00735 {
00736 response = dynamic_cast<WeHttpResponse*>(resp);
00737 WeInStream* stream = response->Data().stream();
00738 if (stream) {
00739 WeTagScanner scanner(*stream);
00740 retval = (Parse("", scanner, processor) != wstError);
00741 }
00742 return retval;
00743
00744 }
00745 catch (...) { };
00746 return retval;
00747 }
00748
00755 WeBlob& WeHtmlDocument::Data( void )
00756 {
00757 if (response != NULL) {
00758 return response->Data();
00759 }
00760 throw WeError("WeRefrenceObject::Data - no data linked!");
00761 }
00762
00763 WeCmpResults* WeHtmlDocument::Diff( iweEntity& cmp, weCmpMode mode )
00764 {
00766 return NULL;
00767 }
00768
00769 weCmpState WeHtmlDocument::Compare( iweEntity& cmp, weCmpMode mode )
00770 {
00772 return weCmpNonComparable;
00773 }
00779 WeRefrenceObject::WeRefrenceObject(iweEntity* prnt ) :
00780 WeHtmlDocument(prnt)
00781 {
00783 }
00784
00792 WeRefrenceObject::WeRefrenceObject( WeRefrenceObject& entity )
00793 {
00795 throw runtime_error("Not implemented");
00796 }
00797
00803 WeRefrenceObject::~WeRefrenceObject()
00804 {
00806 }
00807
00815 WeScript::WeScript( iweEntity* prnt ) :
00816 WeRefrenceObject(prnt)
00817 {
00819 }
00820
00828 WeScript::WeScript( WeScript& entity )
00829 {
00831 }
00832
00838 WeScript::~WeScript()
00839 {
00841 }
00842
00856 WeScannerToken WeScript::Parse( string tagName, WeTagScanner& scanner, WeHTTP* processor )
00857 {
00858 WeScannerToken state;
00859 string txtAttr;
00860 bool inOurTag;
00861 bool inOtherTag;
00862 bool inProcess;
00863
00864 if (! tagName.empty()) {
00865
00866
00867 entityName = tagName;
00868 }
00869
00870 if (processor != NULL) {
00871
00872 }
00873 else {
00874
00875
00876 }
00877 attributes.clear();
00878 ClearChildren();
00879 txtAttr = "";
00880 inOurTag = true;
00881 inOtherTag = false;
00882 inProcess = true;
00883
00884 while (inProcess)
00885 {
00886 state = scanner.GetToken();
00887 if (state == wstEof || state == wstError) {
00888 inProcess = false;
00889
00890 break;
00891 }
00892 switch(state)
00893 {
00894 case wstTagEnd:
00895 inOurTag = false;
00896 inOtherTag = false;
00898
00899 if (iequals(scanner.GetTagName(), "script")) {
00900
00901 inProcess = false;
00902
00903 }
00904 break;
00905 case wstTagStart:
00906
00907 inOurTag = false;
00908 inOtherTag = true;
00909 txtAttr += "<";
00910 txtAttr += scanner.GetTagName();
00911 break;
00912 case wstAttr:
00913
00914 if (inOurTag) {
00915 attributes[scanner.GetAttrName()] = scanner.GetValue();
00916 }
00917 else {
00918 string aval = scanner.GetValue();
00919 char quote = '"';
00920 if (aval.find("\\\"") != string::npos) {
00921
00922 quote = '"';
00923 }
00924 else {
00925 quote = '\'';
00926 }
00927 txtAttr += " ";
00928 txtAttr += scanner.GetAttrName();
00929
00930 txtAttr += "=";
00931 txtAttr += quote;
00932 txtAttr += aval;
00933 txtAttr += quote;
00934 }
00935 break;
00936
00937
00938 case wstWord:
00939 case wstSpace:
00940 case wstCommentStart:
00941 case wstCDataStart:
00942 case wstPiStart:
00943 case wstData:
00944 case wstCommentEnd:
00945 case wstCDataEnd:
00946 case wstPiEnd:
00947 default:
00948 inOurTag = false;
00949 if (inOtherTag)
00950 {
00952 txtAttr += ">";
00953 }
00954 inOtherTag = false;
00955
00956 if (state == wstSpace && processor->IsSet(weoCollapseSpaces)) {
00957 txtAttr += " ";
00958 }
00959 else {
00960 txtAttr += scanner.GetValue();
00961 }
00962 break;
00963 }
00964 }
00965
00967 return state;
00968 }
00969
00982 bool WeScript::SetEngine( void *engine )
00983 {
00985 return false;
00986 }
00987
00995 void* WeScript::Execute()
00996 {
00998 return NULL;
00999 }