31 #if defined( DEBUG_PARSER )
32 # if defined( DEBUG ) && defined( _MSC_VER )
34 # define TIXML_LOG OutputDebugString
36 # define TIXML_LOG printf
43 TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] =
48 {
""", 6,
'\"' },
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
83 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
84 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
90 const unsigned long BYTE_MASK = 0xBF;
91 const unsigned long BYTE_MARK = 0x80;
92 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
96 else if ( input < 0x800 )
98 else if ( input < 0x10000 )
100 else if ( input < 0x200000 )
103 { *length = 0;
return; }
113 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
118 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
123 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
128 *output = (char)(input | FIRST_BYTE_MARK[*length]);
143 return isalpha( anyByte );
164 return isalnum( anyByte );
211 int row = cursor.
row;
212 int col = cursor.
col;
213 const char* p = stamp;
219 const unsigned char* pU = (
const unsigned char*)p;
262 col = (col / tabsize + 1) * tabsize;
268 if ( *(p+1) && *(p+2) )
274 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
276 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
311 assert( cursor.
row >= -1 );
312 assert( cursor.
col >= -1 );
328 const unsigned char* pU = (
const unsigned char*)p;
373 if ( !in->good() )
return false;
380 *tag += (char) in->get();
390 if ( c == character )
421 && (
IsAlpha( (
unsigned char) *p, encoding ) || *p ==
'_' ) )
423 const char* start = p;
425 && (
IsAlphaNum( (
unsigned char ) *p, encoding )
435 name->assign( start, p-start );
449 if ( *(p+1) && *(p+1) ==
'#' && *(p+2) )
451 unsigned long ucs = 0;
458 if ( !*(p+3) )
return 0;
461 q = strchr( q,
';' );
463 if ( !q || !*q )
return 0;
470 if ( *q >=
'0' && *q <=
'9' )
471 ucs += mult * (*q -
'0');
472 else if ( *q >=
'a' && *q <=
'f' )
473 ucs += mult * (*q -
'a' + 10);
474 else if ( *q >=
'A' && *q <=
'F' )
475 ucs += mult * (*q -
'A' + 10 );
485 if ( !*(p+2) )
return 0;
488 q = strchr( q,
';' );
490 if ( !q || !*q )
return 0;
497 if ( *q >=
'0' && *q <=
'9' )
498 ucs += mult * (*q -
'0');
515 return p + delta + 1;
519 for( i=0; i<NUM_ENTITY; ++i )
521 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
523 assert( strlen( entity[i].str ) == entity[i].strLength );
524 *
value = entity[i].chr;
526 return ( p + entity[i].strLength );
555 while ( *q && *tag &&
ToLower( *q, encoding ) ==
ToLower( *tag, encoding ) )
566 while ( *q && *tag && *q == *tag )
582 bool caseInsensitive,
587 || !condenseWhiteSpace )
591 && !
StringEqual( p, endTag, caseInsensitive, encoding )
595 char cArr[4] = { 0, 0, 0, 0 };
596 p =
GetChar( p, cArr, &len, encoding );
597 text->append( cArr, len );
602 bool whitespace =
false;
607 && !
StringEqual( p, endTag, caseInsensitive, encoding ) )
609 if ( *p ==
'\r' || *p ==
'\n' )
629 char cArr[4] = { 0, 0, 0, 0 };
630 p =
GetChar( p, cArr, &len, encoding );
634 text->append( cArr, len );
639 p += strlen( endTag );
640 return ( p && *p ) ? p : 0;
662 int tagIndex = (int) tag->length();
663 while ( in->good() && in->peek() !=
'>' )
684 bool isElement = node->ToElement() != 0;
741 const unsigned char* pU = (
const unsigned char*)p;
747 useMicrosoftBOM =
true;
763 p = node->
Parse( p, &data, encoding );
773 && node->ToDeclaration() )
813 errorLocation.
Clear();
814 if ( pError && data )
816 data->
Stamp( pError, encoding );
817 errorLocation = data->
Cursor();
827 if( !p || !*p || *p !=
'<' )
846 const char* xmlHeader = {
"<?xml" };
847 const char* commentHeader = {
"<!--" };
848 const char* dtdHeader = {
"<!" };
849 const char* cdataHeader = {
"<![CDATA[" };
854 TIXML_LOG(
"XML parsing Declaration\n" );
858 else if (
StringEqual( p, commentHeader,
false, encoding ) )
861 TIXML_LOG(
"XML parsing Comment\n" );
865 else if (
StringEqual( p, cdataHeader,
false, encoding ) )
868 TIXML_LOG(
"XML parsing CDATA\n" );
874 else if (
StringEqual( p, dtdHeader,
false, encoding ) )
877 TIXML_LOG(
"XML parsing Unknown(1)\n" );
881 else if (
IsAlpha( *(p+1), encoding )
885 TIXML_LOG(
"XML parsing Element\n" );
892 TIXML_LOG(
"XML parsing Unknown(2)\n" );
900 returnNode->
parent =
this;
927 if ( tag->length() < 3 )
return;
932 if ( tag->at( tag->length() - 1 ) ==
'>'
933 && tag->at( tag->length() - 2 ) ==
'/' )
938 else if ( tag->at( tag->length() - 1 ) ==
'>' )
950 if ( in->good() && in->peek() !=
'<' )
963 if ( !in->good() )
return;
964 assert( in->peek() ==
'<' );
965 int tagIndex = (int) tag->length();
967 bool closingTag =
false;
968 bool firstCharFound =
false;
991 if ( c ==
'[' && tag->size() >= 9 )
993 size_t len = tag->size();
994 const char* start = tag->c_str() + len - 9;
995 if ( strcmp( start,
"<![CDATA[" ) == 0 ) {
996 assert( !closingTag );
1001 if ( !firstCharFound && c !=
'<' && !
IsWhiteSpace( c ) )
1003 firstCharFound =
true;
1032 const char* tagloc = tag->c_str() + tagIndex;
1060 data->
Stamp( p, encoding );
1073 const char* pErr = p;
1107 else if ( *p ==
'>' )
1126 if (
StringEqual( p, endTag.c_str(),
false, encoding ) )
1128 p += endTag.length();
1130 if ( p && *p && *p ==
'>' ) {
1154 p = attrib->
Parse( p, data, encoding );
1164 #ifdef TIXML_USE_STL
1176 attributeSet.
Add( attrib );
1188 const char* pWithWhiteSpace = p;
1205 p = textNode->
Parse( p, data, encoding );
1211 p = textNode->
Parse( pWithWhiteSpace, data, encoding );
1214 if ( !textNode->
Blank() )
1233 p = node->
Parse( p, data, encoding );
1242 pWithWhiteSpace = p;
1254 #ifdef TIXML_USE_STL
1257 while ( in->good() )
1286 data->
Stamp( p, encoding );
1289 if ( !p || !*p || *p !=
'<' )
1297 while ( p && *p && *p !=
'>' )
1308 if ( p && *p ==
'>' )
1313 #ifdef TIXML_USE_STL
1316 while ( in->good() )
1330 && tag->at( tag->length() - 2 ) ==
'-'
1331 && tag->at( tag->length() - 3 ) ==
'-' )
1350 data->
Stamp( p, encoding );
1353 const char* startTag =
"<!--";
1354 const char* endTag =
"-->";
1356 if ( !
StringEqual( p, startTag,
false, encoding ) )
1362 p += strlen( startTag );
1384 while ( p && *p && !
StringEqual( p, endTag,
false, encoding ) )
1386 value.append( p, 1 );
1390 p += strlen( endTag );
1399 if ( !p || !*p )
return 0;
1403 data->
Stamp( p, encoding );
1407 const char* pErr = p;
1408 p =
ReadName( p, &name, encoding );
1415 if ( !p || !*p || *p !=
'=' )
1430 const char SINGLE_QUOTE =
'\'';
1431 const char DOUBLE_QUOTE =
'\"';
1433 if ( *p == SINGLE_QUOTE )
1439 else if ( *p == DOUBLE_QUOTE )
1453 && *p !=
'/' && *p !=
'>' )
1455 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1469 #ifdef TIXML_USE_STL
1472 while ( in->good() )
1475 if ( !cdata && (c ==
'<' ) )
1490 if ( cdata && c ==
'>' && tag->size() >= 3 ) {
1491 size_t len = tag->size();
1492 if ( (*tag)[len-2] ==
']' && (*tag)[len-3] ==
']' ) {
1508 data->
Stamp( p, encoding );
1512 const char*
const startTag =
"<![CDATA[";
1513 const char*
const endTag =
"]]>";
1515 if ( cdata ||
StringEqual( p, startTag,
false, encoding ) )
1519 if ( !
StringEqual( p, startTag,
false, encoding ) )
1525 p += strlen( startTag );
1537 p =
ReadText( p, &dummy,
false, endTag,
false, encoding );
1542 bool ignoreWhite =
true;
1544 const char* end =
"<";
1545 p =
ReadText( p, &
value, ignoreWhite, end,
false, encoding );
1552 #ifdef TIXML_USE_STL
1555 while ( in->good() )
1582 if ( !p || !*p || !
StringEqual( p,
"<?xml",
true, _encoding ) )
1589 data->
Stamp( p, _encoding );
1607 if (
StringEqual( p,
"version",
true, _encoding ) )
1610 p = attrib.
Parse( p, data, _encoding );
1611 version = attrib.Value();
1613 else if (
StringEqual( p,
"encoding",
true, _encoding ) )
1616 p = attrib.
Parse( p, data, _encoding );
1617 encoding = attrib.Value();
1619 else if (
StringEqual( p,
"standalone",
true, _encoding ) )
1622 p = attrib.
Parse( p, data, _encoding );
1623 standalone = attrib.Value();
1637 for (
unsigned i=0; i<
value.length(); i++ )