1 : /**********************************************************************
2 : * $Id: cpl_minixml.cpp 17930 2009-10-30 22:58:03Z rouault $
3 : *
4 : * Project: CPL - Common Portability Library
5 : * Purpose: Implementation of MiniXML Parser and handling.
6 : * Author: Frank Warmerdam, warmerdam@pobox.com
7 : *
8 : **********************************************************************
9 : * Copyright (c) 2001, Frank Warmerdam
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : **********************************************************************
29 : *
30 : * Independent Security Audit 2003/04/05 Andrey Kiselev:
31 : * Completed audit of this module. Any documents may be parsed without
32 : * buffer overflows and stack corruptions.
33 : *
34 : * Security Audit 2003/03/28 warmerda:
35 : * Completed security audit. I believe that this module may be safely used
36 : * to parse, and serialize arbitrary documents provided by a potentially
37 : * hostile source.
38 : *
39 : */
40 :
41 : #include "cpl_minixml.h"
42 : #include "cpl_error.h"
43 : #include "cpl_conv.h"
44 : #include "cpl_string.h"
45 : #include <ctype.h>
46 :
47 : CPL_CVSID("$Id: cpl_minixml.cpp 17930 2009-10-30 22:58:03Z rouault $");
48 :
49 : typedef enum {
50 : TNone,
51 : TString,
52 : TOpen,
53 : TClose,
54 : TEqual,
55 : TToken,
56 : TSlashClose,
57 : TQuestionClose,
58 : TComment,
59 : TLiteral
60 : } XMLTokenType;
61 :
62 : typedef struct
63 : {
64 : CPLXMLNode *psFirstNode;
65 : CPLXMLNode *psLastChild;
66 : } StackContext;
67 :
68 : typedef struct {
69 : const char *pszInput;
70 : int nInputOffset;
71 : int nInputLine;
72 : int bInElement;
73 : XMLTokenType eTokenType;
74 : char *pszToken;
75 : size_t nTokenMaxSize;
76 : size_t nTokenSize;
77 :
78 : int nStackMaxSize;
79 : int nStackSize;
80 : StackContext *papsStack;
81 :
82 : CPLXMLNode *psFirstNode;
83 : CPLXMLNode *psLastNode;
84 : } ParseContext;
85 :
86 :
87 : /************************************************************************/
88 : /* ReadChar() */
89 : /************************************************************************/
90 :
91 1209314 : static CPL_INLINE char ReadChar( ParseContext *psContext )
92 :
93 : {
94 : char chReturn;
95 :
96 1209314 : chReturn = psContext->pszInput[psContext->nInputOffset++];
97 :
98 1209314 : if( chReturn == '\0' )
99 1082 : psContext->nInputOffset--;
100 1208232 : else if( chReturn == 10 )
101 12964 : psContext->nInputLine++;
102 :
103 1209314 : return chReturn;
104 : }
105 :
106 : /************************************************************************/
107 : /* UnreadChar() */
108 : /************************************************************************/
109 :
110 62918 : static CPL_INLINE void UnreadChar( ParseContext *psContext, char chToUnread )
111 :
112 : {
113 62918 : if( chToUnread == '\0' )
114 : {
115 : /* do nothing */
116 : }
117 : else
118 : {
119 : CPLAssert( chToUnread
120 : == psContext->pszInput[psContext->nInputOffset-1] );
121 :
122 62917 : psContext->nInputOffset--;
123 :
124 62917 : if( chToUnread == 10 )
125 0 : psContext->nInputLine--;
126 : }
127 62918 : }
128 :
129 : /************************************************************************/
130 : /* AddToToken() */
131 : /************************************************************************/
132 :
133 875570 : static CPL_INLINE void AddToToken( ParseContext *psContext, char chNewChar )
134 :
135 : {
136 875570 : if( psContext->pszToken == NULL )
137 : {
138 1082 : psContext->nTokenMaxSize = 10;
139 1082 : psContext->pszToken = (char *) CPLMalloc(psContext->nTokenMaxSize);
140 : }
141 874488 : else if( psContext->nTokenSize >= psContext->nTokenMaxSize - 2 )
142 : {
143 2990 : psContext->nTokenMaxSize *= 2;
144 : psContext->pszToken = (char *)
145 2990 : CPLRealloc(psContext->pszToken,psContext->nTokenMaxSize);
146 : }
147 :
148 875570 : psContext->pszToken[psContext->nTokenSize++] = chNewChar;
149 875570 : psContext->pszToken[psContext->nTokenSize] = '\0';
150 875570 : }
151 :
152 : /************************************************************************/
153 : /* ReadToken() */
154 : /************************************************************************/
155 :
156 171205 : static XMLTokenType ReadToken( ParseContext *psContext )
157 :
158 : {
159 : char chNext;
160 :
161 171205 : psContext->nTokenSize = 0;
162 171205 : psContext->pszToken[0] = '\0';
163 :
164 171205 : chNext = ReadChar( psContext );
165 481027 : while( isspace((unsigned char)chNext) )
166 138617 : chNext = ReadChar( psContext );
167 :
168 : /* -------------------------------------------------------------------- */
169 : /* Handle comments. */
170 : /* -------------------------------------------------------------------- */
171 171214 : if( chNext == '<'
172 : && EQUALN(psContext->pszInput+psContext->nInputOffset,"!--",3) )
173 : {
174 9 : psContext->eTokenType = TComment;
175 :
176 : // Skip "!--" characters
177 9 : ReadChar(psContext);
178 9 : ReadChar(psContext);
179 9 : ReadChar(psContext);
180 :
181 893 : while( !EQUALN(psContext->pszInput+psContext->nInputOffset,"-->",3)
182 : && (chNext = ReadChar(psContext)) != '\0' )
183 875 : AddToToken( psContext, chNext );
184 :
185 : // Skip "-->" characters
186 9 : ReadChar(psContext);
187 9 : ReadChar(psContext);
188 9 : ReadChar(psContext);
189 : }
190 : /* -------------------------------------------------------------------- */
191 : /* Handle DOCTYPE. */
192 : /* -------------------------------------------------------------------- */
193 171197 : else if( chNext == '<'
194 : && EQUALN(psContext->pszInput+psContext->nInputOffset,"!DOCTYPE",8) )
195 : {
196 1 : int bInQuotes = FALSE;
197 1 : psContext->eTokenType = TLiteral;
198 :
199 1 : AddToToken( psContext, '<' );
200 17 : do {
201 18 : chNext = ReadChar(psContext);
202 18 : if( chNext == '\0' )
203 : {
204 : CPLError( CE_Failure, CPLE_AppDefined,
205 : "Parse error in DOCTYPE on or before line %d, "
206 : "reached end of file without '>'.",
207 0 : psContext->nInputLine );
208 :
209 0 : break;
210 : }
211 :
212 : /* The markup declaration block within a DOCTYPE tag consists of:
213 : * - a left square bracket [
214 : * - a list of declarations
215 : * - a right square bracket ]
216 : * Example:
217 : * <!DOCTYPE RootElement [ ...declarations... ]>
218 : */
219 18 : if( chNext == '[' )
220 : {
221 1 : AddToToken( psContext, chNext );
222 :
223 99 : do
224 : {
225 99 : chNext = ReadChar( psContext );
226 99 : AddToToken( psContext, chNext );
227 : }
228 : while( chNext != ']' && chNext != '\0'
229 : && !EQUALN(psContext->pszInput+psContext->nInputOffset,"]>", 2) );
230 :
231 1 : if (chNext == '\0')
232 : {
233 : CPLError( CE_Failure, CPLE_AppDefined,
234 : "Parse error in DOCTYPE on or before line %d, "
235 : "reached end of file without ']'.",
236 0 : psContext->nInputLine );
237 0 : break;
238 : }
239 :
240 1 : chNext = ReadChar( psContext );
241 1 : AddToToken( psContext, chNext );
242 :
243 : // Skip ">" character, will be consumed below
244 1 : chNext = ReadChar( psContext );
245 : }
246 :
247 :
248 18 : if( chNext == '\"' )
249 0 : bInQuotes = !bInQuotes;
250 :
251 18 : if( chNext == '>' && !bInQuotes )
252 : {
253 1 : AddToToken( psContext, '>' );
254 1 : break;
255 : }
256 :
257 17 : AddToToken( psContext, chNext );
258 : } while( TRUE );
259 : }
260 : /* -------------------------------------------------------------------- */
261 : /* Handle CDATA. */
262 : /* -------------------------------------------------------------------- */
263 171195 : else if( chNext == '<'
264 : && EQUALN(psContext->pszInput+psContext->nInputOffset,"![CDATA[",8) )
265 : {
266 0 : psContext->eTokenType = TString;
267 :
268 : // Skip !CDATA[
269 0 : ReadChar( psContext );
270 0 : ReadChar( psContext );
271 0 : ReadChar( psContext );
272 0 : ReadChar( psContext );
273 0 : ReadChar( psContext );
274 0 : ReadChar( psContext );
275 0 : ReadChar( psContext );
276 0 : ReadChar( psContext );
277 :
278 0 : while( !EQUALN(psContext->pszInput+psContext->nInputOffset,"]]>",3)
279 : && (chNext = ReadChar(psContext)) != '\0' )
280 0 : AddToToken( psContext, chNext );
281 :
282 : // Skip "]]>" characters
283 0 : ReadChar(psContext);
284 0 : ReadChar(psContext);
285 0 : ReadChar(psContext);
286 : }
287 : /* -------------------------------------------------------------------- */
288 : /* Simple single tokens of interest. */
289 : /* -------------------------------------------------------------------- */
290 204002 : else if( chNext == '<' && !psContext->bInElement )
291 : {
292 32807 : psContext->eTokenType = TOpen;
293 32807 : psContext->bInElement = TRUE;
294 : }
295 167034 : else if( chNext == '>' && psContext->bInElement )
296 : {
297 28646 : psContext->eTokenType = TClose;
298 28646 : psContext->bInElement = FALSE;
299 : }
300 130534 : else if( chNext == '=' && psContext->bInElement )
301 : {
302 20792 : psContext->eTokenType = TEqual;
303 : }
304 88950 : else if( chNext == '\0' )
305 : {
306 1081 : psContext->eTokenType = TNone;
307 : }
308 : /* -------------------------------------------------------------------- */
309 : /* Handle the /> token terminator. */
310 : /* -------------------------------------------------------------------- */
311 110460 : else if( chNext == '/' && psContext->bInElement
312 18457 : && psContext->pszInput[psContext->nInputOffset] == '>' )
313 : {
314 4134 : chNext = ReadChar( psContext );
315 : CPLAssert( chNext == '>' );
316 :
317 4134 : psContext->eTokenType = TSlashClose;
318 4134 : psContext->bInElement = FALSE;
319 : }
320 : /* -------------------------------------------------------------------- */
321 : /* Handle the ?> token terminator. */
322 : /* -------------------------------------------------------------------- */
323 83813 : else if( chNext == '?' && psContext->bInElement
324 52 : && psContext->pszInput[psContext->nInputOffset] == '>' )
325 : {
326 26 : chNext = ReadChar( psContext );
327 :
328 : CPLAssert( chNext == '>' );
329 :
330 26 : psContext->eTokenType = TQuestionClose;
331 26 : psContext->bInElement = FALSE;
332 : }
333 :
334 : /* -------------------------------------------------------------------- */
335 : /* Collect a quoted string. */
336 : /* -------------------------------------------------------------------- */
337 92983 : else if( psContext->bInElement && chNext == '"' )
338 : {
339 9274 : psContext->eTokenType = TString;
340 :
341 84362 : while( (chNext = ReadChar(psContext)) != '"'
342 : && chNext != '\0' )
343 65814 : AddToToken( psContext, chNext );
344 :
345 9274 : if( chNext != '"' )
346 : {
347 0 : psContext->eTokenType = TNone;
348 : CPLError( CE_Failure, CPLE_AppDefined,
349 : "Parse error on line %d, reached EOF before closing quote.",
350 0 : psContext->nInputLine );
351 : }
352 :
353 : /* Do we need to unescape it? */
354 9274 : if( strchr(psContext->pszToken,'&') != NULL )
355 : {
356 : int nLength;
357 : char *pszUnescaped = CPLUnescapeString( psContext->pszToken,
358 3 : &nLength, CPLES_XML );
359 3 : strcpy( psContext->pszToken, pszUnescaped );
360 3 : CPLFree( pszUnescaped );
361 3 : psContext->nTokenSize = strlen(psContext->pszToken );
362 : }
363 : }
364 :
365 85952 : else if( psContext->bInElement && chNext == '\'' )
366 : {
367 11517 : psContext->eTokenType = TString;
368 :
369 154547 : while( (chNext = ReadChar(psContext)) != '\''
370 : && chNext != '\0' )
371 131513 : AddToToken( psContext, chNext );
372 :
373 11517 : if( chNext != '\'' )
374 : {
375 0 : psContext->eTokenType = TNone;
376 : CPLError( CE_Failure, CPLE_AppDefined,
377 : "Parse error on line %d, reached EOF before closing quote.",
378 0 : psContext->nInputLine );
379 : }
380 :
381 : /* Do we need to unescape it? */
382 11517 : if( strchr(psContext->pszToken,'&') != NULL )
383 : {
384 : int nLength;
385 : char *pszUnescaped = CPLUnescapeString( psContext->pszToken,
386 0 : &nLength, CPLES_XML );
387 0 : strcpy( psContext->pszToken, pszUnescaped );
388 0 : CPLFree( pszUnescaped );
389 0 : psContext->nTokenSize = strlen(psContext->pszToken );
390 : }
391 : }
392 :
393 : /* -------------------------------------------------------------------- */
394 : /* Collect an unquoted string, terminated by a open angle */
395 : /* bracket. */
396 : /* -------------------------------------------------------------------- */
397 62918 : else if( !psContext->bInElement )
398 : {
399 9318 : psContext->eTokenType = TString;
400 :
401 9318 : AddToToken( psContext, chNext );
402 318027 : while( (chNext = ReadChar(psContext)) != '<'
403 : && chNext != '\0' )
404 299391 : AddToToken( psContext, chNext );
405 9318 : UnreadChar( psContext, chNext );
406 :
407 : /* Do we need to unescape it? */
408 9318 : if( strchr(psContext->pszToken,'&') != NULL )
409 : {
410 : int nLength;
411 : char *pszUnescaped = CPLUnescapeString( psContext->pszToken,
412 258 : &nLength, CPLES_XML );
413 258 : strcpy( psContext->pszToken, pszUnescaped );
414 258 : CPLFree( pszUnescaped );
415 258 : psContext->nTokenSize = strlen(psContext->pszToken );
416 : }
417 : }
418 :
419 : /* -------------------------------------------------------------------- */
420 : /* Collect a regular token terminated by white space, or */
421 : /* special character(s) like an equal sign. */
422 : /* -------------------------------------------------------------------- */
423 : else
424 : {
425 53600 : psContext->eTokenType = TToken;
426 :
427 : /* add the first character to the token regardless of what it is */
428 53600 : AddToToken( psContext, chNext );
429 :
430 367457 : for( chNext = ReadChar(psContext);
431 : (chNext >= 'A' && chNext <= 'Z')
432 : || (chNext >= 'a' && chNext <= 'z')
433 : || chNext == '-'
434 : || chNext == '_'
435 : || chNext == '.'
436 : || chNext == ':'
437 : || (chNext >= '0' && chNext <= '9');
438 : chNext = ReadChar(psContext) )
439 : {
440 313857 : AddToToken( psContext, chNext );
441 : }
442 :
443 53600 : UnreadChar(psContext, chNext);
444 : }
445 :
446 171205 : return psContext->eTokenType;
447 : }
448 :
449 : /************************************************************************/
450 : /* PushNode() */
451 : /************************************************************************/
452 :
453 18484 : static void PushNode( ParseContext *psContext, CPLXMLNode *psNode )
454 :
455 : {
456 18484 : if( psContext->nStackMaxSize <= psContext->nStackSize )
457 : {
458 1082 : psContext->nStackMaxSize += 10;
459 : psContext->papsStack = (StackContext *)
460 : CPLRealloc(psContext->papsStack,
461 1082 : sizeof(StackContext) * psContext->nStackMaxSize);
462 : }
463 :
464 18484 : psContext->papsStack[psContext->nStackSize].psFirstNode = psNode;
465 18484 : psContext->papsStack[psContext->nStackSize].psLastChild = NULL;
466 18484 : psContext->nStackSize ++;
467 18484 : }
468 :
469 : /************************************************************************/
470 : /* AttachNode() */
471 : /* */
472 : /* Attach the passed node as a child of the current node. */
473 : /* Special handling exists for adding siblings to psFirst if */
474 : /* there is nothing on the stack. */
475 : /************************************************************************/
476 :
477 48604 : static void AttachNode( ParseContext *psContext, CPLXMLNode *psNode )
478 :
479 : {
480 48604 : if( psContext->psFirstNode == NULL )
481 : {
482 1079 : psContext->psFirstNode = psNode;
483 1079 : psContext->psLastNode = psNode;
484 : }
485 47525 : else if( psContext->nStackSize == 0 )
486 : {
487 30 : psContext->psLastNode->psNext = psNode;
488 30 : psContext->psLastNode = psNode;
489 : }
490 47495 : else if( psContext->papsStack[psContext->nStackSize-1].psFirstNode->psChild == NULL )
491 : {
492 18420 : psContext->papsStack[psContext->nStackSize-1].psFirstNode->psChild = psNode;
493 18420 : psContext->papsStack[psContext->nStackSize-1].psLastChild = psNode;
494 : }
495 : else
496 : {
497 29075 : psContext->papsStack[psContext->nStackSize-1].psLastChild->psNext = psNode;
498 29075 : psContext->papsStack[psContext->nStackSize-1].psLastChild = psNode;
499 : }
500 48604 : }
501 :
502 : /************************************************************************/
503 : /* CPLParseXMLString() */
504 : /************************************************************************/
505 :
506 : /**
507 : * \brief Parse an XML string into tree form.
508 : *
509 : * The passed document is parsed into a CPLXMLNode tree representation.
510 : * If the document is not well formed XML then NULL is returned, and errors
511 : * are reported via CPLError(). No validation beyond wellformedness is
512 : * done. The CPLParseXMLFile() convenience function can be used to parse
513 : * from a file.
514 : *
515 : * The returned document tree is is owned by the caller and should be freed
516 : * with CPLDestroyXMLNode() when no longer needed.
517 : *
518 : * If the document has more than one "root level" element then those after the
519 : * first will be attached to the first as siblings (via the psNext pointers)
520 : * even though there is no common parent. A document with no XML structure
521 : * (no angle brackets for instance) would be considered well formed, and
522 : * returned as a single CXT_Text node.
523 : *
524 : * @param pszString the document to parse.
525 : *
526 : * @return parsed tree or NULL on error.
527 : */
528 :
529 1082 : CPLXMLNode *CPLParseXMLString( const char *pszString )
530 :
531 : {
532 : ParseContext sContext;
533 :
534 1082 : CPLErrorReset();
535 :
536 1082 : if( pszString == NULL )
537 : {
538 : CPLError( CE_Failure, CPLE_AppDefined,
539 0 : "CPLParseXMLString() called with NULL pointer." );
540 0 : return NULL;
541 : }
542 :
543 : /* -------------------------------------------------------------------- */
544 : /* Initialize parse context. */
545 : /* -------------------------------------------------------------------- */
546 1082 : sContext.pszInput = pszString;
547 1082 : sContext.nInputOffset = 0;
548 1082 : sContext.nInputLine = 0;
549 1082 : sContext.bInElement = FALSE;
550 1082 : sContext.pszToken = NULL;
551 1082 : sContext.nTokenMaxSize = 0;
552 1082 : sContext.nTokenSize = 0;
553 1082 : sContext.eTokenType = TNone;
554 1082 : sContext.nStackMaxSize = 0;
555 1082 : sContext.nStackSize = 0;
556 1082 : sContext.papsStack = NULL;
557 1082 : sContext.psFirstNode = NULL;
558 1082 : sContext.psLastNode = NULL;
559 :
560 : /* ensure token is initialized */
561 1082 : AddToToken( &sContext, ' ' );
562 :
563 : /* ==================================================================== */
564 : /* Loop reading tokens. */
565 : /* ==================================================================== */
566 1082 : while( ReadToken( &sContext ) != TNone )
567 : {
568 : /* -------------------------------------------------------------------- */
569 : /* Create a new element. */
570 : /* -------------------------------------------------------------------- */
571 81411 : if( sContext.eTokenType == TOpen )
572 : {
573 : CPLXMLNode *psElement;
574 :
575 32807 : if( ReadToken(&sContext) != TToken )
576 : {
577 : CPLError( CE_Failure, CPLE_AppDefined,
578 : "Line %d: Didn't find element token after open angle bracket.",
579 0 : sContext.nInputLine );
580 0 : break;
581 : }
582 :
583 32807 : if( sContext.pszToken[0] != '/' )
584 : {
585 : psElement = CPLCreateXMLNode( NULL, CXT_Element,
586 18484 : sContext.pszToken );
587 18484 : AttachNode( &sContext, psElement );
588 18484 : PushNode( &sContext, psElement );
589 : }
590 : else
591 : {
592 28645 : if( sContext.nStackSize == 0
593 14322 : || !EQUAL(sContext.pszToken+1,
594 : sContext.papsStack[sContext.nStackSize-1].psFirstNode->pszValue) )
595 : {
596 : CPLError( CE_Failure, CPLE_AppDefined,
597 : "Line %d: <%.500s> doesn't have matching <%.500s>.",
598 : sContext.nInputLine,
599 1 : sContext.pszToken, sContext.pszToken+1 );
600 1 : break;
601 : }
602 : else
603 : {
604 14322 : if( ReadToken(&sContext) != TClose )
605 : {
606 : CPLError( CE_Failure, CPLE_AppDefined,
607 : "Line %d: Missing close angle bracket after <%.500s.",
608 : sContext.nInputLine,
609 0 : sContext.pszToken );
610 0 : break;
611 : }
612 :
613 : /* pop element off stack */
614 14322 : sContext.nStackSize--;
615 : }
616 : }
617 : }
618 :
619 : /* -------------------------------------------------------------------- */
620 : /* Add an attribute to a token. */
621 : /* -------------------------------------------------------------------- */
622 48604 : else if( sContext.eTokenType == TToken )
623 : {
624 : CPLXMLNode *psAttr;
625 :
626 20792 : psAttr = CPLCreateXMLNode(NULL, CXT_Attribute, sContext.pszToken);
627 20792 : AttachNode( &sContext, psAttr );
628 :
629 20792 : if( ReadToken(&sContext) != TEqual )
630 : {
631 : CPLError( CE_Failure, CPLE_AppDefined,
632 : "Line %d: Didn't find expected '=' for value of attribute '%.500s'.",
633 0 : sContext.nInputLine, psAttr->pszValue );
634 0 : break;
635 : }
636 :
637 20792 : if( ReadToken(&sContext) != TString
638 : && sContext.eTokenType != TToken )
639 : {
640 : CPLError( CE_Failure, CPLE_AppDefined,
641 : "Line %d: Didn't find expected attribute value.",
642 0 : sContext.nInputLine );
643 0 : break;
644 : }
645 :
646 20792 : CPLCreateXMLNode( psAttr, CXT_Text, sContext.pszToken );
647 : }
648 :
649 : /* -------------------------------------------------------------------- */
650 : /* Close the start section of an element. */
651 : /* -------------------------------------------------------------------- */
652 27812 : else if( sContext.eTokenType == TClose )
653 : {
654 14324 : if( sContext.nStackSize == 0 )
655 : {
656 : CPLError( CE_Failure, CPLE_AppDefined,
657 : "Line %d: Found unbalanced '>'.",
658 0 : sContext.nInputLine );
659 0 : break;
660 : }
661 : }
662 :
663 : /* -------------------------------------------------------------------- */
664 : /* Close the start section of an element, and pop it */
665 : /* immediately. */
666 : /* -------------------------------------------------------------------- */
667 13488 : else if( sContext.eTokenType == TSlashClose )
668 : {
669 4134 : if( sContext.nStackSize == 0 )
670 : {
671 : CPLError( CE_Failure, CPLE_AppDefined,
672 : "Line %d: Found unbalanced '/>'.",
673 0 : sContext.nInputLine );
674 0 : break;
675 : }
676 :
677 4134 : sContext.nStackSize--;
678 : }
679 :
680 : /* -------------------------------------------------------------------- */
681 : /* Close the start section of a <?...?> element, and pop it */
682 : /* immediately. */
683 : /* -------------------------------------------------------------------- */
684 9354 : else if( sContext.eTokenType == TQuestionClose )
685 : {
686 26 : if( sContext.nStackSize == 0 )
687 : {
688 : CPLError( CE_Failure, CPLE_AppDefined,
689 : "Line %d: Found unbalanced '?>'.",
690 0 : sContext.nInputLine );
691 0 : break;
692 : }
693 26 : else if( sContext.papsStack[sContext.nStackSize-1].psFirstNode->pszValue[0] != '?' )
694 : {
695 : CPLError( CE_Failure, CPLE_AppDefined,
696 : "Line %d: Found '?>' without matching '<?'.",
697 0 : sContext.nInputLine );
698 0 : break;
699 : }
700 :
701 26 : sContext.nStackSize--;
702 : }
703 :
704 : /* -------------------------------------------------------------------- */
705 : /* Handle comments. They are returned as a whole token with the */
706 : /* prefix and postfix omitted. No processing of white space */
707 : /* will be done. */
708 : /* -------------------------------------------------------------------- */
709 9328 : else if( sContext.eTokenType == TComment )
710 : {
711 : CPLXMLNode *psValue;
712 :
713 9 : psValue = CPLCreateXMLNode(NULL, CXT_Comment, sContext.pszToken);
714 9 : AttachNode( &sContext, psValue );
715 : }
716 :
717 : /* -------------------------------------------------------------------- */
718 : /* Handle literals. They are returned without processing. */
719 : /* -------------------------------------------------------------------- */
720 9319 : else if( sContext.eTokenType == TLiteral )
721 : {
722 : CPLXMLNode *psValue;
723 :
724 1 : psValue = CPLCreateXMLNode(NULL, CXT_Literal, sContext.pszToken);
725 1 : AttachNode( &sContext, psValue );
726 : }
727 :
728 : /* -------------------------------------------------------------------- */
729 : /* Add a text value node as a child of the current element. */
730 : /* -------------------------------------------------------------------- */
731 18636 : else if( sContext.eTokenType == TString && !sContext.bInElement )
732 : {
733 : CPLXMLNode *psValue;
734 :
735 9318 : psValue = CPLCreateXMLNode(NULL, CXT_Text, sContext.pszToken);
736 9318 : AttachNode( &sContext, psValue );
737 : }
738 : /* -------------------------------------------------------------------- */
739 : /* Anything else is an error. */
740 : /* -------------------------------------------------------------------- */
741 : else
742 : {
743 : CPLError( CE_Failure, CPLE_AppDefined,
744 : "Parse error at line %d, unexpected token:%.500s\n",
745 0 : sContext.nInputLine, sContext.pszToken );
746 0 : break;
747 : }
748 : }
749 :
750 : /* -------------------------------------------------------------------- */
751 : /* Did we pop all the way out of our stack? */
752 : /* -------------------------------------------------------------------- */
753 1082 : if( CPLGetLastErrorType() == CE_None && sContext.nStackSize != 0 )
754 : {
755 : CPLError( CE_Failure, CPLE_AppDefined,
756 : "Parse error at EOF, not all elements have been closed,\n"
757 : "starting with %.500s\n",
758 1 : sContext.papsStack[sContext.nStackSize-1].psFirstNode->pszValue );
759 : }
760 :
761 : /* -------------------------------------------------------------------- */
762 : /* Cleanup */
763 : /* -------------------------------------------------------------------- */
764 1082 : CPLFree( sContext.pszToken );
765 1082 : if( sContext.papsStack != NULL )
766 1078 : CPLFree( sContext.papsStack );
767 :
768 1082 : if( CPLGetLastErrorType() != CE_None )
769 : {
770 2 : CPLDestroyXMLNode( sContext.psFirstNode );
771 2 : sContext.psFirstNode = NULL;
772 2 : sContext.psLastNode = NULL;
773 : }
774 :
775 1082 : return sContext.psFirstNode;
776 : }
777 :
778 : /************************************************************************/
779 : /* _GrowBuffer() */
780 : /************************************************************************/
781 :
782 38801 : static void _GrowBuffer( size_t nNeeded,
783 : char **ppszText, unsigned int *pnMaxLength )
784 :
785 : {
786 38801 : if( nNeeded+1 >= *pnMaxLength )
787 : {
788 1401 : *pnMaxLength = MAX(*pnMaxLength * 2,nNeeded+1);
789 1401 : *ppszText = (char *) CPLRealloc(*ppszText, *pnMaxLength);
790 : }
791 38801 : }
792 :
793 : /************************************************************************/
794 : /* CPLSerializeXMLNode() */
795 : /************************************************************************/
796 :
797 : static void
798 22574 : CPLSerializeXMLNode( CPLXMLNode *psNode, int nIndent,
799 : char **ppszText, unsigned int *pnLength,
800 : unsigned int *pnMaxLength )
801 :
802 : {
803 22574 : if( psNode == NULL )
804 0 : return;
805 :
806 : /* -------------------------------------------------------------------- */
807 : /* Ensure the buffer is plenty large to hold this additional */
808 : /* string. */
809 : /* -------------------------------------------------------------------- */
810 22574 : *pnLength += strlen(*ppszText + *pnLength);
811 : _GrowBuffer( strlen(psNode->pszValue) + *pnLength + 40 + nIndent,
812 22574 : ppszText, pnMaxLength );
813 :
814 : /* -------------------------------------------------------------------- */
815 : /* Text is just directly emitted. */
816 : /* -------------------------------------------------------------------- */
817 22574 : if( psNode->eType == CXT_Text )
818 : {
819 9688 : char *pszEscaped = CPLEscapeString( psNode->pszValue, -1, CPLES_XML );
820 :
821 : CPLAssert( psNode->psChild == NULL );
822 :
823 : /* Escaped text might be bigger than expected. */
824 : _GrowBuffer( strlen(pszEscaped) + *pnLength,
825 9688 : ppszText, pnMaxLength );
826 9688 : strcat( *ppszText + *pnLength, pszEscaped );
827 :
828 9688 : CPLFree( pszEscaped );
829 : }
830 :
831 : /* -------------------------------------------------------------------- */
832 : /* Attributes require a little formatting. */
833 : /* -------------------------------------------------------------------- */
834 12886 : else if( psNode->eType == CXT_Attribute )
835 : {
836 : CPLAssert( psNode->psChild != NULL
837 : && psNode->psChild->eType == CXT_Text );
838 :
839 5766 : sprintf( *ppszText + *pnLength, " %s=\"", psNode->pszValue );
840 : CPLSerializeXMLNode( psNode->psChild, 0, ppszText,
841 5766 : pnLength, pnMaxLength );
842 5766 : strcat( *ppszText + *pnLength, "\"" );
843 : }
844 :
845 : /* -------------------------------------------------------------------- */
846 : /* Handle comment output. */
847 : /* -------------------------------------------------------------------- */
848 7120 : else if( psNode->eType == CXT_Comment )
849 : {
850 : int i;
851 :
852 : CPLAssert( psNode->psChild == NULL );
853 :
854 0 : for( i = 0; i < nIndent; i++ )
855 0 : (*ppszText)[(*pnLength)++] = ' ';
856 :
857 : sprintf( *ppszText + *pnLength, "<!--%s-->\n",
858 0 : psNode->pszValue );
859 : }
860 :
861 : /* -------------------------------------------------------------------- */
862 : /* Handle literal output (like <!DOCTYPE...>) */
863 : /* -------------------------------------------------------------------- */
864 7120 : else if( psNode->eType == CXT_Literal )
865 : {
866 : int i;
867 :
868 : CPLAssert( psNode->psChild == NULL );
869 :
870 0 : for( i = 0; i < nIndent; i++ )
871 0 : (*ppszText)[(*pnLength)++] = ' ';
872 :
873 0 : strcpy( *ppszText + *pnLength, psNode->pszValue );
874 0 : strcat( *ppszText + *pnLength, "\n" );
875 : }
876 :
877 : /* -------------------------------------------------------------------- */
878 : /* Elements actually have to deal with general children, and */
879 : /* various formatting issues. */
880 : /* -------------------------------------------------------------------- */
881 7120 : else if( psNode->eType == CXT_Element )
882 : {
883 7120 : int bHasNonAttributeChildren = FALSE;
884 : CPLXMLNode *psChild;
885 :
886 7120 : memset( *ppszText + *pnLength, ' ', nIndent );
887 7120 : *pnLength += nIndent;
888 7120 : (*ppszText)[*pnLength] = '\0';
889 :
890 7120 : sprintf( *ppszText + *pnLength, "<%s", psNode->pszValue );
891 :
892 : /* Serialize *all* the attribute children, regardless of order */
893 23316 : for( psChild = psNode->psChild;
894 : psChild != NULL;
895 : psChild = psChild->psNext )
896 : {
897 16196 : if( psChild->eType == CXT_Attribute )
898 : CPLSerializeXMLNode( psChild, 0, ppszText, pnLength,
899 5766 : pnMaxLength );
900 : else
901 10430 : bHasNonAttributeChildren = TRUE;
902 : }
903 :
904 7120 : if( !bHasNonAttributeChildren )
905 : {
906 581 : if( psNode->pszValue[0] == '?' )
907 4 : strcat( *ppszText + *pnLength, "?>\n" );
908 : else
909 577 : strcat( *ppszText + *pnLength, "/>\n" );
910 : }
911 : else
912 : {
913 6539 : int bJustText = TRUE;
914 :
915 6539 : strcat( *ppszText + *pnLength, ">" );
916 :
917 20575 : for( psChild = psNode->psChild;
918 : psChild != NULL;
919 : psChild = psChild->psNext )
920 : {
921 14036 : if( psChild->eType == CXT_Attribute )
922 3606 : continue;
923 :
924 10430 : if( psChild->eType != CXT_Text && bJustText )
925 : {
926 2617 : bJustText = FALSE;
927 2617 : strcat( *ppszText + *pnLength, "\n" );
928 : }
929 :
930 : CPLSerializeXMLNode( psChild, nIndent + 2, ppszText, pnLength,
931 10430 : pnMaxLength );
932 : }
933 :
934 6539 : *pnLength += strlen(*ppszText + *pnLength);
935 : _GrowBuffer( strlen(psNode->pszValue) + *pnLength + 40 + nIndent,
936 6539 : ppszText, pnMaxLength );
937 :
938 6539 : if( !bJustText )
939 : {
940 2617 : memset( *ppszText + *pnLength, ' ', nIndent );
941 2617 : *pnLength += nIndent;
942 2617 : (*ppszText)[*pnLength] = '\0';
943 : }
944 :
945 6539 : *pnLength += strlen(*ppszText + *pnLength);
946 6539 : sprintf( *ppszText + *pnLength, "</%s>\n", psNode->pszValue );
947 : }
948 : }
949 : }
950 :
951 : /************************************************************************/
952 : /* CPLSerializeXMLTree() */
953 : /************************************************************************/
954 :
955 : /**
956 : * \brief Convert tree into string document.
957 : *
958 : * This function converts a CPLXMLNode tree representation of a document
959 : * into a flat string representation. White space indentation is used
960 : * visually preserve the tree structure of the document. The returned
961 : * document becomes owned by the caller and should be freed with CPLFree()
962 : * when no longer needed.
963 : *
964 : * @param psNode
965 : *
966 : * @return the document on success or NULL on failure.
967 : */
968 :
969 609 : char *CPLSerializeXMLTree( CPLXMLNode *psNode )
970 :
971 : {
972 609 : unsigned int nMaxLength = 100, nLength = 0;
973 609 : char *pszText = NULL;
974 : CPLXMLNode *psThis;
975 :
976 609 : pszText = (char *) CPLMalloc(nMaxLength);
977 609 : pszText[0] = '\0';
978 :
979 1221 : for( psThis = psNode; psThis != NULL; psThis = psThis->psNext )
980 612 : CPLSerializeXMLNode( psThis, 0, &pszText, &nLength, &nMaxLength );
981 :
982 609 : return pszText;
983 : }
984 :
985 : /************************************************************************/
986 : /* CPLCreateXMLNode() */
987 : /************************************************************************/
988 :
989 : /**
990 : * \brief Create an document tree item.
991 : *
992 : * Create a single CPLXMLNode object with the desired value and type, and
993 : * attach it as a child of the indicated parent.
994 : *
995 : * @param poParent the parent to which this node should be attached as a
996 : * child. May be NULL to keep as free standing.
997 : * @param eType the type of the newly created node
998 : * @param pszText the value of the newly created node
999 : *
1000 : * @return the newly created node, now owned by the caller (or parent node).
1001 : */
1002 :
1003 93769 : CPLXMLNode *CPLCreateXMLNode( CPLXMLNode *poParent, CPLXMLNodeType eType,
1004 : const char *pszText )
1005 :
1006 : {
1007 : CPLXMLNode *psNode;
1008 :
1009 : /* -------------------------------------------------------------------- */
1010 : /* Create new node. */
1011 : /* -------------------------------------------------------------------- */
1012 93769 : psNode = (CPLXMLNode *) CPLCalloc(sizeof(CPLXMLNode),1);
1013 :
1014 93769 : psNode->eType = eType;
1015 93769 : psNode->pszValue = CPLStrdup( pszText );
1016 :
1017 : /* -------------------------------------------------------------------- */
1018 : /* Attach to parent, if provided. */
1019 : /* -------------------------------------------------------------------- */
1020 93769 : if( poParent != NULL )
1021 : {
1022 41251 : if( poParent->psChild == NULL )
1023 33816 : poParent->psChild = psNode;
1024 : else
1025 : {
1026 7435 : CPLXMLNode *psLink = poParent->psChild;
1027 :
1028 69700 : while( psLink->psNext != NULL )
1029 54830 : psLink = psLink->psNext;
1030 :
1031 7435 : psLink->psNext = psNode;
1032 : }
1033 : }
1034 :
1035 93769 : return psNode;
1036 : }
1037 :
1038 : /************************************************************************/
1039 : /* CPLDestroyXMLNode() */
1040 : /************************************************************************/
1041 :
1042 : /**
1043 : * \brief Destroy a tree.
1044 : *
1045 : * This function frees resources associated with a CPLXMLNode and all its
1046 : * children nodes.
1047 : *
1048 : * @param psNode the tree to free.
1049 : */
1050 :
1051 93769 : void CPLDestroyXMLNode( CPLXMLNode *psNode )
1052 :
1053 : {
1054 93769 : if( psNode == NULL )
1055 0 : return;
1056 :
1057 93769 : if( psNode->psChild != NULL )
1058 53138 : CPLDestroyXMLNode( psNode->psChild );
1059 :
1060 93769 : if( psNode->psNext != NULL )
1061 38586 : CPLDestroyXMLNode( psNode->psNext );
1062 :
1063 93769 : CPLFree( psNode->pszValue );
1064 93769 : CPLFree( psNode );
1065 : }
1066 :
1067 : /************************************************************************/
1068 : /* CPLSearchXMLNode() */
1069 : /************************************************************************/
1070 :
1071 : /**
1072 : * \brief Search for a node in document.
1073 : *
1074 : * Searches the children (and potentially siblings) of the documented
1075 : * passed in for the named element or attribute. To search following
1076 : * siblings as well as children, prefix the pszElement name with an equal
1077 : * sign. This function does an in-order traversal of the document tree.
1078 : * So it will first match against the current node, then it's first child,
1079 : * that childs first child, and so on.
1080 : *
1081 : * Use CPLGetXMLNode() to find a specific child, or along a specific
1082 : * node path.
1083 : *
1084 : * @param psRoot the subtree to search. This should be a node of type
1085 : * CXT_Element. NULL is safe.
1086 : *
1087 : * @param pszElement the name of the element or attribute to search for.
1088 : *
1089 : * @return The matching node or NULL on failure.
1090 : */
1091 :
1092 917 : CPLXMLNode *CPLSearchXMLNode( CPLXMLNode *psRoot, const char *pszElement )
1093 :
1094 : {
1095 917 : int bSideSearch = FALSE;
1096 : CPLXMLNode *psChild, *psResult;
1097 :
1098 917 : if( psRoot == NULL || pszElement == NULL )
1099 0 : return NULL;
1100 :
1101 917 : if( *pszElement == '=' )
1102 : {
1103 4 : bSideSearch = TRUE;
1104 4 : pszElement++;
1105 : }
1106 :
1107 : /* -------------------------------------------------------------------- */
1108 : /* Does this node match? */
1109 : /* -------------------------------------------------------------------- */
1110 917 : if( (psRoot->eType == CXT_Element
1111 : || psRoot->eType == CXT_Attribute)
1112 : && EQUAL(pszElement,psRoot->pszValue) )
1113 0 : return psRoot;
1114 :
1115 : /* -------------------------------------------------------------------- */
1116 : /* Search children. */
1117 : /* -------------------------------------------------------------------- */
1118 2272 : for( psChild = psRoot->psChild; psChild != NULL; psChild = psChild->psNext)
1119 : {
1120 1370 : if( (psChild->eType == CXT_Element
1121 : || psChild->eType == CXT_Attribute)
1122 : && EQUAL(pszElement,psChild->pszValue) )
1123 4 : return psChild;
1124 :
1125 1366 : if( psChild->psChild != NULL )
1126 : {
1127 907 : psResult = CPLSearchXMLNode( psChild, pszElement );
1128 907 : if( psResult != NULL )
1129 11 : return psResult;
1130 : }
1131 : }
1132 :
1133 : /* -------------------------------------------------------------------- */
1134 : /* Search siblings if we are in side search mode. */
1135 : /* -------------------------------------------------------------------- */
1136 902 : if( bSideSearch )
1137 : {
1138 6 : for( psRoot = psRoot->psNext; psRoot != NULL; psRoot = psRoot->psNext )
1139 : {
1140 6 : psResult = CPLSearchXMLNode( psRoot, pszElement );
1141 6 : if( psResult != NULL )
1142 4 : return psResult;
1143 : }
1144 : }
1145 :
1146 898 : return NULL;
1147 : }
1148 :
1149 : /************************************************************************/
1150 : /* CPLGetXMLNode() */
1151 : /************************************************************************/
1152 :
1153 : /**
1154 : * \brief Find node by path.
1155 : *
1156 : * Searches the document or subdocument indicated by psRoot for an element
1157 : * (or attribute) with the given path. The path should consist of a set of
1158 : * element names separated by dots, not including the name of the root
1159 : * element (psRoot). If the requested element is not found NULL is returned.
1160 : *
1161 : * Attribute names may only appear as the last item in the path.
1162 : *
1163 : * The search is done from the root nodes children, but all intermediate
1164 : * nodes in the path must be specified. Seaching for "name" would only find
1165 : * a name element or attribute if it is a direct child of the root, not at any
1166 : * level in the subdocument.
1167 : *
1168 : * If the pszPath is prefixed by "=" then the search will begin with the
1169 : * root node, and it's siblings, instead of the root nodes children. This
1170 : * is particularly useful when searching within a whole document which is
1171 : * often prefixed by one or more "junk" nodes like the <?xml> declaration.
1172 : *
1173 : * @param psRoot the subtree in which to search. This should be a node of
1174 : * type CXT_Element. NULL is safe.
1175 : *
1176 : * @param pszPath the list of element names in the path (dot separated).
1177 : *
1178 : * @return the requested element node, or NULL if not found.
1179 : */
1180 :
1181 26021 : CPLXMLNode *CPLGetXMLNode( CPLXMLNode *psRoot, const char *pszPath )
1182 :
1183 : {
1184 : char *apszTokens[2];
1185 : char **papszTokens;
1186 26021 : int iToken = 0;
1187 26021 : int bSideSearch = FALSE;
1188 :
1189 26021 : if( psRoot == NULL || pszPath == NULL )
1190 0 : return NULL;
1191 :
1192 26021 : if( *pszPath == '=' )
1193 : {
1194 21 : bSideSearch = TRUE;
1195 21 : pszPath++;
1196 : }
1197 :
1198 : /* Slight optimization : avoid using CSLTokenizeStringComplex that */
1199 : /* does memory allocations when it is not really necessary */
1200 26021 : if (strchr(pszPath, '.'))
1201 318 : papszTokens = CSLTokenizeStringComplex( pszPath, ".", FALSE, FALSE );
1202 : else
1203 : {
1204 25703 : apszTokens[0] = (char*) pszPath;
1205 25703 : apszTokens[1] = NULL;
1206 25703 : papszTokens = apszTokens;
1207 : }
1208 :
1209 66381 : while( papszTokens[iToken] != NULL && psRoot != NULL )
1210 : {
1211 : CPLXMLNode *psChild;
1212 :
1213 26383 : if( bSideSearch )
1214 : {
1215 21 : psChild = psRoot;
1216 21 : bSideSearch = FALSE;
1217 : }
1218 : else
1219 26362 : psChild = psRoot->psChild;
1220 :
1221 83972 : for( ; psChild != NULL; psChild = psChild->psNext )
1222 : {
1223 142390 : if( psChild->eType != CXT_Text
1224 70462 : && EQUAL(papszTokens[iToken],psChild->pszValue) )
1225 14339 : break;
1226 : }
1227 :
1228 26383 : if( psChild == NULL )
1229 : {
1230 12044 : psRoot = NULL;
1231 12044 : break;
1232 : }
1233 :
1234 14339 : psRoot = psChild;
1235 14339 : iToken++;
1236 : }
1237 :
1238 26021 : if (papszTokens != apszTokens)
1239 318 : CSLDestroy( papszTokens );
1240 26021 : return psRoot;
1241 : }
1242 :
1243 : /************************************************************************/
1244 : /* CPLGetXMLValue() */
1245 : /************************************************************************/
1246 :
1247 : /**
1248 : * \brief Fetch element/attribute value.
1249 : *
1250 : * Searches the document for the element/attribute value associated with
1251 : * the path. The corresponding node is internally found with CPLGetXMLNode()
1252 : * (see there for details on path handling). Once found, the value is
1253 : * considered to be the first CXT_Text child of the node.
1254 : *
1255 : * If the attribute/element search fails, or if the found node has not
1256 : * value then the passed default value is returned.
1257 : *
1258 : * The returned value points to memory within the document tree, and should
1259 : * not be altered or freed.
1260 : *
1261 : * @param psRoot the subtree in which to search. This should be a node of
1262 : * type CXT_Element. NULL is safe.
1263 : *
1264 : * @param pszPath the list of element names in the path (dot separated). An
1265 : * empty path means get the value of the psRoot node.
1266 : *
1267 : * @param pszDefault the value to return if a corresponding value is not
1268 : * found, may be NULL.
1269 : *
1270 : * @return the requested value or pszDefault if not found.
1271 : */
1272 :
1273 22130 : const char *CPLGetXMLValue( CPLXMLNode *psRoot, const char *pszPath,
1274 : const char *pszDefault )
1275 :
1276 : {
1277 : CPLXMLNode *psTarget;
1278 :
1279 22888 : if( pszPath == NULL || *pszPath == '\0' )
1280 758 : psTarget = psRoot;
1281 : else
1282 21372 : psTarget = CPLGetXMLNode( psRoot, pszPath );
1283 :
1284 22130 : if( psTarget == NULL )
1285 8860 : return pszDefault;
1286 :
1287 13270 : if( psTarget->eType == CXT_Attribute )
1288 : {
1289 : CPLAssert( psTarget->psChild != NULL
1290 : && psTarget->psChild->eType == CXT_Text );
1291 :
1292 9816 : return psTarget->psChild->pszValue;
1293 : }
1294 :
1295 3454 : if( psTarget->eType == CXT_Element )
1296 : {
1297 : // Find first non-attribute child, and verify it is a single text
1298 : // with no siblings
1299 :
1300 3454 : psTarget = psTarget->psChild;
1301 :
1302 7877 : while( psTarget != NULL && psTarget->eType == CXT_Attribute )
1303 969 : psTarget = psTarget->psNext;
1304 :
1305 3454 : if( psTarget != NULL
1306 : && psTarget->eType == CXT_Text
1307 : && psTarget->psNext == NULL )
1308 3431 : return psTarget->pszValue;
1309 : }
1310 :
1311 23 : return pszDefault;
1312 : }
1313 :
1314 : /************************************************************************/
1315 : /* CPLAddXMLChild() */
1316 : /************************************************************************/
1317 :
1318 : /**
1319 : * \brief Add child node to parent.
1320 : *
1321 : * The passed child is added to the list of children of the indicated
1322 : * parent. Normally the child is added at the end of the parents child
1323 : * list, but attributes (CXT_Attribute) will be inserted after any other
1324 : * attributes but before any other element type. Ownership of the child
1325 : * node is effectively assumed by the parent node. If the child has
1326 : * siblings (it's psNext is not NULL) they will be trimmed, but if the child
1327 : * has children they are carried with it.
1328 : *
1329 : * @param psParent the node to attach the child to. May not be NULL.
1330 : *
1331 : * @param psChild the child to add to the parent. May not be NULL. Should
1332 : * not be a child of any other parent.
1333 : */
1334 :
1335 1393 : void CPLAddXMLChild( CPLXMLNode *psParent, CPLXMLNode *psChild )
1336 :
1337 : {
1338 : CPLXMLNode *psSib;
1339 :
1340 1393 : if( psParent->psChild == NULL )
1341 : {
1342 410 : psParent->psChild = psChild;
1343 410 : return;
1344 : }
1345 :
1346 : // Insert at head of list if first child is not attribute.
1347 983 : if( psChild->eType == CXT_Attribute
1348 : && psParent->psChild->eType != CXT_Attribute )
1349 : {
1350 0 : psChild->psNext = psParent->psChild;
1351 0 : psParent->psChild = psChild;
1352 0 : return;
1353 : }
1354 :
1355 : // Search for end of list.
1356 2169 : for( psSib = psParent->psChild;
1357 : psSib->psNext != NULL;
1358 : psSib = psSib->psNext )
1359 : {
1360 : // Insert attributes if the next node is not an attribute.
1361 1186 : if( psChild->eType == CXT_Attribute
1362 : && psSib->psNext != NULL
1363 : && psSib->psNext->eType != CXT_Attribute )
1364 : {
1365 0 : psChild->psNext = psSib->psNext;
1366 0 : psSib->psNext = psChild;
1367 0 : return;
1368 : }
1369 : }
1370 :
1371 983 : psSib->psNext = psChild;
1372 : }
1373 :
1374 : /************************************************************************/
1375 : /* CPLAddXMLChild() */
1376 : /************************************************************************/
1377 :
1378 : /**
1379 : * \brief Remove child node from parent.
1380 : *
1381 : * The passed child is removed from the child list of the passed parent,
1382 : * but the child is not destroyed. The child retains ownership of it's
1383 : * own children, but is cleanly removed from the child list of the parent.
1384 : *
1385 : * @param psParent the node to the child is attached to.
1386 : *
1387 : * @param psChild the child to remove.
1388 : *
1389 : * @return TRUE on success or FALSE if the child was not found.
1390 : */
1391 :
1392 0 : int CPLRemoveXMLChild( CPLXMLNode *psParent, CPLXMLNode *psChild )
1393 :
1394 : {
1395 0 : CPLXMLNode *psLast = NULL, *psThis;
1396 :
1397 0 : if( psParent == NULL )
1398 0 : return FALSE;
1399 :
1400 0 : for( psThis = psParent->psChild;
1401 : psThis != NULL;
1402 : psLast = psThis, psThis = psThis->psNext )
1403 : {
1404 0 : if( psThis == psChild )
1405 : {
1406 0 : if( psLast == NULL )
1407 0 : psParent->psChild = psThis->psNext;
1408 : else
1409 0 : psLast->psNext = psThis->psNext;
1410 :
1411 0 : psThis->psNext = NULL;
1412 0 : return TRUE;
1413 : }
1414 : }
1415 :
1416 0 : return FALSE;
1417 : }
1418 :
1419 : /************************************************************************/
1420 : /* CPLAddXMLSibling() */
1421 : /************************************************************************/
1422 :
1423 : /**
1424 : * \brief Add new sibling.
1425 : *
1426 : * The passed psNewSibling is added to the end of siblings of the
1427 : * psOlderSibling node. That is, it is added to the end of the psNext
1428 : * chain. There is no special handling if psNewSibling is an attribute.
1429 : * If this is required, use CPLAddXMLChild().
1430 : *
1431 : * @param psOlderSibling the node to attach the sibling after.
1432 : *
1433 : * @param psNewSibling the node to add at the end of psOlderSiblings psNext
1434 : * chain.
1435 : */
1436 :
1437 132 : void CPLAddXMLSibling( CPLXMLNode *psOlderSibling, CPLXMLNode *psNewSibling )
1438 :
1439 : {
1440 132 : if( psOlderSibling == NULL )
1441 0 : return;
1442 :
1443 266 : while( psOlderSibling->psNext != NULL )
1444 2 : psOlderSibling = psOlderSibling->psNext;
1445 :
1446 132 : psOlderSibling->psNext = psNewSibling;
1447 : }
1448 :
1449 : /************************************************************************/
1450 : /* CPLCreateXMLElementAndValue() */
1451 : /************************************************************************/
1452 :
1453 : /**
1454 : * \brief Create an element and text value.
1455 : *
1456 : * This is function is a convenient short form for:
1457 : *
1458 : * \code
1459 : * CPLXMLNode *psTextNode;
1460 : * CPLXMLNode *psElementNode;
1461 : *
1462 : * psElementNode = CPLCreateXMLNode( psParent, CXT_Element, pszName );
1463 : * psTextNode = CPLCreateXMLNode( psElementNode, CXT_Text, pszValue );
1464 : *
1465 : * return psElementNode;
1466 : * \endcode
1467 : *
1468 : * It creates a CXT_Element node, with a CXT_Text child, and
1469 : * attaches the element to the passed parent.
1470 : *
1471 : * @param psParent the parent node to which the resulting node should
1472 : * be attached. May be NULL to keep as freestanding.
1473 : *
1474 : * @param pszName the element name to create.
1475 : * @param pszValue the text to attach to the element. Must not be NULL.
1476 : *
1477 : * @return the pointer to the new element node.
1478 : */
1479 :
1480 944 : CPLXMLNode *CPLCreateXMLElementAndValue( CPLXMLNode *psParent,
1481 : const char *pszName,
1482 : const char *pszValue )
1483 :
1484 : {
1485 : CPLXMLNode *psElementNode;
1486 :
1487 944 : psElementNode = CPLCreateXMLNode( psParent, CXT_Element, pszName );
1488 944 : CPLCreateXMLNode( psElementNode, CXT_Text, pszValue );
1489 :
1490 944 : return psElementNode;
1491 : }
1492 :
1493 : /************************************************************************/
1494 : /* CPLCloneXMLTree() */
1495 : /************************************************************************/
1496 :
1497 : /**
1498 : * \brief Copy tree.
1499 : *
1500 : * Creates a deep copy of a CPLXMLNode tree.
1501 : *
1502 : * @param psTree the tree to duplicate.
1503 : *
1504 : * @return a copy of the whole tree.
1505 : */
1506 :
1507 503 : CPLXMLNode *CPLCloneXMLTree( CPLXMLNode *psTree )
1508 :
1509 : {
1510 503 : CPLXMLNode *psPrevious = NULL;
1511 503 : CPLXMLNode *psReturn = NULL;
1512 :
1513 1853 : while( psTree != NULL )
1514 : {
1515 : CPLXMLNode *psCopy;
1516 :
1517 847 : psCopy = CPLCreateXMLNode( NULL, psTree->eType, psTree->pszValue );
1518 847 : if( psReturn == NULL )
1519 503 : psReturn = psCopy;
1520 847 : if( psPrevious != NULL )
1521 344 : psPrevious->psNext = psCopy;
1522 :
1523 847 : if( psTree->psChild != NULL )
1524 491 : psCopy->psChild = CPLCloneXMLTree( psTree->psChild );
1525 :
1526 847 : psPrevious = psCopy;
1527 847 : psTree = psTree->psNext;
1528 : }
1529 :
1530 503 : return psReturn;
1531 : }
1532 :
1533 : /************************************************************************/
1534 : /* CPLSetXMLValue() */
1535 : /************************************************************************/
1536 :
1537 : /**
1538 : * \brief Set element value by path.
1539 : *
1540 : * Find (or create) the target element or attribute specified in the
1541 : * path, and assign it the indicated value.
1542 : *
1543 : * Any path elements that do not already exist will be created. The target
1544 : * nodes value (the first CXT_Text child) will be replaced with the provided
1545 : * value.
1546 : *
1547 : * If the target node is an attribute instead of an element, the name
1548 : * should be prefixed with a #.
1549 : *
1550 : * Example:
1551 : * CPLSetXMLValue( "Citation.Id.Description", "DOQ dataset" );
1552 : * CPLSetXMLValue( "Citation.Id.Description.#name", "doq" );
1553 : *
1554 : * @param psRoot the subdocument to be updated.
1555 : *
1556 : * @param pszPath the dot seperated path to the target element/attribute.
1557 : *
1558 : * @param pszValue the text value to assign.
1559 : *
1560 : * @return TRUE on success.
1561 : */
1562 :
1563 5964 : int CPLSetXMLValue( CPLXMLNode *psRoot, const char *pszPath,
1564 : const char *pszValue )
1565 :
1566 : {
1567 : char **papszTokens;
1568 5964 : int iToken = 0;
1569 :
1570 5964 : papszTokens = CSLTokenizeStringComplex( pszPath, ".", FALSE, FALSE );
1571 :
1572 18881 : while( papszTokens[iToken] != NULL && psRoot != NULL )
1573 : {
1574 : CPLXMLNode *psChild;
1575 6953 : int bIsAttribute = FALSE;
1576 6953 : const char *pszName = papszTokens[iToken];
1577 :
1578 6953 : if( pszName[0] == '#' )
1579 : {
1580 5050 : bIsAttribute = TRUE;
1581 5050 : pszName++;
1582 : }
1583 :
1584 6953 : if( psRoot->eType != CXT_Element )
1585 0 : return FALSE;
1586 :
1587 14367 : for( psChild = psRoot->psChild; psChild != NULL;
1588 : psChild = psChild->psNext )
1589 : {
1590 8209 : if( psChild->eType != CXT_Text
1591 : && EQUAL(pszName,psChild->pszValue) )
1592 795 : break;
1593 : }
1594 :
1595 6953 : if( psChild == NULL )
1596 : {
1597 6158 : if( bIsAttribute )
1598 5050 : psChild = CPLCreateXMLNode( psRoot, CXT_Attribute, pszName );
1599 : else
1600 1108 : psChild = CPLCreateXMLNode( psRoot, CXT_Element, pszName );
1601 : }
1602 :
1603 6953 : psRoot = psChild;
1604 6953 : iToken++;
1605 : }
1606 :
1607 5964 : CSLDestroy( papszTokens );
1608 :
1609 : /* -------------------------------------------------------------------- */
1610 : /* Find the "text" child if there is one. */
1611 : /* -------------------------------------------------------------------- */
1612 5964 : CPLXMLNode *psTextChild = psRoot->psChild;
1613 :
1614 11962 : while( psTextChild != NULL && psTextChild->eType != CXT_Text )
1615 34 : psTextChild = psTextChild->psNext;
1616 :
1617 : /* -------------------------------------------------------------------- */
1618 : /* Now set a value node under this node. */
1619 : /* -------------------------------------------------------------------- */
1620 :
1621 5964 : if( psTextChild == NULL )
1622 5930 : CPLCreateXMLNode( psRoot, CXT_Text, pszValue );
1623 : else
1624 : {
1625 34 : CPLFree( psTextChild->pszValue );
1626 34 : psTextChild->pszValue = CPLStrdup( pszValue );
1627 : }
1628 :
1629 5964 : return TRUE;
1630 : }
1631 :
1632 : /************************************************************************/
1633 : /* CPLStripXMLNamespace() */
1634 : /************************************************************************/
1635 :
1636 : /**
1637 : * \brief Strip indicated namespaces.
1638 : *
1639 : * The subdocument (psRoot) is recursively examined, and any elements
1640 : * with the indicated namespace prefix will have the namespace prefix
1641 : * stripped from the element names. If the passed namespace is NULL, then
1642 : * all namespace prefixes will be stripped.
1643 : *
1644 : * Nodes other than elements should remain unaffected. The changes are
1645 : * made "in place", and should not alter any node locations, only the
1646 : * pszValue field of affected nodes.
1647 : *
1648 : * @param psRoot the document to operate on.
1649 : * @param pszNamespace the name space prefix (not including colon), or NULL.
1650 : * @param bRecurse TRUE to recurse over whole document, or FALSE to only
1651 : * operate on the passed node.
1652 : */
1653 :
1654 2214 : void CPLStripXMLNamespace( CPLXMLNode *psRoot,
1655 : const char *pszNamespace,
1656 : int bRecurse )
1657 :
1658 : {
1659 2214 : if( psRoot == NULL )
1660 0 : return;
1661 :
1662 2214 : if( psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute )
1663 : {
1664 1446 : if( pszNamespace != NULL )
1665 : {
1666 246 : if( EQUALN(pszNamespace,psRoot->pszValue,strlen(pszNamespace))
1667 120 : && psRoot->pszValue[strlen(pszNamespace)] == ':' )
1668 : {
1669 : char *pszNewValue =
1670 120 : CPLStrdup(psRoot->pszValue+strlen(pszNamespace)+1);
1671 :
1672 120 : CPLFree( psRoot->pszValue );
1673 120 : psRoot->pszValue = pszNewValue;
1674 : }
1675 : }
1676 : else
1677 : {
1678 : const char *pszCheck;
1679 :
1680 9628 : for( pszCheck = psRoot->pszValue; *pszCheck != '\0'; pszCheck++ )
1681 : {
1682 9069 : if( *pszCheck == ':' )
1683 : {
1684 761 : char *pszNewValue = CPLStrdup( pszCheck+1 );
1685 :
1686 761 : CPLFree( psRoot->pszValue );
1687 761 : psRoot->pszValue = pszNewValue;
1688 761 : break;
1689 : }
1690 : }
1691 : }
1692 : }
1693 :
1694 2214 : if( bRecurse )
1695 : {
1696 2214 : if( psRoot->psChild != NULL )
1697 1430 : CPLStripXMLNamespace( psRoot->psChild, pszNamespace, 1 );
1698 2214 : if( psRoot->psNext != NULL )
1699 776 : CPLStripXMLNamespace( psRoot->psNext, pszNamespace, 1 );
1700 : }
1701 : }
1702 :
1703 : /************************************************************************/
1704 : /* CPLParseXMLFile() */
1705 : /************************************************************************/
1706 :
1707 : /**
1708 : * \brief Parse XML file into tree.
1709 : *
1710 : * The named file is opened, loaded into memory as a big string, and
1711 : * parsed with CPLParseXMLString(). Errors in reading the file or parsing
1712 : * the XML will be reported by CPLError().
1713 : *
1714 : * The "large file" API is used, so XML files can come from virtualized
1715 : * files.
1716 : *
1717 : * @param pszFilename the file to open.
1718 : *
1719 : * @return NULL on failure, or the document tree on success.
1720 : */
1721 :
1722 414 : CPLXMLNode *CPLParseXMLFile( const char *pszFilename )
1723 :
1724 : {
1725 : FILE *fp;
1726 : vsi_l_offset nLen;
1727 : char *pszDoc;
1728 : CPLXMLNode *psTree;
1729 :
1730 : /* -------------------------------------------------------------------- */
1731 : /* Read the file. */
1732 : /* -------------------------------------------------------------------- */
1733 414 : fp = VSIFOpenL( pszFilename, "rb" );
1734 414 : if( fp == NULL )
1735 : {
1736 : CPLError( CE_Failure, CPLE_OpenFailed,
1737 4 : "Failed to open %.500s to read.", pszFilename );
1738 4 : return NULL;
1739 : }
1740 :
1741 410 : VSIFSeekL( fp, 0, SEEK_END );
1742 410 : nLen = VSIFTellL( fp );
1743 410 : VSIFSeekL( fp, 0, SEEK_SET );
1744 :
1745 410 : pszDoc = (char *) VSIMalloc((size_t)nLen + 1);
1746 410 : if( pszDoc == NULL )
1747 : {
1748 : CPLError( CE_Failure, CPLE_OutOfMemory,
1749 : "Out of memory allocating space for %d byte buffer in\n"
1750 : "CPLParseXMLFile(%.500s).",
1751 0 : (int)nLen+1, pszFilename );
1752 0 : VSIFCloseL( fp );
1753 0 : return NULL;
1754 : }
1755 410 : if( VSIFReadL( pszDoc, 1, (size_t)nLen, fp ) < nLen )
1756 : {
1757 : CPLError( CE_Failure, CPLE_FileIO,
1758 : "VSIFRead() result short of expected %d bytes from %.500s.",
1759 2 : (int)nLen, pszFilename );
1760 2 : pszDoc[0] = '\0';
1761 : }
1762 410 : VSIFCloseL( fp );
1763 :
1764 410 : pszDoc[nLen] = '\0';
1765 :
1766 : /* -------------------------------------------------------------------- */
1767 : /* Parse it. */
1768 : /* -------------------------------------------------------------------- */
1769 410 : psTree = CPLParseXMLString( pszDoc );
1770 410 : CPLFree( pszDoc );
1771 :
1772 410 : return psTree;
1773 : }
1774 :
1775 : /************************************************************************/
1776 : /* CPLSerializeXMLTreeToFile() */
1777 : /************************************************************************/
1778 :
1779 : /**
1780 : * \brief Write document tree to a file.
1781 : *
1782 : * The passed document tree is converted into one big string (with
1783 : * CPLSerializeXMLTree()) and then written to the named file. Errors writing
1784 : * the file will be reported by CPLError(). The source document tree is
1785 : * not altered. If the output file already exists it will be overwritten.
1786 : *
1787 : * @param psTree the document tree to write.
1788 : * @param pszFilename the name of the file to write to.
1789 : * @return TRUE on success, FALSE otherwise.
1790 : */
1791 :
1792 502 : int CPLSerializeXMLTreeToFile( CPLXMLNode *psTree, const char *pszFilename )
1793 :
1794 : {
1795 : char *pszDoc;
1796 : FILE *fp;
1797 : vsi_l_offset nLength;
1798 :
1799 : /* -------------------------------------------------------------------- */
1800 : /* Serialize document. */
1801 : /* -------------------------------------------------------------------- */
1802 502 : pszDoc = CPLSerializeXMLTree( psTree );
1803 502 : if( pszDoc == NULL )
1804 0 : return FALSE;
1805 :
1806 502 : nLength = strlen(pszDoc);
1807 :
1808 : /* -------------------------------------------------------------------- */
1809 : /* Create file. */
1810 : /* -------------------------------------------------------------------- */
1811 502 : fp = VSIFOpenL( pszFilename, "wt" );
1812 502 : if( fp == NULL )
1813 : {
1814 : CPLError( CE_Failure, CPLE_OpenFailed,
1815 0 : "Failed to open %.500s to write.", pszFilename );
1816 0 : CPLFree( pszDoc );
1817 0 : return FALSE;
1818 : }
1819 :
1820 : /* -------------------------------------------------------------------- */
1821 : /* Write file. */
1822 : /* -------------------------------------------------------------------- */
1823 502 : if( VSIFWriteL( pszDoc, 1, (size_t)nLength, fp ) != nLength )
1824 : {
1825 : CPLError( CE_Failure, CPLE_FileIO,
1826 : "Failed to write whole XML document (%.500s).",
1827 0 : pszFilename );
1828 0 : VSIFCloseL( fp );
1829 0 : CPLFree( pszDoc );
1830 0 : return FALSE;
1831 : }
1832 :
1833 : /* -------------------------------------------------------------------- */
1834 : /* Cleanup */
1835 : /* -------------------------------------------------------------------- */
1836 502 : VSIFCloseL( fp );
1837 502 : CPLFree( pszDoc );
1838 :
1839 502 : return TRUE;
1840 : }
1841 :
1842 : /************************************************************************/
1843 : /* CPLCleanXMLElementName() */
1844 : /************************************************************************/
1845 :
1846 : /**
1847 : * \brief Make string into safe XML token.
1848 : *
1849 : * Modififies a string in place to try and make it into a legal
1850 : * XML token that can be used as an element name. This is accomplished
1851 : * by changing any characters not legal in a token into an underscore.
1852 : *
1853 : * NOTE: This function should implement the rules in section 2.3 of
1854 : * http://www.w3.org/TR/xml11/ but it doesn't yet do that properly. We
1855 : * only do a rough approximation of that.
1856 : *
1857 : * @param pszTarget the string to be adjusted. It is altered in place.
1858 : */
1859 :
1860 10 : void CPLCleanXMLElementName( char *pszTarget )
1861 : {
1862 10 : if( pszTarget == NULL )
1863 0 : return;
1864 :
1865 68 : for( ; *pszTarget != '\0'; pszTarget++ )
1866 : {
1867 58 : if( (*((unsigned char *) pszTarget) & 0x80) || isalnum( *pszTarget )
1868 : || *pszTarget == '_' || *pszTarget == '.' )
1869 : {
1870 : /* ok */
1871 : }
1872 : else
1873 : {
1874 0 : *pszTarget = '_';
1875 : }
1876 : }
1877 : }
|