1 : /******************************************************************************
2 : * $Id: resolvexlinks.cpp 20030 2010-07-11 18:50:33Z rouault $
3 : *
4 : * Project: GML Reader
5 : * Purpose: Implementation of GMLReader::ResolveXlinks() method.
6 : * Author: Chaitanya kumar CH, chaitanya@osgeo.in
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 2010, Chaitanya kumar CH
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "gmlreader.h"
31 : #include "cpl_error.h"
32 :
33 : CPL_CVSID("$Id: resolvexlinks.cpp 20030 2010-07-11 18:50:33Z rouault $");
34 :
35 : #if HAVE_XERCES != 0 || defined(HAVE_EXPAT)
36 :
37 : #include "gmlreaderp.h"
38 : #include "cpl_conv.h"
39 : #include "cpl_string.h"
40 : #include "cpl_http.h"
41 :
42 : #include <stack>
43 :
44 : /************************************************************************/
45 : /* GetID() */
46 : /* */
47 : /* Returns the reference to the gml:id of psNode. NULL if not */
48 : /* found. */
49 : /************************************************************************/
50 :
51 0 : static const char* GetID( CPLXMLNode * psNode )
52 :
53 : {
54 0 : if( psNode == NULL )
55 0 : return NULL;
56 :
57 : CPLXMLNode *psChild;
58 0 : for( psChild = psNode->psChild; psChild != NULL; psChild = psChild->psNext )
59 : {
60 0 : if( psChild->eType == CXT_Attribute
61 : && EQUAL(psChild->pszValue, "gml:id") )
62 : {
63 0 : return psChild->psChild->pszValue;
64 : }
65 : }
66 0 : return NULL;
67 : }
68 :
69 : /************************************************************************/
70 : /* CompareNodeIDs() */
71 : /* */
72 : /* Compares two nodes by their IDs */
73 : /************************************************************************/
74 :
75 0 : static int CompareNodeIDs( CPLXMLNode * psNode1, CPLXMLNode * psNode2 )
76 :
77 : {
78 0 : if( psNode2 == NULL )
79 0 : return TRUE;
80 :
81 0 : if( psNode1 == NULL )
82 0 : return FALSE;
83 :
84 0 : return ( strcmp( GetID(psNode2), GetID(psNode1) ) > 0 );
85 : }
86 :
87 : /************************************************************************/
88 : /* BuildIDIndex() */
89 : /* */
90 : /* Returns an array of nodes sorted by their gml:id strings */
91 : /* XXX: This method can be used to build an array of pointers to */
92 : /* nodes sorted by their id values. */
93 : /************************************************************************/
94 : /*
95 : static std::vector<CPLXMLNode*> BuildIDIndex( CPLXMLNode* psNode,
96 : std::vector<CPLXMLNode*> &apsNode )
97 :
98 : {
99 : CPLXMLNode *psSibling;
100 : for( psSibling = psNode; psSibling != NULL; psSibling = psSibling->psNext )
101 : {
102 : if( GetID( psSibling ) != NULL )
103 : apsNode.push_back( psSibling );
104 : BuildIDIndex( psNode->psChild, apsNode );
105 : }
106 : return NULL;
107 : }*/
108 :
109 : /************************************************************************/
110 : /* FindElementByID() */
111 : /* */
112 : /* Find a node with the indicated "gml:id" in the node tree and */
113 : /* it's siblings. */
114 : /************************************************************************/
115 :
116 : static CPLXMLNode *FindElementByID( CPLXMLNode * psRoot,
117 0 : const char *pszID )
118 :
119 : {
120 0 : if( psRoot == NULL )
121 0 : return NULL;
122 :
123 0 : CPLXMLNode *psSibling, *psReturn = NULL;
124 :
125 : // check for id attribute
126 0 : for( psSibling = psRoot; psSibling != NULL; psSibling = psSibling->psNext)
127 : {
128 0 : if( psSibling->eType == CXT_Element )
129 : {
130 : // check that sibling for id value
131 0 : const char* pszIDOfSibling = GetID( psSibling );
132 0 : if( pszIDOfSibling != NULL && EQUAL( pszIDOfSibling, pszID) )
133 0 : return psSibling;
134 : }
135 : }
136 :
137 : // search the child elements of all the psRoot's siblings
138 0 : for( psSibling = psRoot; psSibling != NULL; psSibling = psSibling->psNext)
139 : {
140 0 : if( psSibling->eType == CXT_Element )
141 : {
142 0 : psReturn = FindElementByID( psSibling->psChild, pszID );
143 0 : if( psReturn != NULL )
144 0 : return psReturn;
145 : }
146 : }
147 0 : return NULL;
148 : }
149 :
150 : /************************************************************************/
151 : /* RemoveIDs() */
152 : /* */
153 : /* Remove all the gml:id nodes. Doesn't check psRoot's siblings */
154 : /************************************************************************/
155 :
156 0 : static void RemoveIDs( CPLXMLNode * psRoot )
157 :
158 : {
159 0 : if( psRoot == NULL )
160 0 : return;
161 :
162 0 : CPLXMLNode *psChild = psRoot->psChild;
163 :
164 : // check for id attribute
165 0 : while( psChild != NULL && !( psChild->eType == CXT_Attribute && EQUAL(psChild->pszValue, "gml:id")))
166 0 : psChild = psChild->psNext;
167 0 : CPLRemoveXMLChild( psRoot, psChild );
168 0 : CPLDestroyXMLNode( psChild );
169 :
170 : // search the child elements of psRoot
171 0 : for( psChild = psRoot->psChild; psChild != NULL; psChild = psChild->psNext)
172 0 : if( psChild->eType == CXT_Element )
173 0 : RemoveIDs( psChild );
174 : }
175 :
176 : /************************************************************************/
177 : /* TrimTree() */
178 : /* */
179 : /* Remove all nodes without a gml:id node in the descendents. */
180 : /* Returns TRUE if there is a gml:id node in the descendents. */
181 : /************************************************************************/
182 :
183 0 : static int TrimTree( CPLXMLNode * psRoot )
184 :
185 : {
186 0 : if( psRoot == NULL )
187 0 : return FALSE;
188 :
189 0 : CPLXMLNode *psChild = psRoot->psChild;
190 :
191 : // check for id attribute
192 0 : while( psChild != NULL && !( psChild->eType == CXT_Attribute && EQUAL(psChild->pszValue, "gml:id")))
193 0 : psChild = psChild->psNext;
194 :
195 0 : if( psChild != NULL )
196 0 : return TRUE;
197 :
198 : // search the child elements of psRoot
199 0 : int bReturn = FALSE, bRemove;
200 0 : for( psChild = psRoot->psChild; psChild != NULL;)
201 : {
202 0 : CPLXMLNode* psNextChild = psChild->psNext;
203 0 : if( psChild->eType == CXT_Element )
204 : {
205 0 : bRemove = TrimTree( psChild );
206 0 : if( bRemove )
207 : {
208 0 : bReturn = bRemove;
209 : }
210 : else
211 : {
212 : //remove this child
213 0 : CPLRemoveXMLChild( psRoot, psChild );
214 0 : CPLDestroyXMLNode( psChild );
215 : }
216 : }
217 :
218 0 : psChild = psNextChild;
219 : }
220 0 : return bReturn;
221 : }
222 :
223 : /************************************************************************/
224 : /* CorrectURLs() */
225 : /* */
226 : /* Replaces all empty URLs in URL#id pairs with pszURL in the */
227 : /* node and it's children recursively */
228 : /************************************************************************/
229 :
230 0 : static void CorrectURLs( CPLXMLNode * psRoot, const char *pszURL )
231 :
232 : {
233 0 : if( psRoot == NULL )
234 0 : return;
235 :
236 0 : CPLXMLNode *psChild = psRoot->psChild;
237 :
238 : // check for xlink:href attribute
239 0 : while( psChild != NULL && !( ( psChild->eType == CXT_Attribute ) &&
240 : ( EQUAL(psChild->pszValue, "xlink:href") ) &&
241 : ( psChild->psChild->pszValue[0] == '#' ) ) )
242 0 : psChild = psChild->psNext;
243 :
244 0 : if( psChild != NULL )
245 : {
246 : size_t nLen = CPLStrnlen( pszURL, 512 ) +
247 0 : CPLStrnlen( psChild->psChild->pszValue, 512 ) + 1;
248 : char *pszNew;
249 0 : pszNew = (char *)CPLMalloc( nLen * sizeof(char));
250 0 : CPLStrlcpy( pszNew, pszURL, nLen );
251 0 : CPLStrlcat( pszNew, psChild->psChild->pszValue, nLen );
252 0 : CPLSetXMLValue( psRoot, "#xlink:href", pszNew );
253 0 : CPLFree( pszNew );
254 : }
255 :
256 : // search the child elements of psRoot
257 0 : for( psChild = psRoot->psChild; psChild != NULL; psChild = psChild->psNext)
258 0 : if( psChild->eType == CXT_Element )
259 0 : CorrectURLs( psChild, pszURL );
260 : }
261 :
262 : /************************************************************************/
263 : /* FindTreeByURL() */
264 : /* */
265 : /* Find a doc tree that is located at pszURL. */
266 : /************************************************************************/
267 :
268 : static CPLXMLNode *FindTreeByURL( CPLXMLNode *** ppapsRoot,
269 : char *** ppapszResourceHREF,
270 0 : const char *pszURL )
271 :
272 : {
273 0 : if( *ppapsRoot == NULL || ppapszResourceHREF == NULL )
274 0 : return NULL;
275 :
276 : //if found in ppapszResourceHREF
277 : int i, nItems;
278 : char *pszLocation;
279 0 : if( ( i = CSLFindString( *ppapszResourceHREF, pszURL )) >= 0 )
280 : {
281 : //return corresponding psRoot
282 0 : return (*ppapsRoot)[i];
283 : }
284 : else
285 : {
286 0 : CPLXMLNode *psSrcTree = NULL, *psSibling;
287 0 : pszLocation = CPLStrdup( pszURL );
288 : //if it is part of filesystem
289 0 : if( CPLCheckForFile( pszLocation, NULL) )
290 : {//filesystem
291 0 : psSrcTree = CPLParseXMLFile( pszURL );
292 : }
293 0 : else if( CPLHTTPEnabled() )
294 : {//web resource
295 0 : CPLErrorReset();
296 0 : CPLHTTPResult *psResult = CPLHTTPFetch( pszURL, NULL );
297 0 : if( psResult != NULL )
298 : {
299 0 : if( psResult->nDataLen > 0 && CPLGetLastErrorNo() == 0)
300 0 : psSrcTree = CPLParseXMLString( (const char*)psResult->pabyData );
301 0 : CPLHTTPDestroyResult( psResult );
302 : }
303 : }
304 0 : CPLFree( pszLocation );
305 :
306 :
307 : /************************************************************************/
308 : /* In the external GML resource we will only need elements */
309 : /* identified by a "gml:id". */
310 : /************************************************************************/
311 0 : psSibling = psSrcTree;
312 0 : while( psSibling != NULL )
313 : {
314 0 : TrimTree( psSibling );
315 0 : psSibling = psSibling->psNext;
316 : }
317 :
318 : //update to lists
319 0 : nItems = CSLCount(*ppapszResourceHREF);
320 0 : *ppapszResourceHREF = CSLAddString( *ppapszResourceHREF, pszURL );
321 : *ppapsRoot = (CPLXMLNode**)CPLRealloc(*ppapsRoot,
322 0 : (nItems+2)*sizeof(CPLXMLNode*));
323 0 : (*ppapsRoot)[nItems] = psSrcTree;
324 0 : (*ppapsRoot)[nItems+1] = NULL;
325 :
326 : //return the tree
327 0 : return (*ppapsRoot)[nItems];
328 : }
329 :
330 : return NULL;
331 : }
332 :
333 : /************************************************************************/
334 : /* ResolveTree() */
335 : /* Resolves the xlinks in a node and it's siblings */
336 : /************************************************************************/
337 :
338 : static CPLErr Resolve( CPLXMLNode * psNode,
339 : CPLXMLNode *** ppapsRoot,
340 : char *** ppapszResourceHREF,
341 : char ** papszSkip,
342 563 : const int bStrict )
343 :
344 : {
345 : //for each sibling
346 563 : CPLXMLNode *psSibling = NULL;
347 563 : CPLXMLNode *psResource = NULL;
348 563 : CPLXMLNode *psTarget = NULL;
349 : CPLErr eReturn;
350 :
351 1545 : for( psSibling = psNode; psSibling != NULL; psSibling = psSibling->psNext )
352 : {
353 982 : if( psSibling->eType != CXT_Element )
354 439 : continue;
355 :
356 543 : if( CSLFindString( papszSkip, psSibling->pszValue ) >= 0 )
357 0 : continue;
358 :
359 543 : CPLXMLNode *psChild = psSibling->psChild;
360 2027 : while( psChild != NULL &&
361 : !( psChild->eType == CXT_Attribute &&
362 : EQUAL( psChild->pszValue, "xlink:href" ) ) )
363 941 : psChild = psChild->psNext;
364 :
365 : //if a child has a "xlink:href" attribute
366 543 : if( psChild != NULL )
367 : {
368 : char **papszTokens;
369 0 : if( strstr( psChild->psChild->pszValue, "#" ) == NULL )
370 : {
371 : CPLError( bStrict ? CE_Failure : CE_Warning,
372 : CPLE_NotSupported,
373 : "Couldn't find '#' while parsing the href %s. "
374 : "Can't possibly have an id.",
375 0 : psChild->psChild->pszValue );
376 0 : if( bStrict ) break;
377 0 : else continue;
378 : }
379 : papszTokens = CSLTokenizeString2( psChild->psChild->pszValue, "#",
380 : CSLT_ALLOWEMPTYTOKENS |
381 : CSLT_STRIPLEADSPACES |
382 0 : CSLT_STRIPENDSPACES );
383 0 : if( CSLCount( papszTokens ) != 2 || strlen(papszTokens[1]) <= 0 )
384 : {
385 : CPLError( bStrict ? CE_Failure : CE_Warning,
386 : CPLE_NotSupported,
387 : "Error parsing the href %s",
388 0 : psChild->psChild->pszValue );
389 0 : CSLDestroy( papszTokens );
390 0 : if( bStrict ) break;
391 0 : else continue;
392 : }
393 :
394 : //look for the resource with that URL
395 : psResource = FindTreeByURL( ppapsRoot,
396 : ppapszResourceHREF,
397 0 : papszTokens[0] );
398 0 : if( bStrict && psResource == NULL )
399 : {
400 0 : CSLDestroy( papszTokens );
401 0 : return CE_Failure;
402 : }
403 :
404 : //look for the element with the ID
405 0 : psTarget = FindElementByID( psResource, papszTokens[1] );
406 : static int i = 0;
407 0 : if( i-- == 0 )
408 : {
409 0 : i = 256;
410 : CPLDebug( "GML",
411 : "Resolving xlinks... (currently %s)",
412 0 : psChild->psChild->pszValue );
413 : }
414 0 : if( psTarget != NULL )
415 : {
416 : //remove the xlink:href attribute
417 0 : CPLRemoveXMLChild( psSibling, psChild );
418 0 : CPLDestroyXMLNode( psChild );
419 :
420 : //make a copy of psTarget
421 : CPLXMLNode *psCopy = CPLCreateXMLNode( NULL,
422 : CXT_Element,
423 0 : psTarget->pszValue );
424 0 : psCopy->psChild = CPLCloneXMLTree( psTarget->psChild );
425 0 : RemoveIDs( psCopy );
426 : //correct empty URLs in URL#id pairs
427 0 : if( CPLStrnlen( papszTokens[0], 1 ) > 0 )
428 : {
429 0 : CorrectURLs( psCopy, papszTokens[0] );
430 : }
431 0 : CPLAddXMLChild( psSibling, psCopy );
432 0 : CSLDestroy( papszTokens );
433 : }
434 : else
435 : {
436 : //nothing found
437 0 : CSLDestroy( papszTokens );
438 0 : if( bStrict )
439 : {
440 : CPLError( CE_Failure,
441 : CPLE_ObjectNull,
442 : "Couldn't find the element with id %s.",
443 0 : psChild->psChild->pszValue );
444 0 : return CE_Failure;
445 : }
446 : }
447 : }
448 :
449 : //Recurse with the first child
450 : eReturn = Resolve( psSibling->psChild,
451 : ppapsRoot,
452 : ppapszResourceHREF,
453 : papszSkip,
454 543 : bStrict );
455 543 : if( bStrict && eReturn != CE_None )
456 0 : return eReturn;
457 : }
458 563 : return CE_None;
459 : }
460 :
461 : /************************************************************************/
462 : /* ResolveXlinks() */
463 : /* Returns TRUE for success */
464 : /************************************************************************/
465 :
466 : int GMLReader::ResolveXlinks( const char *pszFile,
467 : int* pbOutIsTempFile,
468 : char **papszSkip,
469 20 : const int bStrict)
470 :
471 : {
472 20 : *pbOutIsTempFile = FALSE;
473 :
474 : // Check if the original source file is set.
475 20 : if( m_pszFilename == NULL )
476 : {
477 : CPLError( CE_Failure, CPLE_NotSupported,
478 : "GML source file needs to be set first with "
479 0 : "GMLReader::SetSourceFile()." );
480 0 : return FALSE;
481 : }
482 :
483 : /* -------------------------------------------------------------------- */
484 : /* Load the raw XML file into a XML Node tree. */
485 : /* -------------------------------------------------------------------- */
486 : CPLXMLNode **papsSrcTree;
487 20 : papsSrcTree = (CPLXMLNode **)CPLCalloc( 2, sizeof(CPLXMLNode *));
488 20 : papsSrcTree[0] = CPLParseXMLFile( m_pszFilename );
489 :
490 20 : if( papsSrcTree[0] == NULL )
491 : {
492 0 : CPLFree(papsSrcTree);
493 0 : return FALSE;
494 : }
495 :
496 : //setup resource data structure
497 20 : char **papszResourceHREF = NULL;
498 : // "" is the href of the original source file
499 20 : papszResourceHREF = CSLAddString( papszResourceHREF, "" );
500 :
501 : //call resolver
502 20 : Resolve( papsSrcTree[0], &papsSrcTree, &papszResourceHREF, papszSkip, bStrict );
503 :
504 20 : char *pszTmpName = NULL;
505 20 : int bTryWithTempFile = FALSE;
506 20 : int bReturn = TRUE;
507 21 : if( EQUALN(pszFile, "/vsitar/", strlen("/vsitar/")) ||
508 : EQUALN(pszFile, "/vsigzip/", strlen("/vsigzip/")) ||
509 : EQUALN(pszFile, "/vsizip/", strlen("/vsizip/")) )
510 : {
511 1 : bTryWithTempFile = TRUE;
512 : }
513 19 : else if( !CPLSerializeXMLTreeToFile( papsSrcTree[0], pszFile ) )
514 : {
515 : CPLError( CE_Failure, CPLE_FileIO,
516 : "Cannot serialize resolved file %s to %s.",
517 0 : m_pszFilename, pszFile );
518 0 : bTryWithTempFile = TRUE;
519 : }
520 :
521 20 : if (bTryWithTempFile)
522 : {
523 1 : pszTmpName = CPLStrdup( CPLGenerateTempFilename( "ResolvedGML" ) );
524 1 : if( !CPLSerializeXMLTreeToFile( papsSrcTree[0], pszTmpName ) )
525 : {
526 : CPLError( CE_Failure, CPLE_FileIO,
527 : "Cannot serialize resolved file %s to %s either.",
528 0 : m_pszFilename, pszTmpName );
529 0 : CPLFree( pszTmpName );
530 0 : bReturn = FALSE;
531 : }
532 : else
533 : {
534 : //set the source file to the resolved file
535 1 : CPLFree( m_pszFilename );
536 1 : m_pszFilename = pszTmpName;
537 1 : *pbOutIsTempFile = TRUE;
538 : }
539 : }
540 : else
541 : {
542 : //set the source file to the resolved file
543 19 : CPLFree( m_pszFilename );
544 19 : m_pszFilename = CPLStrdup( pszFile );
545 : }
546 :
547 20 : int nItems = CSLCount( papszResourceHREF );
548 20 : CSLDestroy( papszResourceHREF );
549 60 : while( nItems > 0 )
550 20 : CPLDestroyXMLNode( papsSrcTree[--nItems] );
551 20 : CPLFree( papsSrcTree );
552 :
553 20 : return bReturn;
554 : }
555 :
556 : #endif /* HAVE_XERCES == 1 || defined(HAVE_EXPAT) */
|