1 : /******************************************************************************
2 : * $Id: cpl_xml_validate.cpp 24302 2012-04-24 13:20:47Z rouault $
3 : *
4 : * Project: CPL - Common Portability Library
5 : * Purpose: Implement XML validation against XSD schema
6 : * Author: Even Rouault, even.rouault at mines-paris.org
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 2012, Even Rouault
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "cpl_conv.h"
31 :
32 : CPL_CVSID("$Id: cpl_xml_validate.cpp 24302 2012-04-24 13:20:47Z rouault $");
33 :
34 : #ifdef HAVE_LIBXML2
35 : #include <libxml/xmlversion.h>
36 : #if defined(LIBXML_VERSION) && LIBXML_VERSION >= 20622
37 : /* We need at least 2.6.20 for xmlSchemaValidateDoc */
38 : /* and xmlParseDoc to accept a const xmlChar* */
39 : /* We could workaround it, but likely not worth the effort for now. */
40 : /* Actually, we need at least 2.6.22, at runtime, to be */
41 : /* able to parse the OGC GML schemas */
42 : #define HAVE_RECENT_LIBXML2
43 : #else
44 : #warning "Not recent enough libxml2 version"
45 : #endif
46 : #endif
47 :
48 : #ifdef HAVE_RECENT_LIBXML2
49 : #include <string.h>
50 : #include <libxml/xmlschemas.h>
51 : #include <libxml/parserInternals.h>
52 : #include <libxml/catalog.h>
53 :
54 : #include "cpl_string.h"
55 : #include "cpl_hash_set.h"
56 : #include "cpl_minixml.h"
57 :
58 : static xmlExternalEntityLoader pfnLibXMLOldExtranerEntityLoader = NULL;
59 :
60 : static int bHasLibXMLBug = -1;
61 :
62 : /************************************************************************/
63 : /* CPLFixPath() */
64 : /************************************************************************/
65 :
66 : /* Replace \ by / to make libxml2 happy on Windows and */
67 : /* replace "a/b/../c" pattern by "a/c" */
68 254 : static void CPLFixPath(char* pszPath)
69 : {
70 17952 : for(int i=0;pszPath[i] != '\0';i++)
71 : {
72 17698 : if (pszPath[i] == '\\')
73 0 : pszPath[i] = '/';
74 : }
75 :
76 62 : while(TRUE)
77 : {
78 316 : char* pszSlashDotDot = strstr(pszPath, "/../");
79 316 : if (pszSlashDotDot == NULL || pszSlashDotDot == pszPath)
80 254 : return;
81 62 : char* pszSlashBefore = pszSlashDotDot-1;
82 310 : while(pszSlashBefore > pszPath && *pszSlashBefore != '/')
83 186 : pszSlashBefore --;
84 62 : if (pszSlashBefore == pszPath)
85 0 : return;
86 : memmove(pszSlashBefore + 1, pszSlashDotDot + 4,
87 62 : strlen(pszSlashDotDot + 4) + 1);
88 : }
89 : }
90 :
91 : /************************************************************************/
92 : /* CPLHasLibXMLBugWarningCallback() */
93 : /************************************************************************/
94 :
95 2 : static void CPLHasLibXMLBugWarningCallback (void * ctx, const char * msg, ...)
96 : {
97 2 : }
98 :
99 : /************************************************************************/
100 : /* CPLHasLibXMLBug() */
101 : /************************************************************************/
102 :
103 32 : static int CPLHasLibXMLBug()
104 : {
105 32 : if (bHasLibXMLBug >= 0)
106 30 : return bHasLibXMLBug;
107 :
108 : static const char szLibXMLBugTester[] =
109 : "<schema targetNamespace=\"http://foo\" xmlns:foo=\"http://foo\" xmlns=\"http://www.w3.org/2001/XMLSchema\">"
110 : "<simpleType name=\"t1\">"
111 : "<list itemType=\"double\"/>"
112 : "</simpleType>"
113 : "<complexType name=\"t2\">"
114 : "<simpleContent>"
115 : "<extension base=\"foo:t1\"/>"
116 : "</simpleContent>"
117 : "</complexType>"
118 : "<complexType name=\"t3\">"
119 : "<simpleContent>"
120 : "<restriction base=\"foo:t2\">"
121 : "<length value=\"2\"/>"
122 : "</restriction>"
123 : "</simpleContent>"
124 : "</complexType>"
125 : "</schema>";
126 :
127 : xmlSchemaParserCtxtPtr pSchemaParserCtxt;
128 : xmlSchemaPtr pSchema;
129 :
130 2 : pSchemaParserCtxt = xmlSchemaNewMemParserCtxt(szLibXMLBugTester, strlen(szLibXMLBugTester));
131 :
132 : xmlSchemaSetParserErrors(pSchemaParserCtxt,
133 : CPLHasLibXMLBugWarningCallback,
134 : CPLHasLibXMLBugWarningCallback,
135 2 : NULL);
136 :
137 2 : pSchema = xmlSchemaParse(pSchemaParserCtxt);
138 2 : xmlSchemaFreeParserCtxt(pSchemaParserCtxt);
139 :
140 2 : bHasLibXMLBug = (pSchema == NULL);
141 :
142 2 : if (pSchema)
143 0 : xmlSchemaFree(pSchema);
144 :
145 2 : if (bHasLibXMLBug)
146 : {
147 : CPLDebug("CPL",
148 : "LibXML bug found (cf https://bugzilla.gnome.org/show_bug.cgi?id=630130). "
149 2 : "Will try to workaround for GML schemas.");
150 : }
151 :
152 2 : return bHasLibXMLBug;
153 : }
154 :
155 : /************************************************************************/
156 : /* CPLExtractSubSchema() */
157 : /************************************************************************/
158 :
159 152 : static CPLXMLNode* CPLExtractSubSchema(CPLXMLNode* psSubXML, CPLXMLNode* psMainSchema)
160 : {
161 152 : if (psSubXML->eType == CXT_Element && strcmp(psSubXML->pszValue, "?xml") == 0)
162 : {
163 152 : CPLXMLNode* psNext = psSubXML->psNext;
164 152 : psSubXML->psNext = NULL;
165 152 : CPLDestroyXMLNode(psSubXML);
166 152 : psSubXML = psNext;
167 : }
168 :
169 152 : if (psSubXML != NULL && psSubXML->eType == CXT_Comment)
170 : {
171 8 : CPLXMLNode* psNext = psSubXML->psNext;
172 8 : psSubXML->psNext = NULL;
173 8 : CPLDestroyXMLNode(psSubXML);
174 8 : psSubXML = psNext;
175 : }
176 :
177 152 : if (psSubXML != NULL && psSubXML->eType == CXT_Element &&
178 : (strcmp(psSubXML->pszValue, "schema") == 0 ||
179 : strcmp(psSubXML->pszValue, "xs:schema") == 0 ||
180 : strcmp(psSubXML->pszValue, "xsd:schema") == 0) &&
181 : psSubXML->psNext == NULL)
182 : {
183 152 : CPLXMLNode* psNext = psSubXML->psChild;
184 1258 : while(psNext != NULL && psNext->eType != CXT_Element &&
185 : psNext->psNext != NULL && psNext->psNext->eType != CXT_Element)
186 : {
187 : /* Add xmlns: from subschema to main schema if missing */
188 954 : if (psNext->eType == CXT_Attribute &&
189 : strncmp(psNext->pszValue, "xmlns:", 6) == 0 &&
190 : CPLGetXMLValue(psMainSchema, psNext->pszValue, NULL) == NULL)
191 : {
192 116 : CPLXMLNode* psAttr = CPLCreateXMLNode(NULL, CXT_Attribute, psNext->pszValue);
193 116 : CPLCreateXMLNode(psAttr, CXT_Text, psNext->psChild->pszValue);
194 :
195 116 : psAttr->psNext = psMainSchema->psChild;
196 116 : psMainSchema->psChild = psAttr;
197 : }
198 954 : psNext = psNext->psNext;
199 : }
200 :
201 152 : if (psNext != NULL && psNext->eType != CXT_Element &&
202 : psNext->psNext != NULL && psNext->psNext->eType == CXT_Element)
203 : {
204 152 : CPLXMLNode* psNext2 = psNext->psNext;
205 152 : psNext->psNext = NULL;
206 152 : CPLDestroyXMLNode(psSubXML);
207 152 : psSubXML = psNext2;
208 : }
209 : }
210 :
211 152 : return psSubXML;
212 : }
213 :
214 : /************************************************************************/
215 : /* CPLWorkaroundLibXMLBug() */
216 : /************************************************************************/
217 :
218 : /* Return TRUE if the current node must be destroyed */
219 9922 : static int CPLWorkaroundLibXMLBug(CPLXMLNode* psIter)
220 : {
221 9922 : if (psIter->eType == CXT_Element &&
222 : strcmp(psIter->pszValue, "element") == 0 &&
223 : strcmp(CPLGetXMLValue(psIter, "name", ""), "QuantityExtent") == 0 &&
224 : strcmp(CPLGetXMLValue(psIter, "type", ""), "gml:QuantityExtentType") == 0)
225 : {
226 4 : CPLXMLNode* psIter2 = psIter->psChild;
227 20 : while(psIter2)
228 : {
229 12 : if (psIter2->eType == CXT_Attribute && strcmp(psIter2->pszValue, "type") == 0)
230 : {
231 4 : CPLFree(psIter2->psChild->pszValue);
232 4 : if (strcmp(CPLGetXMLValue(psIter, "substitutionGroup", ""), "gml:AbstractValue") == 0)
233 2 : psIter2->psChild->pszValue = CPLStrdup("gml:MeasureOrNilReasonListType"); /* GML 3.2.1 */
234 : else
235 2 : psIter2->psChild->pszValue = CPLStrdup("gml:MeasureOrNullListType");
236 : }
237 12 : psIter2 = psIter2->psNext;
238 : }
239 : }
240 :
241 9918 : else if (psIter->eType == CXT_Element &&
242 : strcmp(psIter->pszValue, "element") == 0 &&
243 : strcmp(CPLGetXMLValue(psIter, "name", ""), "CategoryExtent") == 0 &&
244 : strcmp(CPLGetXMLValue(psIter, "type", ""), "gml:CategoryExtentType") == 0)
245 : {
246 4 : CPLXMLNode* psIter2 = psIter->psChild;
247 20 : while(psIter2)
248 : {
249 12 : if (psIter2->eType == CXT_Attribute && strcmp(psIter2->pszValue, "type") == 0)
250 : {
251 4 : CPLFree(psIter2->psChild->pszValue);
252 4 : if (strcmp(CPLGetXMLValue(psIter, "substitutionGroup", ""), "gml:AbstractValue") == 0)
253 2 : psIter2->psChild->pszValue = CPLStrdup("gml:CodeOrNilReasonListType"); /* GML 3.2.1 */
254 : else
255 2 : psIter2->psChild->pszValue = CPLStrdup("gml:CodeOrNullListType");
256 : }
257 12 : psIter2 = psIter2->psNext;
258 : }
259 : }
260 :
261 9914 : else if (bHasLibXMLBug && psIter->eType == CXT_Element &&
262 : strcmp(psIter->pszValue, "complexType") == 0 &&
263 : (strcmp(CPLGetXMLValue(psIter, "name", ""), "QuantityExtentType") == 0 ||
264 : strcmp(CPLGetXMLValue(psIter, "name", ""), "CategoryExtentType") == 0))
265 : {
266 : /* Destroy this element */
267 8 : return TRUE;
268 : }
269 :
270 : /* For GML 3.2.1 */
271 9906 : else if (psIter->eType == CXT_Element &&
272 : strcmp(psIter->pszValue, "complexType") == 0 &&
273 : strcmp(CPLGetXMLValue(psIter, "name", ""), "VectorType") == 0)
274 : {
275 4 : CPLXMLNode* psSimpleContent = CPLCreateXMLNode(NULL, CXT_Element, "simpleContent");
276 4 : CPLXMLNode* psExtension = CPLCreateXMLNode(psSimpleContent, CXT_Element, "extension");
277 4 : CPLXMLNode* psExtensionBase = CPLCreateXMLNode(psExtension, CXT_Attribute, "base");
278 4 : CPLCreateXMLNode(psExtensionBase, CXT_Text, "gml:doubleList");
279 4 : CPLXMLNode* psAttributeGroup = CPLCreateXMLNode(psExtension, CXT_Element, "attributeGroup");
280 4 : CPLXMLNode* psAttributeGroupRef = CPLCreateXMLNode(psAttributeGroup, CXT_Attribute, "ref");
281 4 : CPLCreateXMLNode(psAttributeGroupRef, CXT_Text, "gml:SRSReferenceGroup");
282 :
283 4 : CPLXMLNode* psName = CPLCreateXMLNode(NULL, CXT_Attribute, "name");
284 4 : CPLCreateXMLNode(psName, CXT_Text, "VectorType");
285 :
286 4 : CPLDestroyXMLNode(psIter->psChild);
287 4 : psIter->psChild = psName;
288 4 : psIter->psChild->psNext = psSimpleContent;
289 : }
290 :
291 9902 : else if (psIter->eType == CXT_Element &&
292 : strcmp(psIter->pszValue, "element") == 0 &&
293 : (strcmp(CPLGetXMLValue(psIter, "name", ""), "domainOfValidity") == 0 ||
294 : strcmp(CPLGetXMLValue(psIter, "name", ""), "coordinateOperationAccuracy") == 0 ||
295 : strcmp(CPLGetXMLValue(psIter, "name", ""), "formulaCitation") == 0))
296 : {
297 6 : CPLXMLNode* psComplexType = CPLCreateXMLNode(NULL, CXT_Element, "complexType");
298 6 : CPLXMLNode* psSequence = CPLCreateXMLNode(psComplexType, CXT_Element, "sequence");
299 6 : CPLXMLNode* psSequenceMinOccurs = CPLCreateXMLNode(psSequence, CXT_Attribute, "minOccurs");
300 6 : CPLCreateXMLNode(psSequenceMinOccurs, CXT_Text, "0");
301 6 : CPLXMLNode* psAny = CPLCreateXMLNode(psSequence, CXT_Element, "any");
302 6 : CPLXMLNode* psAnyMinOccurs = CPLCreateXMLNode(psAny, CXT_Attribute, "minOccurs");
303 6 : CPLCreateXMLNode(psAnyMinOccurs, CXT_Text, "0");
304 6 : CPLXMLNode* psAnyProcessContents = CPLCreateXMLNode(psAny, CXT_Attribute, " processContents");
305 6 : CPLCreateXMLNode(psAnyProcessContents, CXT_Text, "lax");
306 :
307 6 : CPLXMLNode* psName = CPLCreateXMLNode(NULL, CXT_Attribute, "name");
308 6 : CPLCreateXMLNode(psName, CXT_Text, CPLGetXMLValue(psIter, "name", ""));
309 :
310 6 : CPLDestroyXMLNode(psIter->psChild);
311 6 : psIter->psChild = psName;
312 6 : psIter->psChild->psNext = psComplexType;
313 : }
314 :
315 9914 : return FALSE;
316 : }
317 :
318 : /************************************************************************/
319 : /* CPLLoadSchemaStrInternal() */
320 : /************************************************************************/
321 :
322 : static
323 184 : CPLXMLNode* CPLLoadSchemaStrInternal(CPLHashSet* hSetSchemas,
324 : const char* pszFile)
325 : {
326 : CPLXMLNode* psXML;
327 : CPLXMLNode* psSchema;
328 : CPLXMLNode* psPrev;
329 : CPLXMLNode* psIter;
330 :
331 184 : if (CPLHashSetLookup(hSetSchemas, pszFile))
332 0 : return NULL;
333 :
334 184 : CPLHashSetInsert(hSetSchemas, CPLStrdup(pszFile));
335 :
336 184 : CPLDebug("CPL", "Parsing %s", pszFile);
337 :
338 184 : psXML = CPLParseXMLFile(pszFile);
339 184 : if (psXML == NULL)
340 : {
341 : CPLError(CE_Failure, CPLE_AppDefined,
342 0 : "Cannot open %s", pszFile);
343 0 : return NULL;
344 : }
345 :
346 184 : psSchema = CPLGetXMLNode(psXML, "=schema");
347 184 : if (psSchema == NULL)
348 56 : psSchema = CPLGetXMLNode(psXML, "=xs:schema");
349 184 : if (psSchema == NULL)
350 0 : psSchema = CPLGetXMLNode(psXML, "=xsd:schema");
351 184 : if (psSchema == NULL)
352 : {
353 : CPLError(CE_Failure, CPLE_AppDefined,
354 0 : "Cannot find schema node in %s", pszFile);
355 0 : CPLDestroyXMLNode(psXML);
356 0 : return NULL;
357 : }
358 :
359 184 : psPrev = NULL;
360 184 : psIter = psSchema->psChild;
361 10290 : while(psIter)
362 : {
363 9922 : int bDestroyCurrentNode = FALSE;
364 :
365 9922 : if (bHasLibXMLBug)
366 9922 : bDestroyCurrentNode = CPLWorkaroundLibXMLBug(psIter);
367 :
368 : /* Load the referenced schemas, and integrate them in the main schema */
369 10018 : if (psIter->eType == CXT_Element &&
370 : (strcmp(psIter->pszValue, "include") == 0 ||
371 : strcmp(psIter->pszValue, "xs:include") == 0||
372 : strcmp(psIter->pszValue, "xsd:include") == 0) &&
373 : psIter->psChild != NULL &&
374 : psIter->psChild->eType == CXT_Attribute &&
375 : strcmp(psIter->psChild->pszValue, "schemaLocation") == 0)
376 : {
377 248 : const char* pszIncludeSchema = psIter->psChild->psChild->pszValue;
378 : char* pszFullFilename = CPLStrdup(
379 248 : CPLFormFilename(CPLGetPath(pszFile), pszIncludeSchema, NULL));
380 :
381 248 : CPLFixPath(pszFullFilename);
382 :
383 248 : CPLXMLNode* psSubXML = NULL;
384 :
385 : /* If we haven't yet loaded that schema, do it now */
386 248 : if (!CPLHashSetLookup(hSetSchemas, pszFullFilename))
387 : {
388 152 : psSubXML = CPLLoadSchemaStrInternal(hSetSchemas, pszFullFilename);
389 152 : if (psSubXML == NULL)
390 : {
391 0 : CPLFree(pszFullFilename);
392 0 : CPLDestroyXMLNode(psXML);
393 0 : return NULL;
394 : }
395 : }
396 248 : CPLFree(pszFullFilename);
397 248 : pszFullFilename = NULL;
398 :
399 248 : if (psSubXML)
400 : {
401 152 : CPLXMLNode* psNext = psIter->psNext;
402 :
403 152 : psSubXML = CPLExtractSubSchema(psSubXML, psSchema);
404 152 : if (psSubXML == NULL)
405 : {
406 0 : CPLDestroyXMLNode(psXML);
407 0 : return NULL;
408 : }
409 :
410 : /* Replace <include/> node by the subXML */
411 152 : CPLXMLNode* psIter2 = psSubXML;
412 23610 : while(psIter2->psNext)
413 23306 : psIter2 = psIter2->psNext;
414 152 : psIter2->psNext = psNext;
415 :
416 152 : if (psPrev == NULL)
417 0 : psSchema->psChild = psSubXML;
418 : else
419 152 : psPrev->psNext = psSubXML;
420 :
421 152 : psIter->psNext = NULL;
422 152 : CPLDestroyXMLNode(psIter);
423 :
424 152 : psPrev = psIter2;
425 152 : psIter = psNext;
426 152 : continue;
427 : }
428 : else
429 : {
430 : /* We have already included that file, */
431 : /* so just remove the <include/> node */
432 96 : bDestroyCurrentNode = TRUE;
433 : }
434 : }
435 :
436 : /* Patch the schemaLocation of <import/> */
437 9674 : else if (psIter->eType == CXT_Element &&
438 : (strcmp(psIter->pszValue, "import") == 0 ||
439 : strcmp(psIter->pszValue, "xs:import") == 0||
440 : strcmp(psIter->pszValue, "xsd:import") == 0))
441 : {
442 86 : CPLXMLNode* psIter2 = psIter->psChild;
443 344 : while(psIter2)
444 : {
445 172 : if (psIter2->eType == CXT_Attribute &&
446 : strcmp(psIter2->pszValue, "schemaLocation") == 0 &&
447 : psIter2->psChild != NULL &&
448 : strncmp(psIter2->psChild->pszValue, "http://", 7) != 0 &&
449 : strncmp(psIter2->psChild->pszValue, "ftp://", 6) != 0)
450 : {
451 : char* pszFullFilename = CPLStrdup(CPLFormFilename(
452 6 : CPLGetPath(pszFile), psIter2->psChild->pszValue, NULL));
453 6 : CPLFixPath(pszFullFilename);
454 6 : CPLFree(psIter2->psChild->pszValue);
455 6 : psIter2->psChild->pszValue = pszFullFilename;
456 : }
457 172 : psIter2 = psIter2->psNext;
458 : }
459 : }
460 :
461 9770 : if (bDestroyCurrentNode)
462 : {
463 104 : CPLXMLNode* psNext = psIter->psNext;
464 104 : if (psPrev == NULL)
465 0 : psSchema->psChild = psNext;
466 : else
467 104 : psPrev->psNext = psNext;
468 :
469 104 : psIter->psNext = NULL;
470 104 : CPLDestroyXMLNode(psIter);
471 :
472 104 : psIter = psNext;
473 104 : continue;
474 : }
475 :
476 9666 : psPrev = psIter;
477 9666 : psIter = psIter->psNext;
478 : }
479 :
480 184 : return psXML;
481 : }
482 :
483 : /************************************************************************/
484 : /* CPLMoveImportAtBeginning() */
485 : /************************************************************************/
486 :
487 : static
488 32 : void CPLMoveImportAtBeginning(CPLXMLNode* psXML)
489 : {
490 : CPLXMLNode* psIter;
491 : CPLXMLNode* psPrev;
492 : CPLXMLNode* psSchema;
493 :
494 32 : psSchema = CPLGetXMLNode(psXML, "=schema");
495 32 : if (psSchema == NULL)
496 14 : psSchema = CPLGetXMLNode(psXML, "=xs:schema");
497 32 : if (psSchema == NULL)
498 0 : psSchema = CPLGetXMLNode(psXML, "=xsd:schema");
499 32 : if (psSchema == NULL)
500 0 : return;
501 :
502 32 : psPrev = NULL;
503 32 : psIter = psSchema->psChild;
504 8740 : while(psIter)
505 : {
506 8676 : if (psPrev != NULL && psIter->eType == CXT_Element &&
507 : (strcmp(psIter->pszValue, "import") == 0 ||
508 : strcmp(psIter->pszValue, "xs:import") == 0 ||
509 : strcmp(psIter->pszValue, "xsd:import") == 0))
510 : {
511 : /* Reorder at the beginning */
512 86 : CPLXMLNode* psNext = psIter->psNext;
513 :
514 86 : psPrev->psNext = psNext;
515 :
516 86 : CPLXMLNode* psFirstChild = psSchema->psChild;
517 86 : psSchema->psChild = psIter;
518 86 : psIter->psNext = psFirstChild;
519 :
520 86 : psIter = psNext;
521 86 : continue;
522 : }
523 :
524 8590 : psPrev = psIter;
525 8590 : psIter = psIter->psNext;
526 : }
527 : }
528 :
529 : /************************************************************************/
530 : /* CPLLoadSchemaStr() */
531 : /************************************************************************/
532 :
533 : static
534 32 : char* CPLLoadSchemaStr(const char* pszXSDFilename)
535 : {
536 32 : char* pszStr = NULL;
537 :
538 32 : CPLHasLibXMLBug();
539 :
540 : CPLHashSet* hSetSchemas =
541 32 : CPLHashSetNew(CPLHashSetHashStr, CPLHashSetEqualStr, CPLFree);
542 : CPLXMLNode* psSchema =
543 32 : CPLLoadSchemaStrInternal(hSetSchemas, pszXSDFilename);
544 32 : if (psSchema)
545 : {
546 32 : CPLMoveImportAtBeginning(psSchema);
547 32 : pszStr = CPLSerializeXMLTree(psSchema);
548 32 : CPLDestroyXMLNode(psSchema);
549 : }
550 32 : CPLHashSetDestroy(hSetSchemas);
551 32 : return pszStr;
552 : }
553 :
554 : /************************************************************************/
555 : /* CPLLibXMLInputStreamCPLFree() */
556 : /************************************************************************/
557 :
558 28 : static void CPLLibXMLInputStreamCPLFree(xmlChar* pszBuffer)
559 : {
560 28 : CPLFree(pszBuffer);
561 28 : }
562 :
563 : /************************************************************************/
564 : /* CPLExternalEntityLoader() */
565 : /************************************************************************/
566 :
567 : static
568 28 : xmlParserInputPtr CPLExternalEntityLoader (const char * URL,
569 : const char * ID,
570 : xmlParserCtxtPtr context)
571 : {
572 : //CPLDebug("CPL", "CPLExternalEntityLoader(%s)", URL);
573 28 : CPLString osURL;
574 :
575 : /* Use libxml2 catalog mechanism to resolve the URL to something else */
576 28 : xmlChar* pszResolved = xmlCatalogResolveSystem((const xmlChar*)URL);
577 28 : if (pszResolved == NULL)
578 28 : pszResolved = xmlCatalogResolveURI((const xmlChar*)URL);
579 28 : if (pszResolved)
580 : {
581 0 : CPLDebug("CPL", "Resolving %s in %s", URL, (const char*)pszResolved );
582 0 : osURL = (const char*)pszResolved;
583 0 : URL = osURL.c_str();
584 0 : xmlFree(pszResolved);
585 0 : pszResolved = NULL;
586 : }
587 :
588 28 : if (strncmp(URL, "http://", 7) == 0)
589 : {
590 : /* Make sure to use http://schemas.opengis.net/ */
591 : /* when gml/2 or gml/3 is detected */
592 24 : const char* pszGML = strstr(URL, "gml/2");
593 24 : if (pszGML == NULL)
594 24 : pszGML = strstr(URL, "gml/3");
595 24 : if (pszGML != NULL)
596 : {
597 8 : osURL = "http://schemas.opengis.net/";
598 8 : osURL += pszGML;
599 8 : URL = osURL.c_str();
600 : }
601 16 : else if (strncmp(URL, "http://schemas.opengis.net/",
602 : strlen("http://schemas.opengis.net/")) != 0)
603 : {
604 0 : return pfnLibXMLOldExtranerEntityLoader(URL, ID, context);
605 : }
606 : }
607 4 : else if (strncmp(URL, "ftp://", 6) == 0)
608 : {
609 0 : return pfnLibXMLOldExtranerEntityLoader(URL, ID, context);
610 : }
611 4 : else if (strncmp(URL, "file://", 7) == 0)
612 : {
613 : /* Parse file:// URI so as to be able to open them with VSI*L API */
614 0 : if (strncmp(URL, "file://localhost/", 17) == 0)
615 0 : URL += 16;
616 : else
617 0 : URL += 7;
618 0 : if (URL[0] == '/' && URL[1] != '\0' && URL[2] == ':' && URL[3] == '/') /* Windows */
619 0 : URL ++;
620 0 : else if (URL[0] == '/') /* Unix */
621 : ;
622 : else
623 0 : return pfnLibXMLOldExtranerEntityLoader(URL, ID, context);
624 : }
625 :
626 28 : CPLString osModURL;
627 28 : if (strncmp(URL, "/vsizip/vsicurl/http%3A//",
628 : strlen("/vsizip/vsicurl/http%3A//")) == 0)
629 : {
630 0 : osModURL = "/vsizip/vsicurl/http://";
631 0 : osModURL += URL + strlen("/vsizip/vsicurl/http%3A//");
632 : }
633 28 : else if (strncmp(URL, "/vsicurl/http%3A//",
634 : strlen("/vsicurl/http%3A//")) == 0)
635 : {
636 0 : osModURL = "vsicurl/http://";
637 0 : osModURL += URL + strlen("/vsicurl/http%3A//");
638 : }
639 28 : else if (strncmp(URL, "http://schemas.opengis.net/",
640 : strlen("http://schemas.opengis.net/")) == 0)
641 : {
642 : const char *pszAfterOpenGIS =
643 24 : URL + strlen("http://schemas.opengis.net/");
644 :
645 : const char *pszSchemasOpenGIS;
646 :
647 24 : pszSchemasOpenGIS = CPLGetConfigOption("GDAL_OPENGIS_SCHEMAS", NULL);
648 24 : if (pszSchemasOpenGIS != NULL)
649 : {
650 24 : int nLen = (int)strlen(pszSchemasOpenGIS);
651 24 : if (nLen > 0 && pszSchemasOpenGIS[nLen-1] == '/')
652 : {
653 0 : osModURL = pszSchemasOpenGIS;
654 0 : osModURL += pszAfterOpenGIS;
655 : }
656 : else
657 : {
658 24 : osModURL = pszSchemasOpenGIS;
659 24 : osModURL += "/";
660 24 : osModURL += pszAfterOpenGIS;
661 : }
662 : }
663 0 : else if ((pszSchemasOpenGIS = CPLFindFile( "gdal", "SCHEMAS_OPENGIS_NET" )) != NULL)
664 : {
665 0 : osModURL = pszSchemasOpenGIS;
666 0 : osModURL += "/";
667 0 : osModURL += pszAfterOpenGIS;
668 : }
669 0 : else if ((pszSchemasOpenGIS = CPLFindFile( "gdal", "SCHEMAS_OPENGIS_NET.zip" )) != NULL)
670 : {
671 0 : osModURL = "/vsizip/";
672 0 : osModURL += pszSchemasOpenGIS;
673 0 : osModURL += "/";
674 0 : osModURL += pszAfterOpenGIS;
675 : }
676 : else
677 : {
678 0 : osModURL = "/vsizip/vsicurl/http://schemas.opengis.net/SCHEMAS_OPENGIS_NET.zip/";
679 0 : osModURL += pszAfterOpenGIS;
680 : }
681 : }
682 : else
683 : {
684 4 : osModURL = URL;
685 : }
686 :
687 28 : xmlChar* pszBuffer = (xmlChar*)CPLLoadSchemaStr(osModURL);
688 28 : if (pszBuffer == NULL)
689 0 : return NULL;
690 :
691 28 : xmlParserInputPtr poInputStream = xmlNewStringInputStream(context, pszBuffer);
692 28 : if (poInputStream != NULL)
693 28 : poInputStream->free = CPLLibXMLInputStreamCPLFree;
694 28 : return poInputStream;
695 : }
696 :
697 : /************************************************************************/
698 : /* CPLLibXMLWarningErrorCallback() */
699 : /************************************************************************/
700 :
701 2 : static void CPLLibXMLWarningErrorCallback (void * ctx, const char * msg, ...)
702 : {
703 : va_list varg;
704 : char * pszStr;
705 :
706 2 : va_start(varg, msg);
707 2 : pszStr = (char *)va_arg( varg, char *);
708 :
709 2 : if (strstr(pszStr, "since this namespace was already imported") == NULL)
710 : {
711 0 : xmlErrorPtr pErrorPtr = xmlGetLastError();
712 0 : const char* pszFilename = (const char*)ctx;
713 0 : char* pszStrDup = CPLStrdup(pszStr);
714 0 : int nLen = (int)strlen(pszStrDup);
715 0 : if (nLen > 0 && pszStrDup[nLen-1] == '\n')
716 0 : pszStrDup[nLen-1] = '\0';
717 : CPLError(CE_Failure, CPLE_AppDefined, "libXML: %s:%d: %s",
718 0 : pszFilename, pErrorPtr ? pErrorPtr->line : 0, pszStrDup);
719 0 : CPLFree(pszStrDup);
720 : }
721 :
722 2 : va_end(varg);
723 2 : }
724 :
725 : /************************************************************************/
726 : /* CPLLoadContentFromFile() */
727 : /************************************************************************/
728 :
729 : static
730 0 : char* CPLLoadContentFromFile(const char* pszFilename)
731 : {
732 0 : VSILFILE* fp = VSIFOpenL(pszFilename, "rb");
733 0 : if (fp == NULL)
734 0 : return NULL;
735 : vsi_l_offset nSize;
736 0 : VSIFSeekL(fp, 0, SEEK_END);
737 0 : nSize = VSIFTellL(fp);
738 0 : VSIFSeekL(fp, 0, SEEK_SET);
739 0 : if ((vsi_l_offset)(int)nSize != nSize ||
740 : nSize > INT_MAX - 1 )
741 : {
742 0 : VSIFCloseL(fp);
743 0 : return NULL;
744 : }
745 0 : char* pszBuffer = (char*)VSIMalloc(nSize + 1);
746 0 : if (pszBuffer == NULL)
747 : {
748 0 : VSIFCloseL(fp);
749 0 : return NULL;
750 : }
751 0 : VSIFReadL(pszBuffer, 1, nSize, fp);
752 0 : pszBuffer[nSize] = '\0';
753 0 : VSIFCloseL(fp);
754 0 : return pszBuffer;
755 : }
756 :
757 : /************************************************************************/
758 : /* CPLLoadXMLSchema() */
759 : /************************************************************************/
760 :
761 : /**
762 : * \brief Load a XSD schema.
763 : *
764 : * The return value should be freed with CPLFreeXMLSchema().
765 : *
766 : * @param pszXSDFilename XSD schema to load.
767 : * @return a handle to the parsed XML schema, or NULL in case of failure.
768 : *
769 : * @since GDAL 2.0.0
770 : */
771 :
772 4 : CPLXMLSchemaPtr CPLLoadXMLSchema(const char* pszXSDFilename)
773 : {
774 4 : char* pszStr = CPLLoadSchemaStr(pszXSDFilename);
775 4 : if (pszStr == NULL)
776 0 : return NULL;
777 :
778 4 : xmlExternalEntityLoader pfnLibXMLOldExtranerEntityLoaderLocal = NULL;
779 4 : pfnLibXMLOldExtranerEntityLoaderLocal = xmlGetExternalEntityLoader();
780 4 : pfnLibXMLOldExtranerEntityLoader = pfnLibXMLOldExtranerEntityLoaderLocal;
781 4 : xmlSetExternalEntityLoader(CPLExternalEntityLoader);
782 :
783 : xmlSchemaParserCtxtPtr pSchemaParserCtxt =
784 4 : xmlSchemaNewMemParserCtxt(pszStr, strlen(pszStr));
785 :
786 : xmlSchemaSetParserErrors(pSchemaParserCtxt,
787 : CPLLibXMLWarningErrorCallback,
788 : CPLLibXMLWarningErrorCallback,
789 4 : NULL);
790 :
791 4 : xmlSchemaPtr pSchema = xmlSchemaParse(pSchemaParserCtxt);
792 4 : xmlSchemaFreeParserCtxt(pSchemaParserCtxt);
793 :
794 4 : xmlSetExternalEntityLoader(pfnLibXMLOldExtranerEntityLoaderLocal);
795 :
796 4 : CPLFree(pszStr);
797 :
798 4 : return (CPLXMLSchemaPtr) pSchema;
799 : }
800 :
801 : /************************************************************************/
802 : /* CPLFreeXMLSchema() */
803 : /************************************************************************/
804 :
805 : /**
806 : * \brief Free a XSD schema.
807 : *
808 : * @param pSchema a handle to the parsed XML schema.
809 : *
810 : * @since GDAL 2.0.0
811 : */
812 :
813 4 : void CPLFreeXMLSchema(CPLXMLSchemaPtr pSchema)
814 : {
815 4 : if (pSchema)
816 4 : xmlSchemaFree((xmlSchemaPtr)pSchema);
817 4 : }
818 :
819 : /************************************************************************/
820 : /* CPLValidateXML() */
821 : /************************************************************************/
822 :
823 : /**
824 : * \brief Validate a XML file against a XML schema.
825 : *
826 : * The return value should be freed with CPLFreeXMLSchema().
827 : *
828 : * @param pszXMLFilename the XML filename to validate.
829 : * @param pSchema a handle returned by CPLLoadXMLSchema().
830 : * @param papszOptions unused for now.
831 : * @return TRUE if the XML file validates against the XML schema.
832 : *
833 : * @since GDAL 2.0.0
834 : */
835 :
836 4 : int CPLValidateXML(const char* pszXMLFilename, CPLXMLSchemaPtr pSchema,
837 : char** papszOptions)
838 : {
839 4 : if (pSchema == NULL)
840 0 : return FALSE;
841 :
842 : xmlSchemaValidCtxtPtr pSchemaValidCtxt;
843 :
844 4 : pSchemaValidCtxt = xmlSchemaNewValidCtxt((xmlSchemaPtr)pSchema);
845 :
846 4 : if (pSchemaValidCtxt == NULL)
847 0 : return FALSE;
848 :
849 : xmlSchemaSetValidErrors(pSchemaValidCtxt,
850 : CPLLibXMLWarningErrorCallback,
851 : CPLLibXMLWarningErrorCallback,
852 4 : (void*) pszXMLFilename);
853 :
854 4 : int bValid = FALSE;
855 4 : if (strncmp(pszXMLFilename, "/vsi", 4) != 0)
856 : {
857 : bValid =
858 4 : xmlSchemaValidateFile(pSchemaValidCtxt, pszXMLFilename, 0) == 0;
859 : }
860 : else
861 : {
862 0 : char* pszXML = CPLLoadContentFromFile(pszXMLFilename);
863 0 : if (pszXML != NULL)
864 : {
865 0 : xmlDocPtr pDoc = xmlParseDoc((const xmlChar *)pszXML);
866 0 : if (pDoc != NULL)
867 : {
868 0 : bValid = xmlSchemaValidateDoc(pSchemaValidCtxt, pDoc) == 0;
869 : }
870 0 : xmlFreeDoc(pDoc);
871 : }
872 0 : CPLFree(pszXML);
873 : }
874 4 : xmlSchemaFreeValidCtxt(pSchemaValidCtxt);
875 :
876 4 : return bValid;
877 : }
878 :
879 : #else // HAVE_RECENT_LIBXML2
880 :
881 : /************************************************************************/
882 : /* CPLLoadXMLSchema() */
883 : /************************************************************************/
884 :
885 : CPLXMLSchemaPtr CPLLoadXMLSchema(const char* pszXSDFilename)
886 : {
887 : CPLError(CE_Failure, CPLE_NotSupported,
888 : "%s not implemented due to missing libxml2 support",
889 : "CPLLoadSchema()");
890 : return NULL;
891 : }
892 :
893 : /************************************************************************/
894 : /* CPLFreeXMLSchema() */
895 : /************************************************************************/
896 :
897 : void CPLFreeXMLSchema(CPLXMLSchemaPtr pSchema)
898 : {
899 : }
900 :
901 : /************************************************************************/
902 : /* CPLValidateXML() */
903 : /************************************************************************/
904 :
905 : int CPLValidateXML(const char* pszXMLFilename, CPLXMLSchemaPtr pSchema,
906 : char** papszOptions)
907 : {
908 : CPLError(CE_Failure, CPLE_NotSupported,
909 : "%s not implemented due to missing libxml2 support",
910 : "CPLValidateXML()");
911 : return FALSE;
912 : }
913 :
914 : #endif // HAVE_RECENT_LIBXML2
|