1 : /******************************************************************************
2 : * $Id: cpl_csv.cpp 17148 2009-05-29 20:45:45Z rouault $
3 : *
4 : * Project: CPL - Common Portability Library
5 : * Purpose: CSV (comma separated value) file access.
6 : * Author: Frank Warmerdam, warmerdam@pobox.com
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 1999, Frank Warmerdam
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "cpl_csv.h"
31 : #include "cpl_conv.h"
32 : #include "cpl_multiproc.h"
33 :
34 : CPL_CVSID("$Id: cpl_csv.cpp 17148 2009-05-29 20:45:45Z rouault $");
35 :
36 : CPL_C_START
37 : const char * GDALDefaultCSVFilename( const char *pszBasename );
38 : CPL_C_END
39 :
40 : /* ==================================================================== */
41 : /* The CSVTable is a persistant set of info about an open CSV */
42 : /* table. While it doesn't currently maintain a record index, */
43 : /* or in-memory copy of the table, it could be changed to do so */
44 : /* in the future. */
45 : /* ==================================================================== */
46 : typedef struct ctb {
47 : FILE *fp;
48 :
49 : struct ctb *psNext;
50 :
51 : char *pszFilename;
52 :
53 : char **papszFieldNames;
54 :
55 : char **papszRecFields;
56 :
57 : int iLastLine;
58 :
59 : int bNonUniqueKey;
60 :
61 : /* Cache for whole file */
62 : int nLineCount;
63 : char **papszLines;
64 : int *panLineIndex;
65 : char *pszRawData;
66 : } CSVTable;
67 :
68 : /* It would likely be better to share this list between threads, but
69 : that will require some rework. */
70 :
71 : /************************************************************************/
72 : /* CSVAccess() */
73 : /* */
74 : /* This function will fetch a handle to the requested table. */
75 : /* If not found in the ``open table list'' the table will be */
76 : /* opened and added to the list. Eventually this function may */
77 : /* become public with an abstracted return type so that */
78 : /* applications can set options about the table. For now this */
79 : /* isn't done. */
80 : /************************************************************************/
81 :
82 21943 : static CSVTable *CSVAccess( const char * pszFilename )
83 :
84 : {
85 : CSVTable *psTable;
86 : FILE *fp;
87 :
88 : /* -------------------------------------------------------------------- */
89 : /* Fetch the table, and allocate the thread-local pointer to it */
90 : /* if there isn't already one. */
91 : /* -------------------------------------------------------------------- */
92 : CSVTable **ppsCSVTableList;
93 :
94 21943 : ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
95 21943 : if( ppsCSVTableList == NULL )
96 : {
97 15 : ppsCSVTableList = (CSVTable **) CPLCalloc(1,sizeof(CSVTable*));
98 15 : CPLSetTLS( CTLS_CSVTABLEPTR, ppsCSVTableList, TRUE );
99 : }
100 :
101 : /* -------------------------------------------------------------------- */
102 : /* Is the table already in the list. */
103 : /* -------------------------------------------------------------------- */
104 145165 : for( psTable = *ppsCSVTableList;
105 : psTable != NULL;
106 : psTable = psTable->psNext )
107 : {
108 145075 : if( EQUAL(psTable->pszFilename,pszFilename) )
109 : {
110 : /*
111 : * Eventually we should consider promoting to the front of
112 : * the list to accelerate frequently accessed tables.
113 : */
114 :
115 21853 : return( psTable );
116 : }
117 : }
118 :
119 : /* -------------------------------------------------------------------- */
120 : /* If not, try to open it. */
121 : /* -------------------------------------------------------------------- */
122 90 : fp = VSIFOpen( pszFilename, "rb" );
123 90 : if( fp == NULL )
124 0 : return NULL;
125 :
126 : /* -------------------------------------------------------------------- */
127 : /* Create an information structure about this table, and add to */
128 : /* the front of the list. */
129 : /* -------------------------------------------------------------------- */
130 90 : psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1);
131 :
132 90 : psTable->fp = fp;
133 90 : psTable->pszFilename = CPLStrdup( pszFilename );
134 90 : psTable->bNonUniqueKey = FALSE; /* as far as we know now */
135 90 : psTable->psNext = *ppsCSVTableList;
136 :
137 90 : *ppsCSVTableList = psTable;
138 :
139 : /* -------------------------------------------------------------------- */
140 : /* Read the table header record containing the field names. */
141 : /* -------------------------------------------------------------------- */
142 90 : psTable->papszFieldNames = CSVReadParseLine( fp );
143 :
144 90 : return( psTable );
145 : }
146 :
147 : /************************************************************************/
148 : /* CSVDeaccess() */
149 : /************************************************************************/
150 :
151 1346 : void CSVDeaccess( const char * pszFilename )
152 :
153 : {
154 : CSVTable *psLast, *psTable;
155 :
156 : /* -------------------------------------------------------------------- */
157 : /* Fetch the table, and allocate the thread-local pointer to it */
158 : /* if there isn't already one. */
159 : /* -------------------------------------------------------------------- */
160 : CSVTable **ppsCSVTableList;
161 :
162 1346 : ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
163 1346 : if( ppsCSVTableList == NULL )
164 1231 : return;
165 :
166 : /* -------------------------------------------------------------------- */
167 : /* A NULL means deaccess all tables. */
168 : /* -------------------------------------------------------------------- */
169 115 : if( pszFilename == NULL )
170 : {
171 151 : while( *ppsCSVTableList != NULL )
172 79 : CSVDeaccess( (*ppsCSVTableList)->pszFilename );
173 :
174 36 : return;
175 : }
176 :
177 : /* -------------------------------------------------------------------- */
178 : /* Find this table. */
179 : /* -------------------------------------------------------------------- */
180 79 : psLast = NULL;
181 79 : for( psTable = *ppsCSVTableList;
182 : psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename);
183 : psTable = psTable->psNext )
184 : {
185 0 : psLast = psTable;
186 : }
187 :
188 79 : if( psTable == NULL )
189 : {
190 0 : CPLDebug( "CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename );
191 0 : return;
192 : }
193 :
194 : /* -------------------------------------------------------------------- */
195 : /* Remove the link from the list. */
196 : /* -------------------------------------------------------------------- */
197 79 : if( psLast != NULL )
198 0 : psLast->psNext = psTable->psNext;
199 : else
200 79 : *ppsCSVTableList = psTable->psNext;
201 :
202 : /* -------------------------------------------------------------------- */
203 : /* Free the table. */
204 : /* -------------------------------------------------------------------- */
205 79 : if( psTable->fp != NULL )
206 0 : VSIFClose( psTable->fp );
207 :
208 79 : CSLDestroy( psTable->papszFieldNames );
209 79 : CSLDestroy( psTable->papszRecFields );
210 79 : CPLFree( psTable->pszFilename );
211 79 : CPLFree( psTable->panLineIndex );
212 79 : CPLFree( psTable->pszRawData );
213 79 : CPLFree( psTable->papszLines );
214 :
215 79 : CPLFree( psTable );
216 :
217 79 : CPLReadLine( NULL );
218 : }
219 :
220 : /************************************************************************/
221 : /* CSVSplitLine() */
222 : /* */
223 : /* Tokenize a CSV line into fields in the form of a string */
224 : /* list. This is used instead of the CPLTokenizeString() */
225 : /* because it provides correct CSV escaping and quoting */
226 : /* semantics. */
227 : /************************************************************************/
228 :
229 45527 : static char **CSVSplitLine( const char *pszString, char chDelimiter )
230 :
231 : {
232 45527 : char **papszRetList = NULL;
233 : char *pszToken;
234 : int nTokenMax, nTokenLen;
235 :
236 45527 : pszToken = (char *) CPLCalloc(10,1);
237 45527 : nTokenMax = 10;
238 :
239 621173 : while( pszString != NULL && *pszString != '\0' )
240 : {
241 530119 : int bInString = FALSE;
242 :
243 530119 : nTokenLen = 0;
244 :
245 : /* Try to find the next delimeter, marking end of token */
246 9179256 : for( ; *pszString != '\0'; pszString++ )
247 : {
248 :
249 : /* End if this is a delimeter skip it and break. */
250 9144086 : if( !bInString && *pszString == chDelimiter )
251 : {
252 494949 : pszString++;
253 494949 : break;
254 : }
255 :
256 8649137 : if( *pszString == '"' )
257 : {
258 173134 : if( !bInString || pszString[1] != '"' )
259 : {
260 149774 : bInString = !bInString;
261 149774 : continue;
262 : }
263 : else /* doubled quotes in string resolve to one quote */
264 : {
265 23360 : pszString++;
266 : }
267 : }
268 :
269 8499363 : if( nTokenLen >= nTokenMax-2 )
270 : {
271 97836 : nTokenMax = nTokenMax * 2 + 10;
272 97836 : pszToken = (char *) CPLRealloc( pszToken, nTokenMax );
273 : }
274 :
275 8499363 : pszToken[nTokenLen] = *pszString;
276 8499363 : nTokenLen++;
277 : }
278 :
279 530119 : pszToken[nTokenLen] = '\0';
280 530119 : papszRetList = CSLAddString( papszRetList, pszToken );
281 :
282 : /* If the last token is an empty token, then we have to catch
283 : * it now, otherwise we won't reenter the loop and it will be lost.
284 : */
285 530119 : if ( *pszString == '\0' && *(pszString-1) == chDelimiter )
286 : {
287 10357 : papszRetList = CSLAddString( papszRetList, "" );
288 : }
289 : }
290 :
291 45527 : if( papszRetList == NULL )
292 0 : papszRetList = (char **) CPLCalloc(sizeof(char *),1);
293 :
294 45527 : CPLFree( pszToken );
295 :
296 45527 : return papszRetList;
297 : }
298 :
299 : /************************************************************************/
300 : /* CSVFindNextLine() */
301 : /* */
302 : /* Find the start of the next line, while at the same time zero */
303 : /* terminating this line. Take into account that there may be */
304 : /* newline indicators within quoted strings, and that quotes */
305 : /* can be escaped with a backslash. */
306 : /************************************************************************/
307 :
308 41250 : static char *CSVFindNextLine( char *pszThisLine )
309 :
310 : {
311 41250 : int nQuoteCount = 0, i;
312 :
313 5912052 : for( i = 0; pszThisLine[i] != '\0'; i++ )
314 : {
315 5986388 : if( pszThisLine[i] == '\"'
316 74336 : && (i == 0 || pszThisLine[i-1] != '\\') )
317 74398 : nQuoteCount++;
318 :
319 5912052 : if( (pszThisLine[i] == 10 || pszThisLine[i] == 13)
320 : && (nQuoteCount % 2) == 0 )
321 41250 : break;
322 : }
323 :
324 123763 : while( pszThisLine[i] == 10 || pszThisLine[i] == 13 )
325 41263 : pszThisLine[i++] = '\0';
326 :
327 41250 : if( pszThisLine[i] == '\0' )
328 90 : return NULL;
329 : else
330 41160 : return pszThisLine + i;
331 : }
332 :
333 : /************************************************************************/
334 : /* CSVIngest() */
335 : /* */
336 : /* Load entire file into memory and setup index if possible. */
337 : /************************************************************************/
338 :
339 4543 : static void CSVIngest( const char *pszFilename )
340 :
341 : {
342 4543 : CSVTable *psTable = CSVAccess( pszFilename );
343 4543 : int nFileLen, i, nMaxLineCount, iLine = 0;
344 : char *pszThisLine;
345 :
346 4543 : if( psTable->pszRawData != NULL )
347 4453 : return;
348 :
349 : /* -------------------------------------------------------------------- */
350 : /* Ingest whole file. */
351 : /* -------------------------------------------------------------------- */
352 90 : VSIFSeek( psTable->fp, 0, SEEK_END );
353 90 : nFileLen = VSIFTell( psTable->fp );
354 90 : VSIRewind( psTable->fp );
355 :
356 90 : psTable->pszRawData = (char *) CPLMalloc(nFileLen+1);
357 90 : if( (int) VSIFRead( psTable->pszRawData, 1, nFileLen, psTable->fp )
358 : != nFileLen )
359 : {
360 0 : CPLFree( psTable->pszRawData );
361 0 : psTable->pszRawData = NULL;
362 :
363 : CPLError( CE_Failure, CPLE_FileIO, "Read of file %s failed.",
364 0 : psTable->pszFilename );
365 0 : return;
366 : }
367 :
368 90 : psTable->pszRawData[nFileLen] = '\0';
369 :
370 : /* -------------------------------------------------------------------- */
371 : /* Get count of newlines so we can allocate line array. */
372 : /* -------------------------------------------------------------------- */
373 90 : nMaxLineCount = 0;
374 5912155 : for( i = 0; i < nFileLen; i++ )
375 : {
376 5912065 : if( psTable->pszRawData[i] == 10 )
377 41263 : nMaxLineCount++;
378 : }
379 :
380 90 : psTable->papszLines = (char **) CPLCalloc(sizeof(char*),nMaxLineCount);
381 :
382 : /* -------------------------------------------------------------------- */
383 : /* Build a list of record pointers into the raw data buffer */
384 : /* based on line terminators. Zero terminate the line */
385 : /* strings. */
386 : /* -------------------------------------------------------------------- */
387 : /* skip header line */
388 90 : pszThisLine = CSVFindNextLine( psTable->pszRawData );
389 :
390 41340 : while( pszThisLine != NULL && iLine < nMaxLineCount )
391 : {
392 41160 : psTable->papszLines[iLine++] = pszThisLine;
393 41160 : pszThisLine = CSVFindNextLine( pszThisLine );
394 : }
395 :
396 90 : psTable->nLineCount = iLine;
397 :
398 : /* -------------------------------------------------------------------- */
399 : /* Allocate and populate index array. Ensure they are in */
400 : /* ascending order so that binary searches can be done on the */
401 : /* array. */
402 : /* -------------------------------------------------------------------- */
403 90 : psTable->panLineIndex = (int *) CPLMalloc(sizeof(int)*psTable->nLineCount);
404 40899 : for( i = 0; i < psTable->nLineCount; i++ )
405 : {
406 40822 : psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
407 :
408 40822 : if( i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i-1] )
409 : {
410 13 : CPLFree( psTable->panLineIndex );
411 13 : psTable->panLineIndex = NULL;
412 13 : break;
413 : }
414 : }
415 :
416 90 : psTable->iLastLine = -1;
417 :
418 : /* -------------------------------------------------------------------- */
419 : /* We should never need the file handle against, so close it. */
420 : /* -------------------------------------------------------------------- */
421 90 : VSIFClose( psTable->fp );
422 90 : psTable->fp = NULL;
423 : }
424 :
425 : /************************************************************************/
426 : /* CSVDetectSeperator() */
427 : /************************************************************************/
428 :
429 : /** Detect which field separator is used.
430 : *
431 : * Currently, it can detect comma, semicolon or tabulation. In case of
432 : * ambiguity or no separator found, comma will be considered as the separator.
433 : *
434 : * @return ',', ';' or '\t'
435 : */
436 43 : char CSVDetectSeperator (const char* pszLine)
437 : {
438 43 : int bInString = FALSE;
439 43 : char chDelimiter = '\0';
440 :
441 1841 : for( ; *pszLine != '\0'; pszLine++ )
442 : {
443 1951 : if( !bInString && (*pszLine == ',' || *pszLine == ';' || *pszLine == '\t'))
444 : {
445 153 : if (chDelimiter == '\0')
446 43 : chDelimiter = *pszLine;
447 110 : else if (chDelimiter != *pszLine)
448 : {
449 : /* The separator is not consistant on the line. */
450 : CPLDebug("CSV", "Inconsistant separator. '%c' and '%c' found. Using ',' as default",
451 0 : chDelimiter, *pszLine);
452 0 : chDelimiter = ',';
453 0 : break;
454 : }
455 : }
456 1645 : else if( *pszLine == '"' )
457 : {
458 104 : if( !bInString || pszLine[1] != '"' )
459 : {
460 104 : bInString = !bInString;
461 104 : continue;
462 : }
463 : else /* doubled quotes in string resolve to one quote */
464 : {
465 0 : pszLine++;
466 : }
467 : }
468 : }
469 :
470 43 : if (chDelimiter == '\0')
471 0 : chDelimiter = ',';
472 :
473 43 : return chDelimiter;
474 : }
475 :
476 : /************************************************************************/
477 : /* CSVReadParseLine() */
478 : /* */
479 : /* Read one line, and return split into fields. The return */
480 : /* result is a stringlist, in the sense of the CSL functions. */
481 : /************************************************************************/
482 :
483 29279 : char **CSVReadParseLine( FILE * fp )
484 : {
485 29279 : return CSVReadParseLine2(fp, ',');
486 : }
487 :
488 29664 : char **CSVReadParseLine2( FILE * fp, char chDelimiter )
489 :
490 : {
491 : const char *pszLine;
492 : char *pszWorkLine;
493 : char **papszReturn;
494 :
495 : CPLAssert( fp != NULL );
496 29664 : if( fp == NULL )
497 0 : return( NULL );
498 :
499 29664 : pszLine = CPLReadLine( fp );
500 29664 : if( pszLine == NULL )
501 102 : return( NULL );
502 :
503 : /* -------------------------------------------------------------------- */
504 : /* If there are no quotes, then this is the simple case. */
505 : /* Parse, and return tokens. */
506 : /* -------------------------------------------------------------------- */
507 29562 : if( strchr(pszLine,'\"') == NULL )
508 830 : return CSVSplitLine( pszLine, chDelimiter );
509 :
510 : /* -------------------------------------------------------------------- */
511 : /* We must now count the quotes in our working string, and as */
512 : /* long as it is odd, keep adding new lines. */
513 : /* -------------------------------------------------------------------- */
514 28732 : pszWorkLine = CPLStrdup( pszLine );
515 :
516 2 : while( TRUE )
517 : {
518 28734 : int i, nCount = 0;
519 :
520 8361110 : for( i = 0; pszWorkLine[i] != '\0'; i++ )
521 : {
522 8528527 : if( pszWorkLine[i] == '\"'
523 196151 : && (i == 0 || pszWorkLine[i-1] != '\\') )
524 196292 : nCount++;
525 : }
526 :
527 28734 : if( nCount % 2 == 0 )
528 28732 : break;
529 :
530 2 : pszLine = CPLReadLine( fp );
531 2 : if( pszLine == NULL )
532 0 : break;
533 :
534 : pszWorkLine = (char *)
535 : CPLRealloc(pszWorkLine,
536 2 : strlen(pszWorkLine) + strlen(pszLine) + 2);
537 2 : strcat( pszWorkLine, "\n" ); // This gets lost in CPLReadLine().
538 2 : strcat( pszWorkLine, pszLine );
539 : }
540 :
541 28732 : papszReturn = CSVSplitLine( pszWorkLine, chDelimiter );
542 :
543 28732 : CPLFree( pszWorkLine );
544 :
545 28732 : return papszReturn;
546 : }
547 :
548 : /************************************************************************/
549 : /* CSVCompare() */
550 : /* */
551 : /* Compare a field to a search value using a particular */
552 : /* criteria. */
553 : /************************************************************************/
554 :
555 19534 : static int CSVCompare( const char * pszFieldValue, const char * pszTarget,
556 : CSVCompareCriteria eCriteria )
557 :
558 : {
559 19534 : if( eCriteria == CC_ExactString )
560 : {
561 0 : return( strcmp( pszFieldValue, pszTarget ) == 0 );
562 : }
563 19534 : else if( eCriteria == CC_ApproxString )
564 : {
565 0 : return( EQUAL( pszFieldValue, pszTarget ) );
566 : }
567 19534 : else if( eCriteria == CC_Integer )
568 : {
569 19534 : return( atoi(pszFieldValue) == atoi(pszTarget) );
570 : }
571 :
572 0 : return FALSE;
573 : }
574 :
575 : /************************************************************************/
576 : /* CSVScanLines() */
577 : /* */
578 : /* Read the file scanline for lines where the key field equals */
579 : /* the indicated value with the suggested comparison criteria. */
580 : /* Return the first matching line split into fields. */
581 : /************************************************************************/
582 :
583 0 : char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue,
584 : CSVCompareCriteria eCriteria )
585 :
586 : {
587 0 : char **papszFields = NULL;
588 0 : int bSelected = FALSE, nTestValue;
589 :
590 : CPLAssert( pszValue != NULL );
591 : CPLAssert( iKeyField >= 0 );
592 : CPLAssert( fp != NULL );
593 :
594 0 : nTestValue = atoi(pszValue);
595 :
596 0 : while( !bSelected ) {
597 0 : papszFields = CSVReadParseLine( fp );
598 0 : if( papszFields == NULL )
599 0 : return( NULL );
600 :
601 0 : if( CSLCount( papszFields ) < iKeyField+1 )
602 : {
603 : /* not selected */
604 : }
605 0 : else if( eCriteria == CC_Integer
606 0 : && atoi(papszFields[iKeyField]) == nTestValue )
607 : {
608 0 : bSelected = TRUE;
609 : }
610 : else
611 : {
612 0 : bSelected = CSVCompare( papszFields[iKeyField], pszValue,
613 0 : eCriteria );
614 : }
615 :
616 0 : if( !bSelected )
617 : {
618 0 : CSLDestroy( papszFields );
619 0 : papszFields = NULL;
620 : }
621 : }
622 :
623 0 : return( papszFields );
624 : }
625 :
626 : /************************************************************************/
627 : /* CSVScanLinesIndexed() */
628 : /* */
629 : /* Read the file scanline for lines where the key field equals */
630 : /* the indicated value with the suggested comparison criteria. */
631 : /* Return the first matching line split into fields. */
632 : /************************************************************************/
633 :
634 : static char **
635 498 : CSVScanLinesIndexed( CSVTable *psTable, int nKeyValue )
636 :
637 : {
638 498 : int iTop, iBottom, iMiddle, iResult = -1;
639 :
640 : CPLAssert( psTable->panLineIndex != NULL );
641 :
642 : /* -------------------------------------------------------------------- */
643 : /* Find target record with binary search. */
644 : /* -------------------------------------------------------------------- */
645 498 : iTop = psTable->nLineCount-1;
646 498 : iBottom = 0;
647 :
648 4382 : while( iTop >= iBottom )
649 : {
650 3792 : iMiddle = (iTop + iBottom) / 2;
651 3792 : if( psTable->panLineIndex[iMiddle] > nKeyValue )
652 1248 : iTop = iMiddle - 1;
653 2544 : else if( psTable->panLineIndex[iMiddle] < nKeyValue )
654 2138 : iBottom = iMiddle + 1;
655 : else
656 : {
657 406 : iResult = iMiddle;
658 : // if a key is not unique, select the first instance of it.
659 1234 : while( iResult > 0
660 404 : && psTable->panLineIndex[iResult-1] == nKeyValue )
661 : {
662 18 : psTable->bNonUniqueKey = TRUE;
663 18 : iResult--;
664 : }
665 406 : break;
666 : }
667 : }
668 :
669 498 : if( iResult == -1 )
670 92 : return NULL;
671 :
672 : /* -------------------------------------------------------------------- */
673 : /* Parse target line, and update iLastLine indicator. */
674 : /* -------------------------------------------------------------------- */
675 406 : psTable->iLastLine = iResult;
676 :
677 406 : return CSVSplitLine( psTable->papszLines[iResult], ',' );
678 : }
679 :
680 : /************************************************************************/
681 : /* CSVScanLinesIngested() */
682 : /* */
683 : /* Read the file scanline for lines where the key field equals */
684 : /* the indicated value with the suggested comparison criteria. */
685 : /* Return the first matching line split into fields. */
686 : /************************************************************************/
687 :
688 : static char **
689 887 : CSVScanLinesIngested( CSVTable *psTable, int iKeyField, const char * pszValue,
690 : CSVCompareCriteria eCriteria )
691 :
692 : {
693 887 : char **papszFields = NULL;
694 887 : int bSelected = FALSE, nTestValue;
695 :
696 : CPLAssert( pszValue != NULL );
697 : CPLAssert( iKeyField >= 0 );
698 :
699 887 : nTestValue = atoi(pszValue);
700 :
701 : /* -------------------------------------------------------------------- */
702 : /* Short cut for indexed files. */
703 : /* -------------------------------------------------------------------- */
704 887 : if( iKeyField == 0 && eCriteria == CC_Integer
705 : && psTable->panLineIndex != NULL )
706 498 : return CSVScanLinesIndexed( psTable, nTestValue );
707 :
708 : /* -------------------------------------------------------------------- */
709 : /* Scan from in-core lines. */
710 : /* -------------------------------------------------------------------- */
711 16316 : while( !bSelected && psTable->iLastLine+1 < psTable->nLineCount ) {
712 15538 : psTable->iLastLine++;
713 15538 : papszFields = CSVSplitLine( psTable->papszLines[psTable->iLastLine], ',' );
714 :
715 15538 : if( CSLCount( papszFields ) < iKeyField+1 )
716 : {
717 : /* not selected */
718 : }
719 31077 : else if( eCriteria == CC_Integer
720 15538 : && atoi(papszFields[iKeyField]) == nTestValue )
721 : {
722 1 : bSelected = TRUE;
723 : }
724 : else
725 : {
726 15537 : bSelected = CSVCompare( papszFields[iKeyField], pszValue,
727 31074 : eCriteria );
728 : }
729 :
730 15538 : if( !bSelected )
731 : {
732 15537 : CSLDestroy( papszFields );
733 15537 : papszFields = NULL;
734 : }
735 : }
736 :
737 389 : return( papszFields );
738 : }
739 :
740 : /************************************************************************/
741 : /* CSVGetNextLine() */
742 : /* */
743 : /* Fetch the next line of a CSV file based on a passed in */
744 : /* filename. Returns NULL at end of file, or if file is not */
745 : /* really established. */
746 : /************************************************************************/
747 :
748 21 : char **CSVGetNextLine( const char *pszFilename )
749 :
750 : {
751 : CSVTable *psTable;
752 :
753 : /* -------------------------------------------------------------------- */
754 : /* Get access to the table. */
755 : /* -------------------------------------------------------------------- */
756 : CPLAssert( pszFilename != NULL );
757 :
758 21 : psTable = CSVAccess( pszFilename );
759 21 : if( psTable == NULL )
760 0 : return NULL;
761 :
762 : /* -------------------------------------------------------------------- */
763 : /* If we use CSVGetNextLine() we can pretty much assume we have */
764 : /* a non-unique key. */
765 : /* -------------------------------------------------------------------- */
766 21 : psTable->bNonUniqueKey = TRUE;
767 :
768 : /* -------------------------------------------------------------------- */
769 : /* Do we have a next line available? This only works for */
770 : /* ingested tables I believe. */
771 : /* -------------------------------------------------------------------- */
772 21 : if( psTable->iLastLine+1 >= psTable->nLineCount )
773 0 : return NULL;
774 :
775 21 : psTable->iLastLine++;
776 21 : CSLDestroy( psTable->papszRecFields );
777 : psTable->papszRecFields =
778 21 : CSVSplitLine( psTable->papszLines[psTable->iLastLine], ',' );
779 :
780 21 : return psTable->papszRecFields;
781 : }
782 :
783 : /************************************************************************/
784 : /* CSVScanFile() */
785 : /* */
786 : /* Scan a whole file using criteria similar to above, but also */
787 : /* taking care of file opening and closing. */
788 : /************************************************************************/
789 :
790 4543 : char **CSVScanFile( const char * pszFilename, int iKeyField,
791 : const char * pszValue, CSVCompareCriteria eCriteria )
792 :
793 : {
794 : CSVTable *psTable;
795 :
796 : /* -------------------------------------------------------------------- */
797 : /* Get access to the table. */
798 : /* -------------------------------------------------------------------- */
799 : CPLAssert( pszFilename != NULL );
800 :
801 4543 : if( iKeyField < 0 )
802 0 : return NULL;
803 :
804 4543 : psTable = CSVAccess( pszFilename );
805 4543 : if( psTable == NULL )
806 0 : return NULL;
807 :
808 4543 : CSVIngest( pszFilename );
809 :
810 : /* -------------------------------------------------------------------- */
811 : /* Does the current record match the criteria? If so, just */
812 : /* return it again. */
813 : /* -------------------------------------------------------------------- */
814 8540 : if( iKeyField >= 0
815 : && iKeyField < CSLCount(psTable->papszRecFields)
816 3997 : && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria)
817 : && !psTable->bNonUniqueKey )
818 : {
819 3656 : return psTable->papszRecFields;
820 : }
821 :
822 : /* -------------------------------------------------------------------- */
823 : /* Scan the file from the beginning, replacing the ``current */
824 : /* record'' in our structure with the one that is found. */
825 : /* -------------------------------------------------------------------- */
826 887 : psTable->iLastLine = -1;
827 887 : CSLDestroy( psTable->papszRecFields );
828 :
829 887 : if( psTable->pszRawData != NULL )
830 : psTable->papszRecFields =
831 887 : CSVScanLinesIngested( psTable, iKeyField, pszValue, eCriteria );
832 : else
833 : {
834 0 : VSIRewind( psTable->fp );
835 0 : CPLReadLine( psTable->fp ); /* throw away the header line */
836 :
837 : psTable->papszRecFields =
838 0 : CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria );
839 : }
840 :
841 887 : return( psTable->papszRecFields );
842 : }
843 :
844 : /************************************************************************/
845 : /* CPLGetFieldId() */
846 : /* */
847 : /* Read the first record of a CSV file (rewinding to be sure), */
848 : /* and find the field with the indicated name. Returns -1 if */
849 : /* it fails to find the field name. Comparison is case */
850 : /* insensitive, but otherwise exact. After this function has */
851 : /* been called the file pointer will be positioned just after */
852 : /* the first record. */
853 : /************************************************************************/
854 :
855 0 : int CSVGetFieldId( FILE * fp, const char * pszFieldName )
856 :
857 : {
858 : char **papszFields;
859 : int i;
860 :
861 : CPLAssert( fp != NULL && pszFieldName != NULL );
862 :
863 0 : VSIRewind( fp );
864 :
865 0 : papszFields = CSVReadParseLine( fp );
866 0 : for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
867 : {
868 0 : if( EQUAL(papszFields[i],pszFieldName) )
869 : {
870 0 : CSLDestroy( papszFields );
871 0 : return i;
872 : }
873 : }
874 :
875 0 : CSLDestroy( papszFields );
876 :
877 0 : return -1;
878 : }
879 :
880 : /************************************************************************/
881 : /* CSVGetFileFieldId() */
882 : /* */
883 : /* Same as CPLGetFieldId(), except that we get the file based */
884 : /* on filename, rather than having an existing handle. */
885 : /************************************************************************/
886 :
887 8763 : int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName )
888 :
889 : {
890 : CSVTable *psTable;
891 : int i;
892 :
893 : /* -------------------------------------------------------------------- */
894 : /* Get access to the table. */
895 : /* -------------------------------------------------------------------- */
896 : CPLAssert( pszFilename != NULL );
897 :
898 8763 : psTable = CSVAccess( pszFilename );
899 8763 : if( psTable == NULL )
900 0 : return -1;
901 :
902 : /* -------------------------------------------------------------------- */
903 : /* Find the requested field. */
904 : /* -------------------------------------------------------------------- */
905 83112 : for( i = 0;
906 : psTable->papszFieldNames != NULL
907 41556 : && psTable->papszFieldNames[i] != NULL;
908 : i++ )
909 : {
910 41512 : if( EQUAL(psTable->papszFieldNames[i],pszFieldName) )
911 : {
912 8719 : return i;
913 : }
914 : }
915 :
916 44 : return -1;
917 : }
918 :
919 :
920 : /************************************************************************/
921 : /* CSVScanFileByName() */
922 : /* */
923 : /* Same as CSVScanFile(), but using a field name instead of a */
924 : /* field number. */
925 : /************************************************************************/
926 :
927 4543 : char **CSVScanFileByName( const char * pszFilename,
928 : const char * pszKeyFieldName,
929 : const char * pszValue, CSVCompareCriteria eCriteria )
930 :
931 : {
932 : int iKeyField;
933 :
934 4543 : iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName );
935 4543 : if( iKeyField == -1 )
936 0 : return NULL;
937 :
938 4543 : return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) );
939 : }
940 :
941 : /************************************************************************/
942 : /* CSVGetField() */
943 : /* */
944 : /* The all-in-one function to fetch a particular field value */
945 : /* from a CSV file. Note this function will return an empty */
946 : /* string, rather than NULL if it fails to find the desired */
947 : /* value for some reason. The caller can't establish that the */
948 : /* fetch failed. */
949 : /************************************************************************/
950 :
951 4073 : const char *CSVGetField( const char * pszFilename,
952 : const char * pszKeyFieldName,
953 : const char * pszKeyFieldValue,
954 : CSVCompareCriteria eCriteria,
955 : const char * pszTargetField )
956 :
957 : {
958 : CSVTable *psTable;
959 : char **papszRecord;
960 : int iTargetField;
961 :
962 : /* -------------------------------------------------------------------- */
963 : /* Find the table. */
964 : /* -------------------------------------------------------------------- */
965 4073 : psTable = CSVAccess( pszFilename );
966 4073 : if( psTable == NULL )
967 0 : return "";
968 :
969 : /* -------------------------------------------------------------------- */
970 : /* Find the correct record. */
971 : /* -------------------------------------------------------------------- */
972 : papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName,
973 4073 : pszKeyFieldValue, eCriteria );
974 :
975 4073 : if( papszRecord == NULL )
976 344 : return "";
977 :
978 : /* -------------------------------------------------------------------- */
979 : /* Figure out which field we want out of this. */
980 : /* -------------------------------------------------------------------- */
981 3729 : iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField );
982 3729 : if( iTargetField < 0 )
983 0 : return "";
984 :
985 3729 : if( iTargetField >= CSLCount( papszRecord ) )
986 0 : return "";
987 :
988 3729 : return( papszRecord[iTargetField] );
989 : }
990 :
991 : /************************************************************************/
992 : /* GDALDefaultCSVFilename() */
993 : /************************************************************************/
994 :
995 : typedef struct
996 : {
997 : char szPath[512];
998 : int bCSVFinderInitialized;
999 : } DefaultCSVFileNameTLS;
1000 :
1001 :
1002 2250 : const char * GDALDefaultCSVFilename( const char *pszBasename )
1003 :
1004 : {
1005 : /* -------------------------------------------------------------------- */
1006 : /* Do we already have this file accessed? If so, just return */
1007 : /* the existing path without any further probing. */
1008 : /* -------------------------------------------------------------------- */
1009 : CSVTable **ppsCSVTableList;
1010 :
1011 2250 : ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
1012 2250 : if( ppsCSVTableList != NULL )
1013 : {
1014 : CSVTable *psTable;
1015 2186 : int nBasenameLen = strlen(pszBasename);
1016 :
1017 15257 : for( psTable = *ppsCSVTableList;
1018 : psTable != NULL;
1019 : psTable = psTable->psNext )
1020 : {
1021 15177 : int nFullLen = strlen(psTable->pszFilename);
1022 :
1023 17284 : if( nFullLen > nBasenameLen
1024 : && strcmp(psTable->pszFilename+nFullLen-nBasenameLen,
1025 : pszBasename) == 0
1026 2107 : && strchr("/\\",psTable->pszFilename[+nFullLen-nBasenameLen-1])
1027 : != NULL )
1028 : {
1029 2106 : return psTable->pszFilename;
1030 : }
1031 : }
1032 : }
1033 :
1034 : /* -------------------------------------------------------------------- */
1035 : /* Otherwise we need to look harder for it. */
1036 : /* -------------------------------------------------------------------- */
1037 : DefaultCSVFileNameTLS* pTLSData =
1038 144 : (DefaultCSVFileNameTLS *) CPLGetTLS( CTLS_CSVDEFAULTFILENAME );
1039 144 : if (pTLSData == NULL)
1040 : {
1041 58 : pTLSData = (DefaultCSVFileNameTLS*) CPLCalloc(1, sizeof(DefaultCSVFileNameTLS));
1042 58 : CPLSetTLS( CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE );
1043 : }
1044 :
1045 144 : FILE *fp = NULL;
1046 : const char *pszResult;
1047 :
1048 144 : pszResult = CPLFindFile( "epsg_csv", pszBasename );
1049 :
1050 144 : if( pszResult != NULL )
1051 140 : return pszResult;
1052 :
1053 4 : if( !pTLSData->bCSVFinderInitialized )
1054 : {
1055 1 : pTLSData->bCSVFinderInitialized = TRUE;
1056 :
1057 1 : if( CPLGetConfigOption("GEOTIFF_CSV",NULL) != NULL )
1058 0 : CPLPushFinderLocation( CPLGetConfigOption("GEOTIFF_CSV",NULL));
1059 :
1060 1 : if( CPLGetConfigOption("GDAL_DATA",NULL) != NULL )
1061 1 : CPLPushFinderLocation( CPLGetConfigOption("GDAL_DATA",NULL) );
1062 :
1063 1 : pszResult = CPLFindFile( "epsg_csv", pszBasename );
1064 :
1065 1 : if( pszResult != NULL )
1066 0 : return pszResult;
1067 : }
1068 :
1069 4 : if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL )
1070 : {
1071 0 : strcpy( pTLSData->szPath, "csv/" );
1072 0 : CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1073 : }
1074 : else
1075 : {
1076 : #ifdef GDAL_PREFIX
1077 : #ifdef MACOSX_FRAMEWORK
1078 : strcpy( pTLSData->szPath, GDAL_PREFIX "/Resources/epsg_csv/" );
1079 : CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1080 : #else
1081 4 : strcpy( pTLSData->szPath, GDAL_PREFIX "/share/epsg_csv/" );
1082 4 : CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1083 : #endif
1084 : #else
1085 : strcpy( pTLSData->szPath, "/usr/local/share/epsg_csv/" );
1086 : CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1087 : #endif
1088 4 : if( (fp = fopen( pTLSData->szPath, "rt" )) == NULL )
1089 4 : CPLStrlcpy( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1090 : }
1091 :
1092 4 : if( fp != NULL )
1093 0 : fclose( fp );
1094 :
1095 4 : return( pTLSData->szPath );
1096 : }
1097 :
1098 : /************************************************************************/
1099 : /* CSVFilename() */
1100 : /* */
1101 : /* Return the full path to a particular CSV file. This will */
1102 : /* eventually be something the application can override. */
1103 : /************************************************************************/
1104 :
1105 : CPL_C_START
1106 : static const char *(*pfnCSVFilenameHook)(const char *) = NULL;
1107 : CPL_C_END
1108 :
1109 2250 : const char * CSVFilename( const char *pszBasename )
1110 :
1111 : {
1112 2250 : if( pfnCSVFilenameHook == NULL )
1113 879 : return GDALDefaultCSVFilename( pszBasename );
1114 : else
1115 1371 : return( pfnCSVFilenameHook( pszBasename ) );
1116 : }
1117 :
1118 : /************************************************************************/
1119 : /* SetCSVFilenameHook() */
1120 : /* */
1121 : /* Applications can use this to set a function that will */
1122 : /* massage CSV filenames. */
1123 : /************************************************************************/
1124 :
1125 : /**
1126 : * Override CSV file search method.
1127 : *
1128 : * @param CSVFileOverride The pointer to a function which will return the
1129 : * full path for a given filename.
1130 : *
1131 :
1132 : This function allows an application to override how the GTIFGetDefn() and related function find the CSV (Comma Separated
1133 : Value) values required. The pfnHook argument should be a pointer to a function that will take in a CSV filename and return a
1134 : full path to the file. The returned string should be to an internal static buffer so that the caller doesn't have to free the result.
1135 :
1136 : <b>Example:</b><br>
1137 :
1138 : The listgeo utility uses the following override function if the user
1139 : specified a CSV file directory with the -t commandline switch (argument
1140 : put into CSVDirName). <p>
1141 :
1142 : <pre>
1143 :
1144 : ...
1145 :
1146 :
1147 : SetCSVFilenameHook( CSVFileOverride );
1148 :
1149 : ...
1150 :
1151 :
1152 : static const char *CSVFileOverride( const char * pszInput )
1153 :
1154 : {
1155 : static char szPath[1024];
1156 :
1157 : #ifdef WIN32
1158 : sprintf( szPath, "%s\\%s", CSVDirName, pszInput );
1159 : #else
1160 : sprintf( szPath, "%s/%s", CSVDirName, pszInput );
1161 : #endif
1162 :
1163 : return( szPath );
1164 : }
1165 : </pre>
1166 :
1167 : */
1168 :
1169 : CPL_C_START
1170 310 : void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) )
1171 :
1172 : {
1173 310 : pfnCSVFilenameHook = pfnNewHook;
1174 310 : }
1175 : CPL_C_END
1176 :
|