1 : /******************************************************************************
2 : * $Id: cpl_csv.cpp 19600 2010-05-02 09:53:35Z rouault $
3 : *
4 : * Project: CPL - Common Portability Library
5 : * Purpose: CSV (comma separated value) file access.
6 : * Author: Frank Warmerdam, warmerdam@pobox.com
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 1999, Frank Warmerdam
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "cpl_csv.h"
31 : #include "cpl_conv.h"
32 : #include "cpl_multiproc.h"
33 :
34 : CPL_CVSID("$Id: cpl_csv.cpp 19600 2010-05-02 09:53:35Z rouault $");
35 :
36 : CPL_C_START
37 : const char * GDALDefaultCSVFilename( const char *pszBasename );
38 : CPL_C_END
39 :
40 : /* ==================================================================== */
41 : /* The CSVTable is a persistant set of info about an open CSV */
42 : /* table. While it doesn't currently maintain a record index, */
43 : /* or in-memory copy of the table, it could be changed to do so */
44 : /* in the future. */
45 : /* ==================================================================== */
46 : typedef struct ctb {
47 : FILE *fp;
48 :
49 : struct ctb *psNext;
50 :
51 : char *pszFilename;
52 :
53 : char **papszFieldNames;
54 :
55 : char **papszRecFields;
56 :
57 : int iLastLine;
58 :
59 : int bNonUniqueKey;
60 :
61 : /* Cache for whole file */
62 : int nLineCount;
63 : char **papszLines;
64 : int *panLineIndex;
65 : char *pszRawData;
66 : } CSVTable;
67 :
68 : /* It would likely be better to share this list between threads, but
69 : that will require some rework. */
70 :
71 : /************************************************************************/
72 : /* CSVAccess() */
73 : /* */
74 : /* This function will fetch a handle to the requested table. */
75 : /* If not found in the ``open table list'' the table will be */
76 : /* opened and added to the list. Eventually this function may */
77 : /* become public with an abstracted return type so that */
78 : /* applications can set options about the table. For now this */
79 : /* isn't done. */
80 : /************************************************************************/
81 :
82 1870549 : static CSVTable *CSVAccess( const char * pszFilename )
83 :
84 : {
85 : CSVTable *psTable;
86 : FILE *fp;
87 :
88 : /* -------------------------------------------------------------------- */
89 : /* Fetch the table, and allocate the thread-local pointer to it */
90 : /* if there isn't already one. */
91 : /* -------------------------------------------------------------------- */
92 : CSVTable **ppsCSVTableList;
93 :
94 1870549 : ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
95 1870549 : if( ppsCSVTableList == NULL )
96 : {
97 33 : ppsCSVTableList = (CSVTable **) CPLCalloc(1,sizeof(CSVTable*));
98 33 : CPLSetTLS( CTLS_CSVTABLEPTR, ppsCSVTableList, TRUE );
99 : }
100 :
101 : /* -------------------------------------------------------------------- */
102 : /* Is the table already in the list. */
103 : /* -------------------------------------------------------------------- */
104 8789519 : for( psTable = *ppsCSVTableList;
105 : psTable != NULL;
106 : psTable = psTable->psNext )
107 : {
108 8789373 : if( EQUAL(psTable->pszFilename,pszFilename) )
109 : {
110 : /*
111 : * Eventually we should consider promoting to the front of
112 : * the list to accelerate frequently accessed tables.
113 : */
114 :
115 1870403 : return( psTable );
116 : }
117 : }
118 :
119 : /* -------------------------------------------------------------------- */
120 : /* If not, try to open it. */
121 : /* -------------------------------------------------------------------- */
122 146 : fp = VSIFOpen( pszFilename, "rb" );
123 146 : if( fp == NULL )
124 0 : return NULL;
125 :
126 : /* -------------------------------------------------------------------- */
127 : /* Create an information structure about this table, and add to */
128 : /* the front of the list. */
129 : /* -------------------------------------------------------------------- */
130 146 : psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1);
131 :
132 146 : psTable->fp = fp;
133 146 : psTable->pszFilename = CPLStrdup( pszFilename );
134 146 : psTable->bNonUniqueKey = FALSE; /* as far as we know now */
135 146 : psTable->psNext = *ppsCSVTableList;
136 :
137 146 : *ppsCSVTableList = psTable;
138 :
139 : /* -------------------------------------------------------------------- */
140 : /* Read the table header record containing the field names. */
141 : /* -------------------------------------------------------------------- */
142 146 : psTable->papszFieldNames = CSVReadParseLine( fp );
143 :
144 146 : return( psTable );
145 : }
146 :
147 : /************************************************************************/
148 : /* CSVDeaccess() */
149 : /************************************************************************/
150 :
151 1612 : void CSVDeaccess( const char * pszFilename )
152 :
153 : {
154 : CSVTable *psLast, *psTable;
155 :
156 : /* -------------------------------------------------------------------- */
157 : /* Fetch the table, and allocate the thread-local pointer to it */
158 : /* if there isn't already one. */
159 : /* -------------------------------------------------------------------- */
160 : CSVTable **ppsCSVTableList;
161 :
162 1612 : ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
163 1612 : if( ppsCSVTableList == NULL )
164 1391 : return;
165 :
166 : /* -------------------------------------------------------------------- */
167 : /* A NULL means deaccess all tables. */
168 : /* -------------------------------------------------------------------- */
169 221 : if( pszFilename == NULL )
170 : {
171 309 : while( *ppsCSVTableList != NULL )
172 133 : CSVDeaccess( (*ppsCSVTableList)->pszFilename );
173 :
174 88 : return;
175 : }
176 :
177 : /* -------------------------------------------------------------------- */
178 : /* Find this table. */
179 : /* -------------------------------------------------------------------- */
180 133 : psLast = NULL;
181 133 : for( psTable = *ppsCSVTableList;
182 : psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename);
183 : psTable = psTable->psNext )
184 : {
185 0 : psLast = psTable;
186 : }
187 :
188 133 : if( psTable == NULL )
189 : {
190 0 : CPLDebug( "CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename );
191 0 : return;
192 : }
193 :
194 : /* -------------------------------------------------------------------- */
195 : /* Remove the link from the list. */
196 : /* -------------------------------------------------------------------- */
197 133 : if( psLast != NULL )
198 0 : psLast->psNext = psTable->psNext;
199 : else
200 133 : *ppsCSVTableList = psTable->psNext;
201 :
202 : /* -------------------------------------------------------------------- */
203 : /* Free the table. */
204 : /* -------------------------------------------------------------------- */
205 133 : if( psTable->fp != NULL )
206 0 : VSIFClose( psTable->fp );
207 :
208 133 : CSLDestroy( psTable->papszFieldNames );
209 133 : CSLDestroy( psTable->papszRecFields );
210 133 : CPLFree( psTable->pszFilename );
211 133 : CPLFree( psTable->panLineIndex );
212 133 : CPLFree( psTable->pszRawData );
213 133 : CPLFree( psTable->papszLines );
214 :
215 133 : CPLFree( psTable );
216 :
217 133 : CPLReadLine( NULL );
218 : }
219 :
220 : /************************************************************************/
221 : /* CSVSplitLine() */
222 : /* */
223 : /* Tokenize a CSV line into fields in the form of a string */
224 : /* list. This is used instead of the CPLTokenizeString() */
225 : /* because it provides correct CSV escaping and quoting */
226 : /* semantics. */
227 : /************************************************************************/
228 :
229 81005 : static char **CSVSplitLine( const char *pszString, char chDelimiter )
230 :
231 : {
232 81005 : char **papszRetList = NULL;
233 : char *pszToken;
234 : int nTokenMax, nTokenLen;
235 :
236 81005 : pszToken = (char *) CPLCalloc(10,1);
237 81005 : nTokenMax = 10;
238 :
239 1586303 : while( pszString != NULL && *pszString != '\0' )
240 : {
241 1424293 : int bInString = FALSE;
242 :
243 1424293 : nTokenLen = 0;
244 :
245 : /* Try to find the next delimeter, marking end of token */
246 15556609 : for( ; *pszString != '\0'; pszString++ )
247 : {
248 :
249 : /* End if this is a delimeter skip it and break. */
250 15516335 : if( !bInString && *pszString == chDelimiter )
251 : {
252 1384019 : pszString++;
253 1384019 : break;
254 : }
255 :
256 14132316 : if( *pszString == '"' )
257 : {
258 333099 : if( !bInString || pszString[1] != '"' )
259 : {
260 302626 : bInString = !bInString;
261 302626 : continue;
262 : }
263 : else /* doubled quotes in string resolve to one quote */
264 : {
265 30473 : pszString++;
266 : }
267 : }
268 :
269 13829690 : if( nTokenLen >= nTokenMax-2 )
270 : {
271 153132 : nTokenMax = nTokenMax * 2 + 10;
272 153132 : pszToken = (char *) CPLRealloc( pszToken, nTokenMax );
273 : }
274 :
275 13829690 : pszToken[nTokenLen] = *pszString;
276 13829690 : nTokenLen++;
277 : }
278 :
279 1424293 : pszToken[nTokenLen] = '\0';
280 1424293 : papszRetList = CSLAddString( papszRetList, pszToken );
281 :
282 : /* If the last token is an empty token, then we have to catch
283 : * it now, otherwise we won't reenter the loop and it will be lost.
284 : */
285 1424293 : if ( *pszString == '\0' && *(pszString-1) == chDelimiter )
286 : {
287 40731 : papszRetList = CSLAddString( papszRetList, "" );
288 : }
289 : }
290 :
291 81005 : if( papszRetList == NULL )
292 0 : papszRetList = (char **) CPLCalloc(sizeof(char *),1);
293 :
294 81005 : CPLFree( pszToken );
295 :
296 81005 : return papszRetList;
297 : }
298 :
299 : /************************************************************************/
300 : /* CSVFindNextLine() */
301 : /* */
302 : /* Find the start of the next line, while at the same time zero */
303 : /* terminating this line. Take into account that there may be */
304 : /* newline indicators within quoted strings, and that quotes */
305 : /* can be escaped with a backslash. */
306 : /************************************************************************/
307 :
308 58740 : static char *CSVFindNextLine( char *pszThisLine )
309 :
310 : {
311 58740 : int nQuoteCount = 0, i;
312 :
313 8260657 : for( i = 0; pszThisLine[i] != '\0'; i++ )
314 : {
315 8260657 : if( pszThisLine[i] == '\"'
316 : && (i == 0 || pszThisLine[i-1] != '\\') )
317 101486 : nQuoteCount++;
318 :
319 8260657 : if( (pszThisLine[i] == 10 || pszThisLine[i] == 13)
320 : && (nQuoteCount % 2) == 0 )
321 58740 : break;
322 : }
323 :
324 176220 : while( pszThisLine[i] == 10 || pszThisLine[i] == 13 )
325 58740 : pszThisLine[i++] = '\0';
326 :
327 58740 : if( pszThisLine[i] == '\0' )
328 146 : return NULL;
329 : else
330 58594 : return pszThisLine + i;
331 : }
332 :
333 : /************************************************************************/
334 : /* CSVIngest() */
335 : /* */
336 : /* Load entire file into memory and setup index if possible. */
337 : /************************************************************************/
338 :
339 378961 : static void CSVIngest( const char *pszFilename )
340 :
341 : {
342 378961 : CSVTable *psTable = CSVAccess( pszFilename );
343 378961 : int nFileLen, i, nMaxLineCount, iLine = 0;
344 : char *pszThisLine;
345 :
346 378961 : if( psTable->pszRawData != NULL )
347 378815 : return;
348 :
349 : /* -------------------------------------------------------------------- */
350 : /* Ingest whole file. */
351 : /* -------------------------------------------------------------------- */
352 146 : VSIFSeek( psTable->fp, 0, SEEK_END );
353 146 : nFileLen = VSIFTell( psTable->fp );
354 146 : VSIRewind( psTable->fp );
355 :
356 146 : psTable->pszRawData = (char *) CPLMalloc(nFileLen+1);
357 146 : if( (int) VSIFRead( psTable->pszRawData, 1, nFileLen, psTable->fp )
358 : != nFileLen )
359 : {
360 0 : CPLFree( psTable->pszRawData );
361 0 : psTable->pszRawData = NULL;
362 :
363 : CPLError( CE_Failure, CPLE_FileIO, "Read of file %s failed.",
364 0 : psTable->pszFilename );
365 0 : return;
366 : }
367 :
368 146 : psTable->pszRawData[nFileLen] = '\0';
369 :
370 : /* -------------------------------------------------------------------- */
371 : /* Get count of newlines so we can allocate line array. */
372 : /* -------------------------------------------------------------------- */
373 146 : nMaxLineCount = 0;
374 8260803 : for( i = 0; i < nFileLen; i++ )
375 : {
376 8260657 : if( psTable->pszRawData[i] == 10 )
377 58768 : nMaxLineCount++;
378 : }
379 :
380 146 : psTable->papszLines = (char **) CPLCalloc(sizeof(char*),nMaxLineCount);
381 :
382 : /* -------------------------------------------------------------------- */
383 : /* Build a list of record pointers into the raw data buffer */
384 : /* based on line terminators. Zero terminate the line */
385 : /* strings. */
386 : /* -------------------------------------------------------------------- */
387 : /* skip header line */
388 146 : pszThisLine = CSVFindNextLine( psTable->pszRawData );
389 :
390 58886 : while( pszThisLine != NULL && iLine < nMaxLineCount )
391 : {
392 58594 : psTable->papszLines[iLine++] = pszThisLine;
393 58594 : pszThisLine = CSVFindNextLine( pszThisLine );
394 : }
395 :
396 146 : psTable->nLineCount = iLine;
397 :
398 : /* -------------------------------------------------------------------- */
399 : /* Allocate and populate index array. Ensure they are in */
400 : /* ascending order so that binary searches can be done on the */
401 : /* array. */
402 : /* -------------------------------------------------------------------- */
403 146 : psTable->panLineIndex = (int *) CPLMalloc(sizeof(int)*psTable->nLineCount);
404 58740 : for( i = 0; i < psTable->nLineCount; i++ )
405 : {
406 58594 : psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
407 :
408 58594 : if( i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i-1] )
409 : {
410 0 : CPLFree( psTable->panLineIndex );
411 0 : psTable->panLineIndex = NULL;
412 0 : break;
413 : }
414 : }
415 :
416 146 : psTable->iLastLine = -1;
417 :
418 : /* -------------------------------------------------------------------- */
419 : /* We should never need the file handle against, so close it. */
420 : /* -------------------------------------------------------------------- */
421 146 : VSIFClose( psTable->fp );
422 146 : psTable->fp = NULL;
423 : }
424 :
425 : /************************************************************************/
426 : /* CSVDetectSeperator() */
427 : /************************************************************************/
428 :
429 : /** Detect which field separator is used.
430 : *
431 : * Currently, it can detect comma, semicolon or tabulation. In case of
432 : * ambiguity or no separator found, comma will be considered as the separator.
433 : *
434 : * @return ',', ';' or '\t'
435 : */
436 53 : char CSVDetectSeperator (const char* pszLine)
437 : {
438 53 : int bInString = FALSE;
439 53 : char chDelimiter = '\0';
440 :
441 1973 : for( ; *pszLine != '\0'; pszLine++ )
442 : {
443 2088 : if( !bInString && (*pszLine == ',' || *pszLine == ';' || *pszLine == '\t'))
444 : {
445 168 : if (chDelimiter == '\0')
446 53 : chDelimiter = *pszLine;
447 115 : else if (chDelimiter != *pszLine)
448 : {
449 : /* The separator is not consistant on the line. */
450 : CPLDebug("CSV", "Inconsistant separator. '%c' and '%c' found. Using ',' as default",
451 0 : chDelimiter, *pszLine);
452 0 : chDelimiter = ',';
453 0 : break;
454 : }
455 : }
456 1752 : else if( *pszLine == '"' )
457 : {
458 104 : if( !bInString || pszLine[1] != '"' )
459 : {
460 104 : bInString = !bInString;
461 104 : continue;
462 : }
463 : else /* doubled quotes in string resolve to one quote */
464 : {
465 0 : pszLine++;
466 : }
467 : }
468 : }
469 :
470 53 : if (chDelimiter == '\0')
471 0 : chDelimiter = ',';
472 :
473 53 : return chDelimiter;
474 : }
475 :
476 : /************************************************************************/
477 : /* CSVReadParseLine() */
478 : /* */
479 : /* Read one line, and return split into fields. The return */
480 : /* result is a stringlist, in the sense of the CSL functions. */
481 : /************************************************************************/
482 :
483 44791 : char **CSVReadParseLine( FILE * fp )
484 : {
485 44791 : return CSVReadParseLine2(fp, ',');
486 : }
487 :
488 45264 : char **CSVReadParseLine2( FILE * fp, char chDelimiter )
489 :
490 : {
491 : const char *pszLine;
492 : char *pszWorkLine;
493 : char **papszReturn;
494 :
495 45264 : CPLAssert( fp != NULL );
496 45264 : if( fp == NULL )
497 0 : return( NULL );
498 :
499 45264 : pszLine = CPLReadLine( fp );
500 45264 : if( pszLine == NULL )
501 129 : return( NULL );
502 :
503 : /* -------------------------------------------------------------------- */
504 : /* If there are no quotes, then this is the simple case. */
505 : /* Parse, and return tokens. */
506 : /* -------------------------------------------------------------------- */
507 45135 : if( strchr(pszLine,'\"') == NULL )
508 1135 : return CSVSplitLine( pszLine, chDelimiter );
509 :
510 : /* -------------------------------------------------------------------- */
511 : /* We must now count the quotes in our working string, and as */
512 : /* long as it is odd, keep adding new lines. */
513 : /* -------------------------------------------------------------------- */
514 44000 : pszWorkLine = CPLStrdup( pszLine );
515 :
516 44000 : int i = 0, nCount = 0;
517 44000 : int nWorkLineLength = strlen(pszWorkLine);
518 :
519 695 : while( TRUE )
520 : {
521 11865166 : for( ; pszWorkLine[i] != '\0'; i++ )
522 : {
523 11820471 : if( pszWorkLine[i] == '\"'
524 : && (i == 0 || pszWorkLine[i-1] != '\\') )
525 335316 : nCount++;
526 : }
527 :
528 44695 : if( nCount % 2 == 0 )
529 44000 : break;
530 :
531 695 : pszLine = CPLReadLine( fp );
532 695 : if( pszLine == NULL )
533 0 : break;
534 :
535 695 : int nLineLen = strlen(pszLine);
536 :
537 : char* pszWorkLineTmp = (char *)
538 : VSIRealloc(pszWorkLine,
539 695 : nWorkLineLength + nLineLen + 2);
540 695 : if (pszWorkLineTmp == NULL)
541 0 : break;
542 695 : pszWorkLine = pszWorkLineTmp;
543 695 : strcat( pszWorkLine + nWorkLineLength, "\n" ); // This gets lost in CPLReadLine().
544 695 : strcat( pszWorkLine + nWorkLineLength, pszLine );
545 :
546 695 : nWorkLineLength += nLineLen + 1;
547 : }
548 :
549 44000 : papszReturn = CSVSplitLine( pszWorkLine, chDelimiter );
550 :
551 44000 : CPLFree( pszWorkLine );
552 :
553 44000 : return papszReturn;
554 : }
555 :
556 : /************************************************************************/
557 : /* CSVCompare() */
558 : /* */
559 : /* Compare a field to a search value using a particular */
560 : /* criteria. */
561 : /************************************************************************/
562 :
563 : static int CSVCompare( const char * pszFieldValue, const char * pszTarget,
564 339377 : CSVCompareCriteria eCriteria )
565 :
566 : {
567 339377 : if( eCriteria == CC_ExactString )
568 : {
569 0 : return( strcmp( pszFieldValue, pszTarget ) == 0 );
570 : }
571 339377 : else if( eCriteria == CC_ApproxString )
572 : {
573 93 : return( EQUAL( pszFieldValue, pszTarget ) );
574 : }
575 339284 : else if( eCriteria == CC_Integer )
576 : {
577 339284 : return( atoi(pszFieldValue) == atoi(pszTarget) );
578 : }
579 :
580 0 : return FALSE;
581 : }
582 :
583 : /************************************************************************/
584 : /* CSVScanLines() */
585 : /* */
586 : /* Read the file scanline for lines where the key field equals */
587 : /* the indicated value with the suggested comparison criteria. */
588 : /* Return the first matching line split into fields. */
589 : /************************************************************************/
590 :
591 : char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue,
592 0 : CSVCompareCriteria eCriteria )
593 :
594 : {
595 0 : char **papszFields = NULL;
596 0 : int bSelected = FALSE, nTestValue;
597 :
598 0 : CPLAssert( pszValue != NULL );
599 0 : CPLAssert( iKeyField >= 0 );
600 0 : CPLAssert( fp != NULL );
601 :
602 0 : nTestValue = atoi(pszValue);
603 :
604 0 : while( !bSelected ) {
605 0 : papszFields = CSVReadParseLine( fp );
606 0 : if( papszFields == NULL )
607 0 : return( NULL );
608 :
609 0 : if( CSLCount( papszFields ) < iKeyField+1 )
610 : {
611 : /* not selected */
612 : }
613 0 : else if( eCriteria == CC_Integer
614 : && atoi(papszFields[iKeyField]) == nTestValue )
615 : {
616 0 : bSelected = TRUE;
617 : }
618 : else
619 : {
620 : bSelected = CSVCompare( papszFields[iKeyField], pszValue,
621 0 : eCriteria );
622 : }
623 :
624 0 : if( !bSelected )
625 : {
626 0 : CSLDestroy( papszFields );
627 0 : papszFields = NULL;
628 : }
629 : }
630 :
631 0 : return( papszFields );
632 : }
633 :
634 : /************************************************************************/
635 : /* CSVScanLinesIndexed() */
636 : /* */
637 : /* Read the file scanline for lines where the key field equals */
638 : /* the indicated value with the suggested comparison criteria. */
639 : /* Return the first matching line split into fields. */
640 : /************************************************************************/
641 :
642 : static char **
643 70802 : CSVScanLinesIndexed( CSVTable *psTable, int nKeyValue )
644 :
645 : {
646 70802 : int iTop, iBottom, iMiddle, iResult = -1;
647 :
648 70802 : CPLAssert( psTable->panLineIndex != NULL );
649 :
650 : /* -------------------------------------------------------------------- */
651 : /* Find target record with binary search. */
652 : /* -------------------------------------------------------------------- */
653 70802 : iTop = psTable->nLineCount-1;
654 70802 : iBottom = 0;
655 :
656 505862 : while( iTop >= iBottom )
657 : {
658 395507 : iMiddle = (iTop + iBottom) / 2;
659 395507 : if( psTable->panLineIndex[iMiddle] > nKeyValue )
660 126836 : iTop = iMiddle - 1;
661 268671 : else if( psTable->panLineIndex[iMiddle] < nKeyValue )
662 237422 : iBottom = iMiddle + 1;
663 : else
664 : {
665 31249 : iResult = iMiddle;
666 : // if a key is not unique, select the first instance of it.
667 63560 : while( iResult > 0
668 : && psTable->panLineIndex[iResult-1] == nKeyValue )
669 : {
670 1062 : psTable->bNonUniqueKey = TRUE;
671 1062 : iResult--;
672 : }
673 31249 : break;
674 : }
675 : }
676 :
677 70802 : if( iResult == -1 )
678 39553 : return NULL;
679 :
680 : /* -------------------------------------------------------------------- */
681 : /* Parse target line, and update iLastLine indicator. */
682 : /* -------------------------------------------------------------------- */
683 31249 : psTable->iLastLine = iResult;
684 :
685 31249 : return CSVSplitLine( psTable->papszLines[iResult], ',' );
686 : }
687 :
688 : /************************************************************************/
689 : /* CSVScanLinesIngested() */
690 : /* */
691 : /* Read the file scanline for lines where the key field equals */
692 : /* the indicated value with the suggested comparison criteria. */
693 : /* Return the first matching line split into fields. */
694 : /************************************************************************/
695 :
696 : static char **
697 : CSVScanLinesIngested( CSVTable *psTable, int iKeyField, const char * pszValue,
698 70804 : CSVCompareCriteria eCriteria )
699 :
700 : {
701 70804 : char **papszFields = NULL;
702 70804 : int bSelected = FALSE, nTestValue;
703 :
704 70804 : CPLAssert( pszValue != NULL );
705 70804 : CPLAssert( iKeyField >= 0 );
706 :
707 70804 : nTestValue = atoi(pszValue);
708 :
709 : /* -------------------------------------------------------------------- */
710 : /* Short cut for indexed files. */
711 : /* -------------------------------------------------------------------- */
712 70804 : if( iKeyField == 0 && eCriteria == CC_Integer
713 : && psTable->panLineIndex != NULL )
714 70802 : return CSVScanLinesIndexed( psTable, nTestValue );
715 :
716 : /* -------------------------------------------------------------------- */
717 : /* Scan from in-core lines. */
718 : /* -------------------------------------------------------------------- */
719 75 : while( !bSelected && psTable->iLastLine+1 < psTable->nLineCount ) {
720 71 : psTable->iLastLine++;
721 71 : papszFields = CSVSplitLine( psTable->papszLines[psTable->iLastLine], ',' );
722 :
723 71 : if( CSLCount( papszFields ) < iKeyField+1 )
724 : {
725 : /* not selected */
726 : }
727 71 : else if( eCriteria == CC_Integer
728 : && atoi(papszFields[iKeyField]) == nTestValue )
729 : {
730 0 : bSelected = TRUE;
731 : }
732 : else
733 : {
734 : bSelected = CSVCompare( papszFields[iKeyField], pszValue,
735 71 : eCriteria );
736 : }
737 :
738 71 : if( !bSelected )
739 : {
740 69 : CSLDestroy( papszFields );
741 69 : papszFields = NULL;
742 : }
743 : }
744 :
745 2 : return( papszFields );
746 : }
747 :
748 : /************************************************************************/
749 : /* CSVGetNextLine() */
750 : /* */
751 : /* Fetch the next line of a CSV file based on a passed in */
752 : /* filename. Returns NULL at end of file, or if file is not */
753 : /* really established. */
754 : /************************************************************************/
755 :
756 4550 : char **CSVGetNextLine( const char *pszFilename )
757 :
758 : {
759 : CSVTable *psTable;
760 :
761 : /* -------------------------------------------------------------------- */
762 : /* Get access to the table. */
763 : /* -------------------------------------------------------------------- */
764 4550 : CPLAssert( pszFilename != NULL );
765 :
766 4550 : psTable = CSVAccess( pszFilename );
767 4550 : if( psTable == NULL )
768 0 : return NULL;
769 :
770 : /* -------------------------------------------------------------------- */
771 : /* If we use CSVGetNextLine() we can pretty much assume we have */
772 : /* a non-unique key. */
773 : /* -------------------------------------------------------------------- */
774 4550 : psTable->bNonUniqueKey = TRUE;
775 :
776 : /* -------------------------------------------------------------------- */
777 : /* Do we have a next line available? This only works for */
778 : /* ingested tables I believe. */
779 : /* -------------------------------------------------------------------- */
780 4550 : if( psTable->iLastLine+1 >= psTable->nLineCount )
781 0 : return NULL;
782 :
783 4550 : psTable->iLastLine++;
784 4550 : CSLDestroy( psTable->papszRecFields );
785 : psTable->papszRecFields =
786 4550 : CSVSplitLine( psTable->papszLines[psTable->iLastLine], ',' );
787 :
788 4550 : return psTable->papszRecFields;
789 : }
790 :
791 : /************************************************************************/
792 : /* CSVScanFile() */
793 : /* */
794 : /* Scan a whole file using criteria similar to above, but also */
795 : /* taking care of file opening and closing. */
796 : /************************************************************************/
797 :
798 : char **CSVScanFile( const char * pszFilename, int iKeyField,
799 378961 : const char * pszValue, CSVCompareCriteria eCriteria )
800 :
801 : {
802 : CSVTable *psTable;
803 :
804 : /* -------------------------------------------------------------------- */
805 : /* Get access to the table. */
806 : /* -------------------------------------------------------------------- */
807 378961 : CPLAssert( pszFilename != NULL );
808 :
809 378961 : if( iKeyField < 0 )
810 0 : return NULL;
811 :
812 378961 : psTable = CSVAccess( pszFilename );
813 378961 : if( psTable == NULL )
814 0 : return NULL;
815 :
816 378961 : CSVIngest( pszFilename );
817 :
818 : /* -------------------------------------------------------------------- */
819 : /* Does the current record match the criteria? If so, just */
820 : /* return it again. */
821 : /* -------------------------------------------------------------------- */
822 378961 : if( iKeyField >= 0
823 : && iKeyField < CSLCount(psTable->papszRecFields)
824 : && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria)
825 : && !psTable->bNonUniqueKey )
826 : {
827 308157 : return psTable->papszRecFields;
828 : }
829 :
830 : /* -------------------------------------------------------------------- */
831 : /* Scan the file from the beginning, replacing the ``current */
832 : /* record'' in our structure with the one that is found. */
833 : /* -------------------------------------------------------------------- */
834 70804 : psTable->iLastLine = -1;
835 70804 : CSLDestroy( psTable->papszRecFields );
836 :
837 70804 : if( psTable->pszRawData != NULL )
838 : psTable->papszRecFields =
839 70804 : CSVScanLinesIngested( psTable, iKeyField, pszValue, eCriteria );
840 : else
841 : {
842 0 : VSIRewind( psTable->fp );
843 0 : CPLReadLine( psTable->fp ); /* throw away the header line */
844 :
845 : psTable->papszRecFields =
846 0 : CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria );
847 : }
848 :
849 70804 : return( psTable->papszRecFields );
850 : }
851 :
852 : /************************************************************************/
853 : /* CPLGetFieldId() */
854 : /* */
855 : /* Read the first record of a CSV file (rewinding to be sure), */
856 : /* and find the field with the indicated name. Returns -1 if */
857 : /* it fails to find the field name. Comparison is case */
858 : /* insensitive, but otherwise exact. After this function has */
859 : /* been called the file pointer will be positioned just after */
860 : /* the first record. */
861 : /************************************************************************/
862 :
863 0 : int CSVGetFieldId( FILE * fp, const char * pszFieldName )
864 :
865 : {
866 : char **papszFields;
867 : int i;
868 :
869 0 : CPLAssert( fp != NULL && pszFieldName != NULL );
870 :
871 0 : VSIRewind( fp );
872 :
873 0 : papszFields = CSVReadParseLine( fp );
874 0 : for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
875 : {
876 0 : if( EQUAL(papszFields[i],pszFieldName) )
877 : {
878 0 : CSLDestroy( papszFields );
879 0 : return i;
880 : }
881 : }
882 :
883 0 : CSLDestroy( papszFields );
884 :
885 0 : return -1;
886 : }
887 :
888 : /************************************************************************/
889 : /* CSVGetFileFieldId() */
890 : /* */
891 : /* Same as CPLGetFieldId(), except that we get the file based */
892 : /* on filename, rather than having an existing handle. */
893 : /************************************************************************/
894 :
895 770219 : int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName )
896 :
897 : {
898 : CSVTable *psTable;
899 : int i;
900 :
901 : /* -------------------------------------------------------------------- */
902 : /* Get access to the table. */
903 : /* -------------------------------------------------------------------- */
904 770219 : CPLAssert( pszFilename != NULL );
905 :
906 770219 : psTable = CSVAccess( pszFilename );
907 770219 : if( psTable == NULL )
908 0 : return -1;
909 :
910 : /* -------------------------------------------------------------------- */
911 : /* Find the requested field. */
912 : /* -------------------------------------------------------------------- */
913 4837596 : for( i = 0;
914 : psTable->papszFieldNames != NULL
915 : && psTable->papszFieldNames[i] != NULL;
916 : i++ )
917 : {
918 4830501 : if( EQUAL(psTable->papszFieldNames[i],pszFieldName) )
919 : {
920 763124 : return i;
921 : }
922 : }
923 :
924 7095 : return -1;
925 : }
926 :
927 :
928 : /************************************************************************/
929 : /* CSVScanFileByName() */
930 : /* */
931 : /* Same as CSVScanFile(), but using a field name instead of a */
932 : /* field number. */
933 : /************************************************************************/
934 :
935 : char **CSVScanFileByName( const char * pszFilename,
936 : const char * pszKeyFieldName,
937 378961 : const char * pszValue, CSVCompareCriteria eCriteria )
938 :
939 : {
940 : int iKeyField;
941 :
942 378961 : iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName );
943 378961 : if( iKeyField == -1 )
944 0 : return NULL;
945 :
946 378961 : return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) );
947 : }
948 :
949 : /************************************************************************/
950 : /* CSVGetField() */
951 : /* */
952 : /* The all-in-one function to fetch a particular field value */
953 : /* from a CSV file. Note this function will return an empty */
954 : /* string, rather than NULL if it fails to find the desired */
955 : /* value for some reason. The caller can't establish that the */
956 : /* fetch failed. */
957 : /************************************************************************/
958 :
959 : const char *CSVGetField( const char * pszFilename,
960 : const char * pszKeyFieldName,
961 : const char * pszKeyFieldValue,
962 : CSVCompareCriteria eCriteria,
963 337858 : const char * pszTargetField )
964 :
965 : {
966 : CSVTable *psTable;
967 : char **papszRecord;
968 : int iTargetField;
969 :
970 : /* -------------------------------------------------------------------- */
971 : /* Find the table. */
972 : /* -------------------------------------------------------------------- */
973 337858 : psTable = CSVAccess( pszFilename );
974 337858 : if( psTable == NULL )
975 0 : return "";
976 :
977 : /* -------------------------------------------------------------------- */
978 : /* Find the correct record. */
979 : /* -------------------------------------------------------------------- */
980 : papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName,
981 337858 : pszKeyFieldValue, eCriteria );
982 :
983 337858 : if( papszRecord == NULL )
984 30498 : return "";
985 :
986 : /* -------------------------------------------------------------------- */
987 : /* Figure out which field we want out of this. */
988 : /* -------------------------------------------------------------------- */
989 307360 : iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField );
990 307360 : if( iTargetField < 0 )
991 0 : return "";
992 :
993 307360 : if( iTargetField >= CSLCount( papszRecord ) )
994 0 : return "";
995 :
996 307360 : return( papszRecord[iTargetField] );
997 : }
998 :
999 : /************************************************************************/
1000 : /* GDALDefaultCSVFilename() */
1001 : /************************************************************************/
1002 :
1003 : typedef struct
1004 : {
1005 : char szPath[512];
1006 : int bCSVFinderInitialized;
1007 : } DefaultCSVFileNameTLS;
1008 :
1009 :
1010 161348 : const char * GDALDefaultCSVFilename( const char *pszBasename )
1011 :
1012 : {
1013 : /* -------------------------------------------------------------------- */
1014 : /* Do we already have this file accessed? If so, just return */
1015 : /* the existing path without any further probing. */
1016 : /* -------------------------------------------------------------------- */
1017 : CSVTable **ppsCSVTableList;
1018 :
1019 161348 : ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
1020 161348 : if( ppsCSVTableList != NULL )
1021 : {
1022 : CSVTable *psTable;
1023 161260 : int nBasenameLen = strlen(pszBasename);
1024 :
1025 898005 : for( psTable = *ppsCSVTableList;
1026 : psTable != NULL;
1027 : psTable = psTable->psNext )
1028 : {
1029 897880 : int nFullLen = strlen(psTable->pszFilename);
1030 :
1031 897880 : if( nFullLen > nBasenameLen
1032 : && strcmp(psTable->pszFilename+nFullLen-nBasenameLen,
1033 : pszBasename) == 0
1034 : && strchr("/\\",psTable->pszFilename[+nFullLen-nBasenameLen-1])
1035 : != NULL )
1036 : {
1037 161135 : return psTable->pszFilename;
1038 : }
1039 : }
1040 : }
1041 :
1042 : /* -------------------------------------------------------------------- */
1043 : /* Otherwise we need to look harder for it. */
1044 : /* -------------------------------------------------------------------- */
1045 : DefaultCSVFileNameTLS* pTLSData =
1046 213 : (DefaultCSVFileNameTLS *) CPLGetTLS( CTLS_CSVDEFAULTFILENAME );
1047 213 : if (pTLSData == NULL)
1048 : {
1049 83 : pTLSData = (DefaultCSVFileNameTLS*) CPLCalloc(1, sizeof(DefaultCSVFileNameTLS));
1050 83 : CPLSetTLS( CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE );
1051 : }
1052 :
1053 213 : FILE *fp = NULL;
1054 : const char *pszResult;
1055 :
1056 213 : pszResult = CPLFindFile( "epsg_csv", pszBasename );
1057 :
1058 213 : if( pszResult != NULL )
1059 209 : return pszResult;
1060 :
1061 4 : if( !pTLSData->bCSVFinderInitialized )
1062 : {
1063 1 : pTLSData->bCSVFinderInitialized = TRUE;
1064 :
1065 1 : if( CPLGetConfigOption("GEOTIFF_CSV",NULL) != NULL )
1066 0 : CPLPushFinderLocation( CPLGetConfigOption("GEOTIFF_CSV",NULL));
1067 :
1068 1 : if( CPLGetConfigOption("GDAL_DATA",NULL) != NULL )
1069 1 : CPLPushFinderLocation( CPLGetConfigOption("GDAL_DATA",NULL) );
1070 :
1071 1 : pszResult = CPLFindFile( "epsg_csv", pszBasename );
1072 :
1073 1 : if( pszResult != NULL )
1074 0 : return pszResult;
1075 : }
1076 :
1077 4 : if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL )
1078 : {
1079 0 : strcpy( pTLSData->szPath, "csv/" );
1080 0 : CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1081 : }
1082 : else
1083 : {
1084 : #ifdef GDAL_PREFIX
1085 : #ifdef MACOSX_FRAMEWORK
1086 : strcpy( pTLSData->szPath, GDAL_PREFIX "/Resources/epsg_csv/" );
1087 : CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1088 : #else
1089 4 : strcpy( pTLSData->szPath, GDAL_PREFIX "/share/epsg_csv/" );
1090 4 : CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1091 : #endif
1092 : #else
1093 : strcpy( pTLSData->szPath, "/usr/local/share/epsg_csv/" );
1094 : CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1095 : #endif
1096 4 : if( (fp = fopen( pTLSData->szPath, "rt" )) == NULL )
1097 4 : CPLStrlcpy( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1098 : }
1099 :
1100 4 : if( fp != NULL )
1101 0 : fclose( fp );
1102 :
1103 4 : return( pTLSData->szPath );
1104 : }
1105 :
1106 : /************************************************************************/
1107 : /* CSVFilename() */
1108 : /* */
1109 : /* Return the full path to a particular CSV file. This will */
1110 : /* eventually be something the application can override. */
1111 : /************************************************************************/
1112 :
1113 : CPL_C_START
1114 : static const char *(*pfnCSVFilenameHook)(const char *) = NULL;
1115 : CPL_C_END
1116 :
1117 161348 : const char * CSVFilename( const char *pszBasename )
1118 :
1119 : {
1120 161348 : if( pfnCSVFilenameHook == NULL )
1121 158089 : return GDALDefaultCSVFilename( pszBasename );
1122 : else
1123 3259 : return( pfnCSVFilenameHook( pszBasename ) );
1124 : }
1125 :
1126 : /************************************************************************/
1127 : /* SetCSVFilenameHook() */
1128 : /* */
1129 : /* Applications can use this to set a function that will */
1130 : /* massage CSV filenames. */
1131 : /************************************************************************/
1132 :
1133 : /**
1134 : * Override CSV file search method.
1135 : *
1136 : * @param CSVFileOverride The pointer to a function which will return the
1137 : * full path for a given filename.
1138 : *
1139 :
1140 : This function allows an application to override how the GTIFGetDefn() and related function find the CSV (Comma Separated
1141 : Value) values required. The pfnHook argument should be a pointer to a function that will take in a CSV filename and return a
1142 : full path to the file. The returned string should be to an internal static buffer so that the caller doesn't have to free the result.
1143 :
1144 : <b>Example:</b><br>
1145 :
1146 : The listgeo utility uses the following override function if the user
1147 : specified a CSV file directory with the -t commandline switch (argument
1148 : put into CSVDirName). <p>
1149 :
1150 : <pre>
1151 :
1152 : ...
1153 :
1154 :
1155 : SetCSVFilenameHook( CSVFileOverride );
1156 :
1157 : ...
1158 :
1159 :
1160 : static const char *CSVFileOverride( const char * pszInput )
1161 :
1162 : {
1163 : static char szPath[1024];
1164 :
1165 : #ifdef WIN32
1166 : sprintf( szPath, "%s\\%s", CSVDirName, pszInput );
1167 : #else
1168 : sprintf( szPath, "%s/%s", CSVDirName, pszInput );
1169 : #endif
1170 :
1171 : return( szPath );
1172 : }
1173 : </pre>
1174 :
1175 : */
1176 :
1177 : CPL_C_START
1178 359 : void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) )
1179 :
1180 : {
1181 359 : pfnCSVFilenameHook = pfnNewHook;
1182 359 : }
1183 : CPL_C_END
1184 :
|