1 : /******************************************************************************
2 : * $Id: cpl_csv.cpp 21102 2010-11-08 20:47:38Z rouault $
3 : *
4 : * Project: CPL - Common Portability Library
5 : * Purpose: CSV (comma separated value) file access.
6 : * Author: Frank Warmerdam, warmerdam@pobox.com
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 1999, Frank Warmerdam
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "cpl_csv.h"
31 : #include "cpl_conv.h"
32 : #include "cpl_multiproc.h"
33 : #include "gdal_csv.h"
34 :
35 : CPL_CVSID("$Id: cpl_csv.cpp 21102 2010-11-08 20:47:38Z rouault $");
36 :
37 : /* ==================================================================== */
38 : /* The CSVTable is a persistant set of info about an open CSV */
39 : /* table. While it doesn't currently maintain a record index, */
40 : /* or in-memory copy of the table, it could be changed to do so */
41 : /* in the future. */
42 : /* ==================================================================== */
43 : typedef struct ctb {
44 : FILE *fp;
45 :
46 : struct ctb *psNext;
47 :
48 : char *pszFilename;
49 :
50 : char **papszFieldNames;
51 :
52 : char **papszRecFields;
53 :
54 : int iLastLine;
55 :
56 : int bNonUniqueKey;
57 :
58 : /* Cache for whole file */
59 : int nLineCount;
60 : char **papszLines;
61 : int *panLineIndex;
62 : char *pszRawData;
63 : } CSVTable;
64 :
65 :
66 : static void CSVDeaccessInternal( CSVTable **ppsCSVTableList, int bCanUseTLS, const char * pszFilename );
67 :
68 : /************************************************************************/
69 : /* CSVFreeTLS() */
70 : /************************************************************************/
71 102 : static void CSVFreeTLS(void* pData)
72 : {
73 102 : CSVDeaccessInternal( (CSVTable **)pData, FALSE, NULL );
74 102 : CPLFree(pData);
75 102 : }
76 :
77 : /* It would likely be better to share this list between threads, but
78 : that will require some rework. */
79 :
80 : /************************************************************************/
81 : /* CSVAccess() */
82 : /* */
83 : /* This function will fetch a handle to the requested table. */
84 : /* If not found in the ``open table list'' the table will be */
85 : /* opened and added to the list. Eventually this function may */
86 : /* become public with an abstracted return type so that */
87 : /* applications can set options about the table. For now this */
88 : /* isn't done. */
89 : /************************************************************************/
90 :
91 6004573 : static CSVTable *CSVAccess( const char * pszFilename )
92 :
93 : {
94 : CSVTable *psTable;
95 : FILE *fp;
96 :
97 : /* -------------------------------------------------------------------- */
98 : /* Fetch the table, and allocate the thread-local pointer to it */
99 : /* if there isn't already one. */
100 : /* -------------------------------------------------------------------- */
101 : CSVTable **ppsCSVTableList;
102 :
103 6004573 : ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
104 6004573 : if( ppsCSVTableList == NULL )
105 : {
106 102 : ppsCSVTableList = (CSVTable **) CPLCalloc(1,sizeof(CSVTable*));
107 102 : CPLSetTLSWithFreeFunc( CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS );
108 : }
109 :
110 : /* -------------------------------------------------------------------- */
111 : /* Is the table already in the list. */
112 : /* -------------------------------------------------------------------- */
113 84967920 : for( psTable = *ppsCSVTableList;
114 : psTable != NULL;
115 : psTable = psTable->psNext )
116 : {
117 84967452 : if( EQUAL(psTable->pszFilename,pszFilename) )
118 : {
119 : /*
120 : * Eventually we should consider promoting to the front of
121 : * the list to accelerate frequently accessed tables.
122 : */
123 :
124 6004105 : return( psTable );
125 : }
126 : }
127 :
128 : /* -------------------------------------------------------------------- */
129 : /* If not, try to open it. */
130 : /* -------------------------------------------------------------------- */
131 468 : fp = VSIFOpen( pszFilename, "rb" );
132 468 : if( fp == NULL )
133 0 : return NULL;
134 :
135 : /* -------------------------------------------------------------------- */
136 : /* Create an information structure about this table, and add to */
137 : /* the front of the list. */
138 : /* -------------------------------------------------------------------- */
139 468 : psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1);
140 :
141 468 : psTable->fp = fp;
142 468 : psTable->pszFilename = CPLStrdup( pszFilename );
143 468 : psTable->bNonUniqueKey = FALSE; /* as far as we know now */
144 468 : psTable->psNext = *ppsCSVTableList;
145 :
146 468 : *ppsCSVTableList = psTable;
147 :
148 : /* -------------------------------------------------------------------- */
149 : /* Read the table header record containing the field names. */
150 : /* -------------------------------------------------------------------- */
151 468 : psTable->papszFieldNames = CSVReadParseLine( fp );
152 :
153 468 : return( psTable );
154 : }
155 :
156 : /************************************************************************/
157 : /* CSVDeaccess() */
158 : /************************************************************************/
159 :
160 4661 : static void CSVDeaccessInternal( CSVTable **ppsCSVTableList, int bCanUseTLS, const char * pszFilename )
161 :
162 : {
163 : CSVTable *psLast, *psTable;
164 :
165 4661 : if( ppsCSVTableList == NULL )
166 3903 : return;
167 :
168 : /* -------------------------------------------------------------------- */
169 : /* A NULL means deaccess all tables. */
170 : /* -------------------------------------------------------------------- */
171 758 : if( pszFilename == NULL )
172 : {
173 1048 : while( *ppsCSVTableList != NULL )
174 468 : CSVDeaccessInternal( ppsCSVTableList, bCanUseTLS, (*ppsCSVTableList)->pszFilename );
175 :
176 290 : return;
177 : }
178 :
179 : /* -------------------------------------------------------------------- */
180 : /* Find this table. */
181 : /* -------------------------------------------------------------------- */
182 468 : psLast = NULL;
183 468 : for( psTable = *ppsCSVTableList;
184 : psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename);
185 : psTable = psTable->psNext )
186 : {
187 0 : psLast = psTable;
188 : }
189 :
190 468 : if( psTable == NULL )
191 : {
192 0 : if (bCanUseTLS)
193 0 : CPLDebug( "CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename );
194 0 : return;
195 : }
196 :
197 : /* -------------------------------------------------------------------- */
198 : /* Remove the link from the list. */
199 : /* -------------------------------------------------------------------- */
200 468 : if( psLast != NULL )
201 0 : psLast->psNext = psTable->psNext;
202 : else
203 468 : *ppsCSVTableList = psTable->psNext;
204 :
205 : /* -------------------------------------------------------------------- */
206 : /* Free the table. */
207 : /* -------------------------------------------------------------------- */
208 468 : if( psTable->fp != NULL )
209 0 : VSIFClose( psTable->fp );
210 :
211 468 : CSLDestroy( psTable->papszFieldNames );
212 468 : CSLDestroy( psTable->papszRecFields );
213 468 : CPLFree( psTable->pszFilename );
214 468 : CPLFree( psTable->panLineIndex );
215 468 : CPLFree( psTable->pszRawData );
216 468 : CPLFree( psTable->papszLines );
217 :
218 468 : CPLFree( psTable );
219 :
220 468 : if (bCanUseTLS)
221 468 : CPLReadLine( NULL );
222 : }
223 :
224 4091 : void CSVDeaccess( const char * pszFilename )
225 : {
226 : CSVTable **ppsCSVTableList;
227 : /* -------------------------------------------------------------------- */
228 : /* Fetch the table, and allocate the thread-local pointer to it */
229 : /* if there isn't already one. */
230 : /* -------------------------------------------------------------------- */
231 4091 : ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
232 :
233 4091 : CSVDeaccessInternal(ppsCSVTableList, TRUE, pszFilename);
234 4091 : }
235 :
236 : /************************************************************************/
237 : /* CSVSplitLine() */
238 : /* */
239 : /* Tokenize a CSV line into fields in the form of a string */
240 : /* list. This is used instead of the CPLTokenizeString() */
241 : /* because it provides correct CSV escaping and quoting */
242 : /* semantics. */
243 : /************************************************************************/
244 :
245 1056122 : static char **CSVSplitLine( const char *pszString, char chDelimiter )
246 :
247 : {
248 1056122 : char **papszRetList = NULL;
249 : char *pszToken;
250 : int nTokenMax, nTokenLen;
251 :
252 1056122 : pszToken = (char *) CPLCalloc(10,1);
253 1056122 : nTokenMax = 10;
254 :
255 15892888 : while( pszString != NULL && *pszString != '\0' )
256 : {
257 13780644 : int bInString = FALSE;
258 :
259 13780644 : nTokenLen = 0;
260 :
261 : /* Try to find the next delimeter, marking end of token */
262 113474559 : for( ; *pszString != '\0'; pszString++ )
263 : {
264 :
265 : /* End if this is a delimeter skip it and break. */
266 112947987 : if( !bInString && *pszString == chDelimiter )
267 : {
268 13254072 : pszString++;
269 13254072 : break;
270 : }
271 :
272 99693915 : if( *pszString == '"' )
273 : {
274 1600432 : if( !bInString || pszString[1] != '"' )
275 : {
276 1489920 : bInString = !bInString;
277 1489920 : continue;
278 : }
279 : else /* doubled quotes in string resolve to one quote */
280 : {
281 110512 : pszString++;
282 : }
283 : }
284 :
285 98203995 : if( nTokenLen >= nTokenMax-2 )
286 : {
287 1436390 : nTokenMax = nTokenMax * 2 + 10;
288 1436390 : pszToken = (char *) CPLRealloc( pszToken, nTokenMax );
289 : }
290 :
291 98203995 : pszToken[nTokenLen] = *pszString;
292 98203995 : nTokenLen++;
293 : }
294 :
295 13780644 : pszToken[nTokenLen] = '\0';
296 13780644 : papszRetList = CSLAddString( papszRetList, pszToken );
297 :
298 : /* If the last token is an empty token, then we have to catch
299 : * it now, otherwise we won't reenter the loop and it will be lost.
300 : */
301 13780644 : if ( *pszString == '\0' && *(pszString-1) == chDelimiter )
302 : {
303 529550 : papszRetList = CSLAddString( papszRetList, "" );
304 : }
305 : }
306 :
307 1056122 : if( papszRetList == NULL )
308 0 : papszRetList = (char **) CPLCalloc(sizeof(char *),1);
309 :
310 1056122 : CPLFree( pszToken );
311 :
312 1056122 : return papszRetList;
313 : }
314 :
315 : /************************************************************************/
316 : /* CSVFindNextLine() */
317 : /* */
318 : /* Find the start of the next line, while at the same time zero */
319 : /* terminating this line. Take into account that there may be */
320 : /* newline indicators within quoted strings, and that quotes */
321 : /* can be escaped with a backslash. */
322 : /************************************************************************/
323 :
324 323150 : static char *CSVFindNextLine( char *pszThisLine )
325 :
326 : {
327 323150 : int nQuoteCount = 0, i;
328 :
329 46575902 : for( i = 0; pszThisLine[i] != '\0'; i++ )
330 : {
331 47166240 : if( pszThisLine[i] == '\"'
332 590338 : && (i == 0 || pszThisLine[i-1] != '\\') )
333 590684 : nQuoteCount++;
334 :
335 46575902 : if( (pszThisLine[i] == 10 || pszThisLine[i] == 13)
336 : && (nQuoteCount % 2) == 0 )
337 323150 : break;
338 : }
339 :
340 969450 : while( pszThisLine[i] == 10 || pszThisLine[i] == 13 )
341 323150 : pszThisLine[i++] = '\0';
342 :
343 323150 : if( pszThisLine[i] == '\0' )
344 468 : return NULL;
345 : else
346 322682 : return pszThisLine + i;
347 : }
348 :
349 : /************************************************************************/
350 : /* CSVIngest() */
351 : /* */
352 : /* Load entire file into memory and setup index if possible. */
353 : /************************************************************************/
354 :
355 1227254 : static void CSVIngest( const char *pszFilename )
356 :
357 : {
358 1227254 : CSVTable *psTable = CSVAccess( pszFilename );
359 1227254 : int nFileLen, i, nMaxLineCount, iLine = 0;
360 : char *pszThisLine;
361 :
362 1227254 : if( psTable->pszRawData != NULL )
363 1226786 : return;
364 :
365 : /* -------------------------------------------------------------------- */
366 : /* Ingest whole file. */
367 : /* -------------------------------------------------------------------- */
368 468 : VSIFSeek( psTable->fp, 0, SEEK_END );
369 468 : nFileLen = VSIFTell( psTable->fp );
370 468 : VSIRewind( psTable->fp );
371 :
372 468 : psTable->pszRawData = (char *) CPLMalloc(nFileLen+1);
373 468 : if( (int) VSIFRead( psTable->pszRawData, 1, nFileLen, psTable->fp )
374 : != nFileLen )
375 : {
376 0 : CPLFree( psTable->pszRawData );
377 0 : psTable->pszRawData = NULL;
378 :
379 : CPLError( CE_Failure, CPLE_FileIO, "Read of file %s failed.",
380 0 : psTable->pszFilename );
381 0 : return;
382 : }
383 :
384 468 : psTable->pszRawData[nFileLen] = '\0';
385 :
386 : /* -------------------------------------------------------------------- */
387 : /* Get count of newlines so we can allocate line array. */
388 : /* -------------------------------------------------------------------- */
389 468 : nMaxLineCount = 0;
390 46576370 : for( i = 0; i < nFileLen; i++ )
391 : {
392 46575902 : if( psTable->pszRawData[i] == 10 )
393 323174 : nMaxLineCount++;
394 : }
395 :
396 468 : psTable->papszLines = (char **) CPLCalloc(sizeof(char*),nMaxLineCount);
397 :
398 : /* -------------------------------------------------------------------- */
399 : /* Build a list of record pointers into the raw data buffer */
400 : /* based on line terminators. Zero terminate the line */
401 : /* strings. */
402 : /* -------------------------------------------------------------------- */
403 : /* skip header line */
404 468 : pszThisLine = CSVFindNextLine( psTable->pszRawData );
405 :
406 323618 : while( pszThisLine != NULL && iLine < nMaxLineCount )
407 : {
408 322682 : psTable->papszLines[iLine++] = pszThisLine;
409 322682 : pszThisLine = CSVFindNextLine( pszThisLine );
410 : }
411 :
412 468 : psTable->nLineCount = iLine;
413 :
414 : /* -------------------------------------------------------------------- */
415 : /* Allocate and populate index array. Ensure they are in */
416 : /* ascending order so that binary searches can be done on the */
417 : /* array. */
418 : /* -------------------------------------------------------------------- */
419 468 : psTable->panLineIndex = (int *) CPLMalloc(sizeof(int)*psTable->nLineCount);
420 322784 : for( i = 0; i < psTable->nLineCount; i++ )
421 : {
422 322388 : psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
423 :
424 322388 : if( i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i-1] )
425 : {
426 72 : CPLFree( psTable->panLineIndex );
427 72 : psTable->panLineIndex = NULL;
428 72 : break;
429 : }
430 : }
431 :
432 468 : psTable->iLastLine = -1;
433 :
434 : /* -------------------------------------------------------------------- */
435 : /* We should never need the file handle against, so close it. */
436 : /* -------------------------------------------------------------------- */
437 468 : VSIFClose( psTable->fp );
438 468 : psTable->fp = NULL;
439 : }
440 :
441 : /************************************************************************/
442 : /* CSVDetectSeperator() */
443 : /************************************************************************/
444 :
445 : /** Detect which field separator is used.
446 : *
447 : * Currently, it can detect comma, semicolon or tabulation. In case of
448 : * ambiguity or no separator found, comma will be considered as the separator.
449 : *
450 : * @return ',', ';' or '\t'
451 : */
452 192 : char CSVDetectSeperator (const char* pszLine)
453 : {
454 192 : int bInString = FALSE;
455 192 : char chDelimiter = '\0';
456 :
457 6334 : for( ; *pszLine != '\0'; pszLine++ )
458 : {
459 6664 : if( !bInString && (*pszLine == ',' || *pszLine == ';' || *pszLine == '\t'))
460 : {
461 522 : if (chDelimiter == '\0')
462 192 : chDelimiter = *pszLine;
463 330 : else if (chDelimiter != *pszLine)
464 : {
465 : /* The separator is not consistant on the line. */
466 : CPLDebug("CSV", "Inconsistant separator. '%c' and '%c' found. Using ',' as default",
467 0 : chDelimiter, *pszLine);
468 0 : chDelimiter = ',';
469 0 : break;
470 : }
471 : }
472 5620 : else if( *pszLine == '"' )
473 : {
474 276 : if( !bInString || pszLine[1] != '"' )
475 : {
476 276 : bInString = !bInString;
477 276 : continue;
478 : }
479 : else /* doubled quotes in string resolve to one quote */
480 : {
481 0 : pszLine++;
482 : }
483 : }
484 : }
485 :
486 192 : if (chDelimiter == '\0')
487 0 : chDelimiter = ',';
488 :
489 192 : return chDelimiter;
490 : }
491 :
492 : /************************************************************************/
493 : /* CSVReadParseLine() */
494 : /* */
495 : /* Read one line, and return split into fields. The return */
496 : /* result is a stringlist, in the sense of the CSL functions. */
497 : /************************************************************************/
498 :
499 146428 : char **CSVReadParseLine( FILE * fp )
500 : {
501 146428 : return CSVReadParseLine2(fp, ',');
502 : }
503 :
504 146428 : char **CSVReadParseLine2( FILE * fp, char chDelimiter )
505 :
506 : {
507 : const char *pszLine;
508 : char *pszWorkLine;
509 : char **papszReturn;
510 :
511 146428 : CPLAssert( fp != NULL );
512 146428 : if( fp == NULL )
513 0 : return( NULL );
514 :
515 146428 : pszLine = CPLReadLine( fp );
516 146428 : if( pszLine == NULL )
517 226 : return( NULL );
518 :
519 : /* -------------------------------------------------------------------- */
520 : /* If there are no quotes, then this is the simple case. */
521 : /* Parse, and return tokens. */
522 : /* -------------------------------------------------------------------- */
523 146202 : if( strchr(pszLine,'\"') == NULL )
524 1168 : return CSVSplitLine( pszLine, chDelimiter );
525 :
526 : /* -------------------------------------------------------------------- */
527 : /* We must now count the quotes in our working string, and as */
528 : /* long as it is odd, keep adding new lines. */
529 : /* -------------------------------------------------------------------- */
530 145034 : pszWorkLine = CPLStrdup( pszLine );
531 :
532 145034 : int i = 0, nCount = 0;
533 145034 : int nWorkLineLength = strlen(pszWorkLine);
534 :
535 2160 : while( TRUE )
536 : {
537 41728652 : for( ; pszWorkLine[i] != '\0'; i++ )
538 : {
539 42796012 : if( pszWorkLine[i] == '\"'
540 1214554 : && (i == 0 || pszWorkLine[i-1] != '\\') )
541 1219412 : nCount++;
542 : }
543 :
544 147194 : if( nCount % 2 == 0 )
545 145034 : break;
546 :
547 2160 : pszLine = CPLReadLine( fp );
548 2160 : if( pszLine == NULL )
549 0 : break;
550 :
551 2160 : int nLineLen = strlen(pszLine);
552 :
553 : char* pszWorkLineTmp = (char *)
554 : VSIRealloc(pszWorkLine,
555 2160 : nWorkLineLength + nLineLen + 2);
556 2160 : if (pszWorkLineTmp == NULL)
557 0 : break;
558 2160 : pszWorkLine = pszWorkLineTmp;
559 2160 : strcat( pszWorkLine + nWorkLineLength, "\n" ); // This gets lost in CPLReadLine().
560 2160 : strcat( pszWorkLine + nWorkLineLength, pszLine );
561 :
562 2160 : nWorkLineLength += nLineLen + 1;
563 : }
564 :
565 145034 : papszReturn = CSVSplitLine( pszWorkLine, chDelimiter );
566 :
567 145034 : CPLFree( pszWorkLine );
568 :
569 145034 : return papszReturn;
570 : }
571 :
572 : /************************************************************************/
573 : /* CSVCompare() */
574 : /* */
575 : /* Compare a field to a search value using a particular */
576 : /* criteria. */
577 : /************************************************************************/
578 :
579 1856472 : static int CSVCompare( const char * pszFieldValue, const char * pszTarget,
580 : CSVCompareCriteria eCriteria )
581 :
582 : {
583 1856472 : if( eCriteria == CC_ExactString )
584 : {
585 0 : return( strcmp( pszFieldValue, pszTarget ) == 0 );
586 : }
587 1856472 : else if( eCriteria == CC_ApproxString )
588 : {
589 580 : return( EQUAL( pszFieldValue, pszTarget ) );
590 : }
591 1855892 : else if( eCriteria == CC_Integer )
592 : {
593 1855892 : return( atoi(pszFieldValue) == atoi(pszTarget) );
594 : }
595 :
596 0 : return FALSE;
597 : }
598 :
599 : /************************************************************************/
600 : /* CSVScanLines() */
601 : /* */
602 : /* Read the file scanline for lines where the key field equals */
603 : /* the indicated value with the suggested comparison criteria. */
604 : /* Return the first matching line split into fields. */
605 : /************************************************************************/
606 :
607 0 : char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue,
608 : CSVCompareCriteria eCriteria )
609 :
610 : {
611 0 : char **papszFields = NULL;
612 0 : int bSelected = FALSE, nTestValue;
613 :
614 0 : CPLAssert( pszValue != NULL );
615 0 : CPLAssert( iKeyField >= 0 );
616 0 : CPLAssert( fp != NULL );
617 :
618 0 : nTestValue = atoi(pszValue);
619 :
620 0 : while( !bSelected ) {
621 0 : papszFields = CSVReadParseLine( fp );
622 0 : if( papszFields == NULL )
623 0 : return( NULL );
624 :
625 0 : if( CSLCount( papszFields ) < iKeyField+1 )
626 : {
627 : /* not selected */
628 : }
629 0 : else if( eCriteria == CC_Integer
630 0 : && atoi(papszFields[iKeyField]) == nTestValue )
631 : {
632 0 : bSelected = TRUE;
633 : }
634 : else
635 : {
636 0 : bSelected = CSVCompare( papszFields[iKeyField], pszValue,
637 0 : eCriteria );
638 : }
639 :
640 0 : if( !bSelected )
641 : {
642 0 : CSLDestroy( papszFields );
643 0 : papszFields = NULL;
644 : }
645 : }
646 :
647 0 : return( papszFields );
648 : }
649 :
650 : /************************************************************************/
651 : /* CSVScanLinesIndexed() */
652 : /* */
653 : /* Read the file scanline for lines where the key field equals */
654 : /* the indicated value with the suggested comparison criteria. */
655 : /* Return the first matching line split into fields. */
656 : /************************************************************************/
657 :
658 : static char **
659 238751 : CSVScanLinesIndexed( CSVTable *psTable, int nKeyValue )
660 :
661 : {
662 238751 : int iTop, iBottom, iMiddle, iResult = -1;
663 :
664 238751 : CPLAssert( psTable->panLineIndex != NULL );
665 :
666 : /* -------------------------------------------------------------------- */
667 : /* Find target record with binary search. */
668 : /* -------------------------------------------------------------------- */
669 238751 : iTop = psTable->nLineCount-1;
670 238751 : iBottom = 0;
671 :
672 1746827 : while( iTop >= iBottom )
673 : {
674 1383598 : iMiddle = (iTop + iBottom) / 2;
675 1383598 : if( psTable->panLineIndex[iMiddle] > nKeyValue )
676 431519 : iTop = iMiddle - 1;
677 952079 : else if( psTable->panLineIndex[iMiddle] < nKeyValue )
678 837806 : iBottom = iMiddle + 1;
679 : else
680 : {
681 114273 : iResult = iMiddle;
682 : // if a key is not unique, select the first instance of it.
683 344531 : while( iResult > 0
684 115113 : && psTable->panLineIndex[iResult-1] == nKeyValue )
685 : {
686 872 : psTable->bNonUniqueKey = TRUE;
687 872 : iResult--;
688 : }
689 114273 : break;
690 : }
691 : }
692 :
693 238751 : if( iResult == -1 )
694 124478 : return NULL;
695 :
696 : /* -------------------------------------------------------------------- */
697 : /* Parse target line, and update iLastLine indicator. */
698 : /* -------------------------------------------------------------------- */
699 114273 : psTable->iLastLine = iResult;
700 :
701 114273 : return CSVSplitLine( psTable->papszLines[iResult], ',' );
702 : }
703 :
704 : /************************************************************************/
705 : /* CSVScanLinesIngested() */
706 : /* */
707 : /* Read the file scanline for lines where the key field equals */
708 : /* the indicated value with the suggested comparison criteria. */
709 : /* Return the first matching line split into fields. */
710 : /************************************************************************/
711 :
712 : static char **
713 265327 : CSVScanLinesIngested( CSVTable *psTable, int iKeyField, const char * pszValue,
714 : CSVCompareCriteria eCriteria )
715 :
716 : {
717 265327 : char **papszFields = NULL;
718 265327 : int bSelected = FALSE, nTestValue;
719 :
720 265327 : CPLAssert( pszValue != NULL );
721 265327 : CPLAssert( iKeyField >= 0 );
722 :
723 265327 : nTestValue = atoi(pszValue);
724 :
725 : /* -------------------------------------------------------------------- */
726 : /* Short cut for indexed files. */
727 : /* -------------------------------------------------------------------- */
728 265327 : if( iKeyField == 0 && eCriteria == CC_Integer
729 : && psTable->panLineIndex != NULL )
730 238751 : return CSVScanLinesIndexed( psTable, nTestValue );
731 :
732 : /* -------------------------------------------------------------------- */
733 : /* Scan from in-core lines. */
734 : /* -------------------------------------------------------------------- */
735 833728 : while( !bSelected && psTable->iLastLine+1 < psTable->nLineCount ) {
736 780576 : psTable->iLastLine++;
737 780576 : papszFields = CSVSplitLine( psTable->papszLines[psTable->iLastLine], ',' );
738 :
739 780576 : if( CSLCount( papszFields ) < iKeyField+1 )
740 : {
741 : /* not selected */
742 : }
743 1561954 : else if( eCriteria == CC_Integer
744 780046 : && atoi(papszFields[iKeyField]) == nTestValue )
745 : {
746 1356 : bSelected = TRUE;
747 : }
748 : else
749 : {
750 779196 : bSelected = CSVCompare( papszFields[iKeyField], pszValue,
751 1558392 : eCriteria );
752 : }
753 :
754 780576 : if( !bSelected )
755 : {
756 779210 : CSLDestroy( papszFields );
757 779210 : papszFields = NULL;
758 : }
759 : }
760 :
761 26576 : return( papszFields );
762 : }
763 :
764 : /************************************************************************/
765 : /* CSVGetNextLine() */
766 : /* */
767 : /* Fetch the next line of a CSV file based on a passed in */
768 : /* filename. Returns NULL at end of file, or if file is not */
769 : /* really established. */
770 : /************************************************************************/
771 :
772 15071 : char **CSVGetNextLine( const char *pszFilename )
773 :
774 : {
775 : CSVTable *psTable;
776 :
777 : /* -------------------------------------------------------------------- */
778 : /* Get access to the table. */
779 : /* -------------------------------------------------------------------- */
780 15071 : CPLAssert( pszFilename != NULL );
781 :
782 15071 : psTable = CSVAccess( pszFilename );
783 15071 : if( psTable == NULL )
784 0 : return NULL;
785 :
786 : /* -------------------------------------------------------------------- */
787 : /* If we use CSVGetNextLine() we can pretty much assume we have */
788 : /* a non-unique key. */
789 : /* -------------------------------------------------------------------- */
790 15071 : psTable->bNonUniqueKey = TRUE;
791 :
792 : /* -------------------------------------------------------------------- */
793 : /* Do we have a next line available? This only works for */
794 : /* ingested tables I believe. */
795 : /* -------------------------------------------------------------------- */
796 15071 : if( psTable->iLastLine+1 >= psTable->nLineCount )
797 0 : return NULL;
798 :
799 15071 : psTable->iLastLine++;
800 15071 : CSLDestroy( psTable->papszRecFields );
801 : psTable->papszRecFields =
802 15071 : CSVSplitLine( psTable->papszLines[psTable->iLastLine], ',' );
803 :
804 15071 : return psTable->papszRecFields;
805 : }
806 :
807 : /************************************************************************/
808 : /* CSVScanFile() */
809 : /* */
810 : /* Scan a whole file using criteria similar to above, but also */
811 : /* taking care of file opening and closing. */
812 : /************************************************************************/
813 :
814 1227254 : char **CSVScanFile( const char * pszFilename, int iKeyField,
815 : const char * pszValue, CSVCompareCriteria eCriteria )
816 :
817 : {
818 : CSVTable *psTable;
819 :
820 : /* -------------------------------------------------------------------- */
821 : /* Get access to the table. */
822 : /* -------------------------------------------------------------------- */
823 1227254 : CPLAssert( pszFilename != NULL );
824 :
825 1227254 : if( iKeyField < 0 )
826 0 : return NULL;
827 :
828 1227254 : psTable = CSVAccess( pszFilename );
829 1227254 : if( psTable == NULL )
830 0 : return NULL;
831 :
832 1227254 : CSVIngest( pszFilename );
833 :
834 : /* -------------------------------------------------------------------- */
835 : /* Does the current record match the criteria? If so, just */
836 : /* return it again. */
837 : /* -------------------------------------------------------------------- */
838 2304530 : if( iKeyField >= 0
839 : && iKeyField < CSLCount(psTable->papszRecFields)
840 1077276 : && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria)
841 : && !psTable->bNonUniqueKey )
842 : {
843 961927 : return psTable->papszRecFields;
844 : }
845 :
846 : /* -------------------------------------------------------------------- */
847 : /* Scan the file from the beginning, replacing the ``current */
848 : /* record'' in our structure with the one that is found. */
849 : /* -------------------------------------------------------------------- */
850 265327 : psTable->iLastLine = -1;
851 265327 : CSLDestroy( psTable->papszRecFields );
852 :
853 265327 : if( psTable->pszRawData != NULL )
854 : psTable->papszRecFields =
855 265327 : CSVScanLinesIngested( psTable, iKeyField, pszValue, eCriteria );
856 : else
857 : {
858 0 : VSIRewind( psTable->fp );
859 0 : CPLReadLine( psTable->fp ); /* throw away the header line */
860 :
861 : psTable->papszRecFields =
862 0 : CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria );
863 : }
864 :
865 265327 : return( psTable->papszRecFields );
866 : }
867 :
868 : /************************************************************************/
869 : /* CPLGetFieldId() */
870 : /* */
871 : /* Read the first record of a CSV file (rewinding to be sure), */
872 : /* and find the field with the indicated name. Returns -1 if */
873 : /* it fails to find the field name. Comparison is case */
874 : /* insensitive, but otherwise exact. After this function has */
875 : /* been called the file pointer will be positioned just after */
876 : /* the first record. */
877 : /************************************************************************/
878 :
879 0 : int CSVGetFieldId( FILE * fp, const char * pszFieldName )
880 :
881 : {
882 : char **papszFields;
883 : int i;
884 :
885 0 : CPLAssert( fp != NULL && pszFieldName != NULL );
886 :
887 0 : VSIRewind( fp );
888 :
889 0 : papszFields = CSVReadParseLine( fp );
890 0 : for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
891 : {
892 0 : if( EQUAL(papszFields[i],pszFieldName) )
893 : {
894 0 : CSLDestroy( papszFields );
895 0 : return i;
896 : }
897 : }
898 :
899 0 : CSLDestroy( papszFields );
900 :
901 0 : return -1;
902 : }
903 :
904 : /************************************************************************/
905 : /* CSVGetFileFieldId() */
906 : /* */
907 : /* Same as CPLGetFieldId(), except that we get the file based */
908 : /* on filename, rather than having an existing handle. */
909 : /************************************************************************/
910 :
911 2480191 : int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName )
912 :
913 : {
914 : CSVTable *psTable;
915 : int i;
916 :
917 : /* -------------------------------------------------------------------- */
918 : /* Get access to the table. */
919 : /* -------------------------------------------------------------------- */
920 2480191 : CPLAssert( pszFilename != NULL );
921 :
922 2480191 : psTable = CSVAccess( pszFilename );
923 2480191 : if( psTable == NULL )
924 0 : return -1;
925 :
926 : /* -------------------------------------------------------------------- */
927 : /* Find the requested field. */
928 : /* -------------------------------------------------------------------- */
929 32152974 : for( i = 0;
930 : psTable->papszFieldNames != NULL
931 16076487 : && psTable->papszFieldNames[i] != NULL;
932 : i++ )
933 : {
934 16053059 : if( EQUAL(psTable->papszFieldNames[i],pszFieldName) )
935 : {
936 2456763 : return i;
937 : }
938 : }
939 :
940 23428 : return -1;
941 : }
942 :
943 :
944 : /************************************************************************/
945 : /* CSVScanFileByName() */
946 : /* */
947 : /* Same as CSVScanFile(), but using a field name instead of a */
948 : /* field number. */
949 : /************************************************************************/
950 :
951 1227254 : char **CSVScanFileByName( const char * pszFilename,
952 : const char * pszKeyFieldName,
953 : const char * pszValue, CSVCompareCriteria eCriteria )
954 :
955 : {
956 : int iKeyField;
957 :
958 1227254 : iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName );
959 1227254 : if( iKeyField == -1 )
960 0 : return NULL;
961 :
962 1227254 : return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) );
963 : }
964 :
965 : /************************************************************************/
966 : /* CSVGetField() */
967 : /* */
968 : /* The all-in-one function to fetch a particular field value */
969 : /* from a CSV file. Note this function will return an empty */
970 : /* string, rather than NULL if it fails to find the desired */
971 : /* value for some reason. The caller can't establish that the */
972 : /* fetch failed. */
973 : /************************************************************************/
974 :
975 1054803 : const char *CSVGetField( const char * pszFilename,
976 : const char * pszKeyFieldName,
977 : const char * pszKeyFieldValue,
978 : CSVCompareCriteria eCriteria,
979 : const char * pszTargetField )
980 :
981 : {
982 : CSVTable *psTable;
983 : char **papszRecord;
984 : int iTargetField;
985 :
986 : /* -------------------------------------------------------------------- */
987 : /* Find the table. */
988 : /* -------------------------------------------------------------------- */
989 1054803 : psTable = CSVAccess( pszFilename );
990 1054803 : if( psTable == NULL )
991 0 : return "";
992 :
993 : /* -------------------------------------------------------------------- */
994 : /* Find the correct record. */
995 : /* -------------------------------------------------------------------- */
996 : papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName,
997 1054803 : pszKeyFieldValue, eCriteria );
998 :
999 1054803 : if( papszRecord == NULL )
1000 108042 : return "";
1001 :
1002 : /* -------------------------------------------------------------------- */
1003 : /* Figure out which field we want out of this. */
1004 : /* -------------------------------------------------------------------- */
1005 946761 : iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField );
1006 946761 : if( iTargetField < 0 )
1007 0 : return "";
1008 :
1009 946761 : if( iTargetField >= CSLCount( papszRecord ) )
1010 0 : return "";
1011 :
1012 946761 : return( papszRecord[iTargetField] );
1013 : }
1014 :
1015 : /************************************************************************/
1016 : /* GDALDefaultCSVFilename() */
1017 : /************************************************************************/
1018 :
1019 : typedef struct
1020 : {
1021 : char szPath[512];
1022 : int bCSVFinderInitialized;
1023 : } DefaultCSVFileNameTLS;
1024 :
1025 :
1026 517486 : const char * GDALDefaultCSVFilename( const char *pszBasename )
1027 :
1028 : {
1029 : /* -------------------------------------------------------------------- */
1030 : /* Do we already have this file accessed? If so, just return */
1031 : /* the existing path without any further probing. */
1032 : /* -------------------------------------------------------------------- */
1033 : CSVTable **ppsCSVTableList;
1034 :
1035 517486 : ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
1036 517486 : if( ppsCSVTableList != NULL )
1037 : {
1038 : CSVTable *psTable;
1039 517212 : int nBasenameLen = strlen(pszBasename);
1040 :
1041 6924172 : for( psTable = *ppsCSVTableList;
1042 : psTable != NULL;
1043 : psTable = psTable->psNext )
1044 : {
1045 6923686 : int nFullLen = strlen(psTable->pszFilename);
1046 :
1047 7440500 : if( nFullLen > nBasenameLen
1048 : && strcmp(psTable->pszFilename+nFullLen-nBasenameLen,
1049 : pszBasename) == 0
1050 516814 : && strchr("/\\",psTable->pszFilename[+nFullLen-nBasenameLen-1])
1051 : != NULL )
1052 : {
1053 516726 : return psTable->pszFilename;
1054 : }
1055 : }
1056 : }
1057 :
1058 : /* -------------------------------------------------------------------- */
1059 : /* Otherwise we need to look harder for it. */
1060 : /* -------------------------------------------------------------------- */
1061 : DefaultCSVFileNameTLS* pTLSData =
1062 760 : (DefaultCSVFileNameTLS *) CPLGetTLS( CTLS_CSVDEFAULTFILENAME );
1063 760 : if (pTLSData == NULL)
1064 : {
1065 268 : pTLSData = (DefaultCSVFileNameTLS*) CPLCalloc(1, sizeof(DefaultCSVFileNameTLS));
1066 268 : CPLSetTLS( CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE );
1067 : }
1068 :
1069 760 : FILE *fp = NULL;
1070 : const char *pszResult;
1071 :
1072 760 : pszResult = CPLFindFile( "epsg_csv", pszBasename );
1073 :
1074 760 : if( pszResult != NULL )
1075 666 : return pszResult;
1076 :
1077 94 : if( !pTLSData->bCSVFinderInitialized )
1078 : {
1079 2 : pTLSData->bCSVFinderInitialized = TRUE;
1080 :
1081 2 : if( CPLGetConfigOption("GEOTIFF_CSV",NULL) != NULL )
1082 0 : CPLPushFinderLocation( CPLGetConfigOption("GEOTIFF_CSV",NULL));
1083 :
1084 2 : if( CPLGetConfigOption("GDAL_DATA",NULL) != NULL )
1085 2 : CPLPushFinderLocation( CPLGetConfigOption("GDAL_DATA",NULL) );
1086 :
1087 2 : pszResult = CPLFindFile( "epsg_csv", pszBasename );
1088 :
1089 2 : if( pszResult != NULL )
1090 0 : return pszResult;
1091 : }
1092 :
1093 94 : if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL )
1094 : {
1095 0 : strcpy( pTLSData->szPath, "csv/" );
1096 0 : CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1097 : }
1098 : else
1099 : {
1100 : #ifdef GDAL_PREFIX
1101 : #ifdef MACOSX_FRAMEWORK
1102 : strcpy( pTLSData->szPath, GDAL_PREFIX "/Resources/epsg_csv/" );
1103 : CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1104 : #else
1105 94 : strcpy( pTLSData->szPath, GDAL_PREFIX "/share/epsg_csv/" );
1106 94 : CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1107 : #endif
1108 : #else
1109 : strcpy( pTLSData->szPath, "/usr/local/share/epsg_csv/" );
1110 : CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1111 : #endif
1112 94 : if( (fp = fopen( pTLSData->szPath, "rt" )) == NULL )
1113 94 : CPLStrlcpy( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
1114 : }
1115 :
1116 94 : if( fp != NULL )
1117 0 : fclose( fp );
1118 :
1119 94 : return( pTLSData->szPath );
1120 : }
1121 :
1122 : /************************************************************************/
1123 : /* CSVFilename() */
1124 : /* */
1125 : /* Return the full path to a particular CSV file. This will */
1126 : /* eventually be something the application can override. */
1127 : /************************************************************************/
1128 :
1129 : CPL_C_START
1130 : static const char *(*pfnCSVFilenameHook)(const char *) = NULL;
1131 : CPL_C_END
1132 :
1133 517486 : const char * CSVFilename( const char *pszBasename )
1134 :
1135 : {
1136 517486 : if( pfnCSVFilenameHook == NULL )
1137 148816 : return GDALDefaultCSVFilename( pszBasename );
1138 : else
1139 368670 : return( pfnCSVFilenameHook( pszBasename ) );
1140 : }
1141 :
1142 : /************************************************************************/
1143 : /* SetCSVFilenameHook() */
1144 : /* */
1145 : /* Applications can use this to set a function that will */
1146 : /* massage CSV filenames. */
1147 : /************************************************************************/
1148 :
1149 : /**
1150 : * Override CSV file search method.
1151 : *
1152 : * @param CSVFileOverride The pointer to a function which will return the
1153 : * full path for a given filename.
1154 : *
1155 :
1156 : This function allows an application to override how the GTIFGetDefn() and related function find the CSV (Comma Separated
1157 : Value) values required. The pfnHook argument should be a pointer to a function that will take in a CSV filename and return a
1158 : full path to the file. The returned string should be to an internal static buffer so that the caller doesn't have to free the result.
1159 :
1160 : <b>Example:</b><br>
1161 :
1162 : The listgeo utility uses the following override function if the user
1163 : specified a CSV file directory with the -t commandline switch (argument
1164 : put into CSVDirName). <p>
1165 :
1166 : <pre>
1167 :
1168 : ...
1169 :
1170 :
1171 : SetCSVFilenameHook( CSVFileOverride );
1172 :
1173 : ...
1174 :
1175 :
1176 : static const char *CSVFileOverride( const char * pszInput )
1177 :
1178 : {
1179 : static char szPath[1024];
1180 :
1181 : #ifdef WIN32
1182 : sprintf( szPath, "%s\\%s", CSVDirName, pszInput );
1183 : #else
1184 : sprintf( szPath, "%s/%s", CSVDirName, pszInput );
1185 : #endif
1186 :
1187 : return( szPath );
1188 : }
1189 : </pre>
1190 :
1191 : */
1192 :
1193 : CPL_C_START
1194 860 : void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) )
1195 :
1196 : {
1197 860 : pfnCSVFilenameHook = pfnNewHook;
1198 860 : }
1199 : CPL_C_END
1200 :
|