1 : /******************************************************************************
2 : * $Id: ogrcsvlayer.cpp 24258 2012-04-18 20:38:02Z rouault $
3 : *
4 : * Project: CSV Translator
5 : * Purpose: Implements OGRCSVLayer class.
6 : * Author: Frank Warmerdam <warmerdam@pobox.com>
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 2004, Frank Warmerdam <warmerdam@pobox.com>
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "ogr_csv.h"
31 : #include "cpl_conv.h"
32 : #include "cpl_string.h"
33 : #include "cpl_csv.h"
34 : #include "ogr_p.h"
35 :
36 : CPL_CVSID("$Id: ogrcsvlayer.cpp 24258 2012-04-18 20:38:02Z rouault $");
37 :
38 :
39 :
40 : /************************************************************************/
41 : /* CSVSplitLine() */
42 : /* */
43 : /* Tokenize a CSV line into fields in the form of a string */
44 : /* list. This is used instead of the CPLTokenizeString() */
45 : /* because it provides correct CSV escaping and quoting */
46 : /* semantics. */
47 : /************************************************************************/
48 :
49 19422 : static char **CSVSplitLine( const char *pszString, char chDelimiter )
50 :
51 : {
52 19422 : char **papszRetList = NULL;
53 : char *pszToken;
54 : int nTokenMax, nTokenLen;
55 :
56 19422 : pszToken = (char *) CPLCalloc(10,1);
57 19422 : nTokenMax = 10;
58 :
59 98062 : while( pszString != NULL && *pszString != '\0' )
60 : {
61 59218 : int bInString = FALSE;
62 :
63 59218 : nTokenLen = 0;
64 :
65 : /* Try to find the next delimeter, marking end of token */
66 536234 : for( ; *pszString != '\0'; pszString++ )
67 : {
68 :
69 : /* End if this is a delimeter skip it and break. */
70 516962 : if( !bInString && *pszString == chDelimiter )
71 : {
72 39946 : pszString++;
73 39946 : break;
74 : }
75 :
76 477016 : if( *pszString == '"' )
77 : {
78 1916 : if( !bInString || pszString[1] != '"' )
79 : {
80 1832 : bInString = !bInString;
81 1832 : continue;
82 : }
83 : else /* doubled quotes in string resolve to one quote */
84 : {
85 84 : pszString++;
86 : }
87 : }
88 :
89 475184 : if( nTokenLen >= nTokenMax-2 )
90 : {
91 19204 : nTokenMax = nTokenMax * 2 + 10;
92 19204 : pszToken = (char *) CPLRealloc( pszToken, nTokenMax );
93 : }
94 :
95 475184 : pszToken[nTokenLen] = *pszString;
96 475184 : nTokenLen++;
97 : }
98 :
99 59218 : pszToken[nTokenLen] = '\0';
100 59218 : papszRetList = CSLAddString( papszRetList, pszToken );
101 :
102 : /* If the last token is an empty token, then we have to catch
103 : * it now, otherwise we won't reenter the loop and it will be lost.
104 : */
105 59218 : if ( *pszString == '\0' && *(pszString-1) == chDelimiter )
106 : {
107 150 : papszRetList = CSLAddString( papszRetList, "" );
108 : }
109 : }
110 :
111 19422 : if( papszRetList == NULL )
112 0 : papszRetList = (char **) CPLCalloc(sizeof(char *),1);
113 :
114 19422 : CPLFree( pszToken );
115 :
116 19422 : return papszRetList;
117 : }
118 :
119 : /************************************************************************/
120 : /* OGRCSVReadParseLineL() */
121 : /* */
122 : /* Read one line, and return split into fields. The return */
123 : /* result is a stringlist, in the sense of the CSL functions. */
124 : /************************************************************************/
125 :
126 19638 : char **OGRCSVReadParseLineL( VSILFILE * fp, char chDelimiter, int bDontHonourStrings )
127 :
128 : {
129 : const char *pszLine;
130 : char *pszWorkLine;
131 : char **papszReturn;
132 :
133 19638 : pszLine = CPLReadLineL( fp );
134 19638 : if( pszLine == NULL )
135 216 : return( NULL );
136 :
137 : /* Skip BOM */
138 19422 : GByte* pabyData = (GByte*) pszLine;
139 19422 : if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
140 2 : pszLine += 3;
141 :
142 : /* Special fix to read NdfcFacilities.xls that has non-balanced double quotes */
143 19422 : if (chDelimiter == '\t' && bDontHonourStrings)
144 : {
145 0 : return CSLTokenizeStringComplex(pszLine, "\t", FALSE, TRUE);
146 : }
147 :
148 : /* -------------------------------------------------------------------- */
149 : /* If there are no quotes, then this is the simple case. */
150 : /* Parse, and return tokens. */
151 : /* -------------------------------------------------------------------- */
152 19422 : if( strchr(pszLine,'\"') == NULL )
153 19066 : return CSVSplitLine( pszLine, chDelimiter );
154 :
155 : /* -------------------------------------------------------------------- */
156 : /* We must now count the quotes in our working string, and as */
157 : /* long as it is odd, keep adding new lines. */
158 : /* -------------------------------------------------------------------- */
159 356 : pszWorkLine = CPLStrdup( pszLine );
160 :
161 356 : int i = 0, nCount = 0;
162 356 : int nWorkLineLength = strlen(pszWorkLine);
163 :
164 4 : while( TRUE )
165 : {
166 25428 : for( ; pszWorkLine[i] != '\0'; i++ )
167 : {
168 26944 : if( pszWorkLine[i] == '\"'
169 1876 : && (i == 0 || pszWorkLine[i-1] != '\\') )
170 2000 : nCount++;
171 : }
172 :
173 360 : if( nCount % 2 == 0 )
174 356 : break;
175 :
176 4 : pszLine = CPLReadLineL( fp );
177 4 : if( pszLine == NULL )
178 0 : break;
179 :
180 4 : int nLineLen = strlen(pszLine);
181 :
182 : char* pszWorkLineTmp = (char *)
183 : VSIRealloc(pszWorkLine,
184 4 : nWorkLineLength + nLineLen + 2);
185 4 : if (pszWorkLineTmp == NULL)
186 0 : break;
187 4 : pszWorkLine = pszWorkLineTmp;
188 4 : strcat( pszWorkLine + nWorkLineLength, "\n" ); // This gets lost in CPLReadLine().
189 4 : strcat( pszWorkLine + nWorkLineLength, pszLine );
190 :
191 4 : nWorkLineLength += nLineLen + 1;
192 : }
193 :
194 356 : papszReturn = CSVSplitLine( pszWorkLine, chDelimiter );
195 :
196 356 : CPLFree( pszWorkLine );
197 :
198 356 : return papszReturn;
199 : }
200 :
201 : /************************************************************************/
202 : /* OGRCSVLayer() */
203 : /* */
204 : /* Note that the OGRCSVLayer assumes ownership of the passed */
205 : /* file pointer. */
206 : /************************************************************************/
207 :
208 218 : OGRCSVLayer::OGRCSVLayer( const char *pszLayerNameIn,
209 : VSILFILE * fp, const char *pszFilename, int bNew, int bInWriteMode,
210 : char chDelimiter, const char* pszNfdcGeomField,
211 218 : const char* pszGeonamesGeomFieldPrefix)
212 :
213 : {
214 218 : fpCSV = fp;
215 :
216 218 : iWktGeomReadField = -1;
217 218 : iNfdcLatitudeS = iNfdcLongitudeS = -1;
218 218 : iLatitudeField = iLongitudeField = -1;
219 218 : this->bInWriteMode = bInWriteMode;
220 218 : this->bNew = bNew;
221 218 : this->pszFilename = CPLStrdup(pszFilename);
222 218 : this->chDelimiter = chDelimiter;
223 :
224 218 : bFirstFeatureAppendedDuringSession = TRUE;
225 218 : bUseCRLF = FALSE;
226 218 : bNeedRewindBeforeRead = FALSE;
227 218 : eGeometryFormat = OGR_CSV_GEOM_NONE;
228 :
229 218 : nNextFID = 1;
230 :
231 218 : poFeatureDefn = new OGRFeatureDefn( pszLayerNameIn );
232 218 : poFeatureDefn->Reference();
233 218 : poFeatureDefn->SetGeomType( wkbNone );
234 :
235 218 : bCreateCSVT = FALSE;
236 218 : bDontHonourStrings = FALSE;
237 :
238 218 : nTotalFeatures = -1;
239 :
240 : /* -------------------------------------------------------------------- */
241 : /* If this is not a new file, read ahead to establish if it is */
242 : /* already in CRLF (DOS) mode, or just a normal unix CR mode. */
243 : /* -------------------------------------------------------------------- */
244 218 : if( !bNew && bInWriteMode )
245 : {
246 28 : int nBytesRead = 0;
247 : char chNewByte;
248 :
249 1472 : while( nBytesRead < 10000 && VSIFReadL( &chNewByte, 1, 1, fpCSV ) == 1 )
250 : {
251 1424 : if( chNewByte == 13 )
252 : {
253 8 : bUseCRLF = TRUE;
254 8 : break;
255 : }
256 1416 : nBytesRead ++;
257 : }
258 28 : VSIRewindL( fpCSV );
259 : }
260 :
261 : /* -------------------------------------------------------------------- */
262 : /* Check if the first record seems to be field definitions or */
263 : /* not. We assume it is field definitions if none of the */
264 : /* values are strictly numeric. */
265 : /* -------------------------------------------------------------------- */
266 218 : char **papszTokens = NULL;
267 218 : int nFieldCount=0, iField;
268 : CPLValueType eType;
269 :
270 218 : if( !bNew )
271 : {
272 192 : const char *pszLine = NULL;
273 : char szDelimiter[2];
274 192 : szDelimiter[0] = chDelimiter; szDelimiter[1] = '\0';
275 :
276 192 : pszLine = CPLReadLineL( fpCSV );
277 192 : if ( pszLine != NULL )
278 : {
279 : /* Detect and remove UTF-8 BOM marker if found (#4623) */
280 196 : if (pszLine[0] == (char)0xEF &&
281 2 : pszLine[1] == (char)0xBB &&
282 2 : pszLine[2] == (char)0xBF)
283 : {
284 2 : pszLine += 3;
285 : }
286 :
287 : /* tokenize the strings and preserve quotes, so we can separate string from numeric */
288 : /* this is only used in the test for bHasFeldNames (bug #4361) */
289 : papszTokens = CSLTokenizeString2( pszLine, szDelimiter,
290 192 : CSLT_HONOURSTRINGS | CSLT_PRESERVEQUOTES );
291 192 : nFieldCount = CSLCount( papszTokens );
292 192 : bHasFieldNames = TRUE;
293 :
294 810 : for( iField = 0; iField < nFieldCount && bHasFieldNames; iField++ )
295 : {
296 618 : eType = CPLGetValueType(papszTokens[iField]);
297 618 : if ( (eType == CPL_VALUE_INTEGER ||
298 : eType == CPL_VALUE_REAL) ) {
299 : /* we have a numeric field, therefore do not consider the first line as field names */
300 50 : bHasFieldNames = FALSE;
301 : }
302 : }
303 :
304 : /* tokenize without quotes to get the actual values */
305 192 : CSLDestroy( papszTokens );
306 : // papszTokens = OGRCSVReadParseLineL( fpCSV, chDelimiter, FALSE );
307 : papszTokens = CSLTokenizeString2( pszLine, szDelimiter,
308 192 : CSLT_HONOURSTRINGS);
309 192 : nFieldCount = CSLCount( papszTokens );
310 : }
311 : }
312 : else
313 26 : bHasFieldNames = FALSE;
314 :
315 218 : if( !bNew && !bHasFieldNames )
316 50 : VSIRewindL( fpCSV );
317 :
318 : /* -------------------------------------------------------------------- */
319 : /* Check for geonames.org tables */
320 : /* -------------------------------------------------------------------- */
321 218 : if( !bHasFieldNames && nFieldCount == 19 )
322 : {
323 0 : if (CPLGetValueType(papszTokens[0]) == CPL_VALUE_INTEGER &&
324 0 : CPLGetValueType(papszTokens[4]) == CPL_VALUE_REAL &&
325 0 : CPLGetValueType(papszTokens[5]) == CPL_VALUE_REAL &&
326 0 : CPLAtof(papszTokens[4]) >= -90 && CPLAtof(papszTokens[4]) <= 90 &&
327 0 : CPLAtof(papszTokens[5]) >= -180 && CPLAtof(papszTokens[4]) <= 180)
328 : {
329 0 : bHasFieldNames = TRUE;
330 0 : CSLDestroy(papszTokens);
331 0 : papszTokens = NULL;
332 :
333 : static const struct {
334 : const char* pszName;
335 : OGRFieldType eType;
336 : }
337 : asGeonamesFieldDesc[] =
338 : {
339 : { "GEONAMEID", OFTString },
340 : { "NAME", OFTString },
341 : { "ASCIINAME", OFTString },
342 : { "ALTNAMES", OFTString },
343 : { "LATITUDE", OFTReal },
344 : { "LONGITUDE", OFTReal },
345 : { "FEATCLASS", OFTString },
346 : { "FEATCODE", OFTString },
347 : { "COUNTRY", OFTString },
348 : { "CC2", OFTString },
349 : { "ADMIN1", OFTString },
350 : { "ADMIN2", OFTString },
351 : { "ADMIN3", OFTString },
352 : { "ADMIN4", OFTString },
353 : { "POPULATION", OFTReal },
354 : { "ELEVATION", OFTInteger },
355 : { "GTOPO30", OFTInteger },
356 : { "TIMEZONE", OFTString },
357 : { "MODDATE", OFTString }
358 : };
359 0 : for(iField = 0; iField < nFieldCount; iField++)
360 : {
361 : OGRFieldDefn oFieldDefn(asGeonamesFieldDesc[iField].pszName,
362 0 : asGeonamesFieldDesc[iField].eType);
363 0 : poFeatureDefn->AddFieldDefn(&oFieldDefn);
364 : }
365 :
366 0 : iLatitudeField = 4;
367 0 : iLongitudeField = 5;
368 :
369 0 : nFieldCount = 0;
370 : }
371 : }
372 :
373 :
374 : /* -------------------------------------------------------------------- */
375 : /* Search a csvt file for types */
376 : /* -------------------------------------------------------------------- */
377 218 : char** papszFieldTypes = NULL;
378 218 : if (!bNew) {
379 192 : char* dname = strdup(CPLGetDirname(pszFilename));
380 192 : char* fname = strdup(CPLGetBasename(pszFilename));
381 192 : VSILFILE* fpCSVT = VSIFOpenL(CPLFormFilename(dname, fname, ".csvt"), "r");
382 192 : free(dname);
383 192 : free(fname);
384 192 : if (fpCSVT!=NULL) {
385 40 : VSIRewindL(fpCSVT);
386 40 : papszFieldTypes = OGRCSVReadParseLineL(fpCSVT, ',', FALSE);
387 40 : VSIFCloseL(fpCSVT);
388 : }
389 : }
390 :
391 :
392 : /* -------------------------------------------------------------------- */
393 : /* Build field definitions. */
394 : /* -------------------------------------------------------------------- */
395 932 : for( iField = 0; iField < nFieldCount; iField++ )
396 : {
397 714 : char *pszFieldName = NULL;
398 : char szFieldNameBuffer[100];
399 :
400 714 : if( bHasFieldNames )
401 : {
402 566 : pszFieldName = papszTokens[iField];
403 :
404 : // trim white space.
405 1132 : while( *pszFieldName == ' ' )
406 0 : pszFieldName++;
407 :
408 1698 : while( pszFieldName[0] != '\0'
409 566 : && pszFieldName[strlen(pszFieldName)-1] == ' ' )
410 0 : pszFieldName[strlen(pszFieldName)-1] = '\0';
411 :
412 566 : if (*pszFieldName == '\0')
413 0 : pszFieldName = NULL;
414 : }
415 :
416 714 : if (pszFieldName == NULL)
417 : {
418 148 : pszFieldName = szFieldNameBuffer;
419 148 : sprintf( szFieldNameBuffer, "field_%d", iField+1 );
420 : }
421 :
422 714 : OGRFieldDefn oField(pszFieldName, OFTString);
423 714 : if (papszFieldTypes!=NULL && iField<CSLCount(papszFieldTypes)) {
424 :
425 204 : char* pszLeftParenthesis = strchr(papszFieldTypes[iField], '(');
426 284 : if (pszLeftParenthesis && pszLeftParenthesis != papszFieldTypes[iField] &&
427 80 : pszLeftParenthesis[1] >= '0' && pszLeftParenthesis[1] <= '9')
428 : {
429 40 : int nWidth = 0;
430 40 : int nPrecision = 0;
431 :
432 40 : char* pszDot = strchr(pszLeftParenthesis, '.');
433 40 : if (pszDot) *pszDot = 0;
434 40 : *pszLeftParenthesis = 0;
435 :
436 40 : if (pszLeftParenthesis[-1] == ' ')
437 4 : pszLeftParenthesis[-1] = 0;
438 :
439 40 : nWidth = atoi(pszLeftParenthesis+1);
440 40 : if (pszDot)
441 14 : nPrecision = atoi(pszDot+1);
442 :
443 40 : oField.SetWidth(nWidth);
444 40 : oField.SetPrecision(nPrecision);
445 : }
446 :
447 204 : if (EQUAL(papszFieldTypes[iField], "Integer"))
448 30 : oField.SetType(OFTInteger);
449 174 : else if (EQUAL(papszFieldTypes[iField], "Real"))
450 78 : oField.SetType(OFTReal);
451 96 : else if (EQUAL(papszFieldTypes[iField], "String"))
452 58 : oField.SetType(OFTString);
453 38 : else if (EQUAL(papszFieldTypes[iField], "Date"))
454 12 : oField.SetType(OFTDate);
455 26 : else if (EQUAL(papszFieldTypes[iField], "Time"))
456 12 : oField.SetType(OFTTime);
457 14 : else if (EQUAL(papszFieldTypes[iField], "DateTime"))
458 14 : oField.SetType(OFTDateTime);
459 : else
460 0 : CPLError(CE_Warning, CPLE_NotSupported, "Unknown type : %s", papszFieldTypes[iField]);
461 : }
462 :
463 714 : if( EQUAL(oField.GetNameRef(),"WKT")
464 : && oField.GetType() == OFTString
465 : && iWktGeomReadField == -1 )
466 : {
467 48 : iWktGeomReadField = iField;
468 48 : poFeatureDefn->SetGeomType( wkbUnknown );
469 : }
470 :
471 : /*http://www.faa.gov/airports/airport_safety/airportdata_5010/menu/index.cfm specific */
472 714 : if ( pszNfdcGeomField != NULL &&
473 : EQUALN(oField.GetNameRef(), pszNfdcGeomField, strlen(pszNfdcGeomField)) &&
474 : EQUAL(oField.GetNameRef() + strlen(pszNfdcGeomField), "LatitudeS") )
475 0 : iNfdcLatitudeS = iField;
476 714 : else if ( pszNfdcGeomField != NULL &&
477 : EQUALN(oField.GetNameRef(), pszNfdcGeomField, strlen(pszNfdcGeomField)) &&
478 : EQUAL(oField.GetNameRef() + strlen(pszNfdcGeomField), "LongitudeS") )
479 0 : iNfdcLongitudeS = iField;
480 :
481 : /* GNIS specific */
482 714 : else if ( pszGeonamesGeomFieldPrefix != NULL &&
483 : EQUALN(oField.GetNameRef(), pszGeonamesGeomFieldPrefix, strlen(pszGeonamesGeomFieldPrefix)) &&
484 : (EQUAL(oField.GetNameRef() + strlen(pszGeonamesGeomFieldPrefix), "_LAT_DEC") ||
485 : EQUAL(oField.GetNameRef() + strlen(pszGeonamesGeomFieldPrefix), "_LATITUDE_DEC") ||
486 : EQUAL(oField.GetNameRef() + strlen(pszGeonamesGeomFieldPrefix), "_LATITUDE")) )
487 : {
488 0 : oField.SetType(OFTReal);
489 0 : iLatitudeField = iField;
490 : }
491 714 : else if ( pszGeonamesGeomFieldPrefix != NULL &&
492 : EQUALN(oField.GetNameRef(), pszGeonamesGeomFieldPrefix, strlen(pszGeonamesGeomFieldPrefix)) &&
493 : (EQUAL(oField.GetNameRef() + strlen(pszGeonamesGeomFieldPrefix), "_LONG_DEC") ||
494 : EQUAL(oField.GetNameRef() + strlen(pszGeonamesGeomFieldPrefix), "_LONGITUDE_DEC") ||
495 : EQUAL(oField.GetNameRef() + strlen(pszGeonamesGeomFieldPrefix), "_LONGITUDE")) )
496 : {
497 0 : oField.SetType(OFTReal);
498 0 : iLongitudeField = iField;
499 : }
500 :
501 714 : poFeatureDefn->AddFieldDefn( &oField );
502 :
503 : }
504 :
505 218 : if ( iNfdcLatitudeS != -1 && iNfdcLongitudeS != -1 )
506 : {
507 0 : bDontHonourStrings = TRUE;
508 0 : poFeatureDefn->SetGeomType( wkbPoint );
509 : }
510 218 : else if ( iLatitudeField != -1 && iLongitudeField != -1 )
511 : {
512 0 : poFeatureDefn->SetGeomType( wkbPoint );
513 : }
514 :
515 218 : CSLDestroy( papszTokens );
516 218 : CSLDestroy( papszFieldTypes );
517 218 : }
518 :
519 : /************************************************************************/
520 : /* ~OGRCSVLayer() */
521 : /************************************************************************/
522 :
523 218 : OGRCSVLayer::~OGRCSVLayer()
524 :
525 : {
526 218 : if( m_nFeaturesRead > 0 && poFeatureDefn != NULL )
527 : {
528 : CPLDebug( "CSV", "%d features read on layer '%s'.",
529 : (int) m_nFeaturesRead,
530 158 : poFeatureDefn->GetName() );
531 : }
532 :
533 218 : poFeatureDefn->Release();
534 218 : CPLFree(pszFilename);
535 :
536 218 : if (fpCSV)
537 218 : VSIFCloseL( fpCSV );
538 218 : }
539 :
540 : /************************************************************************/
541 : /* ResetReading() */
542 : /************************************************************************/
543 :
544 326 : void OGRCSVLayer::ResetReading()
545 :
546 : {
547 326 : if (fpCSV)
548 326 : VSIRewindL( fpCSV );
549 :
550 326 : if( bHasFieldNames )
551 276 : CSLDestroy( OGRCSVReadParseLineL( fpCSV, chDelimiter, bDontHonourStrings ) );
552 :
553 326 : bNeedRewindBeforeRead = FALSE;
554 :
555 326 : nNextFID = 1;
556 326 : }
557 :
558 : /************************************************************************/
559 : /* GetNextUnfilteredFeature() */
560 : /************************************************************************/
561 :
562 19114 : OGRFeature * OGRCSVLayer::GetNextUnfilteredFeature()
563 :
564 : {
565 19114 : if (fpCSV == NULL)
566 0 : return NULL;
567 :
568 : /* -------------------------------------------------------------------- */
569 : /* Read the CSV record. */
570 : /* -------------------------------------------------------------------- */
571 : char **papszTokens;
572 :
573 0 : while(TRUE)
574 : {
575 19114 : papszTokens = OGRCSVReadParseLineL( fpCSV, chDelimiter, bDontHonourStrings );
576 19114 : if( papszTokens == NULL )
577 212 : return NULL;
578 :
579 18902 : if( papszTokens[0] != NULL )
580 : break;
581 :
582 0 : CSLDestroy(papszTokens);
583 : }
584 :
585 : /* -------------------------------------------------------------------- */
586 : /* Create the OGR feature. */
587 : /* -------------------------------------------------------------------- */
588 : OGRFeature *poFeature;
589 :
590 37804 : poFeature = new OGRFeature( poFeatureDefn );
591 :
592 : /* -------------------------------------------------------------------- */
593 : /* Set attributes for any indicated attribute records. */
594 : /* -------------------------------------------------------------------- */
595 : int iAttr;
596 18902 : int nAttrCount = MIN(CSLCount(papszTokens),
597 : poFeatureDefn->GetFieldCount() );
598 : CPLValueType eType;
599 :
600 76206 : for( iAttr = 0; iAttr < nAttrCount; iAttr++)
601 : {
602 57304 : if( iAttr == iWktGeomReadField && papszTokens[iAttr][0] != '\0' )
603 : {
604 68 : char *pszWKT = papszTokens[iAttr];
605 68 : OGRGeometry *poGeom = NULL;
606 :
607 68 : if( OGRGeometryFactory::createFromWkt( &pszWKT, NULL, &poGeom )
608 : == OGRERR_NONE )
609 68 : poFeature->SetGeometryDirectly( poGeom );
610 : }
611 :
612 57304 : OGRFieldType eFieldType = poFeatureDefn->GetFieldDefn(iAttr)->GetType();
613 57756 : if ( eFieldType == OFTReal || eFieldType == OFTInteger )
614 : {
615 452 : if (chDelimiter == ';' && eFieldType == OFTReal)
616 : {
617 0 : char* chComma = strchr(papszTokens[iAttr], ',');
618 0 : if (chComma)
619 0 : *chComma = '.';
620 : }
621 452 : eType = CPLGetValueType(papszTokens[iAttr]);
622 452 : if ( (papszTokens[iAttr][0] != '\0') &&
623 : ( eType == CPL_VALUE_INTEGER ||
624 : eType == CPL_VALUE_REAL ) )
625 316 : poFeature->SetField( iAttr, CPLAtof(papszTokens[iAttr]) );
626 : }
627 56852 : else if (eFieldType != OFTString)
628 : {
629 192 : if (papszTokens[iAttr][0] != '\0')
630 114 : poFeature->SetField( iAttr, papszTokens[iAttr] );
631 : }
632 : else
633 56660 : poFeature->SetField( iAttr, papszTokens[iAttr] );
634 :
635 : }
636 :
637 : /* -------------------------------------------------------------------- */
638 : /*http://www.faa.gov/airports/airport_safety/airportdata_5010/menu/index.cfm specific */
639 : /* -------------------------------------------------------------------- */
640 :
641 18902 : if ( iNfdcLatitudeS != -1 &&
642 : iNfdcLongitudeS != -1 &&
643 : nAttrCount > iNfdcLatitudeS &&
644 : nAttrCount > iNfdcLongitudeS &&
645 0 : papszTokens[iNfdcLongitudeS][0] != 0 &&
646 0 : papszTokens[iNfdcLatitudeS][0] != 0)
647 : {
648 0 : double dfLon = atof(papszTokens[iNfdcLongitudeS]) / 3600;
649 0 : if (strchr(papszTokens[iNfdcLongitudeS], 'W'))
650 0 : dfLon *= -1;
651 0 : double dfLat = atof(papszTokens[iNfdcLatitudeS]) / 3600;
652 0 : if (strchr(papszTokens[iNfdcLatitudeS], 'S'))
653 0 : dfLat *= -1;
654 0 : poFeature->SetGeometryDirectly( new OGRPoint(dfLon, dfLat) );
655 : }
656 :
657 : /* -------------------------------------------------------------------- */
658 : /* GNIS specific */
659 : /* -------------------------------------------------------------------- */
660 18902 : else if ( iLatitudeField != -1 &&
661 : iLongitudeField != -1 &&
662 : nAttrCount > iLatitudeField &&
663 : nAttrCount > iLongitudeField &&
664 0 : papszTokens[iLongitudeField][0] != 0 &&
665 0 : papszTokens[iLatitudeField][0] != 0)
666 : {
667 : /* Some records have dummy 0,0 value */
668 0 : if (papszTokens[iLongitudeField][0] != '0' ||
669 0 : papszTokens[iLongitudeField][1] != '\0' ||
670 0 : papszTokens[iLatitudeField][0] != '0' ||
671 0 : papszTokens[iLatitudeField][1] != '\0')
672 : {
673 0 : double dfLon = atof(papszTokens[iLongitudeField]);
674 0 : double dfLat = atof(papszTokens[iLatitudeField]);
675 0 : poFeature->SetGeometryDirectly( new OGRPoint(dfLon, dfLat) );
676 : }
677 : }
678 :
679 18902 : CSLDestroy( papszTokens );
680 :
681 : /* -------------------------------------------------------------------- */
682 : /* Translate the record id. */
683 : /* -------------------------------------------------------------------- */
684 18902 : poFeature->SetFID( nNextFID++ );
685 :
686 18902 : m_nFeaturesRead++;
687 :
688 18902 : return poFeature;
689 : }
690 :
691 :
692 : /************************************************************************/
693 : /* GetNextFeature() */
694 : /************************************************************************/
695 :
696 19044 : OGRFeature *OGRCSVLayer::GetNextFeature()
697 :
698 : {
699 19044 : OGRFeature *poFeature = NULL;
700 :
701 19044 : if( bNeedRewindBeforeRead )
702 6 : ResetReading();
703 :
704 : /* -------------------------------------------------------------------- */
705 : /* Read features till we find one that satisfies our current */
706 : /* spatial criteria. */
707 : /* -------------------------------------------------------------------- */
708 70 : while( TRUE )
709 : {
710 19114 : poFeature = GetNextUnfilteredFeature();
711 19114 : if( poFeature == NULL )
712 212 : break;
713 :
714 18902 : if( (m_poFilterGeom == NULL
715 : || FilterGeometry( poFeature->GetGeometryRef() ) )
716 : && (m_poAttrQuery == NULL
717 : || m_poAttrQuery->Evaluate( poFeature )) )
718 18832 : break;
719 :
720 70 : delete poFeature;
721 : }
722 :
723 19044 : return poFeature;
724 : }
725 :
726 : /************************************************************************/
727 : /* TestCapability() */
728 : /************************************************************************/
729 :
730 18 : int OGRCSVLayer::TestCapability( const char * pszCap )
731 :
732 : {
733 18 : if( EQUAL(pszCap,OLCSequentialWrite) )
734 4 : return bInWriteMode;
735 14 : else if( EQUAL(pszCap,OLCCreateField) )
736 0 : return bNew && !bHasFieldNames;
737 : else
738 14 : return FALSE;
739 : }
740 :
741 : /************************************************************************/
742 : /* CreateField() */
743 : /************************************************************************/
744 :
745 120 : OGRErr OGRCSVLayer::CreateField( OGRFieldDefn *poNewField, int bApproxOK )
746 :
747 : {
748 : /* -------------------------------------------------------------------- */
749 : /* If we have already written our field names, then we are not */
750 : /* allowed to add new fields. */
751 : /* -------------------------------------------------------------------- */
752 120 : if( bHasFieldNames || !bNew )
753 : {
754 : CPLError( CE_Failure, CPLE_AppDefined,
755 0 : "Unable to create new fields after first feature written.");
756 0 : return OGRERR_FAILURE;
757 : }
758 :
759 : /* -------------------------------------------------------------------- */
760 : /* Does this duplicate an existing field? */
761 : /* -------------------------------------------------------------------- */
762 120 : if( poFeatureDefn->GetFieldIndex( poNewField->GetNameRef() ) != -1 )
763 : {
764 : CPLError( CE_Failure, CPLE_AppDefined,
765 : "Attempt to create field %s, but a field with this name already exists.",
766 0 : poNewField->GetNameRef() );
767 :
768 0 : return OGRERR_FAILURE;
769 : }
770 :
771 : /* -------------------------------------------------------------------- */
772 : /* Is this a legal field type for CSV? For now we only allow */
773 : /* simple integer, real and string fields. */
774 : /* -------------------------------------------------------------------- */
775 120 : switch( poNewField->GetType() )
776 : {
777 : case OFTInteger:
778 : case OFTReal:
779 : case OFTString:
780 : // these types are OK.
781 114 : break;
782 :
783 : default:
784 6 : if( bApproxOK )
785 : {
786 : CPLError( CE_Warning, CPLE_AppDefined,
787 : "Attempt to create field of type %s, but this is not supported\n"
788 : "for .csv files. Just treating as a plain string.",
789 6 : poNewField->GetFieldTypeName( poNewField->GetType() ) );
790 : }
791 : else
792 : {
793 : CPLError( CE_Failure, CPLE_AppDefined,
794 : "Attempt to create field of type %s, but this is not supported\n"
795 : "for .csv files.",
796 0 : poNewField->GetFieldTypeName( poNewField->GetType() ) );
797 0 : return OGRERR_FAILURE;
798 : }
799 : }
800 :
801 : /* -------------------------------------------------------------------- */
802 : /* Seems ok, add to field list. */
803 : /* -------------------------------------------------------------------- */
804 120 : poFeatureDefn->AddFieldDefn( poNewField );
805 :
806 120 : return OGRERR_NONE;
807 : }
808 :
809 : /************************************************************************/
810 : /* CreateFeature() */
811 : /************************************************************************/
812 :
813 74 : OGRErr OGRCSVLayer::CreateFeature( OGRFeature *poNewFeature )
814 :
815 : {
816 : int iField;
817 :
818 74 : if( !bInWriteMode )
819 : {
820 : CPLError( CE_Failure, CPLE_AppDefined,
821 0 : "The CreateFeature() operation is not permitted on a read-only CSV." );
822 0 : return OGRERR_FAILURE;
823 : }
824 :
825 : /* If we need rewind, it means that we have just written a feature before */
826 : /* so there's no point seeking to the end of the file, as we're already */
827 : /* at the end */
828 74 : int bNeedSeekEnd = !bNeedRewindBeforeRead;
829 :
830 74 : bNeedRewindBeforeRead = TRUE;
831 :
832 : /* -------------------------------------------------------------------- */
833 : /* Write field names if we haven't written them yet. */
834 : /* Write .csvt file if needed */
835 : /* -------------------------------------------------------------------- */
836 74 : if( !bHasFieldNames )
837 : {
838 26 : bHasFieldNames = TRUE;
839 26 : bNeedSeekEnd = FALSE;
840 :
841 60 : for(int iFile=0;iFile<((bCreateCSVT) ? 2 : 1);iFile++)
842 : {
843 34 : VSILFILE* fpCSVT = NULL;
844 42 : if (bCreateCSVT && iFile == 0)
845 : {
846 8 : char* pszDirName = CPLStrdup(CPLGetDirname(pszFilename));
847 8 : char* pszBaseName = CPLStrdup(CPLGetBasename(pszFilename));
848 8 : fpCSVT = VSIFOpenL(CPLFormFilename(pszDirName, pszBaseName, ".csvt"), "wb");
849 8 : CPLFree(pszDirName);
850 8 : CPLFree(pszBaseName);
851 : }
852 : else
853 : {
854 28 : if( strncmp(pszFilename, "/vsistdout/", 11) == 0 ||
855 : strncmp(pszFilename, "/vsizip/", 8) == 0 )
856 2 : fpCSV = VSIFOpenL( pszFilename, "wb" );
857 : else
858 24 : fpCSV = VSIFOpenL( pszFilename, "w+b" );
859 :
860 26 : if( fpCSV == NULL )
861 : {
862 : CPLError( CE_Failure, CPLE_OpenFailed,
863 : "Failed to create %s:\n%s",
864 0 : pszFilename, VSIStrerror( errno ) );
865 0 : return OGRERR_FAILURE;
866 : }
867 : }
868 :
869 34 : if (eGeometryFormat == OGR_CSV_GEOM_AS_WKT)
870 : {
871 4 : if (fpCSV) VSIFPrintfL( fpCSV, "%s", "WKT");
872 4 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", "String");
873 4 : if (poFeatureDefn->GetFieldCount() > 0)
874 : {
875 4 : if (fpCSV) VSIFPrintfL( fpCSV, "%c", chDelimiter );
876 4 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", ",");
877 : }
878 : }
879 30 : else if (eGeometryFormat == OGR_CSV_GEOM_AS_XYZ)
880 : {
881 4 : if (fpCSV) VSIFPrintfL( fpCSV, "X%cY%cZ", chDelimiter, chDelimiter);
882 4 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", "Real,Real,Real");
883 4 : if (poFeatureDefn->GetFieldCount() > 0)
884 : {
885 4 : if (fpCSV) VSIFPrintfL( fpCSV, "%c", chDelimiter );
886 4 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", ",");
887 : }
888 : }
889 26 : else if (eGeometryFormat == OGR_CSV_GEOM_AS_XY)
890 : {
891 4 : if (fpCSV) VSIFPrintfL( fpCSV, "X%cY", chDelimiter);
892 4 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", "Real,Real");
893 4 : if (poFeatureDefn->GetFieldCount() > 0)
894 : {
895 4 : if (fpCSV) VSIFPrintfL( fpCSV, "%c", chDelimiter );
896 4 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", ",");
897 : }
898 : }
899 22 : else if (eGeometryFormat == OGR_CSV_GEOM_AS_YX)
900 : {
901 4 : if (fpCSV) VSIFPrintfL( fpCSV, "Y%cX", chDelimiter);
902 4 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", "Real,Real");
903 4 : if (poFeatureDefn->GetFieldCount() > 0)
904 : {
905 4 : if (fpCSV) VSIFPrintfL( fpCSV, "%c", chDelimiter );
906 4 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", ",");
907 : }
908 : }
909 :
910 178 : for( iField = 0; iField < poFeatureDefn->GetFieldCount(); iField++ )
911 : {
912 : char *pszEscaped;
913 :
914 144 : if( iField > 0 )
915 : {
916 110 : if (fpCSV) VSIFPrintfL( fpCSV, "%c", chDelimiter );
917 110 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", ",");
918 : }
919 :
920 : pszEscaped =
921 : CPLEscapeString( poFeatureDefn->GetFieldDefn(iField)->GetNameRef(),
922 144 : -1, CPLES_CSV );
923 :
924 144 : if (fpCSV) VSIFPrintfL( fpCSV, "%s", pszEscaped );
925 144 : CPLFree( pszEscaped );
926 :
927 144 : if (fpCSVT)
928 : {
929 24 : switch( poFeatureDefn->GetFieldDefn(iField)->GetType() )
930 : {
931 4 : case OFTInteger: VSIFPrintfL( fpCSVT, "%s", "Integer"); break;
932 4 : case OFTReal: VSIFPrintfL( fpCSVT, "%s", "Real"); break;
933 2 : case OFTDate: VSIFPrintfL( fpCSVT, "%s", "Date"); break;
934 2 : case OFTTime: VSIFPrintfL( fpCSVT, "%s", "Time"); break;
935 2 : case OFTDateTime: VSIFPrintfL( fpCSVT, "%s", "DateTime"); break;
936 10 : default: VSIFPrintfL( fpCSVT, "%s", "String"); break;
937 : }
938 :
939 24 : int nWidth = poFeatureDefn->GetFieldDefn(iField)->GetWidth();
940 24 : int nPrecision = poFeatureDefn->GetFieldDefn(iField)->GetPrecision();
941 24 : if (nWidth != 0)
942 : {
943 6 : if (nPrecision != 0)
944 2 : VSIFPrintfL( fpCSVT, "(%d.%d)", nWidth, nPrecision);
945 : else
946 4 : VSIFPrintfL( fpCSVT, "(%d)", nWidth);
947 : }
948 : }
949 : }
950 34 : if( bUseCRLF )
951 : {
952 2 : if (fpCSV) VSIFPutcL( 13, fpCSV );
953 2 : if (fpCSVT) VSIFPutcL( 13, fpCSVT );
954 : }
955 34 : if (fpCSV) VSIFPutcL( '\n', fpCSV );
956 34 : if (fpCSVT) VSIFPutcL( '\n', fpCSVT );
957 34 : if (fpCSVT) VSIFCloseL(fpCSVT);
958 : }
959 : }
960 :
961 74 : if (fpCSV == NULL)
962 0 : return OGRERR_FAILURE;
963 :
964 : /* -------------------------------------------------------------------- */
965 : /* Make sure we are at the end of the file. */
966 : /* -------------------------------------------------------------------- */
967 74 : if (bNeedSeekEnd)
968 : {
969 8 : if (bFirstFeatureAppendedDuringSession)
970 : {
971 : /* Add a newline character to the end of the file if necessary */
972 8 : bFirstFeatureAppendedDuringSession = FALSE;
973 8 : VSIFSeekL( fpCSV, 0, SEEK_END );
974 8 : VSIFSeekL( fpCSV, VSIFTellL(fpCSV) - 1, SEEK_SET);
975 : char chLast;
976 8 : VSIFReadL( &chLast, 1, 1, fpCSV );
977 8 : VSIFSeekL( fpCSV, 0, SEEK_END );
978 8 : if (chLast != '\n')
979 : {
980 0 : if( bUseCRLF )
981 0 : VSIFPutcL( 13, fpCSV );
982 0 : VSIFPutcL( '\n', fpCSV );
983 : }
984 : }
985 : else
986 : {
987 0 : VSIFSeekL( fpCSV, 0, SEEK_END );
988 : }
989 : }
990 :
991 : /* -------------------------------------------------------------------- */
992 : /* Write out the geometry */
993 : /* -------------------------------------------------------------------- */
994 74 : if (eGeometryFormat == OGR_CSV_GEOM_AS_WKT)
995 : {
996 4 : OGRGeometry *poGeom = poNewFeature->GetGeometryRef();
997 4 : char* pszWKT = NULL;
998 4 : if (poGeom && poGeom->exportToWkt(&pszWKT) == OGRERR_NONE)
999 : {
1000 4 : VSIFPrintfL( fpCSV, "\"%s\"", pszWKT);
1001 : }
1002 : else
1003 : {
1004 0 : VSIFPrintfL( fpCSV, "\"\"");
1005 : }
1006 4 : CPLFree(pszWKT);
1007 4 : if (poFeatureDefn->GetFieldCount() > 0)
1008 4 : VSIFPrintfL( fpCSV, "%c", chDelimiter);
1009 : }
1010 70 : else if (eGeometryFormat == OGR_CSV_GEOM_AS_XYZ ||
1011 : eGeometryFormat == OGR_CSV_GEOM_AS_XY ||
1012 : eGeometryFormat == OGR_CSV_GEOM_AS_YX)
1013 : {
1014 8 : OGRGeometry *poGeom = poNewFeature->GetGeometryRef();
1015 8 : if (poGeom && wkbFlatten(poGeom->getGeometryType()) == wkbPoint)
1016 : {
1017 6 : OGRPoint* poPoint = (OGRPoint*) poGeom;
1018 : char szBuffer[75];
1019 6 : if (eGeometryFormat == OGR_CSV_GEOM_AS_XYZ )
1020 2 : OGRMakeWktCoordinate(szBuffer, poPoint->getX(), poPoint->getY(), poPoint->getZ(), 3);
1021 4 : else if (eGeometryFormat == OGR_CSV_GEOM_AS_XY )
1022 2 : OGRMakeWktCoordinate(szBuffer, poPoint->getX(), poPoint->getY(), 0, 2);
1023 : else
1024 2 : OGRMakeWktCoordinate(szBuffer, poPoint->getY(), poPoint->getX(), 0, 2);
1025 6 : char* pc = szBuffer;
1026 34 : while(*pc != '\0')
1027 : {
1028 22 : if (*pc == ' ')
1029 8 : *pc = chDelimiter;
1030 22 : pc ++;
1031 : }
1032 6 : VSIFPrintfL( fpCSV, "%s", szBuffer );
1033 : }
1034 : else
1035 : {
1036 2 : VSIFPrintfL( fpCSV, "%c", chDelimiter );
1037 2 : if (eGeometryFormat == OGR_CSV_GEOM_AS_XYZ)
1038 0 : VSIFPrintfL( fpCSV, "%c", chDelimiter );
1039 : }
1040 8 : if (poFeatureDefn->GetFieldCount() > 0)
1041 8 : VSIFPrintfL( fpCSV, "%c", chDelimiter );
1042 : }
1043 :
1044 : /* -------------------------------------------------------------------- */
1045 : /* Write out all the field values. */
1046 : /* -------------------------------------------------------------------- */
1047 850 : for( iField = 0; iField < poFeatureDefn->GetFieldCount(); iField++ )
1048 : {
1049 : char *pszEscaped;
1050 :
1051 776 : if( iField > 0 )
1052 702 : VSIFPrintfL( fpCSV, "%c", chDelimiter );
1053 :
1054 : pszEscaped =
1055 : CPLEscapeString( poNewFeature->GetFieldAsString(iField),
1056 776 : -1, CPLES_CSV );
1057 :
1058 776 : if (poFeatureDefn->GetFieldDefn(iField)->GetType() == OFTReal)
1059 : {
1060 : /* Use point as decimal separator */
1061 8 : char* pszComma = strchr(pszEscaped, ',');
1062 8 : if (pszComma)
1063 0 : *pszComma = '.';
1064 : }
1065 :
1066 776 : VSIFWriteL( pszEscaped, 1, strlen(pszEscaped), fpCSV );
1067 776 : CPLFree( pszEscaped );
1068 : }
1069 :
1070 74 : if( bUseCRLF )
1071 10 : VSIFPutcL( 13, fpCSV );
1072 74 : VSIFPutcL( '\n', fpCSV );
1073 :
1074 74 : return OGRERR_NONE;
1075 : }
1076 :
1077 : /************************************************************************/
1078 : /* SetCRLF() */
1079 : /************************************************************************/
1080 :
1081 26 : void OGRCSVLayer::SetCRLF( int bNewValue )
1082 :
1083 : {
1084 26 : bUseCRLF = bNewValue;
1085 26 : }
1086 :
1087 : /************************************************************************/
1088 : /* SetWriteGeometry() */
1089 : /************************************************************************/
1090 :
1091 10 : void OGRCSVLayer::SetWriteGeometry(OGRCSVGeometryFormat eGeometryFormat)
1092 : {
1093 10 : this->eGeometryFormat = eGeometryFormat;
1094 10 : }
1095 :
1096 : /************************************************************************/
1097 : /* SetCreateCSVT() */
1098 : /************************************************************************/
1099 :
1100 8 : void OGRCSVLayer::SetCreateCSVT(int bCreateCSVT)
1101 : {
1102 8 : this->bCreateCSVT = bCreateCSVT;
1103 8 : }
1104 :
1105 : /************************************************************************/
1106 : /* GetFeatureCount() */
1107 : /************************************************************************/
1108 :
1109 10 : int OGRCSVLayer::GetFeatureCount( int bForce )
1110 : {
1111 10 : if (bInWriteMode || m_poFilterGeom != NULL || m_poAttrQuery != NULL)
1112 6 : return OGRLayer::GetFeatureCount(bForce);
1113 :
1114 4 : if (nTotalFeatures >= 0)
1115 0 : return nTotalFeatures;
1116 :
1117 4 : if (fpCSV == NULL)
1118 0 : return 0;
1119 :
1120 4 : ResetReading();
1121 :
1122 : char **papszTokens;
1123 4 : nTotalFeatures = 0;
1124 12 : while(TRUE)
1125 : {
1126 16 : papszTokens = OGRCSVReadParseLineL( fpCSV, chDelimiter, bDontHonourStrings );
1127 16 : if( papszTokens == NULL )
1128 : break;
1129 :
1130 12 : if( papszTokens[0] != NULL )
1131 12 : nTotalFeatures ++;
1132 :
1133 12 : CSLDestroy(papszTokens);
1134 : }
1135 :
1136 4 : ResetReading();
1137 :
1138 4 : return nTotalFeatures;
1139 : }
|