1 : /******************************************************************************
2 : * $Id: ogrcsvlayer.cpp 23503 2011-12-09 20:40:35Z rouault $
3 : *
4 : * Project: CSV Translator
5 : * Purpose: Implements OGRCSVLayer class.
6 : * Author: Frank Warmerdam <warmerdam@pobox.com>
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 2004, Frank Warmerdam <warmerdam@pobox.com>
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "ogr_csv.h"
31 : #include "cpl_conv.h"
32 : #include "cpl_string.h"
33 : #include "cpl_csv.h"
34 : #include "ogr_p.h"
35 :
36 : CPL_CVSID("$Id: ogrcsvlayer.cpp 23503 2011-12-09 20:40:35Z rouault $");
37 :
38 :
39 :
40 : /************************************************************************/
41 : /* CSVSplitLine() */
42 : /* */
43 : /* Tokenize a CSV line into fields in the form of a string */
44 : /* list. This is used instead of the CPLTokenizeString() */
45 : /* because it provides correct CSV escaping and quoting */
46 : /* semantics. */
47 : /************************************************************************/
48 :
49 9697 : static char **CSVSplitLine( const char *pszString, char chDelimiter )
50 :
51 : {
52 9697 : char **papszRetList = NULL;
53 : char *pszToken;
54 : int nTokenMax, nTokenLen;
55 :
56 9697 : pszToken = (char *) CPLCalloc(10,1);
57 9697 : nTokenMax = 10;
58 :
59 48959 : while( pszString != NULL && *pszString != '\0' )
60 : {
61 29565 : int bInString = FALSE;
62 :
63 29565 : nTokenLen = 0;
64 :
65 : /* Try to find the next delimeter, marking end of token */
66 267122 : for( ; *pszString != '\0'; pszString++ )
67 : {
68 :
69 : /* End if this is a delimeter skip it and break. */
70 257499 : if( !bInString && *pszString == chDelimiter )
71 : {
72 19942 : pszString++;
73 19942 : break;
74 : }
75 :
76 237557 : if( *pszString == '"' )
77 : {
78 944 : if( !bInString || pszString[1] != '"' )
79 : {
80 908 : bInString = !bInString;
81 908 : continue;
82 : }
83 : else /* doubled quotes in string resolve to one quote */
84 : {
85 36 : pszString++;
86 : }
87 : }
88 :
89 236649 : if( nTokenLen >= nTokenMax-2 )
90 : {
91 9590 : nTokenMax = nTokenMax * 2 + 10;
92 9590 : pszToken = (char *) CPLRealloc( pszToken, nTokenMax );
93 : }
94 :
95 236649 : pszToken[nTokenLen] = *pszString;
96 236649 : nTokenLen++;
97 : }
98 :
99 29565 : pszToken[nTokenLen] = '\0';
100 29565 : papszRetList = CSLAddString( papszRetList, pszToken );
101 :
102 : /* If the last token is an empty token, then we have to catch
103 : * it now, otherwise we won't reenter the loop and it will be lost.
104 : */
105 29565 : if ( *pszString == '\0' && *(pszString-1) == chDelimiter )
106 : {
107 74 : papszRetList = CSLAddString( papszRetList, "" );
108 : }
109 : }
110 :
111 9697 : if( papszRetList == NULL )
112 0 : papszRetList = (char **) CPLCalloc(sizeof(char *),1);
113 :
114 9697 : CPLFree( pszToken );
115 :
116 9697 : return papszRetList;
117 : }
118 :
119 : /************************************************************************/
120 : /* OGRCSVReadParseLineL() */
121 : /* */
122 : /* Read one line, and return split into fields. The return */
123 : /* result is a stringlist, in the sense of the CSL functions. */
124 : /************************************************************************/
125 :
126 9804 : char **OGRCSVReadParseLineL( VSILFILE * fp, char chDelimiter, int bDontHonourStrings )
127 :
128 : {
129 : const char *pszLine;
130 : char *pszWorkLine;
131 : char **papszReturn;
132 :
133 9804 : pszLine = CPLReadLineL( fp );
134 9804 : if( pszLine == NULL )
135 107 : return( NULL );
136 :
137 : /* Skip BOM */
138 9697 : GByte* pabyData = (GByte*) pszLine;
139 9697 : if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
140 0 : pszLine += 3;
141 :
142 : /* Special fix to read NdfcFacilities.xls that has non-balanced double quotes */
143 9697 : if (chDelimiter == '\t' && bDontHonourStrings)
144 : {
145 0 : return CSLTokenizeStringComplex(pszLine, "\t", FALSE, TRUE);
146 : }
147 :
148 : /* -------------------------------------------------------------------- */
149 : /* If there are no quotes, then this is the simple case. */
150 : /* Parse, and return tokens. */
151 : /* -------------------------------------------------------------------- */
152 9697 : if( strchr(pszLine,'\"') == NULL )
153 9518 : return CSVSplitLine( pszLine, chDelimiter );
154 :
155 : /* -------------------------------------------------------------------- */
156 : /* We must now count the quotes in our working string, and as */
157 : /* long as it is odd, keep adding new lines. */
158 : /* -------------------------------------------------------------------- */
159 179 : pszWorkLine = CPLStrdup( pszLine );
160 :
161 179 : int i = 0, nCount = 0;
162 179 : int nWorkLineLength = strlen(pszWorkLine);
163 :
164 2 : while( TRUE )
165 : {
166 11917 : for( ; pszWorkLine[i] != '\0'; i++ )
167 : {
168 12657 : if( pszWorkLine[i] == '\"'
169 921 : && (i == 0 || pszWorkLine[i-1] != '\\') )
170 980 : nCount++;
171 : }
172 :
173 181 : if( nCount % 2 == 0 )
174 179 : break;
175 :
176 2 : pszLine = CPLReadLineL( fp );
177 2 : if( pszLine == NULL )
178 0 : break;
179 :
180 2 : int nLineLen = strlen(pszLine);
181 :
182 : char* pszWorkLineTmp = (char *)
183 : VSIRealloc(pszWorkLine,
184 2 : nWorkLineLength + nLineLen + 2);
185 2 : if (pszWorkLineTmp == NULL)
186 0 : break;
187 2 : pszWorkLine = pszWorkLineTmp;
188 2 : strcat( pszWorkLine + nWorkLineLength, "\n" ); // This gets lost in CPLReadLine().
189 2 : strcat( pszWorkLine + nWorkLineLength, pszLine );
190 :
191 2 : nWorkLineLength += nLineLen + 1;
192 : }
193 :
194 179 : papszReturn = CSVSplitLine( pszWorkLine, chDelimiter );
195 :
196 179 : CPLFree( pszWorkLine );
197 :
198 179 : return papszReturn;
199 : }
200 :
201 : /************************************************************************/
202 : /* OGRCSVLayer() */
203 : /* */
204 : /* Note that the OGRCSVLayer assumes ownership of the passed */
205 : /* file pointer. */
206 : /************************************************************************/
207 :
208 102 : OGRCSVLayer::OGRCSVLayer( const char *pszLayerNameIn,
209 : VSILFILE * fp, const char *pszFilename, int bNew, int bInWriteMode,
210 : char chDelimiter, const char* pszNfdcGeomField,
211 102 : const char* pszGeonamesGeomFieldPrefix)
212 :
213 : {
214 102 : fpCSV = fp;
215 :
216 102 : iWktGeomReadField = -1;
217 102 : iNfdcLatitudeS = iNfdcLongitudeS = -1;
218 102 : iLatitudeField = iLongitudeField = -1;
219 102 : this->bInWriteMode = bInWriteMode;
220 102 : this->bNew = bNew;
221 102 : this->pszFilename = CPLStrdup(pszFilename);
222 102 : this->chDelimiter = chDelimiter;
223 :
224 102 : bFirstFeatureAppendedDuringSession = TRUE;
225 102 : bUseCRLF = FALSE;
226 102 : bNeedRewindBeforeRead = FALSE;
227 102 : eGeometryFormat = OGR_CSV_GEOM_NONE;
228 :
229 102 : nNextFID = 1;
230 :
231 102 : poFeatureDefn = new OGRFeatureDefn( pszLayerNameIn );
232 102 : poFeatureDefn->Reference();
233 102 : poFeatureDefn->SetGeomType( wkbNone );
234 :
235 102 : bCreateCSVT = FALSE;
236 102 : bDontHonourStrings = FALSE;
237 :
238 102 : nTotalFeatures = -1;
239 :
240 : /* -------------------------------------------------------------------- */
241 : /* If this is not a new file, read ahead to establish if it is */
242 : /* already in CRLF (DOS) mode, or just a normal unix CR mode. */
243 : /* -------------------------------------------------------------------- */
244 102 : if( !bNew && bInWriteMode )
245 : {
246 14 : int nBytesRead = 0;
247 : char chNewByte;
248 :
249 736 : while( nBytesRead < 10000 && VSIFReadL( &chNewByte, 1, 1, fpCSV ) == 1 )
250 : {
251 712 : if( chNewByte == 13 )
252 : {
253 4 : bUseCRLF = TRUE;
254 4 : break;
255 : }
256 708 : nBytesRead ++;
257 : }
258 14 : VSIRewindL( fpCSV );
259 : }
260 :
261 : /* -------------------------------------------------------------------- */
262 : /* Check if the first record seems to be field definitions or */
263 : /* not. We assume it is field definitions if none of the */
264 : /* values are strictly numeric. */
265 : /* -------------------------------------------------------------------- */
266 102 : char **papszTokens = NULL;
267 102 : int nFieldCount=0, iField;
268 : CPLValueType eType;
269 :
270 102 : if( !bNew )
271 : {
272 93 : const char *pszLine = NULL;
273 : char szDelimiter[2];
274 93 : szDelimiter[0] = chDelimiter; szDelimiter[1] = '\0';
275 :
276 93 : pszLine = CPLReadLineL( fpCSV );
277 93 : if ( pszLine != NULL )
278 : {
279 : /* tokenize the strings and preserve quotes, so we can separate string from numeric */
280 : /* this is only used in the test for bHasFeldNames (bug #4361) */
281 : papszTokens = CSLTokenizeString2( pszLine, szDelimiter,
282 93 : CSLT_HONOURSTRINGS | CSLT_PRESERVEQUOTES );
283 93 : nFieldCount = CSLCount( papszTokens );
284 93 : bHasFieldNames = TRUE;
285 :
286 393 : for( iField = 0; iField < nFieldCount && bHasFieldNames; iField++ )
287 : {
288 300 : eType = CPLGetValueType(papszTokens[iField]);
289 300 : if ( (eType == CPL_VALUE_INTEGER ||
290 : eType == CPL_VALUE_REAL) ) {
291 : /* we have a numeric field, therefore do not consider the first line as field names */
292 25 : bHasFieldNames = FALSE;
293 : }
294 : }
295 :
296 : /* tokenize without quotes to get the actual values */
297 93 : CSLDestroy( papszTokens );
298 : // papszTokens = OGRCSVReadParseLineL( fpCSV, chDelimiter, FALSE );
299 : papszTokens = CSLTokenizeString2( pszLine, szDelimiter,
300 93 : CSLT_HONOURSTRINGS);
301 93 : nFieldCount = CSLCount( papszTokens );
302 : }
303 : }
304 : else
305 9 : bHasFieldNames = FALSE;
306 :
307 102 : if( !bNew && !bHasFieldNames )
308 25 : VSIRewindL( fpCSV );
309 :
310 : /* -------------------------------------------------------------------- */
311 : /* Check for geonames.org tables */
312 : /* -------------------------------------------------------------------- */
313 102 : if( !bHasFieldNames && nFieldCount == 19 )
314 : {
315 0 : if (CPLGetValueType(papszTokens[0]) == CPL_VALUE_INTEGER &&
316 0 : CPLGetValueType(papszTokens[4]) == CPL_VALUE_REAL &&
317 0 : CPLGetValueType(papszTokens[5]) == CPL_VALUE_REAL &&
318 0 : CPLAtof(papszTokens[4]) >= -90 && CPLAtof(papszTokens[4]) <= 90 &&
319 0 : CPLAtof(papszTokens[5]) >= -180 && CPLAtof(papszTokens[4]) <= 180)
320 : {
321 0 : bHasFieldNames = TRUE;
322 0 : CSLDestroy(papszTokens);
323 0 : papszTokens = NULL;
324 :
325 : static const struct {
326 : const char* pszName;
327 : OGRFieldType eType;
328 : }
329 : asGeonamesFieldDesc[] =
330 : {
331 : { "GEONAMEID", OFTString },
332 : { "NAME", OFTString },
333 : { "ASCIINAME", OFTString },
334 : { "ALTNAMES", OFTString },
335 : { "LATITUDE", OFTReal },
336 : { "LONGITUDE", OFTReal },
337 : { "FEATCLASS", OFTString },
338 : { "FEATCODE", OFTString },
339 : { "COUNTRY", OFTString },
340 : { "CC2", OFTString },
341 : { "ADMIN1", OFTString },
342 : { "ADMIN2", OFTString },
343 : { "ADMIN3", OFTString },
344 : { "ADMIN4", OFTString },
345 : { "POPULATION", OFTReal },
346 : { "ELEVATION", OFTInteger },
347 : { "GTOPO30", OFTInteger },
348 : { "TIMEZONE", OFTString },
349 : { "MODDATE", OFTString }
350 : };
351 0 : for(iField = 0; iField < nFieldCount; iField++)
352 : {
353 : OGRFieldDefn oFieldDefn(asGeonamesFieldDesc[iField].pszName,
354 0 : asGeonamesFieldDesc[iField].eType);
355 0 : poFeatureDefn->AddFieldDefn(&oFieldDefn);
356 : }
357 :
358 0 : iLatitudeField = 4;
359 0 : iLongitudeField = 5;
360 :
361 0 : nFieldCount = 0;
362 : }
363 : }
364 :
365 :
366 : /* -------------------------------------------------------------------- */
367 : /* Search a csvt file for types */
368 : /* -------------------------------------------------------------------- */
369 102 : char** papszFieldTypes = NULL;
370 102 : if (!bNew) {
371 93 : char* dname = strdup(CPLGetDirname(pszFilename));
372 93 : char* fname = strdup(CPLGetBasename(pszFilename));
373 93 : VSILFILE* fpCSVT = VSIFOpenL(CPLFormFilename(dname, fname, ".csvt"), "r");
374 93 : free(dname);
375 93 : free(fname);
376 93 : if (fpCSVT!=NULL) {
377 20 : VSIRewindL(fpCSVT);
378 20 : papszFieldTypes = OGRCSVReadParseLineL(fpCSVT, ',', FALSE);
379 20 : VSIFCloseL(fpCSVT);
380 : }
381 : }
382 :
383 :
384 : /* -------------------------------------------------------------------- */
385 : /* Build field definitions. */
386 : /* -------------------------------------------------------------------- */
387 450 : for( iField = 0; iField < nFieldCount; iField++ )
388 : {
389 348 : char *pszFieldName = NULL;
390 : char szFieldNameBuffer[100];
391 :
392 348 : if( bHasFieldNames )
393 : {
394 274 : pszFieldName = papszTokens[iField];
395 :
396 : // trim white space.
397 548 : while( *pszFieldName == ' ' )
398 0 : pszFieldName++;
399 :
400 822 : while( pszFieldName[0] != '\0'
401 274 : && pszFieldName[strlen(pszFieldName)-1] == ' ' )
402 0 : pszFieldName[strlen(pszFieldName)-1] = '\0';
403 :
404 274 : if (*pszFieldName == '\0')
405 0 : pszFieldName = NULL;
406 : }
407 :
408 348 : if (pszFieldName == NULL)
409 : {
410 74 : pszFieldName = szFieldNameBuffer;
411 74 : sprintf( szFieldNameBuffer, "field_%d", iField+1 );
412 : }
413 :
414 348 : OGRFieldDefn oField(pszFieldName, OFTString);
415 348 : if (papszFieldTypes!=NULL && iField<CSLCount(papszFieldTypes)) {
416 :
417 102 : char* pszLeftParenthesis = strchr(papszFieldTypes[iField], '(');
418 142 : if (pszLeftParenthesis && pszLeftParenthesis != papszFieldTypes[iField] &&
419 40 : pszLeftParenthesis[1] >= '0' && pszLeftParenthesis[1] <= '9')
420 : {
421 20 : int nWidth = 0;
422 20 : int nPrecision = 0;
423 :
424 20 : char* pszDot = strchr(pszLeftParenthesis, '.');
425 20 : if (pszDot) *pszDot = 0;
426 20 : *pszLeftParenthesis = 0;
427 :
428 20 : if (pszLeftParenthesis[-1] == ' ')
429 2 : pszLeftParenthesis[-1] = 0;
430 :
431 20 : nWidth = atoi(pszLeftParenthesis+1);
432 20 : if (pszDot)
433 7 : nPrecision = atoi(pszDot+1);
434 :
435 20 : oField.SetWidth(nWidth);
436 20 : oField.SetPrecision(nPrecision);
437 : }
438 :
439 102 : if (EQUAL(papszFieldTypes[iField], "Integer"))
440 15 : oField.SetType(OFTInteger);
441 87 : else if (EQUAL(papszFieldTypes[iField], "Real"))
442 39 : oField.SetType(OFTReal);
443 48 : else if (EQUAL(papszFieldTypes[iField], "String"))
444 29 : oField.SetType(OFTString);
445 19 : else if (EQUAL(papszFieldTypes[iField], "Date"))
446 6 : oField.SetType(OFTDate);
447 13 : else if (EQUAL(papszFieldTypes[iField], "Time"))
448 6 : oField.SetType(OFTTime);
449 7 : else if (EQUAL(papszFieldTypes[iField], "DateTime"))
450 7 : oField.SetType(OFTDateTime);
451 : else
452 0 : CPLError(CE_Warning, CPLE_NotSupported, "Unknown type : %s", papszFieldTypes[iField]);
453 : }
454 :
455 348 : if( EQUAL(oField.GetNameRef(),"WKT")
456 : && oField.GetType() == OFTString
457 : && iWktGeomReadField == -1 )
458 : {
459 22 : iWktGeomReadField = iField;
460 22 : poFeatureDefn->SetGeomType( wkbUnknown );
461 : }
462 :
463 : /*http://www.faa.gov/airports/airport_safety/airportdata_5010/menu/index.cfm specific */
464 348 : if ( pszNfdcGeomField != NULL &&
465 : EQUALN(oField.GetNameRef(), pszNfdcGeomField, strlen(pszNfdcGeomField)) &&
466 : EQUAL(oField.GetNameRef() + strlen(pszNfdcGeomField), "LatitudeS") )
467 0 : iNfdcLatitudeS = iField;
468 348 : else if ( pszNfdcGeomField != NULL &&
469 : EQUALN(oField.GetNameRef(), pszNfdcGeomField, strlen(pszNfdcGeomField)) &&
470 : EQUAL(oField.GetNameRef() + strlen(pszNfdcGeomField), "LongitudeS") )
471 0 : iNfdcLongitudeS = iField;
472 :
473 : /* GNIS specific */
474 348 : else if ( pszGeonamesGeomFieldPrefix != NULL &&
475 : EQUALN(oField.GetNameRef(), pszGeonamesGeomFieldPrefix, strlen(pszGeonamesGeomFieldPrefix)) &&
476 : (EQUAL(oField.GetNameRef() + strlen(pszGeonamesGeomFieldPrefix), "_LAT_DEC") ||
477 : EQUAL(oField.GetNameRef() + strlen(pszGeonamesGeomFieldPrefix), "_LATITUDE_DEC") ||
478 : EQUAL(oField.GetNameRef() + strlen(pszGeonamesGeomFieldPrefix), "_LATITUDE")) )
479 : {
480 0 : oField.SetType(OFTReal);
481 0 : iLatitudeField = iField;
482 : }
483 348 : else if ( pszGeonamesGeomFieldPrefix != NULL &&
484 : EQUALN(oField.GetNameRef(), pszGeonamesGeomFieldPrefix, strlen(pszGeonamesGeomFieldPrefix)) &&
485 : (EQUAL(oField.GetNameRef() + strlen(pszGeonamesGeomFieldPrefix), "_LONG_DEC") ||
486 : EQUAL(oField.GetNameRef() + strlen(pszGeonamesGeomFieldPrefix), "_LONGITUDE_DEC") ||
487 : EQUAL(oField.GetNameRef() + strlen(pszGeonamesGeomFieldPrefix), "_LONGITUDE")) )
488 : {
489 0 : oField.SetType(OFTReal);
490 0 : iLongitudeField = iField;
491 : }
492 :
493 348 : poFeatureDefn->AddFieldDefn( &oField );
494 :
495 : }
496 :
497 102 : if ( iNfdcLatitudeS != -1 && iNfdcLongitudeS != -1 )
498 : {
499 0 : bDontHonourStrings = TRUE;
500 0 : poFeatureDefn->SetGeomType( wkbPoint );
501 : }
502 102 : else if ( iLatitudeField != -1 && iLongitudeField != -1 )
503 : {
504 0 : poFeatureDefn->SetGeomType( wkbPoint );
505 : }
506 :
507 102 : CSLDestroy( papszTokens );
508 102 : CSLDestroy( papszFieldTypes );
509 102 : }
510 :
511 : /************************************************************************/
512 : /* ~OGRCSVLayer() */
513 : /************************************************************************/
514 :
515 102 : OGRCSVLayer::~OGRCSVLayer()
516 :
517 : {
518 102 : if( m_nFeaturesRead > 0 && poFeatureDefn != NULL )
519 : {
520 : CPLDebug( "CSV", "%d features read on layer '%s'.",
521 : (int) m_nFeaturesRead,
522 74 : poFeatureDefn->GetName() );
523 : }
524 :
525 102 : poFeatureDefn->Release();
526 102 : CPLFree(pszFilename);
527 :
528 102 : if (fpCSV)
529 102 : VSIFCloseL( fpCSV );
530 102 : }
531 :
532 : /************************************************************************/
533 : /* ResetReading() */
534 : /************************************************************************/
535 :
536 154 : void OGRCSVLayer::ResetReading()
537 :
538 : {
539 154 : if (fpCSV)
540 154 : VSIRewindL( fpCSV );
541 :
542 154 : if( bHasFieldNames )
543 129 : CSLDestroy( OGRCSVReadParseLineL( fpCSV, chDelimiter, bDontHonourStrings ) );
544 :
545 154 : bNeedRewindBeforeRead = FALSE;
546 :
547 154 : nNextFID = 1;
548 154 : }
549 :
550 : /************************************************************************/
551 : /* GetNextUnfilteredFeature() */
552 : /************************************************************************/
553 :
554 9554 : OGRFeature * OGRCSVLayer::GetNextUnfilteredFeature()
555 :
556 : {
557 9554 : if (fpCSV == NULL)
558 0 : return NULL;
559 :
560 : /* -------------------------------------------------------------------- */
561 : /* Read the CSV record. */
562 : /* -------------------------------------------------------------------- */
563 : char **papszTokens;
564 :
565 0 : while(TRUE)
566 : {
567 9554 : papszTokens = OGRCSVReadParseLineL( fpCSV, chDelimiter, bDontHonourStrings );
568 9554 : if( papszTokens == NULL )
569 105 : return NULL;
570 :
571 9449 : if( papszTokens[0] != NULL )
572 : break;
573 :
574 0 : CSLDestroy(papszTokens);
575 : }
576 :
577 : /* -------------------------------------------------------------------- */
578 : /* Create the OGR feature. */
579 : /* -------------------------------------------------------------------- */
580 : OGRFeature *poFeature;
581 :
582 18898 : poFeature = new OGRFeature( poFeatureDefn );
583 :
584 : /* -------------------------------------------------------------------- */
585 : /* Set attributes for any indicated attribute records. */
586 : /* -------------------------------------------------------------------- */
587 : int iAttr;
588 9449 : int nAttrCount = MIN(CSLCount(papszTokens),
589 : poFeatureDefn->GetFieldCount() );
590 : CPLValueType eType;
591 :
592 38090 : for( iAttr = 0; iAttr < nAttrCount; iAttr++)
593 : {
594 28641 : if( iAttr == iWktGeomReadField && papszTokens[iAttr][0] != '\0' )
595 : {
596 31 : char *pszWKT = papszTokens[iAttr];
597 31 : OGRGeometry *poGeom = NULL;
598 :
599 31 : if( OGRGeometryFactory::createFromWkt( &pszWKT, NULL, &poGeom )
600 : == OGRERR_NONE )
601 31 : poFeature->SetGeometryDirectly( poGeom );
602 : }
603 :
604 28641 : OGRFieldType eFieldType = poFeatureDefn->GetFieldDefn(iAttr)->GetType();
605 28867 : if ( eFieldType == OFTReal || eFieldType == OFTInteger )
606 : {
607 226 : if (chDelimiter == ';' && eFieldType == OFTReal)
608 : {
609 0 : char* chComma = strchr(papszTokens[iAttr], ',');
610 0 : if (chComma)
611 0 : *chComma = '.';
612 : }
613 226 : eType = CPLGetValueType(papszTokens[iAttr]);
614 226 : if ( (papszTokens[iAttr][0] != '\0') &&
615 : ( eType == CPL_VALUE_INTEGER ||
616 : eType == CPL_VALUE_REAL ) )
617 158 : poFeature->SetField( iAttr, CPLAtof(papszTokens[iAttr]) );
618 : }
619 28415 : else if (eFieldType != OFTString)
620 : {
621 103 : if (papszTokens[iAttr][0] != '\0')
622 64 : poFeature->SetField( iAttr, papszTokens[iAttr] );
623 : }
624 : else
625 28312 : poFeature->SetField( iAttr, papszTokens[iAttr] );
626 :
627 : }
628 :
629 : /* -------------------------------------------------------------------- */
630 : /*http://www.faa.gov/airports/airport_safety/airportdata_5010/menu/index.cfm specific */
631 : /* -------------------------------------------------------------------- */
632 :
633 9449 : if ( iNfdcLatitudeS != -1 &&
634 : iNfdcLongitudeS != -1 &&
635 : nAttrCount > iNfdcLatitudeS &&
636 : nAttrCount > iNfdcLongitudeS &&
637 0 : papszTokens[iNfdcLongitudeS][0] != 0 &&
638 0 : papszTokens[iNfdcLatitudeS][0] != 0)
639 : {
640 0 : double dfLon = atof(papszTokens[iNfdcLongitudeS]) / 3600;
641 0 : if (strchr(papszTokens[iNfdcLongitudeS], 'W'))
642 0 : dfLon *= -1;
643 0 : double dfLat = atof(papszTokens[iNfdcLatitudeS]) / 3600;
644 0 : if (strchr(papszTokens[iNfdcLatitudeS], 'S'))
645 0 : dfLat *= -1;
646 0 : poFeature->SetGeometryDirectly( new OGRPoint(dfLon, dfLat) );
647 : }
648 :
649 : /* -------------------------------------------------------------------- */
650 : /* GNIS specific */
651 : /* -------------------------------------------------------------------- */
652 9449 : else if ( iLatitudeField != -1 &&
653 : iLongitudeField != -1 &&
654 : nAttrCount > iLatitudeField &&
655 : nAttrCount > iLongitudeField &&
656 0 : papszTokens[iLongitudeField][0] != 0 &&
657 0 : papszTokens[iLatitudeField][0] != 0)
658 : {
659 : /* Some records have dummy 0,0 value */
660 0 : if (papszTokens[iLongitudeField][0] != '0' ||
661 0 : papszTokens[iLongitudeField][1] != '\0' ||
662 0 : papszTokens[iLatitudeField][0] != '0' ||
663 0 : papszTokens[iLatitudeField][1] != '\0')
664 : {
665 0 : double dfLon = atof(papszTokens[iLongitudeField]);
666 0 : double dfLat = atof(papszTokens[iLatitudeField]);
667 0 : poFeature->SetGeometryDirectly( new OGRPoint(dfLon, dfLat) );
668 : }
669 : }
670 :
671 9449 : CSLDestroy( papszTokens );
672 :
673 : /* -------------------------------------------------------------------- */
674 : /* Translate the record id. */
675 : /* -------------------------------------------------------------------- */
676 9449 : poFeature->SetFID( nNextFID++ );
677 :
678 9449 : m_nFeaturesRead++;
679 :
680 9449 : return poFeature;
681 : }
682 :
683 :
684 : /************************************************************************/
685 : /* GetNextFeature() */
686 : /************************************************************************/
687 :
688 9514 : OGRFeature *OGRCSVLayer::GetNextFeature()
689 :
690 : {
691 9514 : OGRFeature *poFeature = NULL;
692 :
693 9514 : if( bNeedRewindBeforeRead )
694 3 : ResetReading();
695 :
696 : /* -------------------------------------------------------------------- */
697 : /* Read features till we find one that satisfies our current */
698 : /* spatial criteria. */
699 : /* -------------------------------------------------------------------- */
700 40 : while( TRUE )
701 : {
702 9554 : poFeature = GetNextUnfilteredFeature();
703 9554 : if( poFeature == NULL )
704 105 : break;
705 :
706 9449 : if( (m_poFilterGeom == NULL
707 : || FilterGeometry( poFeature->GetGeometryRef() ) )
708 : && (m_poAttrQuery == NULL
709 : || m_poAttrQuery->Evaluate( poFeature )) )
710 9409 : break;
711 :
712 40 : delete poFeature;
713 : }
714 :
715 9514 : return poFeature;
716 : }
717 :
718 : /************************************************************************/
719 : /* TestCapability() */
720 : /************************************************************************/
721 :
722 8 : int OGRCSVLayer::TestCapability( const char * pszCap )
723 :
724 : {
725 8 : if( EQUAL(pszCap,OLCSequentialWrite) )
726 2 : return bInWriteMode;
727 6 : else if( EQUAL(pszCap,OLCCreateField) )
728 0 : return bNew && !bHasFieldNames;
729 : else
730 6 : return FALSE;
731 : }
732 :
733 : /************************************************************************/
734 : /* CreateField() */
735 : /************************************************************************/
736 :
737 21 : OGRErr OGRCSVLayer::CreateField( OGRFieldDefn *poNewField, int bApproxOK )
738 :
739 : {
740 : /* -------------------------------------------------------------------- */
741 : /* If we have already written our field names, then we are not */
742 : /* allowed to add new fields. */
743 : /* -------------------------------------------------------------------- */
744 21 : if( bHasFieldNames || !bNew )
745 : {
746 : CPLError( CE_Failure, CPLE_AppDefined,
747 0 : "Unable to create new fields after first feature written.");
748 0 : return OGRERR_FAILURE;
749 : }
750 :
751 : /* -------------------------------------------------------------------- */
752 : /* Does this duplicate an existing field? */
753 : /* -------------------------------------------------------------------- */
754 21 : if( poFeatureDefn->GetFieldIndex( poNewField->GetNameRef() ) != -1 )
755 : {
756 : CPLError( CE_Failure, CPLE_AppDefined,
757 : "Attempt to create field %s, but a field with this name already exists.",
758 0 : poNewField->GetNameRef() );
759 :
760 0 : return OGRERR_FAILURE;
761 : }
762 :
763 : /* -------------------------------------------------------------------- */
764 : /* Is this a legal field type for CSV? For now we only allow */
765 : /* simple integer, real and string fields. */
766 : /* -------------------------------------------------------------------- */
767 21 : switch( poNewField->GetType() )
768 : {
769 : case OFTInteger:
770 : case OFTReal:
771 : case OFTString:
772 : // these types are OK.
773 18 : break;
774 :
775 : default:
776 3 : if( bApproxOK )
777 : {
778 : CPLError( CE_Warning, CPLE_AppDefined,
779 : "Attempt to create field of type %s, but this is not supported\n"
780 : "for .csv files. Just treating as a plain string.",
781 3 : poNewField->GetFieldTypeName( poNewField->GetType() ) );
782 : }
783 : else
784 : {
785 : CPLError( CE_Failure, CPLE_AppDefined,
786 : "Attempt to create field of type %s, but this is not supported\n"
787 : "for .csv files.",
788 0 : poNewField->GetFieldTypeName( poNewField->GetType() ) );
789 0 : return OGRERR_FAILURE;
790 : }
791 : }
792 :
793 : /* -------------------------------------------------------------------- */
794 : /* Seems ok, add to field list. */
795 : /* -------------------------------------------------------------------- */
796 21 : poFeatureDefn->AddFieldDefn( poNewField );
797 :
798 21 : return OGRERR_NONE;
799 : }
800 :
801 : /************************************************************************/
802 : /* CreateFeature() */
803 : /************************************************************************/
804 :
805 24 : OGRErr OGRCSVLayer::CreateFeature( OGRFeature *poNewFeature )
806 :
807 : {
808 : int iField;
809 :
810 24 : if( !bInWriteMode )
811 : {
812 : CPLError( CE_Failure, CPLE_AppDefined,
813 0 : "The CreateFeature() operation is not permitted on a read-only CSV." );
814 0 : return OGRERR_FAILURE;
815 : }
816 :
817 : /* If we need rewind, it means that we have just written a feature before */
818 : /* so there's no point seeking to the end of the file, as we're already */
819 : /* at the end */
820 24 : int bNeedSeekEnd = !bNeedRewindBeforeRead;
821 :
822 24 : bNeedRewindBeforeRead = TRUE;
823 :
824 : /* -------------------------------------------------------------------- */
825 : /* Write field names if we haven't written them yet. */
826 : /* Write .csvt file if needed */
827 : /* -------------------------------------------------------------------- */
828 24 : if( !bHasFieldNames )
829 : {
830 9 : bHasFieldNames = TRUE;
831 9 : bNeedSeekEnd = FALSE;
832 :
833 22 : for(int iFile=0;iFile<((bCreateCSVT) ? 2 : 1);iFile++)
834 : {
835 13 : VSILFILE* fpCSVT = NULL;
836 17 : if (bCreateCSVT && iFile == 0)
837 : {
838 4 : char* pszDirName = CPLStrdup(CPLGetDirname(pszFilename));
839 4 : char* pszBaseName = CPLStrdup(CPLGetBasename(pszFilename));
840 4 : fpCSVT = VSIFOpenL(CPLFormFilename(pszDirName, pszBaseName, ".csvt"), "wb");
841 4 : CPLFree(pszDirName);
842 4 : CPLFree(pszBaseName);
843 : }
844 : else
845 : {
846 10 : if( strncmp(pszFilename, "/vsistdout/", 11) == 0 ||
847 : strncmp(pszFilename, "/vsizip/", 8) == 0 )
848 1 : fpCSV = VSIFOpenL( pszFilename, "wb" );
849 : else
850 8 : fpCSV = VSIFOpenL( pszFilename, "w+b" );
851 :
852 9 : if( fpCSV == NULL )
853 : {
854 : CPLError( CE_Failure, CPLE_OpenFailed,
855 : "Failed to create %s:\n%s",
856 0 : pszFilename, VSIStrerror( errno ) );
857 0 : return OGRERR_FAILURE;
858 : }
859 : }
860 :
861 13 : if (eGeometryFormat == OGR_CSV_GEOM_AS_WKT)
862 : {
863 2 : if (fpCSV) VSIFPrintfL( fpCSV, "%s", "WKT");
864 2 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", "String");
865 2 : if (poFeatureDefn->GetFieldCount() > 0)
866 : {
867 2 : if (fpCSV) VSIFPrintfL( fpCSV, "%c", chDelimiter );
868 2 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", ",");
869 : }
870 : }
871 11 : else if (eGeometryFormat == OGR_CSV_GEOM_AS_XYZ)
872 : {
873 2 : if (fpCSV) VSIFPrintfL( fpCSV, "X%cY%cZ", chDelimiter, chDelimiter);
874 2 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", "Real,Real,Real");
875 2 : if (poFeatureDefn->GetFieldCount() > 0)
876 : {
877 2 : if (fpCSV) VSIFPrintfL( fpCSV, "%c", chDelimiter );
878 2 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", ",");
879 : }
880 : }
881 9 : else if (eGeometryFormat == OGR_CSV_GEOM_AS_XY)
882 : {
883 2 : if (fpCSV) VSIFPrintfL( fpCSV, "X%cY", chDelimiter);
884 2 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", "Real,Real");
885 2 : if (poFeatureDefn->GetFieldCount() > 0)
886 : {
887 2 : if (fpCSV) VSIFPrintfL( fpCSV, "%c", chDelimiter );
888 2 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", ",");
889 : }
890 : }
891 7 : else if (eGeometryFormat == OGR_CSV_GEOM_AS_YX)
892 : {
893 2 : if (fpCSV) VSIFPrintfL( fpCSV, "Y%cX", chDelimiter);
894 2 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", "Real,Real");
895 2 : if (poFeatureDefn->GetFieldCount() > 0)
896 : {
897 2 : if (fpCSV) VSIFPrintfL( fpCSV, "%c", chDelimiter );
898 2 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", ",");
899 : }
900 : }
901 :
902 46 : for( iField = 0; iField < poFeatureDefn->GetFieldCount(); iField++ )
903 : {
904 : char *pszEscaped;
905 :
906 33 : if( iField > 0 )
907 : {
908 20 : if (fpCSV) VSIFPrintfL( fpCSV, "%c", chDelimiter );
909 20 : if (fpCSVT) VSIFPrintfL( fpCSVT, "%s", ",");
910 : }
911 :
912 : pszEscaped =
913 : CPLEscapeString( poFeatureDefn->GetFieldDefn(iField)->GetNameRef(),
914 33 : -1, CPLES_CSV );
915 :
916 33 : if (fpCSV) VSIFPrintfL( fpCSV, "%s", pszEscaped );
917 33 : CPLFree( pszEscaped );
918 :
919 33 : if (fpCSVT)
920 : {
921 12 : switch( poFeatureDefn->GetFieldDefn(iField)->GetType() )
922 : {
923 2 : case OFTInteger: VSIFPrintfL( fpCSVT, "%s", "Integer"); break;
924 2 : case OFTReal: VSIFPrintfL( fpCSVT, "%s", "Real"); break;
925 1 : case OFTDate: VSIFPrintfL( fpCSVT, "%s", "Date"); break;
926 1 : case OFTTime: VSIFPrintfL( fpCSVT, "%s", "Time"); break;
927 1 : case OFTDateTime: VSIFPrintfL( fpCSVT, "%s", "DateTime"); break;
928 5 : default: VSIFPrintfL( fpCSVT, "%s", "String"); break;
929 : }
930 :
931 12 : int nWidth = poFeatureDefn->GetFieldDefn(iField)->GetWidth();
932 12 : int nPrecision = poFeatureDefn->GetFieldDefn(iField)->GetPrecision();
933 12 : if (nWidth != 0)
934 : {
935 3 : if (nPrecision != 0)
936 1 : VSIFPrintfL( fpCSVT, "(%d.%d)", nWidth, nPrecision);
937 : else
938 2 : VSIFPrintfL( fpCSVT, "(%d)", nWidth);
939 : }
940 : }
941 : }
942 13 : if( bUseCRLF )
943 : {
944 1 : if (fpCSV) VSIFPutcL( 13, fpCSV );
945 1 : if (fpCSVT) VSIFPutcL( 13, fpCSVT );
946 : }
947 13 : if (fpCSV) VSIFPutcL( '\n', fpCSV );
948 13 : if (fpCSVT) VSIFPutcL( '\n', fpCSVT );
949 13 : if (fpCSVT) VSIFCloseL(fpCSVT);
950 : }
951 : }
952 :
953 24 : if (fpCSV == NULL)
954 0 : return OGRERR_FAILURE;
955 :
956 : /* -------------------------------------------------------------------- */
957 : /* Make sure we are at the end of the file. */
958 : /* -------------------------------------------------------------------- */
959 24 : if (bNeedSeekEnd)
960 : {
961 4 : if (bFirstFeatureAppendedDuringSession)
962 : {
963 : /* Add a newline character to the end of the file if necessary */
964 4 : bFirstFeatureAppendedDuringSession = FALSE;
965 4 : VSIFSeekL( fpCSV, 0, SEEK_END );
966 4 : VSIFSeekL( fpCSV, VSIFTellL(fpCSV) - 1, SEEK_SET);
967 : char chLast;
968 4 : VSIFReadL( &chLast, 1, 1, fpCSV );
969 4 : VSIFSeekL( fpCSV, 0, SEEK_END );
970 4 : if (chLast != '\n')
971 : {
972 0 : if( bUseCRLF )
973 0 : VSIFPutcL( 13, fpCSV );
974 0 : VSIFPutcL( '\n', fpCSV );
975 : }
976 : }
977 : else
978 : {
979 0 : VSIFSeekL( fpCSV, 0, SEEK_END );
980 : }
981 : }
982 :
983 : /* -------------------------------------------------------------------- */
984 : /* Write out the geometry */
985 : /* -------------------------------------------------------------------- */
986 24 : if (eGeometryFormat == OGR_CSV_GEOM_AS_WKT)
987 : {
988 2 : OGRGeometry *poGeom = poNewFeature->GetGeometryRef();
989 2 : char* pszWKT = NULL;
990 2 : if (poGeom && poGeom->exportToWkt(&pszWKT) == OGRERR_NONE)
991 : {
992 2 : VSIFPrintfL( fpCSV, "\"%s\"", pszWKT);
993 : }
994 : else
995 : {
996 0 : VSIFPrintfL( fpCSV, "\"\"");
997 : }
998 2 : CPLFree(pszWKT);
999 2 : if (poFeatureDefn->GetFieldCount() > 0)
1000 2 : VSIFPrintfL( fpCSV, "%c", chDelimiter);
1001 : }
1002 22 : else if (eGeometryFormat == OGR_CSV_GEOM_AS_XYZ ||
1003 : eGeometryFormat == OGR_CSV_GEOM_AS_XY ||
1004 : eGeometryFormat == OGR_CSV_GEOM_AS_YX)
1005 : {
1006 4 : OGRGeometry *poGeom = poNewFeature->GetGeometryRef();
1007 4 : if (poGeom && wkbFlatten(poGeom->getGeometryType()) == wkbPoint)
1008 : {
1009 3 : OGRPoint* poPoint = (OGRPoint*) poGeom;
1010 : char szBuffer[75];
1011 3 : if (eGeometryFormat == OGR_CSV_GEOM_AS_XYZ )
1012 1 : OGRMakeWktCoordinate(szBuffer, poPoint->getX(), poPoint->getY(), poPoint->getZ(), 3);
1013 2 : else if (eGeometryFormat == OGR_CSV_GEOM_AS_XY )
1014 1 : OGRMakeWktCoordinate(szBuffer, poPoint->getX(), poPoint->getY(), 0, 2);
1015 : else
1016 1 : OGRMakeWktCoordinate(szBuffer, poPoint->getY(), poPoint->getX(), 0, 2);
1017 3 : char* pc = szBuffer;
1018 17 : while(*pc != '\0')
1019 : {
1020 11 : if (*pc == ' ')
1021 4 : *pc = chDelimiter;
1022 11 : pc ++;
1023 : }
1024 3 : VSIFPrintfL( fpCSV, "%s", szBuffer );
1025 : }
1026 : else
1027 : {
1028 1 : VSIFPrintfL( fpCSV, "%c", chDelimiter );
1029 1 : if (eGeometryFormat == OGR_CSV_GEOM_AS_XYZ)
1030 0 : VSIFPrintfL( fpCSV, "%c", chDelimiter );
1031 : }
1032 4 : if (poFeatureDefn->GetFieldCount() > 0)
1033 4 : VSIFPrintfL( fpCSV, "%c", chDelimiter );
1034 : }
1035 :
1036 : /* -------------------------------------------------------------------- */
1037 : /* Write out all the field values. */
1038 : /* -------------------------------------------------------------------- */
1039 85 : for( iField = 0; iField < poFeatureDefn->GetFieldCount(); iField++ )
1040 : {
1041 : char *pszEscaped;
1042 :
1043 61 : if( iField > 0 )
1044 37 : VSIFPrintfL( fpCSV, "%c", chDelimiter );
1045 :
1046 : pszEscaped =
1047 : CPLEscapeString( poNewFeature->GetFieldAsString(iField),
1048 61 : -1, CPLES_CSV );
1049 :
1050 61 : if (poFeatureDefn->GetFieldDefn(iField)->GetType() == OFTReal)
1051 : {
1052 : /* Use point as decimal separator */
1053 4 : char* pszComma = strchr(pszEscaped, ',');
1054 4 : if (pszComma)
1055 0 : *pszComma = '.';
1056 : }
1057 :
1058 61 : VSIFWriteL( pszEscaped, 1, strlen(pszEscaped), fpCSV );
1059 61 : CPLFree( pszEscaped );
1060 : }
1061 :
1062 24 : if( bUseCRLF )
1063 5 : VSIFPutcL( 13, fpCSV );
1064 24 : VSIFPutcL( '\n', fpCSV );
1065 :
1066 24 : return OGRERR_NONE;
1067 : }
1068 :
1069 : /************************************************************************/
1070 : /* SetCRLF() */
1071 : /************************************************************************/
1072 :
1073 9 : void OGRCSVLayer::SetCRLF( int bNewValue )
1074 :
1075 : {
1076 9 : bUseCRLF = bNewValue;
1077 9 : }
1078 :
1079 : /************************************************************************/
1080 : /* SetWriteGeometry() */
1081 : /************************************************************************/
1082 :
1083 5 : void OGRCSVLayer::SetWriteGeometry(OGRCSVGeometryFormat eGeometryFormat)
1084 : {
1085 5 : this->eGeometryFormat = eGeometryFormat;
1086 5 : }
1087 :
1088 : /************************************************************************/
1089 : /* SetCreateCSVT() */
1090 : /************************************************************************/
1091 :
1092 4 : void OGRCSVLayer::SetCreateCSVT(int bCreateCSVT)
1093 : {
1094 4 : this->bCreateCSVT = bCreateCSVT;
1095 4 : }
1096 :
1097 : /************************************************************************/
1098 : /* GetFeatureCount() */
1099 : /************************************************************************/
1100 :
1101 6 : int OGRCSVLayer::GetFeatureCount( int bForce )
1102 : {
1103 6 : if (bInWriteMode || m_poFilterGeom != NULL || m_poAttrQuery != NULL)
1104 4 : return OGRLayer::GetFeatureCount(bForce);
1105 :
1106 2 : if (nTotalFeatures >= 0)
1107 0 : return nTotalFeatures;
1108 :
1109 2 : if (fpCSV == NULL)
1110 0 : return 0;
1111 :
1112 2 : ResetReading();
1113 :
1114 : char **papszTokens;
1115 2 : nTotalFeatures = 0;
1116 6 : while(TRUE)
1117 : {
1118 8 : papszTokens = OGRCSVReadParseLineL( fpCSV, chDelimiter, bDontHonourStrings );
1119 8 : if( papszTokens == NULL )
1120 : break;
1121 :
1122 6 : if( papszTokens[0] != NULL )
1123 6 : nTotalFeatures ++;
1124 :
1125 6 : CSLDestroy(papszTokens);
1126 : }
1127 :
1128 2 : ResetReading();
1129 :
1130 2 : return nTotalFeatures;
1131 : }
|