1 : /**********************************************************************
2 : * $Id: e00read.c,v 1.10 2009-02-24 20:03:50 aboudreault Exp $
3 : *
4 : * Name: e00read.c
5 : * Project: Compressed E00 Read/Write library
6 : * Language: ANSI C
7 : * Purpose: Functions to read Compressed E00 files and return a stream
8 : * of uncompressed lines.
9 : * Author: Daniel Morissette, dmorissette@mapgears.com
10 : *
11 : * $Log: e00read.c,v $
12 : * Revision 1.10 2009-02-24 20:03:50 aboudreault
13 : * Added a short manual pages (#1875)
14 : * Updated documentation and code examples (#247)
15 : *
16 : * Revision 1.9 2005-09-17 14:22:05 daniel
17 : * Switch to MIT license, update refs to website and email address, and
18 : * prepare for 1.0.0 release.
19 : *
20 : * Revision 1.8 1999/02/25 18:45:56 daniel
21 : * Now use CPL for Error handling, Memory allocation, and File access
22 : *
23 : * Revision 1.7 1999/01/08 17:39:08 daniel
24 : * Added E00ReadCallbackOpen()
25 : *
26 : * Revision 1.6 1998/11/13 16:34:08 daniel
27 : * Fixed '\r' problem when reading E00 files from a PC under Unix
28 : *
29 : * Revision 1.5 1998/11/13 15:48:08 daniel
30 : * Simplified the decoding of the compression codes for numbers
31 : * (use a logical rule instead of going case by case)
32 : *
33 : * Revision 1.4 1998/11/02 18:34:29 daniel
34 : * Added E00ErrorReset() calls. Replace "EXP 1" by "EXP 0" on read.
35 : *
36 : * Revision 1.1 1998/10/29 13:26:00 daniel
37 : * Initial revision
38 : *
39 : **********************************************************************
40 : * Copyright (c) 1998-2005, Daniel Morissette
41 : *
42 : * Permission is hereby granted, free of charge, to any person obtaining a
43 : * copy of this software and associated documentation files (the "Software"),
44 : * to deal in the Software without restriction, including without limitation
45 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
46 : * and/or sell copies of the Software, and to permit persons to whom the
47 : * Software is furnished to do so, subject to the following conditions:
48 : *
49 : * The above copyright notice and this permission notice shall be included
50 : * in all copies or substantial portions of the Software.
51 : *
52 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
53 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
54 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
55 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
56 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
57 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
58 : * DEALINGS IN THE SOFTWARE.
59 : *
60 : **********************************************************************/
61 :
62 : #include <stdlib.h>
63 : #include <string.h>
64 : #include <ctype.h>
65 : #include <errno.h>
66 :
67 : #include "e00compr.h"
68 :
69 : static void _ReadNextSourceLine(E00ReadPtr psInfo);
70 : static const char *_UncompressNextLine(E00ReadPtr psInfo);
71 :
72 : /**********************************************************************
73 : * _E00ReadTestOpen()
74 : *
75 : * Given a pre-initialized E00ReadPtr, this function will make sure
76 : * that the file is really a E00 file, and also establish if it is
77 : * compressed or not... setting the structure members by the same way.
78 : *
79 : * Returns NULL (and destroys the E00ReadPtr) if the file does not
80 : * appear to be a valid E00 file.
81 : **********************************************************************/
82 2 : static E00ReadPtr _E00ReadTestOpen(E00ReadPtr psInfo)
83 : {
84 :
85 : /* Check that the file is in E00 format.
86 : */
87 2 : _ReadNextSourceLine(psInfo);
88 4 : if (!psInfo->bEOF && strncmp(psInfo->szInBuf, "EXP ", 4) == 0)
89 : {
90 : /* We should be in presence of a valid E00 file...
91 : * Is the file compressed or not?
92 : *
93 : * Note: we cannot really rely on the number that follows the EXP to
94 : * establish if the file is compressed since we sometimes encounter
95 : * uncompressed files that start with a "EXP 1" line!!!
96 : *
97 : * The best test is to read the first non-empty line: if the file is
98 : * compressed, the first line of data should be 79 or 80 characters
99 : * long and contain several '~' characters.
100 : */
101 6 : do
102 : {
103 2 : _ReadNextSourceLine(psInfo);
104 : }while(!psInfo->bEOF &&
105 4 : (psInfo->szInBuf[0] == '\0' || isspace(psInfo->szInBuf[0])) );
106 :
107 2 : if (!psInfo->bEOF &&
108 : (strlen(psInfo->szInBuf)==79 || strlen(psInfo->szInBuf)==80) &&
109 : strchr(psInfo->szInBuf, '~') != NULL )
110 2 : psInfo->bIsCompressed = 1;
111 :
112 : /* Move the Read ptr ready to read at the beginning of the file
113 : */
114 2 : E00ReadRewind(psInfo);
115 : }
116 : else
117 : {
118 0 : CPLFree(psInfo);
119 0 : psInfo = NULL;
120 : }
121 :
122 2 : return psInfo;
123 : }
124 :
125 : /**********************************************************************
126 : * E00ReadOpen()
127 : *
128 : * Try to open a E00 file given its filename and return a E00ReadPtr handle.
129 : *
130 : * Returns NULL if the file could not be opened or if it does not
131 : * appear to be a valid E00 file.
132 : **********************************************************************/
133 0 : E00ReadPtr E00ReadOpen(const char *pszFname)
134 : {
135 0 : E00ReadPtr psInfo = NULL;
136 : FILE *fp;
137 :
138 0 : CPLErrorReset();
139 :
140 : /* Open the file
141 : */
142 0 : fp = VSIFOpen(pszFname, "rt");
143 0 : if (fp == NULL)
144 : {
145 : CPLError(CE_Failure, CPLE_OpenFailed,
146 0 : "Failed to open %s: %s", pszFname, strerror(errno));
147 0 : return NULL;
148 : }
149 :
150 : /* File was succesfully opened, allocate and initialize a
151 : * E00ReadPtr handle and check that the file is valid.
152 : */
153 0 : psInfo = (E00ReadPtr)CPLCalloc(1, sizeof(struct _E00ReadInfo));
154 :
155 0 : psInfo->fp = fp;
156 :
157 0 : psInfo = _E00ReadTestOpen(psInfo);
158 :
159 0 : if (psInfo == NULL)
160 : {
161 : CPLError(CE_Failure, CPLE_OpenFailed,
162 0 : "%s is not a valid E00 file.", pszFname);
163 : }
164 :
165 0 : return psInfo;
166 : }
167 :
168 : /**********************************************************************
169 : * E00ReadCallbackOpen()
170 : *
171 : * This is an alternative to E00ReadOpen() for cases where you want to
172 : * do all the file management yourself. You open/close the file yourself
173 : * and provide 2 callback functions: to read from the file and rewind the
174 : * file pointer. pRefData is your handle on the physical file and can
175 : * be whatever you want... it is not used by the library, it will be
176 : * passed directly to your 2 callback functions when they are called.
177 : *
178 : * The callback functions must have the following C prototype:
179 : *
180 : * const char *myReadNextLine(void *pRefData);
181 : * void myReadRewind(void *pRefData);
182 : *
183 : * myReadNextLine() should return a reference to its own internal
184 : * buffer, or NULL if an error happens or EOF is reached.
185 : *
186 : * E00ReadCallbackOpen() returns a E00ReadPtr handle or NULL if the file
187 : * does not appear to be a valid E00 file.
188 : **********************************************************************/
189 2 : E00ReadPtr E00ReadCallbackOpen(void *pRefData,
190 : const char * (*pfnReadNextLine)(void *),
191 : void (*pfnReadRewind)(void *))
192 : {
193 2 : E00ReadPtr psInfo = NULL;
194 :
195 2 : CPLErrorReset();
196 :
197 : /* Make sure we received valid function pointers
198 : */
199 2 : if (pfnReadNextLine == NULL || pfnReadRewind == NULL)
200 : {
201 : CPLError(CE_Failure, CPLE_IllegalArg,
202 0 : "Invalid function pointers!");
203 0 : return NULL;
204 : }
205 :
206 : /* Allocate and initialize a
207 : * E00ReadPtr handle and check that the file is valid.
208 : */
209 2 : psInfo = (E00ReadPtr)CPLCalloc(1, sizeof(struct _E00ReadInfo));
210 :
211 2 : psInfo->pRefData = pRefData;
212 2 : psInfo->pfnReadNextLine = pfnReadNextLine;
213 2 : psInfo->pfnReadRewind = pfnReadRewind;
214 :
215 2 : psInfo = _E00ReadTestOpen(psInfo);
216 :
217 2 : if (psInfo == NULL)
218 : {
219 : CPLError(CE_Failure, CPLE_OpenFailed,
220 0 : "This is not a valid E00 file.");
221 : }
222 :
223 2 : return psInfo;
224 : }
225 :
226 : /**********************************************************************
227 : * E00ReadClose()
228 : *
229 : * Close input file and release any memory used by the E00ReadPtr.
230 : **********************************************************************/
231 4 : void E00ReadClose(E00ReadPtr psInfo)
232 : {
233 4 : CPLErrorReset();
234 :
235 4 : if (psInfo)
236 : {
237 2 : if (psInfo->fp)
238 0 : VSIFClose(psInfo->fp);
239 2 : CPLFree(psInfo);
240 : }
241 4 : }
242 :
243 : /**********************************************************************
244 : * E00ReadRewind()
245 : *
246 : * Rewind the E00ReadPtr. Allows to start another read pass on the
247 : * input file.
248 : **********************************************************************/
249 4 : void E00ReadRewind(E00ReadPtr psInfo)
250 : {
251 4 : CPLErrorReset();
252 :
253 4 : psInfo->szInBuf[0] = psInfo->szOutBuf[0] = '\0';
254 4 : psInfo->iInBufPtr = 0;
255 :
256 4 : psInfo->nInputLineNo = 0;
257 :
258 4 : if (psInfo->pfnReadRewind == NULL)
259 0 : VSIRewind(psInfo->fp);
260 : else
261 4 : psInfo->pfnReadRewind(psInfo->pRefData);
262 :
263 4 : psInfo->bEOF = 0;
264 4 : }
265 :
266 : /**********************************************************************
267 : * E00ReadNextLine()
268 : *
269 : * Return the next line of input from the E00 file or NULL if we reached EOF.
270 : *
271 : * Returns a reference to an internal buffer whose contents will be valid
272 : * only until the next call to this function.
273 : **********************************************************************/
274 87 : const char *E00ReadNextLine(E00ReadPtr psInfo)
275 : {
276 87 : const char *pszLine = NULL;
277 : char *pszPtr;
278 :
279 87 : CPLErrorReset();
280 :
281 87 : if (psInfo && !psInfo->bEOF)
282 : {
283 87 : if (!psInfo->bIsCompressed)
284 : {
285 : /* Uncompressed file... return line directly.
286 : */
287 0 : _ReadNextSourceLine(psInfo);
288 0 : pszLine = psInfo->szInBuf;
289 : }
290 91 : else if (psInfo->bIsCompressed && psInfo->nInputLineNo == 0)
291 : {
292 : /* Header line in a compressed file... return line
293 : * after replacing "EXP 1" with "EXP 0". E00ReadOpen()
294 : * has already verified that this line starts with "EXP "
295 : */
296 4 : _ReadNextSourceLine(psInfo);
297 4 : if ( (pszPtr = strstr(psInfo->szInBuf, " 1")) != NULL)
298 4 : pszPtr[1] = '0';
299 4 : pszLine = psInfo->szInBuf;
300 : }
301 : else
302 : {
303 83 : if (psInfo->nInputLineNo == 1)
304 : {
305 : /* We just read the header line... reload the input buffer
306 : */
307 4 : _ReadNextSourceLine(psInfo);
308 : }
309 :
310 : /* Uncompress the next line of input and return it
311 : */
312 83 : pszLine = _UncompressNextLine(psInfo);
313 : }
314 :
315 : /* If we just reached EOF then make sure we don't add an extra
316 : * empty line at the end of the uncompressed oputput.
317 : */
318 87 : if (psInfo->bEOF && strlen(pszLine) == 0)
319 0 : pszLine = NULL;
320 : }
321 :
322 87 : return pszLine;
323 : }
324 :
325 : /**********************************************************************
326 : * _ReadNextSourceLine()
327 : *
328 : * Loads the next line from the source file in psInfo.
329 : *
330 : * psInfo->bEOF should be checked after this call.
331 : **********************************************************************/
332 34 : static void _ReadNextSourceLine(E00ReadPtr psInfo)
333 : {
334 34 : if (!psInfo->bEOF)
335 : {
336 34 : psInfo->iInBufPtr = 0;
337 34 : psInfo->szInBuf[0] = '\0';
338 :
339 : /* Read either using fgets() or psInfo->pfnReadNextLine()
340 : * depending on the way the file was opened...
341 : */
342 34 : if (psInfo->pfnReadNextLine == NULL)
343 : {
344 0 : if (VSIFGets(psInfo->szInBuf,E00_READ_BUF_SIZE,psInfo->fp) == NULL)
345 : {
346 : /* We reached EOF
347 : */
348 0 : psInfo->bEOF = 1;
349 : }
350 : }
351 : else
352 : {
353 : const char *pszLine;
354 34 : pszLine = psInfo->pfnReadNextLine(psInfo->pRefData);
355 34 : if (pszLine)
356 : {
357 34 : strncpy(psInfo->szInBuf, pszLine, E00_READ_BUF_SIZE);
358 : }
359 : else
360 : {
361 : /* We reached EOF
362 : */
363 0 : psInfo->bEOF = 1;
364 : }
365 : }
366 :
367 34 : if (!psInfo->bEOF)
368 : {
369 : /* A new line was succesfully read. Remove trailing '\n' if any.
370 : * (Note: For Unix systems, we also have to check for '\r')
371 : */
372 : int nLen;
373 34 : nLen = strlen(psInfo->szInBuf);
374 102 : while(nLen > 0 && (psInfo->szInBuf[nLen-1] == '\n' ||
375 34 : psInfo->szInBuf[nLen-1] == '\r' ) )
376 : {
377 0 : nLen--;
378 0 : psInfo->szInBuf[nLen] = '\0';
379 : }
380 :
381 34 : psInfo->nInputLineNo++;
382 : }
383 : }
384 34 : }
385 :
386 :
387 : /**********************************************************************
388 : * _GetNextSourceChar()
389 : *
390 : * Returns the next char from the source file input buffer... and
391 : * reload the input buffer when necessary... this function makes the
392 : * whole input file appear as one huge null-terminated string with
393 : * no line delimiters.
394 : *
395 : * Will return '\0' when EOF is reached.
396 : **********************************************************************/
397 1815 : static char _GetNextSourceChar(E00ReadPtr psInfo)
398 : {
399 1815 : char c = '\0';
400 :
401 1815 : if (!psInfo->bEOF)
402 : {
403 1815 : if (psInfo->szInBuf[psInfo->iInBufPtr] == '\0')
404 : {
405 22 : _ReadNextSourceLine(psInfo);
406 22 : c = _GetNextSourceChar(psInfo);
407 : }
408 : else
409 : {
410 1793 : c = psInfo->szInBuf[psInfo->iInBufPtr++];
411 : }
412 : }
413 :
414 1815 : return c;
415 : }
416 :
417 : /**********************************************************************
418 : * _UngetSourceChar()
419 : *
420 : * Reverse the effect of the previous call to _GetNextSourceChar() by
421 : * moving the input buffer pointer back 1 character.
422 : *
423 : * This function can be called only once per call to _GetNextSourceChar()
424 : * (i.e. you cannot unget more than one character) otherwise the pointer
425 : * could move before the beginning of the input buffer.
426 : **********************************************************************/
427 79 : static void _UngetSourceChar(E00ReadPtr psInfo)
428 : {
429 79 : if (psInfo->iInBufPtr > 0)
430 79 : psInfo->iInBufPtr--;
431 : else
432 : {
433 : /* This error can happen only if _UngetSourceChar() is called
434 : * twice in a row (which should never happen!).
435 : */
436 : CPLError(CE_Failure, CPLE_AssertionFailed,
437 : "UNEXPECTED INTERNAL ERROR: _UngetSourceChar() "
438 0 : "failed while reading line %d.", psInfo->nInputLineNo);
439 : }
440 79 : }
441 :
442 : /**********************************************************************
443 : * _UncompressNextLine()
444 : *
445 : * Uncompress one line of input and return a reference to an internal
446 : * buffer containing the uncompressed output.
447 : **********************************************************************/
448 83 : static const char *_UncompressNextLine(E00ReadPtr psInfo)
449 : {
450 : char c;
451 83 : int bEOL = 0; /* Set to 1 when End of Line reached */
452 83 : int iOutBufPtr = 0, i, n;
453 : int iDecimalPoint, bOddNumDigits, iCurDigit;
454 : char *pszExp;
455 83 : int bPreviousCodeWasNumeric = 0;
456 :
457 1033 : while(!bEOL && (c=_GetNextSourceChar(psInfo)) != '\0')
458 : {
459 867 : if (c != '~')
460 : {
461 : /* Normal character... just copy it
462 : */
463 611 : psInfo->szOutBuf[iOutBufPtr++] = c;
464 611 : bPreviousCodeWasNumeric = 0;
465 : }
466 : else /* c == '~' */
467 : {
468 : /* ========================================================
469 : * Found an encoded sequence.
470 : * =======================================================*/
471 256 : c = _GetNextSourceChar(psInfo);
472 :
473 : /* --------------------------------------------------------
474 : * Compression level 1: only spaces, '~' and '\n' are encoded
475 : * -------------------------------------------------------*/
476 256 : if (c == ' ')
477 : {
478 : /* "~ " followed by number of spaces
479 : */
480 52 : c = _GetNextSourceChar(psInfo);
481 52 : n = c - ' ';
482 592 : for(i=0; i<n; i++)
483 540 : psInfo->szOutBuf[iOutBufPtr++] = ' ';
484 52 : bPreviousCodeWasNumeric = 0;
485 : }
486 204 : else if (c == '}')
487 : {
488 : /* "~}" == '\n'
489 : */
490 83 : bEOL = 1;
491 83 : bPreviousCodeWasNumeric = 0;
492 : }
493 121 : else if (bPreviousCodeWasNumeric)
494 : {
495 : /* If the previous code was numeric, then the only valid code
496 : * sequences are the ones above: "~ " and "~}". If we end up
497 : * here, it is because the number was followed by a '~' but
498 : * this '~' was not a code, it only marked the end of a
499 : * number that was not followed by any space.
500 : *
501 : * We should simply ignore the '~' and return the character
502 : * that follows it directly.
503 : */
504 24 : psInfo->szOutBuf[iOutBufPtr++] = c;
505 24 : bPreviousCodeWasNumeric = 0;
506 : }
507 115 : else if (c == '~' || c == '-')
508 : {
509 : /* "~~" and "~-" are simple escape sequences for '~' and '-'
510 : */
511 18 : psInfo->szOutBuf[iOutBufPtr++] = c;
512 : }
513 : /* --------------------------------------------------------
514 : * Compression level 2: numeric values are encoded.
515 : *
516 : * All codes for this level are in the form "~ c0 c1 c2 ... cn"
517 : * where:
518 : *
519 : * ~ marks the beginning of a new code sequence
520 : *
521 : * c0 is a single character code defining the format
522 : * of the number (decimal position, exponent,
523 : * and even or odd number of digits)
524 : *
525 : * c1 c2 ... cn each of these characters represent a pair of
526 : * digits of the encoded value with '!' == 00
527 : * values 92..99 are encoded on 2 chars that
528 : * must be added to each other
529 : * (i.e. 92 == }!, 93 == }", ...)
530 : *
531 : * The sequence ends with a ' ' or a '~' character
532 : * -------------------------------------------------------*/
533 158 : else if (c >= '!' && c <= 'z')
534 : {
535 : /* The format code defines 3 characteristics of the final number:
536 : * - Presence of a decimal point and its position
537 : * - Presence of an exponent, and its sign
538 : * - Odd or even number of digits
539 : */
540 79 : n = c - '!';
541 79 : iDecimalPoint = n % 15; /* 0 = no decimal point */
542 79 : bOddNumDigits = n / 45; /* 0 = even num.digits, 1 = odd */
543 79 : n = n / 15;
544 79 : if ( n % 3 == 1 )
545 71 : pszExp = "E+";
546 8 : else if (n % 3 == 2 )
547 0 : pszExp = "E-";
548 : else
549 8 : pszExp = NULL;
550 :
551 : /* Decode the c1 c2 ... cn value and apply the format.
552 : * Read characters until we encounter a ' ' or a '~'
553 : */
554 79 : iCurDigit = 0;
555 693 : while((c=_GetNextSourceChar(psInfo)) != '\0' &&
556 : c != ' ' && c != '~')
557 : {
558 535 : n = c - '!';
559 535 : if (n == 92 && (c=_GetNextSourceChar(psInfo)) != '\0')
560 4 : n += c - '!';
561 :
562 535 : psInfo->szOutBuf[iOutBufPtr++] = '0' + n/10;
563 :
564 535 : if (++iCurDigit == iDecimalPoint)
565 75 : psInfo->szOutBuf[iOutBufPtr++] = '.';
566 :
567 535 : psInfo->szOutBuf[iOutBufPtr++] = '0' + n%10;
568 :
569 535 : if (++iCurDigit == iDecimalPoint)
570 0 : psInfo->szOutBuf[iOutBufPtr++] = '.';
571 : }
572 :
573 79 : if (c == '~' || c == ' ')
574 : {
575 79 : bPreviousCodeWasNumeric = 1;
576 79 : _UngetSourceChar(psInfo);
577 : }
578 :
579 : /* If odd number of digits, then flush the last one
580 : */
581 79 : if (bOddNumDigits)
582 32 : iOutBufPtr--;
583 :
584 : /* Insert the exponent string before the 2 last digits
585 : * (we assume the exponent string is 2 chars. long)
586 : */
587 79 : if (pszExp)
588 : {
589 213 : for(i=0; i<2;i++)
590 : {
591 142 : psInfo->szOutBuf[iOutBufPtr] =
592 142 : psInfo->szOutBuf[iOutBufPtr-2];
593 142 : psInfo->szOutBuf[iOutBufPtr-2] = pszExp[i];
594 142 : iOutBufPtr++;
595 : }
596 : }
597 : }
598 : else
599 : {
600 : /* Unsupported code sequence... this is a possibility
601 : * given the fact that this library was written by
602 : * reverse-engineering the format!
603 : *
604 : * Send an error to the user and abort.
605 : *
606 : * If this error ever happens, and you are convinced that
607 : * the input file is not corrupted, then please report it to
608 : * me at dmorissette@mapgears.com, quoting the section of the input
609 : * file that produced it, and I'll do my best to add support
610 : * for this code sequence.
611 : */
612 : CPLError(CE_Failure, CPLE_NotSupported,
613 : "Unexpected code \"~%c\" encountered in line %d.",
614 0 : c, psInfo->nInputLineNo);
615 :
616 : /* Force the program to abort by simulating a EOF
617 : */
618 0 : psInfo->bEOF = 1;
619 0 : bEOL = 1;
620 : }
621 :
622 : }/* if c == '~' */
623 :
624 : /* E00 lines should NEVER be longer than 80 chars. if we passed
625 : * that limit, then the input file is likely corrupt.
626 : */
627 867 : if (iOutBufPtr > 80)
628 : {
629 : CPLError(CE_Failure, CPLE_FileIO,
630 : "Uncompressed line longer than 80 chars. "
631 : "Input file possibly corrupt around line %d.",
632 0 : psInfo->nInputLineNo);
633 : /* Force the program to abort by simulating a EOF
634 : */
635 0 : psInfo->bEOF = 1;
636 0 : bEOL = 1;
637 : }
638 :
639 : }/* while !EOL */
640 :
641 83 : psInfo->szOutBuf[iOutBufPtr++] = '\0';
642 :
643 83 : return psInfo->szOutBuf;
644 : }
645 :
646 :
|