1 : /* $Id: tif_lzw.c,v 1.45 2011-04-02 20:54:09 bfriesen Exp $ */
2 :
3 : /*
4 : * Copyright (c) 1988-1997 Sam Leffler
5 : * Copyright (c) 1991-1997 Silicon Graphics, Inc.
6 : *
7 : * Permission to use, copy, modify, distribute, and sell this software and
8 : * its documentation for any purpose is hereby granted without fee, provided
9 : * that (i) the above copyright notices and this permission notice appear in
10 : * all copies of the software and related documentation, and (ii) the names of
11 : * Sam Leffler and Silicon Graphics may not be used in any advertising or
12 : * publicity relating to the software without the specific, prior written
13 : * permission of Sam Leffler and Silicon Graphics.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
16 : * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
17 : * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
18 : *
19 : * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
20 : * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
21 : * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
22 : * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
23 : * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
24 : * OF THIS SOFTWARE.
25 : */
26 :
27 : #include "tiffiop.h"
28 : #ifdef LZW_SUPPORT
29 : /*
30 : * TIFF Library.
31 : * Rev 5.0 Lempel-Ziv & Welch Compression Support
32 : *
33 : * This code is derived from the compress program whose code is
34 : * derived from software contributed to Berkeley by James A. Woods,
35 : * derived from original work by Spencer Thomas and Joseph Orost.
36 : *
37 : * The original Berkeley copyright notice appears below in its entirety.
38 : */
39 : #include "tif_predict.h"
40 :
41 : #include <stdio.h>
42 :
43 : /*
44 : * NB: The 5.0 spec describes a different algorithm than Aldus
45 : * implements. Specifically, Aldus does code length transitions
46 : * one code earlier than should be done (for real LZW).
47 : * Earlier versions of this library implemented the correct
48 : * LZW algorithm, but emitted codes in a bit order opposite
49 : * to the TIFF spec. Thus, to maintain compatibility w/ Aldus
50 : * we interpret MSB-LSB ordered codes to be images written w/
51 : * old versions of this library, but otherwise adhere to the
52 : * Aldus "off by one" algorithm.
53 : *
54 : * Future revisions to the TIFF spec are expected to "clarify this issue".
55 : */
56 : #define LZW_COMPAT /* include backwards compatibility code */
57 : /*
58 : * Each strip of data is supposed to be terminated by a CODE_EOI.
59 : * If the following #define is included, the decoder will also
60 : * check for end-of-strip w/o seeing this code. This makes the
61 : * library more robust, but also slower.
62 : */
63 : #define LZW_CHECKEOS /* include checks for strips w/o EOI code */
64 :
65 : #define MAXCODE(n) ((1L<<(n))-1)
66 : /*
67 : * The TIFF spec specifies that encoded bit
68 : * strings range from 9 to 12 bits.
69 : */
70 : #define BITS_MIN 9 /* start with 9 bits */
71 : #define BITS_MAX 12 /* max of 12 bit strings */
72 : /* predefined codes */
73 : #define CODE_CLEAR 256 /* code to clear string table */
74 : #define CODE_EOI 257 /* end-of-information code */
75 : #define CODE_FIRST 258 /* first free code entry */
76 : #define CODE_MAX MAXCODE(BITS_MAX)
77 : #define HSIZE 9001L /* 91% occupancy */
78 : #define HSHIFT (13-8)
79 : #ifdef LZW_COMPAT
80 : /* NB: +1024 is for compatibility with old files */
81 : #define CSIZE (MAXCODE(BITS_MAX)+1024L)
82 : #else
83 : #define CSIZE (MAXCODE(BITS_MAX)+1L)
84 : #endif
85 :
86 : /*
87 : * State block for each open TIFF file using LZW
88 : * compression/decompression. Note that the predictor
89 : * state block must be first in this data structure.
90 : */
91 : typedef struct {
92 : TIFFPredictorState predict; /* predictor super class */
93 :
94 : unsigned short nbits; /* # of bits/code */
95 : unsigned short maxcode; /* maximum code for lzw_nbits */
96 : unsigned short free_ent; /* next free entry in hash table */
97 : long nextdata; /* next bits of i/o */
98 : long nextbits; /* # of valid bits in lzw_nextdata */
99 :
100 : int rw_mode; /* preserve rw_mode from init */
101 : } LZWBaseState;
102 :
103 : #define lzw_nbits base.nbits
104 : #define lzw_maxcode base.maxcode
105 : #define lzw_free_ent base.free_ent
106 : #define lzw_nextdata base.nextdata
107 : #define lzw_nextbits base.nextbits
108 :
109 : /*
110 : * Encoding-specific state.
111 : */
112 : typedef uint16 hcode_t; /* codes fit in 16 bits */
113 : typedef struct {
114 : long hash;
115 : hcode_t code;
116 : } hash_t;
117 :
118 : /*
119 : * Decoding-specific state.
120 : */
121 : typedef struct code_ent {
122 : struct code_ent *next;
123 : unsigned short length; /* string len, including this token */
124 : unsigned char value; /* data value */
125 : unsigned char firstchar; /* first token of string */
126 : } code_t;
127 :
128 : typedef int (*decodeFunc)(TIFF*, uint8*, tmsize_t, uint16);
129 :
130 : typedef struct {
131 : LZWBaseState base;
132 :
133 : /* Decoding specific data */
134 : long dec_nbitsmask; /* lzw_nbits 1 bits, right adjusted */
135 : long dec_restart; /* restart count */
136 : #ifdef LZW_CHECKEOS
137 : uint64 dec_bitsleft; /* available bits in raw data */
138 : #endif
139 : decodeFunc dec_decode; /* regular or backwards compatible */
140 : code_t* dec_codep; /* current recognized code */
141 : code_t* dec_oldcodep; /* previously recognized code */
142 : code_t* dec_free_entp; /* next free entry */
143 : code_t* dec_maxcodep; /* max available entry */
144 : code_t* dec_codetab; /* kept separate for small machines */
145 :
146 : /* Encoding specific data */
147 : int enc_oldcode; /* last code encountered */
148 : long enc_checkpoint; /* point at which to clear table */
149 : #define CHECK_GAP 10000 /* enc_ratio check interval */
150 : long enc_ratio; /* current compression ratio */
151 : long enc_incount; /* (input) data bytes encoded */
152 : long enc_outcount; /* encoded (output) bytes */
153 : uint8* enc_rawlimit; /* bound on tif_rawdata buffer */
154 : hash_t* enc_hashtab; /* kept separate for small machines */
155 : } LZWCodecState;
156 :
157 : #define LZWState(tif) ((LZWBaseState*) (tif)->tif_data)
158 : #define DecoderState(tif) ((LZWCodecState*) LZWState(tif))
159 : #define EncoderState(tif) ((LZWCodecState*) LZWState(tif))
160 :
161 : static int LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s);
162 : #ifdef LZW_COMPAT
163 : static int LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s);
164 : #endif
165 : static void cl_hash(LZWCodecState*);
166 :
167 : /*
168 : * LZW Decoder.
169 : */
170 :
171 : #ifdef LZW_CHECKEOS
172 : /*
173 : * This check shouldn't be necessary because each
174 : * strip is suppose to be terminated with CODE_EOI.
175 : */
176 : #define NextCode(_tif, _sp, _bp, _code, _get) { \
177 : if ((_sp)->dec_bitsleft < (uint64)nbits) { \
178 : TIFFWarningExt(_tif->tif_clientdata, module, \
179 : "LZWDecode: Strip %d not terminated with EOI code", \
180 : _tif->tif_curstrip); \
181 : _code = CODE_EOI; \
182 : } else { \
183 : _get(_sp,_bp,_code); \
184 : (_sp)->dec_bitsleft -= nbits; \
185 : } \
186 : }
187 : #else
188 : #define NextCode(tif, sp, bp, code, get) get(sp, bp, code)
189 : #endif
190 :
191 : static int
192 83 : LZWFixupTags(TIFF* tif)
193 : {
194 : (void) tif;
195 83 : return (1);
196 : }
197 :
198 : static int
199 17 : LZWSetupDecode(TIFF* tif)
200 : {
201 : static const char module[] = "LZWSetupDecode";
202 17 : LZWCodecState* sp = DecoderState(tif);
203 : int code;
204 :
205 17 : if( sp == NULL )
206 : {
207 : /*
208 : * Allocate state block so tag methods have storage to record
209 : * values.
210 : */
211 0 : tif->tif_data = (uint8*) _TIFFmalloc(sizeof(LZWCodecState));
212 0 : if (tif->tif_data == NULL)
213 : {
214 0 : TIFFErrorExt(tif->tif_clientdata, module, "No space for LZW state block");
215 0 : return (0);
216 : }
217 :
218 0 : DecoderState(tif)->dec_codetab = NULL;
219 0 : DecoderState(tif)->dec_decode = NULL;
220 :
221 : /*
222 : * Setup predictor setup.
223 : */
224 0 : (void) TIFFPredictorInit(tif);
225 :
226 0 : sp = DecoderState(tif);
227 : }
228 :
229 17 : assert(sp != NULL);
230 :
231 17 : if (sp->dec_codetab == NULL) {
232 17 : sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t));
233 17 : if (sp->dec_codetab == NULL) {
234 0 : TIFFErrorExt(tif->tif_clientdata, module,
235 : "No space for LZW code table");
236 0 : return (0);
237 : }
238 : /*
239 : * Pre-load the table.
240 : */
241 17 : code = 255;
242 : do {
243 4352 : sp->dec_codetab[code].value = code;
244 4352 : sp->dec_codetab[code].firstchar = code;
245 4352 : sp->dec_codetab[code].length = 1;
246 4352 : sp->dec_codetab[code].next = NULL;
247 4352 : } while (code--);
248 : /*
249 : * Zero-out the unused entries
250 : */
251 17 : _TIFFmemset(&sp->dec_codetab[CODE_CLEAR], 0,
252 : (CODE_FIRST - CODE_CLEAR) * sizeof (code_t));
253 : }
254 17 : return (1);
255 : }
256 :
257 : /*
258 : * Setup state for decoding a strip.
259 : */
260 : static int
261 92 : LZWPreDecode(TIFF* tif, uint16 s)
262 : {
263 : static const char module[] = "LZWPreDecode";
264 92 : LZWCodecState *sp = DecoderState(tif);
265 :
266 : (void) s;
267 92 : assert(sp != NULL);
268 92 : if( sp->dec_codetab == NULL )
269 : {
270 6 : tif->tif_setupdecode( tif );
271 : }
272 :
273 : /*
274 : * Check for old bit-reversed codes.
275 : */
276 92 : if (tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) {
277 : #ifdef LZW_COMPAT
278 0 : if (!sp->dec_decode) {
279 0 : TIFFWarningExt(tif->tif_clientdata, module,
280 : "Old-style LZW codes, convert file");
281 : /*
282 : * Override default decoding methods with
283 : * ones that deal with the old coding.
284 : * Otherwise the predictor versions set
285 : * above will call the compatibility routines
286 : * through the dec_decode method.
287 : */
288 0 : tif->tif_decoderow = LZWDecodeCompat;
289 0 : tif->tif_decodestrip = LZWDecodeCompat;
290 0 : tif->tif_decodetile = LZWDecodeCompat;
291 : /*
292 : * If doing horizontal differencing, must
293 : * re-setup the predictor logic since we
294 : * switched the basic decoder methods...
295 : */
296 0 : (*tif->tif_setupdecode)(tif);
297 0 : sp->dec_decode = LZWDecodeCompat;
298 : }
299 0 : sp->lzw_maxcode = MAXCODE(BITS_MIN);
300 : #else /* !LZW_COMPAT */
301 : if (!sp->dec_decode) {
302 : TIFFErrorExt(tif->tif_clientdata, module,
303 : "Old-style LZW codes not supported");
304 : sp->dec_decode = LZWDecode;
305 : }
306 : return (0);
307 : #endif/* !LZW_COMPAT */
308 : } else {
309 92 : sp->lzw_maxcode = MAXCODE(BITS_MIN)-1;
310 92 : sp->dec_decode = LZWDecode;
311 : }
312 92 : sp->lzw_nbits = BITS_MIN;
313 92 : sp->lzw_nextbits = 0;
314 92 : sp->lzw_nextdata = 0;
315 :
316 92 : sp->dec_restart = 0;
317 92 : sp->dec_nbitsmask = MAXCODE(BITS_MIN);
318 : #ifdef LZW_CHECKEOS
319 92 : sp->dec_bitsleft = ((uint64)tif->tif_rawcc) << 3;
320 : #endif
321 92 : sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
322 : /*
323 : * Zero entries that are not yet filled in. We do
324 : * this to guard against bogus input data that causes
325 : * us to index into undefined entries. If you can
326 : * come up with a way to safely bounds-check input codes
327 : * while decoding then you can remove this operation.
328 : */
329 92 : _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t));
330 92 : sp->dec_oldcodep = &sp->dec_codetab[-1];
331 92 : sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1];
332 92 : return (1);
333 : }
334 :
335 : /*
336 : * Decode a "hunk of data".
337 : */
338 : #define GetNextCode(sp, bp, code) { \
339 : nextdata = (nextdata<<8) | *(bp)++; \
340 : nextbits += 8; \
341 : if (nextbits < nbits) { \
342 : nextdata = (nextdata<<8) | *(bp)++; \
343 : nextbits += 8; \
344 : } \
345 : code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask); \
346 : nextbits -= nbits; \
347 : }
348 :
349 : static void
350 0 : codeLoop(TIFF* tif, const char* module)
351 : {
352 0 : TIFFErrorExt(tif->tif_clientdata, module,
353 : "Bogus encoding, loop in the code table; scanline %d",
354 : tif->tif_row);
355 0 : }
356 :
357 : static int
358 106306 : LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s)
359 : {
360 : static const char module[] = "LZWDecode";
361 106306 : LZWCodecState *sp = DecoderState(tif);
362 106306 : char *op = (char*) op0;
363 106306 : long occ = (long) occ0;
364 : char *tp;
365 : unsigned char *bp;
366 : hcode_t code;
367 : int len;
368 : long nbits, nextbits, nextdata, nbitsmask;
369 : code_t *codep, *free_entp, *maxcodep, *oldcodep;
370 :
371 : (void) s;
372 106306 : assert(sp != NULL);
373 106306 : assert(sp->dec_codetab != NULL);
374 :
375 : /*
376 : Fail if value does not fit in long.
377 : */
378 106306 : if ((tmsize_t) occ != occ0)
379 0 : return (0);
380 : /*
381 : * Restart interrupted output operation.
382 : */
383 106306 : if (sp->dec_restart) {
384 : long residue;
385 :
386 103886 : codep = sp->dec_codep;
387 103886 : residue = codep->length - sp->dec_restart;
388 103886 : if (residue > occ) {
389 : /*
390 : * Residue from previous decode is sufficient
391 : * to satisfy decode request. Skip to the
392 : * start of the decoded string, place decoded
393 : * values in the output buffer, and return.
394 : */
395 99868 : sp->dec_restart += occ;
396 : do {
397 3160750 : codep = codep->next;
398 3160750 : } while (--residue > occ && codep);
399 99868 : if (codep) {
400 99868 : tp = op + occ;
401 : do {
402 176900 : *--tp = codep->value;
403 176900 : codep = codep->next;
404 176900 : } while (--occ && codep);
405 : }
406 99868 : return (1);
407 : }
408 : /*
409 : * Residue satisfies only part of the decode request.
410 : */
411 4018 : op += residue, occ -= residue;
412 4018 : tp = op;
413 : do {
414 : int t;
415 6582 : --tp;
416 6582 : t = codep->value;
417 6582 : codep = codep->next;
418 6582 : *tp = t;
419 6582 : } while (--residue && codep);
420 4018 : sp->dec_restart = 0;
421 : }
422 :
423 6438 : bp = (unsigned char *)tif->tif_rawcp;
424 6438 : nbits = sp->lzw_nbits;
425 6438 : nextdata = sp->lzw_nextdata;
426 6438 : nextbits = sp->lzw_nextbits;
427 6438 : nbitsmask = sp->dec_nbitsmask;
428 6438 : oldcodep = sp->dec_oldcodep;
429 6438 : free_entp = sp->dec_free_entp;
430 6438 : maxcodep = sp->dec_maxcodep;
431 :
432 3846996 : while (occ > 0) {
433 3838176 : NextCode(tif, sp, bp, code, GetNextCode);
434 3838176 : if (code == CODE_EOI)
435 0 : break;
436 3838176 : if (code == CODE_CLEAR) {
437 1065 : free_entp = sp->dec_codetab + CODE_FIRST;
438 1065 : _TIFFmemset(free_entp, 0,
439 : (CSIZE - CODE_FIRST) * sizeof (code_t));
440 1065 : nbits = BITS_MIN;
441 1065 : nbitsmask = MAXCODE(BITS_MIN);
442 1065 : maxcodep = sp->dec_codetab + nbitsmask-1;
443 1065 : NextCode(tif, sp, bp, code, GetNextCode);
444 1065 : if (code == CODE_EOI)
445 0 : break;
446 1065 : if (code >= CODE_CLEAR) {
447 0 : TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
448 : "LZWDecode: Corrupted LZW table at scanline %d",
449 : tif->tif_row);
450 0 : return (0);
451 : }
452 1065 : *op++ = (char)code, occ--;
453 1065 : oldcodep = sp->dec_codetab + code;
454 1065 : continue;
455 : }
456 3837111 : codep = sp->dec_codetab + code;
457 :
458 : /*
459 : * Add the new entry to the code table.
460 : */
461 7674222 : if (free_entp < &sp->dec_codetab[0] ||
462 3837111 : free_entp >= &sp->dec_codetab[CSIZE]) {
463 0 : TIFFErrorExt(tif->tif_clientdata, module,
464 : "Corrupted LZW table at scanline %d",
465 : tif->tif_row);
466 0 : return (0);
467 : }
468 :
469 3837111 : free_entp->next = oldcodep;
470 7674222 : if (free_entp->next < &sp->dec_codetab[0] ||
471 3837111 : free_entp->next >= &sp->dec_codetab[CSIZE]) {
472 0 : TIFFErrorExt(tif->tif_clientdata, module,
473 : "Corrupted LZW table at scanline %d",
474 : tif->tif_row);
475 0 : return (0);
476 : }
477 3837111 : free_entp->firstchar = free_entp->next->firstchar;
478 3837111 : free_entp->length = free_entp->next->length+1;
479 3837111 : free_entp->value = (codep < free_entp) ?
480 : codep->firstchar : free_entp->firstchar;
481 3837111 : if (++free_entp > maxcodep) {
482 3303 : if (++nbits > BITS_MAX) /* should not happen */
483 257 : nbits = BITS_MAX;
484 3303 : nbitsmask = MAXCODE(nbits);
485 3303 : maxcodep = sp->dec_codetab + nbitsmask-1;
486 : }
487 3837111 : oldcodep = codep;
488 3837111 : if (code >= 256) {
489 : /*
490 : * Code maps to a string, copy string
491 : * value to output (written in reverse).
492 : */
493 1772683 : if(codep->length == 0) {
494 0 : TIFFErrorExt(tif->tif_clientdata, module,
495 : "Wrong length of decoded string: "
496 : "data probably corrupted at scanline %d",
497 : tif->tif_row);
498 0 : return (0);
499 : }
500 1772683 : if (codep->length > occ) {
501 : /*
502 : * String is too long for decode buffer,
503 : * locate portion that will fit, copy to
504 : * the decode buffer, and setup restart
505 : * logic for the next decoding call.
506 : */
507 4056 : sp->dec_codep = codep;
508 : do {
509 184168 : codep = codep->next;
510 184168 : } while (codep && codep->length > occ);
511 4056 : if (codep) {
512 4056 : sp->dec_restart = (long)occ;
513 4056 : tp = op + occ;
514 : do {
515 7179 : *--tp = codep->value;
516 7179 : codep = codep->next;
517 7179 : } while (--occ && codep);
518 4056 : if (codep)
519 0 : codeLoop(tif, module);
520 : }
521 4056 : break;
522 : }
523 1768627 : len = codep->length;
524 1768627 : tp = op + len;
525 : do {
526 : int t;
527 5275065 : --tp;
528 5275065 : t = codep->value;
529 5275065 : codep = codep->next;
530 5275065 : *tp = t;
531 5275065 : } while (codep && tp > op);
532 1768627 : if (codep) {
533 0 : codeLoop(tif, module);
534 0 : break;
535 : }
536 1768627 : assert(occ >= len);
537 1768627 : op += len, occ -= len;
538 : } else
539 2064428 : *op++ = (char)code, occ--;
540 : }
541 :
542 6438 : tif->tif_rawcp = (uint8*) bp;
543 6438 : sp->lzw_nbits = (unsigned short) nbits;
544 6438 : sp->lzw_nextdata = nextdata;
545 6438 : sp->lzw_nextbits = nextbits;
546 6438 : sp->dec_nbitsmask = nbitsmask;
547 6438 : sp->dec_oldcodep = oldcodep;
548 6438 : sp->dec_free_entp = free_entp;
549 6438 : sp->dec_maxcodep = maxcodep;
550 :
551 6438 : if (occ > 0) {
552 : #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__))
553 : TIFFErrorExt(tif->tif_clientdata, module,
554 : "Not enough data at scanline %d (short %I64d bytes)",
555 : tif->tif_row, (unsigned __int64) occ);
556 : #else
557 0 : TIFFErrorExt(tif->tif_clientdata, module,
558 : "Not enough data at scanline %d (short %llu bytes)",
559 : tif->tif_row, (unsigned long long) occ);
560 : #endif
561 0 : return (0);
562 : }
563 6438 : return (1);
564 : }
565 :
566 : #ifdef LZW_COMPAT
567 : /*
568 : * Decode a "hunk of data" for old images.
569 : */
570 : #define GetNextCodeCompat(sp, bp, code) { \
571 : nextdata |= (unsigned long) *(bp)++ << nextbits; \
572 : nextbits += 8; \
573 : if (nextbits < nbits) { \
574 : nextdata |= (unsigned long) *(bp)++ << nextbits;\
575 : nextbits += 8; \
576 : } \
577 : code = (hcode_t)(nextdata & nbitsmask); \
578 : nextdata >>= nbits; \
579 : nextbits -= nbits; \
580 : }
581 :
582 : static int
583 0 : LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s)
584 : {
585 : static const char module[] = "LZWDecodeCompat";
586 0 : LZWCodecState *sp = DecoderState(tif);
587 0 : char *op = (char*) op0;
588 0 : long occ = (long) occ0;
589 : char *tp;
590 : unsigned char *bp;
591 : int code, nbits;
592 : long nextbits, nextdata, nbitsmask;
593 : code_t *codep, *free_entp, *maxcodep, *oldcodep;
594 :
595 : (void) s;
596 0 : assert(sp != NULL);
597 :
598 : /*
599 : Fail if value does not fit in long.
600 : */
601 0 : if ((tmsize_t) occ != occ0)
602 0 : return (0);
603 :
604 : /*
605 : * Restart interrupted output operation.
606 : */
607 0 : if (sp->dec_restart) {
608 : long residue;
609 :
610 0 : codep = sp->dec_codep;
611 0 : residue = codep->length - sp->dec_restart;
612 0 : if (residue > occ) {
613 : /*
614 : * Residue from previous decode is sufficient
615 : * to satisfy decode request. Skip to the
616 : * start of the decoded string, place decoded
617 : * values in the output buffer, and return.
618 : */
619 0 : sp->dec_restart += occ;
620 : do {
621 0 : codep = codep->next;
622 0 : } while (--residue > occ);
623 0 : tp = op + occ;
624 : do {
625 0 : *--tp = codep->value;
626 0 : codep = codep->next;
627 0 : } while (--occ);
628 0 : return (1);
629 : }
630 : /*
631 : * Residue satisfies only part of the decode request.
632 : */
633 0 : op += residue, occ -= residue;
634 0 : tp = op;
635 : do {
636 0 : *--tp = codep->value;
637 0 : codep = codep->next;
638 0 : } while (--residue);
639 0 : sp->dec_restart = 0;
640 : }
641 :
642 0 : bp = (unsigned char *)tif->tif_rawcp;
643 0 : nbits = sp->lzw_nbits;
644 0 : nextdata = sp->lzw_nextdata;
645 0 : nextbits = sp->lzw_nextbits;
646 0 : nbitsmask = sp->dec_nbitsmask;
647 0 : oldcodep = sp->dec_oldcodep;
648 0 : free_entp = sp->dec_free_entp;
649 0 : maxcodep = sp->dec_maxcodep;
650 :
651 0 : while (occ > 0) {
652 0 : NextCode(tif, sp, bp, code, GetNextCodeCompat);
653 0 : if (code == CODE_EOI)
654 0 : break;
655 0 : if (code == CODE_CLEAR) {
656 0 : free_entp = sp->dec_codetab + CODE_FIRST;
657 0 : _TIFFmemset(free_entp, 0,
658 : (CSIZE - CODE_FIRST) * sizeof (code_t));
659 0 : nbits = BITS_MIN;
660 0 : nbitsmask = MAXCODE(BITS_MIN);
661 0 : maxcodep = sp->dec_codetab + nbitsmask;
662 0 : NextCode(tif, sp, bp, code, GetNextCodeCompat);
663 0 : if (code == CODE_EOI)
664 0 : break;
665 0 : if (code >= CODE_CLEAR) {
666 0 : TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
667 : "LZWDecode: Corrupted LZW table at scanline %d",
668 : tif->tif_row);
669 0 : return (0);
670 : }
671 0 : *op++ = code, occ--;
672 0 : oldcodep = sp->dec_codetab + code;
673 0 : continue;
674 : }
675 0 : codep = sp->dec_codetab + code;
676 :
677 : /*
678 : * Add the new entry to the code table.
679 : */
680 0 : if (free_entp < &sp->dec_codetab[0] ||
681 0 : free_entp >= &sp->dec_codetab[CSIZE]) {
682 0 : TIFFErrorExt(tif->tif_clientdata, module,
683 : "Corrupted LZW table at scanline %d", tif->tif_row);
684 0 : return (0);
685 : }
686 :
687 0 : free_entp->next = oldcodep;
688 0 : if (free_entp->next < &sp->dec_codetab[0] ||
689 0 : free_entp->next >= &sp->dec_codetab[CSIZE]) {
690 0 : TIFFErrorExt(tif->tif_clientdata, module,
691 : "Corrupted LZW table at scanline %d", tif->tif_row);
692 0 : return (0);
693 : }
694 0 : free_entp->firstchar = free_entp->next->firstchar;
695 0 : free_entp->length = free_entp->next->length+1;
696 0 : free_entp->value = (codep < free_entp) ?
697 : codep->firstchar : free_entp->firstchar;
698 0 : if (++free_entp > maxcodep) {
699 0 : if (++nbits > BITS_MAX) /* should not happen */
700 0 : nbits = BITS_MAX;
701 0 : nbitsmask = MAXCODE(nbits);
702 0 : maxcodep = sp->dec_codetab + nbitsmask;
703 : }
704 0 : oldcodep = codep;
705 0 : if (code >= 256) {
706 : /*
707 : * Code maps to a string, copy string
708 : * value to output (written in reverse).
709 : */
710 0 : if(codep->length == 0) {
711 0 : TIFFErrorExt(tif->tif_clientdata, module,
712 : "Wrong length of decoded "
713 : "string: data probably corrupted at scanline %d",
714 : tif->tif_row);
715 0 : return (0);
716 : }
717 0 : if (codep->length > occ) {
718 : /*
719 : * String is too long for decode buffer,
720 : * locate portion that will fit, copy to
721 : * the decode buffer, and setup restart
722 : * logic for the next decoding call.
723 : */
724 0 : sp->dec_codep = codep;
725 : do {
726 0 : codep = codep->next;
727 0 : } while (codep->length > occ);
728 0 : sp->dec_restart = occ;
729 0 : tp = op + occ;
730 : do {
731 0 : *--tp = codep->value;
732 0 : codep = codep->next;
733 0 : } while (--occ);
734 0 : break;
735 : }
736 0 : assert(occ >= codep->length);
737 0 : op += codep->length, occ -= codep->length;
738 0 : tp = op;
739 : do {
740 0 : *--tp = codep->value;
741 0 : } while( (codep = codep->next) != NULL );
742 : } else
743 0 : *op++ = code, occ--;
744 : }
745 :
746 0 : tif->tif_rawcp = (uint8*) bp;
747 0 : sp->lzw_nbits = nbits;
748 0 : sp->lzw_nextdata = nextdata;
749 0 : sp->lzw_nextbits = nextbits;
750 0 : sp->dec_nbitsmask = nbitsmask;
751 0 : sp->dec_oldcodep = oldcodep;
752 0 : sp->dec_free_entp = free_entp;
753 0 : sp->dec_maxcodep = maxcodep;
754 :
755 0 : if (occ > 0) {
756 : #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__))
757 : TIFFErrorExt(tif->tif_clientdata, module,
758 : "Not enough data at scanline %d (short %I64d bytes)",
759 : tif->tif_row, (unsigned __int64) occ);
760 : #else
761 0 : TIFFErrorExt(tif->tif_clientdata, module,
762 : "Not enough data at scanline %d (short %llu bytes)",
763 : tif->tif_row, (unsigned long long) occ);
764 : #endif
765 0 : return (0);
766 : }
767 0 : return (1);
768 : }
769 : #endif /* LZW_COMPAT */
770 :
771 : /*
772 : * LZW Encoding.
773 : */
774 :
775 : static int
776 21 : LZWSetupEncode(TIFF* tif)
777 : {
778 : static const char module[] = "LZWSetupEncode";
779 21 : LZWCodecState* sp = EncoderState(tif);
780 :
781 21 : assert(sp != NULL);
782 21 : sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t));
783 21 : if (sp->enc_hashtab == NULL) {
784 0 : TIFFErrorExt(tif->tif_clientdata, module,
785 : "No space for LZW hash table");
786 0 : return (0);
787 : }
788 21 : return (1);
789 : }
790 :
791 : /*
792 : * Reset encoding state at the start of a strip.
793 : */
794 : static int
795 73 : LZWPreEncode(TIFF* tif, uint16 s)
796 : {
797 73 : LZWCodecState *sp = EncoderState(tif);
798 :
799 : (void) s;
800 73 : assert(sp != NULL);
801 :
802 73 : if( sp->enc_hashtab == NULL )
803 : {
804 0 : tif->tif_setupencode( tif );
805 : }
806 :
807 73 : sp->lzw_nbits = BITS_MIN;
808 73 : sp->lzw_maxcode = MAXCODE(BITS_MIN);
809 73 : sp->lzw_free_ent = CODE_FIRST;
810 73 : sp->lzw_nextbits = 0;
811 73 : sp->lzw_nextdata = 0;
812 73 : sp->enc_checkpoint = CHECK_GAP;
813 73 : sp->enc_ratio = 0;
814 73 : sp->enc_incount = 0;
815 73 : sp->enc_outcount = 0;
816 : /*
817 : * The 4 here insures there is space for 2 max-sized
818 : * codes in LZWEncode and LZWPostDecode.
819 : */
820 73 : sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4;
821 73 : cl_hash(sp); /* clear hash table */
822 73 : sp->enc_oldcode = (hcode_t) -1; /* generates CODE_CLEAR in LZWEncode */
823 73 : return (1);
824 : }
825 :
826 : #define CALCRATIO(sp, rat) { \
827 : if (incount > 0x007fffff) { /* NB: shift will overflow */\
828 : rat = outcount >> 8; \
829 : rat = (rat == 0 ? 0x7fffffff : incount/rat); \
830 : } else \
831 : rat = (incount<<8) / outcount; \
832 : }
833 : #define PutNextCode(op, c) { \
834 : nextdata = (nextdata << nbits) | c; \
835 : nextbits += nbits; \
836 : *op++ = (unsigned char)(nextdata >> (nextbits-8)); \
837 : nextbits -= 8; \
838 : if (nextbits >= 8) { \
839 : *op++ = (unsigned char)(nextdata >> (nextbits-8)); \
840 : nextbits -= 8; \
841 : } \
842 : outcount += nbits; \
843 : }
844 :
845 : /*
846 : * Encode a chunk of pixels.
847 : *
848 : * Uses an open addressing double hashing (no chaining) on the
849 : * prefix code/next character combination. We do a variant of
850 : * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's
851 : * relatively-prime secondary probe. Here, the modular division
852 : * first probe is gives way to a faster exclusive-or manipulation.
853 : * Also do block compression with an adaptive reset, whereby the
854 : * code table is cleared when the compression ratio decreases,
855 : * but after the table fills. The variable-length output codes
856 : * are re-sized at this point, and a CODE_CLEAR is generated
857 : * for the decoder.
858 : */
859 : static int
860 20069 : LZWEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s)
861 : {
862 20069 : register LZWCodecState *sp = EncoderState(tif);
863 : register long fcode;
864 : register hash_t *hp;
865 : register int h, c;
866 : hcode_t ent;
867 : long disp;
868 : long incount, outcount, checkpoint;
869 : long nextdata, nextbits;
870 : int free_ent, maxcode, nbits;
871 : uint8* op;
872 : uint8* limit;
873 :
874 : (void) s;
875 20069 : if (sp == NULL)
876 0 : return (0);
877 :
878 20069 : assert(sp->enc_hashtab != NULL);
879 :
880 : /*
881 : * Load local state.
882 : */
883 20069 : incount = sp->enc_incount;
884 20069 : outcount = sp->enc_outcount;
885 20069 : checkpoint = sp->enc_checkpoint;
886 20069 : nextdata = sp->lzw_nextdata;
887 20069 : nextbits = sp->lzw_nextbits;
888 20069 : free_ent = sp->lzw_free_ent;
889 20069 : maxcode = sp->lzw_maxcode;
890 20069 : nbits = sp->lzw_nbits;
891 20069 : op = tif->tif_rawcp;
892 20069 : limit = sp->enc_rawlimit;
893 20069 : ent = sp->enc_oldcode;
894 :
895 20069 : if (ent == (hcode_t) -1 && cc > 0) {
896 : /*
897 : * NB: This is safe because it can only happen
898 : * at the start of a strip where we know there
899 : * is space in the data buffer.
900 : */
901 73 : PutNextCode(op, CODE_CLEAR);
902 73 : ent = *bp++; cc--; incount++;
903 : }
904 4993996 : while (cc > 0) {
905 4953858 : c = *bp++; cc--; incount++;
906 4953858 : fcode = ((long)c << BITS_MAX) + ent;
907 4953858 : h = (c << HSHIFT) ^ ent; /* xor hashing */
908 : #ifdef _WINDOWS
909 : /*
910 : * Check hash index for an overflow.
911 : */
912 : if (h >= HSIZE)
913 : h -= HSIZE;
914 : #endif
915 4953858 : hp = &sp->enc_hashtab[h];
916 4953858 : if (hp->hash == fcode) {
917 2147019 : ent = hp->code;
918 2147019 : continue;
919 : }
920 2806839 : if (hp->hash >= 0) {
921 : /*
922 : * Primary hash failed, check secondary hash.
923 : */
924 1058838 : disp = HSIZE - h;
925 1058838 : if (h == 0)
926 131 : disp = 1;
927 : do {
928 : /*
929 : * Avoid pointer arithmetic 'cuz of
930 : * wraparound problems with segments.
931 : */
932 1636434 : if ((h -= disp) < 0)
933 662657 : h += HSIZE;
934 1636434 : hp = &sp->enc_hashtab[h];
935 1636434 : if (hp->hash == fcode) {
936 478305 : ent = hp->code;
937 478305 : goto hit;
938 : }
939 1158129 : } while (hp->hash >= 0);
940 : }
941 : /*
942 : * New entry, emit code and add to table.
943 : */
944 : /*
945 : * Verify there is space in the buffer for the code
946 : * and any potential Clear code that might be emitted
947 : * below. The value of limit is setup so that there
948 : * are at least 4 bytes free--room for 2 codes.
949 : */
950 2328534 : if (op > limit) {
951 0 : tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
952 0 : TIFFFlushData1(tif);
953 0 : op = tif->tif_rawdata;
954 : }
955 2328534 : PutNextCode(op, ent);
956 2328534 : ent = c;
957 2328534 : hp->code = free_ent++;
958 2328534 : hp->hash = fcode;
959 2328534 : if (free_ent == CODE_MAX-1) {
960 : /* table is full, emit clear code and reset */
961 583 : cl_hash(sp);
962 583 : sp->enc_ratio = 0;
963 583 : incount = 0;
964 583 : outcount = 0;
965 583 : free_ent = CODE_FIRST;
966 583 : PutNextCode(op, CODE_CLEAR);
967 583 : nbits = BITS_MIN;
968 583 : maxcode = MAXCODE(BITS_MIN);
969 : } else {
970 : /*
971 : * If the next entry is going to be too big for
972 : * the code size, then increase it, if possible.
973 : */
974 2327951 : if (free_ent > maxcode) {
975 1865 : nbits++;
976 1865 : assert(nbits <= BITS_MAX);
977 1865 : maxcode = (int) MAXCODE(nbits);
978 2326086 : } else if (incount >= checkpoint) {
979 : long rat;
980 : /*
981 : * Check compression ratio and, if things seem
982 : * to be slipping, clear the hash table and
983 : * reset state. The compression ratio is a
984 : * 24+8-bit fractional number.
985 : */
986 40 : checkpoint = incount+CHECK_GAP;
987 40 : CALCRATIO(sp, rat);
988 40 : if (rat <= sp->enc_ratio) {
989 0 : cl_hash(sp);
990 0 : sp->enc_ratio = 0;
991 0 : incount = 0;
992 0 : outcount = 0;
993 0 : free_ent = CODE_FIRST;
994 0 : PutNextCode(op, CODE_CLEAR);
995 0 : nbits = BITS_MIN;
996 0 : maxcode = MAXCODE(BITS_MIN);
997 : } else
998 40 : sp->enc_ratio = rat;
999 : }
1000 : }
1001 : hit:
1002 : ;
1003 : }
1004 :
1005 : /*
1006 : * Restore global state.
1007 : */
1008 20069 : sp->enc_incount = incount;
1009 20069 : sp->enc_outcount = outcount;
1010 20069 : sp->enc_checkpoint = checkpoint;
1011 20069 : sp->enc_oldcode = ent;
1012 20069 : sp->lzw_nextdata = nextdata;
1013 20069 : sp->lzw_nextbits = nextbits;
1014 20069 : sp->lzw_free_ent = free_ent;
1015 20069 : sp->lzw_maxcode = maxcode;
1016 20069 : sp->lzw_nbits = nbits;
1017 20069 : tif->tif_rawcp = op;
1018 20069 : return (1);
1019 : }
1020 :
1021 : /*
1022 : * Finish off an encoded strip by flushing the last
1023 : * string and tacking on an End Of Information code.
1024 : */
1025 : static int
1026 73 : LZWPostEncode(TIFF* tif)
1027 : {
1028 73 : register LZWCodecState *sp = EncoderState(tif);
1029 73 : uint8* op = tif->tif_rawcp;
1030 73 : long nextbits = sp->lzw_nextbits;
1031 73 : long nextdata = sp->lzw_nextdata;
1032 73 : long outcount = sp->enc_outcount;
1033 73 : int nbits = sp->lzw_nbits;
1034 :
1035 73 : if (op > sp->enc_rawlimit) {
1036 0 : tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
1037 0 : TIFFFlushData1(tif);
1038 0 : op = tif->tif_rawdata;
1039 : }
1040 73 : if (sp->enc_oldcode != (hcode_t) -1) {
1041 73 : PutNextCode(op, sp->enc_oldcode);
1042 73 : sp->enc_oldcode = (hcode_t) -1;
1043 : }
1044 73 : PutNextCode(op, CODE_EOI);
1045 73 : if (nextbits > 0)
1046 71 : *op++ = (unsigned char)(nextdata << (8-nextbits));
1047 73 : tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
1048 73 : return (1);
1049 : }
1050 :
1051 : /*
1052 : * Reset encoding hash table.
1053 : */
1054 : static void
1055 656 : cl_hash(LZWCodecState* sp)
1056 : {
1057 656 : register hash_t *hp = &sp->enc_hashtab[HSIZE-1];
1058 656 : register long i = HSIZE-8;
1059 :
1060 : do {
1061 738000 : i -= 8;
1062 738000 : hp[-7].hash = -1;
1063 738000 : hp[-6].hash = -1;
1064 738000 : hp[-5].hash = -1;
1065 738000 : hp[-4].hash = -1;
1066 738000 : hp[-3].hash = -1;
1067 738000 : hp[-2].hash = -1;
1068 738000 : hp[-1].hash = -1;
1069 738000 : hp[ 0].hash = -1;
1070 738000 : hp -= 8;
1071 738000 : } while (i >= 0);
1072 1312 : for (i += 8; i > 0; i--, hp--)
1073 656 : hp->hash = -1;
1074 656 : }
1075 :
1076 : static void
1077 100 : LZWCleanup(TIFF* tif)
1078 : {
1079 100 : (void)TIFFPredictorCleanup(tif);
1080 :
1081 100 : assert(tif->tif_data != 0);
1082 :
1083 100 : if (DecoderState(tif)->dec_codetab)
1084 17 : _TIFFfree(DecoderState(tif)->dec_codetab);
1085 :
1086 100 : if (EncoderState(tif)->enc_hashtab)
1087 21 : _TIFFfree(EncoderState(tif)->enc_hashtab);
1088 :
1089 100 : _TIFFfree(tif->tif_data);
1090 100 : tif->tif_data = NULL;
1091 :
1092 100 : _TIFFSetDefaultCompressionState(tif);
1093 100 : }
1094 :
1095 : int
1096 104 : TIFFInitLZW(TIFF* tif, int scheme)
1097 : {
1098 : static const char module[] = "TIFFInitLZW";
1099 104 : assert(scheme == COMPRESSION_LZW);
1100 : /*
1101 : * Allocate state block so tag methods have storage to record values.
1102 : */
1103 104 : tif->tif_data = (uint8*) _TIFFmalloc(sizeof (LZWCodecState));
1104 104 : if (tif->tif_data == NULL)
1105 0 : goto bad;
1106 104 : DecoderState(tif)->dec_codetab = NULL;
1107 104 : DecoderState(tif)->dec_decode = NULL;
1108 104 : EncoderState(tif)->enc_hashtab = NULL;
1109 104 : LZWState(tif)->rw_mode = tif->tif_mode;
1110 :
1111 : /*
1112 : * Install codec methods.
1113 : */
1114 104 : tif->tif_fixuptags = LZWFixupTags;
1115 104 : tif->tif_setupdecode = LZWSetupDecode;
1116 104 : tif->tif_predecode = LZWPreDecode;
1117 104 : tif->tif_decoderow = LZWDecode;
1118 104 : tif->tif_decodestrip = LZWDecode;
1119 104 : tif->tif_decodetile = LZWDecode;
1120 104 : tif->tif_setupencode = LZWSetupEncode;
1121 104 : tif->tif_preencode = LZWPreEncode;
1122 104 : tif->tif_postencode = LZWPostEncode;
1123 104 : tif->tif_encoderow = LZWEncode;
1124 104 : tif->tif_encodestrip = LZWEncode;
1125 104 : tif->tif_encodetile = LZWEncode;
1126 104 : tif->tif_cleanup = LZWCleanup;
1127 : /*
1128 : * Setup predictor setup.
1129 : */
1130 104 : (void) TIFFPredictorInit(tif);
1131 104 : return (1);
1132 : bad:
1133 0 : TIFFErrorExt(tif->tif_clientdata, module,
1134 : "No space for LZW state block");
1135 0 : return (0);
1136 : }
1137 :
1138 : /*
1139 : * Copyright (c) 1985, 1986 The Regents of the University of California.
1140 : * All rights reserved.
1141 : *
1142 : * This code is derived from software contributed to Berkeley by
1143 : * James A. Woods, derived from original work by Spencer Thomas
1144 : * and Joseph Orost.
1145 : *
1146 : * Redistribution and use in source and binary forms are permitted
1147 : * provided that the above copyright notice and this paragraph are
1148 : * duplicated in all such forms and that any documentation,
1149 : * advertising materials, and other materials related to such
1150 : * distribution and use acknowledge that the software was developed
1151 : * by the University of California, Berkeley. The name of the
1152 : * University may not be used to endorse or promote products derived
1153 : * from this software without specific prior written permission.
1154 : * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1155 : * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1156 : * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1157 : */
1158 : #endif /* LZW_SUPPORT */
1159 :
1160 : /* vim: set ts=8 sts=8 sw=8 noet: */
1161 : /*
1162 : * Local Variables:
1163 : * mode: c
1164 : * c-basic-offset: 8
1165 : * fill-column: 78
1166 : * End:
1167 : */
|