3acecba520
This adds two optimizations for ARM:
NEON optimized Adler(-)32 checksum algorithm (ARMv7 and newer NEON CPUs)
ARM(v7+) specific optimization for inflate
I've also connected inflate optimization to the build using the following
source as template.
0397489124 (diff-a62ad2db6c83dbc205d34bb9a8884f16)
Additional info:
https://codereview.chromium.org/2676493007/
https://codereview.chromium.org/2722063002/
Sources:
https://github.com/madler/zlib/pull/251 (only the first commit)
https://github.com/madler/zlib/pull/256
Signed-off-by: Daniel Engberg <daniel.engberg.lists@pyret.net>
2411 lines
89 KiB
Diff
2411 lines
89 KiB
Diff
From 6bac7a3e0ebcd3147294b73acb34606eba18ae7f Mon Sep 17 00:00:00 2001
|
|
From: Simon Hosie <simon.hosie@arm.com>
|
|
Date: Wed, 12 Apr 2017 12:52:33 -0700
|
|
Subject: [PATCH 1/2] Prepare ARM-specific contrib directory.
|
|
|
|
Change-Id: Id4cda552b39bfb39ab35ec499dbe122b43b6d1a1
|
|
---
|
|
contrib/arm/inffast.c | 323 ++++++++++
|
|
contrib/arm/inflate.c | 1561 +++++++++++++++++++++++++++++++++++++++++++++++++
|
|
2 files changed, 1884 insertions(+)
|
|
create mode 100644 contrib/arm/inffast.c
|
|
create mode 100644 contrib/arm/inflate.c
|
|
|
|
diff --git a/contrib/arm/inffast.c b/contrib/arm/inffast.c
|
|
new file mode 100644
|
|
index 00000000..0dbd1dbc
|
|
--- /dev/null
|
|
+++ b/contrib/arm/inffast.c
|
|
@@ -0,0 +1,323 @@
|
|
+/* inffast.c -- fast decoding
|
|
+ * Copyright (C) 1995-2017 Mark Adler
|
|
+ * For conditions of distribution and use, see copyright notice in zlib.h
|
|
+ */
|
|
+
|
|
+#include "zutil.h"
|
|
+#include "inftrees.h"
|
|
+#include "inflate.h"
|
|
+#include "inffast.h"
|
|
+
|
|
+#ifdef ASMINF
|
|
+# pragma message("Assembler code may have bugs -- use at your own risk")
|
|
+#else
|
|
+
|
|
+/*
|
|
+ Decode literal, length, and distance codes and write out the resulting
|
|
+ literal and match bytes until either not enough input or output is
|
|
+ available, an end-of-block is encountered, or a data error is encountered.
|
|
+ When large enough input and output buffers are supplied to inflate(), for
|
|
+ example, a 16K input buffer and a 64K output buffer, more than 95% of the
|
|
+ inflate execution time is spent in this routine.
|
|
+
|
|
+ Entry assumptions:
|
|
+
|
|
+ state->mode == LEN
|
|
+ strm->avail_in >= 6
|
|
+ strm->avail_out >= 258
|
|
+ start >= strm->avail_out
|
|
+ state->bits < 8
|
|
+
|
|
+ On return, state->mode is one of:
|
|
+
|
|
+ LEN -- ran out of enough output space or enough available input
|
|
+ TYPE -- reached end of block code, inflate() to interpret next block
|
|
+ BAD -- error in block data
|
|
+
|
|
+ Notes:
|
|
+
|
|
+ - The maximum input bits used by a length/distance pair is 15 bits for the
|
|
+ length code, 5 bits for the length extra, 15 bits for the distance code,
|
|
+ and 13 bits for the distance extra. This totals 48 bits, or six bytes.
|
|
+ Therefore if strm->avail_in >= 6, then there is enough input to avoid
|
|
+ checking for available input while decoding.
|
|
+
|
|
+ - The maximum bytes that a single length/distance pair can output is 258
|
|
+ bytes, which is the maximum length that can be coded. inflate_fast()
|
|
+ requires strm->avail_out >= 258 for each loop to avoid checking for
|
|
+ output space.
|
|
+ */
|
|
+void ZLIB_INTERNAL inflate_fast(strm, start)
|
|
+z_streamp strm;
|
|
+unsigned start; /* inflate()'s starting value for strm->avail_out */
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+ z_const unsigned char FAR *in; /* local strm->next_in */
|
|
+ z_const unsigned char FAR *last; /* have enough input while in < last */
|
|
+ unsigned char FAR *out; /* local strm->next_out */
|
|
+ unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
|
|
+ unsigned char FAR *end; /* while out < end, enough space available */
|
|
+#ifdef INFLATE_STRICT
|
|
+ unsigned dmax; /* maximum distance from zlib header */
|
|
+#endif
|
|
+ unsigned wsize; /* window size or zero if not using window */
|
|
+ unsigned whave; /* valid bytes in the window */
|
|
+ unsigned wnext; /* window write index */
|
|
+ unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
|
|
+ unsigned long hold; /* local strm->hold */
|
|
+ unsigned bits; /* local strm->bits */
|
|
+ code const FAR *lcode; /* local strm->lencode */
|
|
+ code const FAR *dcode; /* local strm->distcode */
|
|
+ unsigned lmask; /* mask for first level of length codes */
|
|
+ unsigned dmask; /* mask for first level of distance codes */
|
|
+ code here; /* retrieved table entry */
|
|
+ unsigned op; /* code bits, operation, extra bits, or */
|
|
+ /* window position, window bytes to copy */
|
|
+ unsigned len; /* match length, unused bytes */
|
|
+ unsigned dist; /* match distance */
|
|
+ unsigned char FAR *from; /* where to copy match from */
|
|
+
|
|
+ /* copy state to local variables */
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ in = strm->next_in;
|
|
+ last = in + (strm->avail_in - 5);
|
|
+ out = strm->next_out;
|
|
+ beg = out - (start - strm->avail_out);
|
|
+ end = out + (strm->avail_out - 257);
|
|
+#ifdef INFLATE_STRICT
|
|
+ dmax = state->dmax;
|
|
+#endif
|
|
+ wsize = state->wsize;
|
|
+ whave = state->whave;
|
|
+ wnext = state->wnext;
|
|
+ window = state->window;
|
|
+ hold = state->hold;
|
|
+ bits = state->bits;
|
|
+ lcode = state->lencode;
|
|
+ dcode = state->distcode;
|
|
+ lmask = (1U << state->lenbits) - 1;
|
|
+ dmask = (1U << state->distbits) - 1;
|
|
+
|
|
+ /* decode literals and length/distances until end-of-block or not enough
|
|
+ input data or output space */
|
|
+ do {
|
|
+ if (bits < 15) {
|
|
+ hold += (unsigned long)(*in++) << bits;
|
|
+ bits += 8;
|
|
+ hold += (unsigned long)(*in++) << bits;
|
|
+ bits += 8;
|
|
+ }
|
|
+ here = lcode[hold & lmask];
|
|
+ dolen:
|
|
+ op = (unsigned)(here.bits);
|
|
+ hold >>= op;
|
|
+ bits -= op;
|
|
+ op = (unsigned)(here.op);
|
|
+ if (op == 0) { /* literal */
|
|
+ Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
|
|
+ "inflate: literal '%c'\n" :
|
|
+ "inflate: literal 0x%02x\n", here.val));
|
|
+ *out++ = (unsigned char)(here.val);
|
|
+ }
|
|
+ else if (op & 16) { /* length base */
|
|
+ len = (unsigned)(here.val);
|
|
+ op &= 15; /* number of extra bits */
|
|
+ if (op) {
|
|
+ if (bits < op) {
|
|
+ hold += (unsigned long)(*in++) << bits;
|
|
+ bits += 8;
|
|
+ }
|
|
+ len += (unsigned)hold & ((1U << op) - 1);
|
|
+ hold >>= op;
|
|
+ bits -= op;
|
|
+ }
|
|
+ Tracevv((stderr, "inflate: length %u\n", len));
|
|
+ if (bits < 15) {
|
|
+ hold += (unsigned long)(*in++) << bits;
|
|
+ bits += 8;
|
|
+ hold += (unsigned long)(*in++) << bits;
|
|
+ bits += 8;
|
|
+ }
|
|
+ here = dcode[hold & dmask];
|
|
+ dodist:
|
|
+ op = (unsigned)(here.bits);
|
|
+ hold >>= op;
|
|
+ bits -= op;
|
|
+ op = (unsigned)(here.op);
|
|
+ if (op & 16) { /* distance base */
|
|
+ dist = (unsigned)(here.val);
|
|
+ op &= 15; /* number of extra bits */
|
|
+ if (bits < op) {
|
|
+ hold += (unsigned long)(*in++) << bits;
|
|
+ bits += 8;
|
|
+ if (bits < op) {
|
|
+ hold += (unsigned long)(*in++) << bits;
|
|
+ bits += 8;
|
|
+ }
|
|
+ }
|
|
+ dist += (unsigned)hold & ((1U << op) - 1);
|
|
+#ifdef INFLATE_STRICT
|
|
+ if (dist > dmax) {
|
|
+ strm->msg = (char *)"invalid distance too far back";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+#endif
|
|
+ hold >>= op;
|
|
+ bits -= op;
|
|
+ Tracevv((stderr, "inflate: distance %u\n", dist));
|
|
+ op = (unsigned)(out - beg); /* max distance in output */
|
|
+ if (dist > op) { /* see if copy from window */
|
|
+ op = dist - op; /* distance back in window */
|
|
+ if (op > whave) {
|
|
+ if (state->sane) {
|
|
+ strm->msg =
|
|
+ (char *)"invalid distance too far back";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
|
|
+ if (len <= op - whave) {
|
|
+ do {
|
|
+ *out++ = 0;
|
|
+ } while (--len);
|
|
+ continue;
|
|
+ }
|
|
+ len -= op - whave;
|
|
+ do {
|
|
+ *out++ = 0;
|
|
+ } while (--op > whave);
|
|
+ if (op == 0) {
|
|
+ from = out - dist;
|
|
+ do {
|
|
+ *out++ = *from++;
|
|
+ } while (--len);
|
|
+ continue;
|
|
+ }
|
|
+#endif
|
|
+ }
|
|
+ from = window;
|
|
+ if (wnext == 0) { /* very common case */
|
|
+ from += wsize - op;
|
|
+ if (op < len) { /* some from window */
|
|
+ len -= op;
|
|
+ do {
|
|
+ *out++ = *from++;
|
|
+ } while (--op);
|
|
+ from = out - dist; /* rest from output */
|
|
+ }
|
|
+ }
|
|
+ else if (wnext < op) { /* wrap around window */
|
|
+ from += wsize + wnext - op;
|
|
+ op -= wnext;
|
|
+ if (op < len) { /* some from end of window */
|
|
+ len -= op;
|
|
+ do {
|
|
+ *out++ = *from++;
|
|
+ } while (--op);
|
|
+ from = window;
|
|
+ if (wnext < len) { /* some from start of window */
|
|
+ op = wnext;
|
|
+ len -= op;
|
|
+ do {
|
|
+ *out++ = *from++;
|
|
+ } while (--op);
|
|
+ from = out - dist; /* rest from output */
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ else { /* contiguous in window */
|
|
+ from += wnext - op;
|
|
+ if (op < len) { /* some from window */
|
|
+ len -= op;
|
|
+ do {
|
|
+ *out++ = *from++;
|
|
+ } while (--op);
|
|
+ from = out - dist; /* rest from output */
|
|
+ }
|
|
+ }
|
|
+ while (len > 2) {
|
|
+ *out++ = *from++;
|
|
+ *out++ = *from++;
|
|
+ *out++ = *from++;
|
|
+ len -= 3;
|
|
+ }
|
|
+ if (len) {
|
|
+ *out++ = *from++;
|
|
+ if (len > 1)
|
|
+ *out++ = *from++;
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ from = out - dist; /* copy direct from output */
|
|
+ do { /* minimum length is three */
|
|
+ *out++ = *from++;
|
|
+ *out++ = *from++;
|
|
+ *out++ = *from++;
|
|
+ len -= 3;
|
|
+ } while (len > 2);
|
|
+ if (len) {
|
|
+ *out++ = *from++;
|
|
+ if (len > 1)
|
|
+ *out++ = *from++;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ else if ((op & 64) == 0) { /* 2nd level distance code */
|
|
+ here = dcode[here.val + (hold & ((1U << op) - 1))];
|
|
+ goto dodist;
|
|
+ }
|
|
+ else {
|
|
+ strm->msg = (char *)"invalid distance code";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ else if ((op & 64) == 0) { /* 2nd level length code */
|
|
+ here = lcode[here.val + (hold & ((1U << op) - 1))];
|
|
+ goto dolen;
|
|
+ }
|
|
+ else if (op & 32) { /* end-of-block */
|
|
+ Tracevv((stderr, "inflate: end of block\n"));
|
|
+ state->mode = TYPE;
|
|
+ break;
|
|
+ }
|
|
+ else {
|
|
+ strm->msg = (char *)"invalid literal/length code";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ } while (in < last && out < end);
|
|
+
|
|
+ /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
|
|
+ len = bits >> 3;
|
|
+ in -= len;
|
|
+ bits -= len << 3;
|
|
+ hold &= (1U << bits) - 1;
|
|
+
|
|
+ /* update state and return */
|
|
+ strm->next_in = in;
|
|
+ strm->next_out = out;
|
|
+ strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
|
|
+ strm->avail_out = (unsigned)(out < end ?
|
|
+ 257 + (end - out) : 257 - (out - end));
|
|
+ state->hold = hold;
|
|
+ state->bits = bits;
|
|
+ return;
|
|
+}
|
|
+
|
|
+/*
|
|
+ inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
|
|
+ - Using bit fields for code structure
|
|
+ - Different op definition to avoid & for extra bits (do & for table bits)
|
|
+ - Three separate decoding do-loops for direct, window, and wnext == 0
|
|
+ - Special case for distance > 1 copies to do overlapped load and store copy
|
|
+ - Explicit branch predictions (based on measured branch probabilities)
|
|
+ - Deferring match copy and interspersed it with decoding subsequent codes
|
|
+ - Swapping literal/length else
|
|
+ - Swapping window/direct else
|
|
+ - Larger unrolled copy loops (three is about right)
|
|
+ - Moving len -= 3 statement into middle of loop
|
|
+ */
|
|
+
|
|
+#endif /* !ASMINF */
|
|
diff --git a/contrib/arm/inflate.c b/contrib/arm/inflate.c
|
|
new file mode 100644
|
|
index 00000000..ac333e8c
|
|
--- /dev/null
|
|
+++ b/contrib/arm/inflate.c
|
|
@@ -0,0 +1,1561 @@
|
|
+/* inflate.c -- zlib decompression
|
|
+ * Copyright (C) 1995-2016 Mark Adler
|
|
+ * For conditions of distribution and use, see copyright notice in zlib.h
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * Change history:
|
|
+ *
|
|
+ * 1.2.beta0 24 Nov 2002
|
|
+ * - First version -- complete rewrite of inflate to simplify code, avoid
|
|
+ * creation of window when not needed, minimize use of window when it is
|
|
+ * needed, make inffast.c even faster, implement gzip decoding, and to
|
|
+ * improve code readability and style over the previous zlib inflate code
|
|
+ *
|
|
+ * 1.2.beta1 25 Nov 2002
|
|
+ * - Use pointers for available input and output checking in inffast.c
|
|
+ * - Remove input and output counters in inffast.c
|
|
+ * - Change inffast.c entry and loop from avail_in >= 7 to >= 6
|
|
+ * - Remove unnecessary second byte pull from length extra in inffast.c
|
|
+ * - Unroll direct copy to three copies per loop in inffast.c
|
|
+ *
|
|
+ * 1.2.beta2 4 Dec 2002
|
|
+ * - Change external routine names to reduce potential conflicts
|
|
+ * - Correct filename to inffixed.h for fixed tables in inflate.c
|
|
+ * - Make hbuf[] unsigned char to match parameter type in inflate.c
|
|
+ * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset)
|
|
+ * to avoid negation problem on Alphas (64 bit) in inflate.c
|
|
+ *
|
|
+ * 1.2.beta3 22 Dec 2002
|
|
+ * - Add comments on state->bits assertion in inffast.c
|
|
+ * - Add comments on op field in inftrees.h
|
|
+ * - Fix bug in reuse of allocated window after inflateReset()
|
|
+ * - Remove bit fields--back to byte structure for speed
|
|
+ * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths
|
|
+ * - Change post-increments to pre-increments in inflate_fast(), PPC biased?
|
|
+ * - Add compile time option, POSTINC, to use post-increments instead (Intel?)
|
|
+ * - Make MATCH copy in inflate() much faster for when inflate_fast() not used
|
|
+ * - Use local copies of stream next and avail values, as well as local bit
|
|
+ * buffer and bit count in inflate()--for speed when inflate_fast() not used
|
|
+ *
|
|
+ * 1.2.beta4 1 Jan 2003
|
|
+ * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings
|
|
+ * - Move a comment on output buffer sizes from inffast.c to inflate.c
|
|
+ * - Add comments in inffast.c to introduce the inflate_fast() routine
|
|
+ * - Rearrange window copies in inflate_fast() for speed and simplification
|
|
+ * - Unroll last copy for window match in inflate_fast()
|
|
+ * - Use local copies of window variables in inflate_fast() for speed
|
|
+ * - Pull out common wnext == 0 case for speed in inflate_fast()
|
|
+ * - Make op and len in inflate_fast() unsigned for consistency
|
|
+ * - Add FAR to lcode and dcode declarations in inflate_fast()
|
|
+ * - Simplified bad distance check in inflate_fast()
|
|
+ * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new
|
|
+ * source file infback.c to provide a call-back interface to inflate for
|
|
+ * programs like gzip and unzip -- uses window as output buffer to avoid
|
|
+ * window copying
|
|
+ *
|
|
+ * 1.2.beta5 1 Jan 2003
|
|
+ * - Improved inflateBack() interface to allow the caller to provide initial
|
|
+ * input in strm.
|
|
+ * - Fixed stored blocks bug in inflateBack()
|
|
+ *
|
|
+ * 1.2.beta6 4 Jan 2003
|
|
+ * - Added comments in inffast.c on effectiveness of POSTINC
|
|
+ * - Typecasting all around to reduce compiler warnings
|
|
+ * - Changed loops from while (1) or do {} while (1) to for (;;), again to
|
|
+ * make compilers happy
|
|
+ * - Changed type of window in inflateBackInit() to unsigned char *
|
|
+ *
|
|
+ * 1.2.beta7 27 Jan 2003
|
|
+ * - Changed many types to unsigned or unsigned short to avoid warnings
|
|
+ * - Added inflateCopy() function
|
|
+ *
|
|
+ * 1.2.0 9 Mar 2003
|
|
+ * - Changed inflateBack() interface to provide separate opaque descriptors
|
|
+ * for the in() and out() functions
|
|
+ * - Changed inflateBack() argument and in_func typedef to swap the length
|
|
+ * and buffer address return values for the input function
|
|
+ * - Check next_in and next_out for Z_NULL on entry to inflate()
|
|
+ *
|
|
+ * The history for versions after 1.2.0 are in ChangeLog in zlib distribution.
|
|
+ */
|
|
+
|
|
+#include "zutil.h"
|
|
+#include "inftrees.h"
|
|
+#include "inflate.h"
|
|
+#include "inffast.h"
|
|
+
|
|
+#ifdef MAKEFIXED
|
|
+# ifndef BUILDFIXED
|
|
+# define BUILDFIXED
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+/* function prototypes */
|
|
+local int inflateStateCheck OF((z_streamp strm));
|
|
+local void fixedtables OF((struct inflate_state FAR *state));
|
|
+local int updatewindow OF((z_streamp strm, const unsigned char FAR *end,
|
|
+ unsigned copy));
|
|
+#ifdef BUILDFIXED
|
|
+ void makefixed OF((void));
|
|
+#endif
|
|
+local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf,
|
|
+ unsigned len));
|
|
+
|
|
+local int inflateStateCheck(strm)
|
|
+z_streamp strm;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+ if (strm == Z_NULL ||
|
|
+ strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
|
|
+ return 1;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ if (state == Z_NULL || state->strm != strm ||
|
|
+ state->mode < HEAD || state->mode > SYNC)
|
|
+ return 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateResetKeep(strm)
|
|
+z_streamp strm;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+
|
|
+ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ strm->total_in = strm->total_out = state->total = 0;
|
|
+ strm->msg = Z_NULL;
|
|
+ if (state->wrap) /* to support ill-conceived Java test suite */
|
|
+ strm->adler = state->wrap & 1;
|
|
+ state->mode = HEAD;
|
|
+ state->last = 0;
|
|
+ state->havedict = 0;
|
|
+ state->dmax = 32768U;
|
|
+ state->head = Z_NULL;
|
|
+ state->hold = 0;
|
|
+ state->bits = 0;
|
|
+ state->lencode = state->distcode = state->next = state->codes;
|
|
+ state->sane = 1;
|
|
+ state->back = -1;
|
|
+ Tracev((stderr, "inflate: reset\n"));
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateReset(strm)
|
|
+z_streamp strm;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+
|
|
+ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ state->wsize = 0;
|
|
+ state->whave = 0;
|
|
+ state->wnext = 0;
|
|
+ return inflateResetKeep(strm);
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateReset2(strm, windowBits)
|
|
+z_streamp strm;
|
|
+int windowBits;
|
|
+{
|
|
+ int wrap;
|
|
+ struct inflate_state FAR *state;
|
|
+
|
|
+ /* get the state */
|
|
+ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+
|
|
+ /* extract wrap request from windowBits parameter */
|
|
+ if (windowBits < 0) {
|
|
+ wrap = 0;
|
|
+ windowBits = -windowBits;
|
|
+ }
|
|
+ else {
|
|
+ wrap = (windowBits >> 4) + 5;
|
|
+#ifdef GUNZIP
|
|
+ if (windowBits < 48)
|
|
+ windowBits &= 15;
|
|
+#endif
|
|
+ }
|
|
+
|
|
+ /* set number of window bits, free window if different */
|
|
+ if (windowBits && (windowBits < 8 || windowBits > 15))
|
|
+ return Z_STREAM_ERROR;
|
|
+ if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
|
|
+ ZFREE(strm, state->window);
|
|
+ state->window = Z_NULL;
|
|
+ }
|
|
+
|
|
+ /* update state and reset the rest of it */
|
|
+ state->wrap = wrap;
|
|
+ state->wbits = (unsigned)windowBits;
|
|
+ return inflateReset(strm);
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size)
|
|
+z_streamp strm;
|
|
+int windowBits;
|
|
+const char *version;
|
|
+int stream_size;
|
|
+{
|
|
+ int ret;
|
|
+ struct inflate_state FAR *state;
|
|
+
|
|
+ if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
|
|
+ stream_size != (int)(sizeof(z_stream)))
|
|
+ return Z_VERSION_ERROR;
|
|
+ if (strm == Z_NULL) return Z_STREAM_ERROR;
|
|
+ strm->msg = Z_NULL; /* in case we return an error */
|
|
+ if (strm->zalloc == (alloc_func)0) {
|
|
+#ifdef Z_SOLO
|
|
+ return Z_STREAM_ERROR;
|
|
+#else
|
|
+ strm->zalloc = zcalloc;
|
|
+ strm->opaque = (voidpf)0;
|
|
+#endif
|
|
+ }
|
|
+ if (strm->zfree == (free_func)0)
|
|
+#ifdef Z_SOLO
|
|
+ return Z_STREAM_ERROR;
|
|
+#else
|
|
+ strm->zfree = zcfree;
|
|
+#endif
|
|
+ state = (struct inflate_state FAR *)
|
|
+ ZALLOC(strm, 1, sizeof(struct inflate_state));
|
|
+ if (state == Z_NULL) return Z_MEM_ERROR;
|
|
+ Tracev((stderr, "inflate: allocated\n"));
|
|
+ strm->state = (struct internal_state FAR *)state;
|
|
+ state->strm = strm;
|
|
+ state->window = Z_NULL;
|
|
+ state->mode = HEAD; /* to pass state test in inflateReset2() */
|
|
+ ret = inflateReset2(strm, windowBits);
|
|
+ if (ret != Z_OK) {
|
|
+ ZFREE(strm, state);
|
|
+ strm->state = Z_NULL;
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateInit_(strm, version, stream_size)
|
|
+z_streamp strm;
|
|
+const char *version;
|
|
+int stream_size;
|
|
+{
|
|
+ return inflateInit2_(strm, DEF_WBITS, version, stream_size);
|
|
+}
|
|
+
|
|
+int ZEXPORT inflatePrime(strm, bits, value)
|
|
+z_streamp strm;
|
|
+int bits;
|
|
+int value;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+
|
|
+ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ if (bits < 0) {
|
|
+ state->hold = 0;
|
|
+ state->bits = 0;
|
|
+ return Z_OK;
|
|
+ }
|
|
+ if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR;
|
|
+ value &= (1L << bits) - 1;
|
|
+ state->hold += (unsigned)value << state->bits;
|
|
+ state->bits += (uInt)bits;
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+/*
|
|
+ Return state with length and distance decoding tables and index sizes set to
|
|
+ fixed code decoding. Normally this returns fixed tables from inffixed.h.
|
|
+ If BUILDFIXED is defined, then instead this routine builds the tables the
|
|
+ first time it's called, and returns those tables the first time and
|
|
+ thereafter. This reduces the size of the code by about 2K bytes, in
|
|
+ exchange for a little execution time. However, BUILDFIXED should not be
|
|
+ used for threaded applications, since the rewriting of the tables and virgin
|
|
+ may not be thread-safe.
|
|
+ */
|
|
+local void fixedtables(state)
|
|
+struct inflate_state FAR *state;
|
|
+{
|
|
+#ifdef BUILDFIXED
|
|
+ static int virgin = 1;
|
|
+ static code *lenfix, *distfix;
|
|
+ static code fixed[544];
|
|
+
|
|
+ /* build fixed huffman tables if first call (may not be thread safe) */
|
|
+ if (virgin) {
|
|
+ unsigned sym, bits;
|
|
+ static code *next;
|
|
+
|
|
+ /* literal/length table */
|
|
+ sym = 0;
|
|
+ while (sym < 144) state->lens[sym++] = 8;
|
|
+ while (sym < 256) state->lens[sym++] = 9;
|
|
+ while (sym < 280) state->lens[sym++] = 7;
|
|
+ while (sym < 288) state->lens[sym++] = 8;
|
|
+ next = fixed;
|
|
+ lenfix = next;
|
|
+ bits = 9;
|
|
+ inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
|
|
+
|
|
+ /* distance table */
|
|
+ sym = 0;
|
|
+ while (sym < 32) state->lens[sym++] = 5;
|
|
+ distfix = next;
|
|
+ bits = 5;
|
|
+ inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
|
|
+
|
|
+ /* do this just once */
|
|
+ virgin = 0;
|
|
+ }
|
|
+#else /* !BUILDFIXED */
|
|
+# include "inffixed.h"
|
|
+#endif /* BUILDFIXED */
|
|
+ state->lencode = lenfix;
|
|
+ state->lenbits = 9;
|
|
+ state->distcode = distfix;
|
|
+ state->distbits = 5;
|
|
+}
|
|
+
|
|
+#ifdef MAKEFIXED
|
|
+#include <stdio.h>
|
|
+
|
|
+/*
|
|
+ Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also
|
|
+ defines BUILDFIXED, so the tables are built on the fly. makefixed() writes
|
|
+ those tables to stdout, which would be piped to inffixed.h. A small program
|
|
+ can simply call makefixed to do this:
|
|
+
|
|
+ void makefixed(void);
|
|
+
|
|
+ int main(void)
|
|
+ {
|
|
+ makefixed();
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ Then that can be linked with zlib built with MAKEFIXED defined and run:
|
|
+
|
|
+ a.out > inffixed.h
|
|
+ */
|
|
+void makefixed()
|
|
+{
|
|
+ unsigned low, size;
|
|
+ struct inflate_state state;
|
|
+
|
|
+ fixedtables(&state);
|
|
+ puts(" /* inffixed.h -- table for decoding fixed codes");
|
|
+ puts(" * Generated automatically by makefixed().");
|
|
+ puts(" */");
|
|
+ puts("");
|
|
+ puts(" /* WARNING: this file should *not* be used by applications.");
|
|
+ puts(" It is part of the implementation of this library and is");
|
|
+ puts(" subject to change. Applications should only use zlib.h.");
|
|
+ puts(" */");
|
|
+ puts("");
|
|
+ size = 1U << 9;
|
|
+ printf(" static const code lenfix[%u] = {", size);
|
|
+ low = 0;
|
|
+ for (;;) {
|
|
+ if ((low % 7) == 0) printf("\n ");
|
|
+ printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op,
|
|
+ state.lencode[low].bits, state.lencode[low].val);
|
|
+ if (++low == size) break;
|
|
+ putchar(',');
|
|
+ }
|
|
+ puts("\n };");
|
|
+ size = 1U << 5;
|
|
+ printf("\n static const code distfix[%u] = {", size);
|
|
+ low = 0;
|
|
+ for (;;) {
|
|
+ if ((low % 6) == 0) printf("\n ");
|
|
+ printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits,
|
|
+ state.distcode[low].val);
|
|
+ if (++low == size) break;
|
|
+ putchar(',');
|
|
+ }
|
|
+ puts("\n };");
|
|
+}
|
|
+#endif /* MAKEFIXED */
|
|
+
|
|
+/*
|
|
+ Update the window with the last wsize (normally 32K) bytes written before
|
|
+ returning. If window does not exist yet, create it. This is only called
|
|
+ when a window is already in use, or when output has been written during this
|
|
+ inflate call, but the end of the deflate stream has not been reached yet.
|
|
+ It is also called to create a window for dictionary data when a dictionary
|
|
+ is loaded.
|
|
+
|
|
+ Providing output buffers larger than 32K to inflate() should provide a speed
|
|
+ advantage, since only the last 32K of output is copied to the sliding window
|
|
+ upon return from inflate(), and since all distances after the first 32K of
|
|
+ output will fall in the output data, making match copies simpler and faster.
|
|
+ The advantage may be dependent on the size of the processor's data caches.
|
|
+ */
|
|
+local int updatewindow(strm, end, copy)
|
|
+z_streamp strm;
|
|
+const Bytef *end;
|
|
+unsigned copy;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+ unsigned dist;
|
|
+
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+
|
|
+ /* if it hasn't been done already, allocate space for the window */
|
|
+ if (state->window == Z_NULL) {
|
|
+ state->window = (unsigned char FAR *)
|
|
+ ZALLOC(strm, 1U << state->wbits,
|
|
+ sizeof(unsigned char));
|
|
+ if (state->window == Z_NULL) return 1;
|
|
+ }
|
|
+
|
|
+ /* if window not in use yet, initialize */
|
|
+ if (state->wsize == 0) {
|
|
+ state->wsize = 1U << state->wbits;
|
|
+ state->wnext = 0;
|
|
+ state->whave = 0;
|
|
+ }
|
|
+
|
|
+ /* copy state->wsize or less output bytes into the circular window */
|
|
+ if (copy >= state->wsize) {
|
|
+ zmemcpy(state->window, end - state->wsize, state->wsize);
|
|
+ state->wnext = 0;
|
|
+ state->whave = state->wsize;
|
|
+ }
|
|
+ else {
|
|
+ dist = state->wsize - state->wnext;
|
|
+ if (dist > copy) dist = copy;
|
|
+ zmemcpy(state->window + state->wnext, end - copy, dist);
|
|
+ copy -= dist;
|
|
+ if (copy) {
|
|
+ zmemcpy(state->window, end - copy, copy);
|
|
+ state->wnext = copy;
|
|
+ state->whave = state->wsize;
|
|
+ }
|
|
+ else {
|
|
+ state->wnext += dist;
|
|
+ if (state->wnext == state->wsize) state->wnext = 0;
|
|
+ if (state->whave < state->wsize) state->whave += dist;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Macros for inflate(): */
|
|
+
|
|
+/* check function to use adler32() for zlib or crc32() for gzip */
|
|
+#ifdef GUNZIP
|
|
+# define UPDATE(check, buf, len) \
|
|
+ (state->flags ? crc32(check, buf, len) : adler32(check, buf, len))
|
|
+#else
|
|
+# define UPDATE(check, buf, len) adler32(check, buf, len)
|
|
+#endif
|
|
+
|
|
+/* check macros for header crc */
|
|
+#ifdef GUNZIP
|
|
+# define CRC2(check, word) \
|
|
+ do { \
|
|
+ hbuf[0] = (unsigned char)(word); \
|
|
+ hbuf[1] = (unsigned char)((word) >> 8); \
|
|
+ check = crc32(check, hbuf, 2); \
|
|
+ } while (0)
|
|
+
|
|
+# define CRC4(check, word) \
|
|
+ do { \
|
|
+ hbuf[0] = (unsigned char)(word); \
|
|
+ hbuf[1] = (unsigned char)((word) >> 8); \
|
|
+ hbuf[2] = (unsigned char)((word) >> 16); \
|
|
+ hbuf[3] = (unsigned char)((word) >> 24); \
|
|
+ check = crc32(check, hbuf, 4); \
|
|
+ } while (0)
|
|
+#endif
|
|
+
|
|
+/* Load registers with state in inflate() for speed */
|
|
+#define LOAD() \
|
|
+ do { \
|
|
+ put = strm->next_out; \
|
|
+ left = strm->avail_out; \
|
|
+ next = strm->next_in; \
|
|
+ have = strm->avail_in; \
|
|
+ hold = state->hold; \
|
|
+ bits = state->bits; \
|
|
+ } while (0)
|
|
+
|
|
+/* Restore state from registers in inflate() */
|
|
+#define RESTORE() \
|
|
+ do { \
|
|
+ strm->next_out = put; \
|
|
+ strm->avail_out = left; \
|
|
+ strm->next_in = next; \
|
|
+ strm->avail_in = have; \
|
|
+ state->hold = hold; \
|
|
+ state->bits = bits; \
|
|
+ } while (0)
|
|
+
|
|
+/* Clear the input bit accumulator */
|
|
+#define INITBITS() \
|
|
+ do { \
|
|
+ hold = 0; \
|
|
+ bits = 0; \
|
|
+ } while (0)
|
|
+
|
|
+/* Get a byte of input into the bit accumulator, or return from inflate()
|
|
+ if there is no input available. */
|
|
+#define PULLBYTE() \
|
|
+ do { \
|
|
+ if (have == 0) goto inf_leave; \
|
|
+ have--; \
|
|
+ hold += (unsigned long)(*next++) << bits; \
|
|
+ bits += 8; \
|
|
+ } while (0)
|
|
+
|
|
+/* Assure that there are at least n bits in the bit accumulator. If there is
|
|
+ not enough available input to do that, then return from inflate(). */
|
|
+#define NEEDBITS(n) \
|
|
+ do { \
|
|
+ while (bits < (unsigned)(n)) \
|
|
+ PULLBYTE(); \
|
|
+ } while (0)
|
|
+
|
|
+/* Return the low n bits of the bit accumulator (n < 16) */
|
|
+#define BITS(n) \
|
|
+ ((unsigned)hold & ((1U << (n)) - 1))
|
|
+
|
|
+/* Remove n bits from the bit accumulator */
|
|
+#define DROPBITS(n) \
|
|
+ do { \
|
|
+ hold >>= (n); \
|
|
+ bits -= (unsigned)(n); \
|
|
+ } while (0)
|
|
+
|
|
+/* Remove zero to seven bits as needed to go to a byte boundary */
|
|
+#define BYTEBITS() \
|
|
+ do { \
|
|
+ hold >>= bits & 7; \
|
|
+ bits -= bits & 7; \
|
|
+ } while (0)
|
|
+
|
|
+/*
|
|
+ inflate() uses a state machine to process as much input data and generate as
|
|
+ much output data as possible before returning. The state machine is
|
|
+ structured roughly as follows:
|
|
+
|
|
+ for (;;) switch (state) {
|
|
+ ...
|
|
+ case STATEn:
|
|
+ if (not enough input data or output space to make progress)
|
|
+ return;
|
|
+ ... make progress ...
|
|
+ state = STATEm;
|
|
+ break;
|
|
+ ...
|
|
+ }
|
|
+
|
|
+ so when inflate() is called again, the same case is attempted again, and
|
|
+ if the appropriate resources are provided, the machine proceeds to the
|
|
+ next state. The NEEDBITS() macro is usually the way the state evaluates
|
|
+ whether it can proceed or should return. NEEDBITS() does the return if
|
|
+ the requested bits are not available. The typical use of the BITS macros
|
|
+ is:
|
|
+
|
|
+ NEEDBITS(n);
|
|
+ ... do something with BITS(n) ...
|
|
+ DROPBITS(n);
|
|
+
|
|
+ where NEEDBITS(n) either returns from inflate() if there isn't enough
|
|
+ input left to load n bits into the accumulator, or it continues. BITS(n)
|
|
+ gives the low n bits in the accumulator. When done, DROPBITS(n) drops
|
|
+ the low n bits off the accumulator. INITBITS() clears the accumulator
|
|
+ and sets the number of available bits to zero. BYTEBITS() discards just
|
|
+ enough bits to put the accumulator on a byte boundary. After BYTEBITS()
|
|
+ and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
|
|
+
|
|
+ NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
|
|
+ if there is no input available. The decoding of variable length codes uses
|
|
+ PULLBYTE() directly in order to pull just enough bytes to decode the next
|
|
+ code, and no more.
|
|
+
|
|
+ Some states loop until they get enough input, making sure that enough
|
|
+ state information is maintained to continue the loop where it left off
|
|
+ if NEEDBITS() returns in the loop. For example, want, need, and keep
|
|
+ would all have to actually be part of the saved state in case NEEDBITS()
|
|
+ returns:
|
|
+
|
|
+ case STATEw:
|
|
+ while (want < need) {
|
|
+ NEEDBITS(n);
|
|
+ keep[want++] = BITS(n);
|
|
+ DROPBITS(n);
|
|
+ }
|
|
+ state = STATEx;
|
|
+ case STATEx:
|
|
+
|
|
+ As shown above, if the next state is also the next case, then the break
|
|
+ is omitted.
|
|
+
|
|
+ A state may also return if there is not enough output space available to
|
|
+ complete that state. Those states are copying stored data, writing a
|
|
+ literal byte, and copying a matching string.
|
|
+
|
|
+ When returning, a "goto inf_leave" is used to update the total counters,
|
|
+ update the check value, and determine whether any progress has been made
|
|
+ during that inflate() call in order to return the proper return code.
|
|
+ Progress is defined as a change in either strm->avail_in or strm->avail_out.
|
|
+ When there is a window, goto inf_leave will update the window with the last
|
|
+ output written. If a goto inf_leave occurs in the middle of decompression
|
|
+ and there is no window currently, goto inf_leave will create one and copy
|
|
+ output to the window for the next call of inflate().
|
|
+
|
|
+ In this implementation, the flush parameter of inflate() only affects the
|
|
+ return code (per zlib.h). inflate() always writes as much as possible to
|
|
+ strm->next_out, given the space available and the provided input--the effect
|
|
+ documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers
|
|
+ the allocation of and copying into a sliding window until necessary, which
|
|
+ provides the effect documented in zlib.h for Z_FINISH when the entire input
|
|
+ stream available. So the only thing the flush parameter actually does is:
|
|
+ when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it
|
|
+ will return Z_BUF_ERROR if it has not reached the end of the stream.
|
|
+ */
|
|
+
|
|
+int ZEXPORT inflate(strm, flush)
|
|
+z_streamp strm;
|
|
+int flush;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+ z_const unsigned char FAR *next; /* next input */
|
|
+ unsigned char FAR *put; /* next output */
|
|
+ unsigned have, left; /* available input and output */
|
|
+ unsigned long hold; /* bit buffer */
|
|
+ unsigned bits; /* bits in bit buffer */
|
|
+ unsigned in, out; /* save starting available input and output */
|
|
+ unsigned copy; /* number of stored or match bytes to copy */
|
|
+ unsigned char FAR *from; /* where to copy match bytes from */
|
|
+ code here; /* current decoding table entry */
|
|
+ code last; /* parent table entry */
|
|
+ unsigned len; /* length to copy for repeats, bits to drop */
|
|
+ int ret; /* return code */
|
|
+#ifdef GUNZIP
|
|
+ unsigned char hbuf[4]; /* buffer for gzip header crc calculation */
|
|
+#endif
|
|
+ static const unsigned short order[19] = /* permutation of code lengths */
|
|
+ {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
|
|
+
|
|
+ if (inflateStateCheck(strm) || strm->next_out == Z_NULL ||
|
|
+ (strm->next_in == Z_NULL && strm->avail_in != 0))
|
|
+ return Z_STREAM_ERROR;
|
|
+
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */
|
|
+ LOAD();
|
|
+ in = have;
|
|
+ out = left;
|
|
+ ret = Z_OK;
|
|
+ for (;;)
|
|
+ switch (state->mode) {
|
|
+ case HEAD:
|
|
+ if (state->wrap == 0) {
|
|
+ state->mode = TYPEDO;
|
|
+ break;
|
|
+ }
|
|
+ NEEDBITS(16);
|
|
+#ifdef GUNZIP
|
|
+ if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */
|
|
+ if (state->wbits == 0)
|
|
+ state->wbits = 15;
|
|
+ state->check = crc32(0L, Z_NULL, 0);
|
|
+ CRC2(state->check, hold);
|
|
+ INITBITS();
|
|
+ state->mode = FLAGS;
|
|
+ break;
|
|
+ }
|
|
+ state->flags = 0; /* expect zlib header */
|
|
+ if (state->head != Z_NULL)
|
|
+ state->head->done = -1;
|
|
+ if (!(state->wrap & 1) || /* check if zlib header allowed */
|
|
+#else
|
|
+ if (
|
|
+#endif
|
|
+ ((BITS(8) << 8) + (hold >> 8)) % 31) {
|
|
+ strm->msg = (char *)"incorrect header check";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ if (BITS(4) != Z_DEFLATED) {
|
|
+ strm->msg = (char *)"unknown compression method";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ DROPBITS(4);
|
|
+ len = BITS(4) + 8;
|
|
+ if (state->wbits == 0)
|
|
+ state->wbits = len;
|
|
+ if (len > 15 || len > state->wbits) {
|
|
+ strm->msg = (char *)"invalid window size";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ state->dmax = 1U << len;
|
|
+ Tracev((stderr, "inflate: zlib header ok\n"));
|
|
+ strm->adler = state->check = adler32(0L, Z_NULL, 0);
|
|
+ state->mode = hold & 0x200 ? DICTID : TYPE;
|
|
+ INITBITS();
|
|
+ break;
|
|
+#ifdef GUNZIP
|
|
+ case FLAGS:
|
|
+ NEEDBITS(16);
|
|
+ state->flags = (int)(hold);
|
|
+ if ((state->flags & 0xff) != Z_DEFLATED) {
|
|
+ strm->msg = (char *)"unknown compression method";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ if (state->flags & 0xe000) {
|
|
+ strm->msg = (char *)"unknown header flags set";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ if (state->head != Z_NULL)
|
|
+ state->head->text = (int)((hold >> 8) & 1);
|
|
+ if ((state->flags & 0x0200) && (state->wrap & 4))
|
|
+ CRC2(state->check, hold);
|
|
+ INITBITS();
|
|
+ state->mode = TIME;
|
|
+ case TIME:
|
|
+ NEEDBITS(32);
|
|
+ if (state->head != Z_NULL)
|
|
+ state->head->time = hold;
|
|
+ if ((state->flags & 0x0200) && (state->wrap & 4))
|
|
+ CRC4(state->check, hold);
|
|
+ INITBITS();
|
|
+ state->mode = OS;
|
|
+ case OS:
|
|
+ NEEDBITS(16);
|
|
+ if (state->head != Z_NULL) {
|
|
+ state->head->xflags = (int)(hold & 0xff);
|
|
+ state->head->os = (int)(hold >> 8);
|
|
+ }
|
|
+ if ((state->flags & 0x0200) && (state->wrap & 4))
|
|
+ CRC2(state->check, hold);
|
|
+ INITBITS();
|
|
+ state->mode = EXLEN;
|
|
+ case EXLEN:
|
|
+ if (state->flags & 0x0400) {
|
|
+ NEEDBITS(16);
|
|
+ state->length = (unsigned)(hold);
|
|
+ if (state->head != Z_NULL)
|
|
+ state->head->extra_len = (unsigned)hold;
|
|
+ if ((state->flags & 0x0200) && (state->wrap & 4))
|
|
+ CRC2(state->check, hold);
|
|
+ INITBITS();
|
|
+ }
|
|
+ else if (state->head != Z_NULL)
|
|
+ state->head->extra = Z_NULL;
|
|
+ state->mode = EXTRA;
|
|
+ case EXTRA:
|
|
+ if (state->flags & 0x0400) {
|
|
+ copy = state->length;
|
|
+ if (copy > have) copy = have;
|
|
+ if (copy) {
|
|
+ if (state->head != Z_NULL &&
|
|
+ state->head->extra != Z_NULL) {
|
|
+ len = state->head->extra_len - state->length;
|
|
+ zmemcpy(state->head->extra + len, next,
|
|
+ len + copy > state->head->extra_max ?
|
|
+ state->head->extra_max - len : copy);
|
|
+ }
|
|
+ if ((state->flags & 0x0200) && (state->wrap & 4))
|
|
+ state->check = crc32(state->check, next, copy);
|
|
+ have -= copy;
|
|
+ next += copy;
|
|
+ state->length -= copy;
|
|
+ }
|
|
+ if (state->length) goto inf_leave;
|
|
+ }
|
|
+ state->length = 0;
|
|
+ state->mode = NAME;
|
|
+ case NAME:
|
|
+ if (state->flags & 0x0800) {
|
|
+ if (have == 0) goto inf_leave;
|
|
+ copy = 0;
|
|
+ do {
|
|
+ len = (unsigned)(next[copy++]);
|
|
+ if (state->head != Z_NULL &&
|
|
+ state->head->name != Z_NULL &&
|
|
+ state->length < state->head->name_max)
|
|
+ state->head->name[state->length++] = (Bytef)len;
|
|
+ } while (len && copy < have);
|
|
+ if ((state->flags & 0x0200) && (state->wrap & 4))
|
|
+ state->check = crc32(state->check, next, copy);
|
|
+ have -= copy;
|
|
+ next += copy;
|
|
+ if (len) goto inf_leave;
|
|
+ }
|
|
+ else if (state->head != Z_NULL)
|
|
+ state->head->name = Z_NULL;
|
|
+ state->length = 0;
|
|
+ state->mode = COMMENT;
|
|
+ case COMMENT:
|
|
+ if (state->flags & 0x1000) {
|
|
+ if (have == 0) goto inf_leave;
|
|
+ copy = 0;
|
|
+ do {
|
|
+ len = (unsigned)(next[copy++]);
|
|
+ if (state->head != Z_NULL &&
|
|
+ state->head->comment != Z_NULL &&
|
|
+ state->length < state->head->comm_max)
|
|
+ state->head->comment[state->length++] = (Bytef)len;
|
|
+ } while (len && copy < have);
|
|
+ if ((state->flags & 0x0200) && (state->wrap & 4))
|
|
+ state->check = crc32(state->check, next, copy);
|
|
+ have -= copy;
|
|
+ next += copy;
|
|
+ if (len) goto inf_leave;
|
|
+ }
|
|
+ else if (state->head != Z_NULL)
|
|
+ state->head->comment = Z_NULL;
|
|
+ state->mode = HCRC;
|
|
+ case HCRC:
|
|
+ if (state->flags & 0x0200) {
|
|
+ NEEDBITS(16);
|
|
+ if ((state->wrap & 4) && hold != (state->check & 0xffff)) {
|
|
+ strm->msg = (char *)"header crc mismatch";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ INITBITS();
|
|
+ }
|
|
+ if (state->head != Z_NULL) {
|
|
+ state->head->hcrc = (int)((state->flags >> 9) & 1);
|
|
+ state->head->done = 1;
|
|
+ }
|
|
+ strm->adler = state->check = crc32(0L, Z_NULL, 0);
|
|
+ state->mode = TYPE;
|
|
+ break;
|
|
+#endif
|
|
+ case DICTID:
|
|
+ NEEDBITS(32);
|
|
+ strm->adler = state->check = ZSWAP32(hold);
|
|
+ INITBITS();
|
|
+ state->mode = DICT;
|
|
+ case DICT:
|
|
+ if (state->havedict == 0) {
|
|
+ RESTORE();
|
|
+ return Z_NEED_DICT;
|
|
+ }
|
|
+ strm->adler = state->check = adler32(0L, Z_NULL, 0);
|
|
+ state->mode = TYPE;
|
|
+ case TYPE:
|
|
+ if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
|
|
+ case TYPEDO:
|
|
+ if (state->last) {
|
|
+ BYTEBITS();
|
|
+ state->mode = CHECK;
|
|
+ break;
|
|
+ }
|
|
+ NEEDBITS(3);
|
|
+ state->last = BITS(1);
|
|
+ DROPBITS(1);
|
|
+ switch (BITS(2)) {
|
|
+ case 0: /* stored block */
|
|
+ Tracev((stderr, "inflate: stored block%s\n",
|
|
+ state->last ? " (last)" : ""));
|
|
+ state->mode = STORED;
|
|
+ break;
|
|
+ case 1: /* fixed block */
|
|
+ fixedtables(state);
|
|
+ Tracev((stderr, "inflate: fixed codes block%s\n",
|
|
+ state->last ? " (last)" : ""));
|
|
+ state->mode = LEN_; /* decode codes */
|
|
+ if (flush == Z_TREES) {
|
|
+ DROPBITS(2);
|
|
+ goto inf_leave;
|
|
+ }
|
|
+ break;
|
|
+ case 2: /* dynamic block */
|
|
+ Tracev((stderr, "inflate: dynamic codes block%s\n",
|
|
+ state->last ? " (last)" : ""));
|
|
+ state->mode = TABLE;
|
|
+ break;
|
|
+ case 3:
|
|
+ strm->msg = (char *)"invalid block type";
|
|
+ state->mode = BAD;
|
|
+ }
|
|
+ DROPBITS(2);
|
|
+ break;
|
|
+ case STORED:
|
|
+ BYTEBITS(); /* go to byte boundary */
|
|
+ NEEDBITS(32);
|
|
+ if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
|
|
+ strm->msg = (char *)"invalid stored block lengths";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ state->length = (unsigned)hold & 0xffff;
|
|
+ Tracev((stderr, "inflate: stored length %u\n",
|
|
+ state->length));
|
|
+ INITBITS();
|
|
+ state->mode = COPY_;
|
|
+ if (flush == Z_TREES) goto inf_leave;
|
|
+ case COPY_:
|
|
+ state->mode = COPY;
|
|
+ case COPY:
|
|
+ copy = state->length;
|
|
+ if (copy) {
|
|
+ if (copy > have) copy = have;
|
|
+ if (copy > left) copy = left;
|
|
+ if (copy == 0) goto inf_leave;
|
|
+ zmemcpy(put, next, copy);
|
|
+ have -= copy;
|
|
+ next += copy;
|
|
+ left -= copy;
|
|
+ put += copy;
|
|
+ state->length -= copy;
|
|
+ break;
|
|
+ }
|
|
+ Tracev((stderr, "inflate: stored end\n"));
|
|
+ state->mode = TYPE;
|
|
+ break;
|
|
+ case TABLE:
|
|
+ NEEDBITS(14);
|
|
+ state->nlen = BITS(5) + 257;
|
|
+ DROPBITS(5);
|
|
+ state->ndist = BITS(5) + 1;
|
|
+ DROPBITS(5);
|
|
+ state->ncode = BITS(4) + 4;
|
|
+ DROPBITS(4);
|
|
+#ifndef PKZIP_BUG_WORKAROUND
|
|
+ if (state->nlen > 286 || state->ndist > 30) {
|
|
+ strm->msg = (char *)"too many length or distance symbols";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+#endif
|
|
+ Tracev((stderr, "inflate: table sizes ok\n"));
|
|
+ state->have = 0;
|
|
+ state->mode = LENLENS;
|
|
+ case LENLENS:
|
|
+ while (state->have < state->ncode) {
|
|
+ NEEDBITS(3);
|
|
+ state->lens[order[state->have++]] = (unsigned short)BITS(3);
|
|
+ DROPBITS(3);
|
|
+ }
|
|
+ while (state->have < 19)
|
|
+ state->lens[order[state->have++]] = 0;
|
|
+ state->next = state->codes;
|
|
+ state->lencode = (const code FAR *)(state->next);
|
|
+ state->lenbits = 7;
|
|
+ ret = inflate_table(CODES, state->lens, 19, &(state->next),
|
|
+ &(state->lenbits), state->work);
|
|
+ if (ret) {
|
|
+ strm->msg = (char *)"invalid code lengths set";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ Tracev((stderr, "inflate: code lengths ok\n"));
|
|
+ state->have = 0;
|
|
+ state->mode = CODELENS;
|
|
+ case CODELENS:
|
|
+ while (state->have < state->nlen + state->ndist) {
|
|
+ for (;;) {
|
|
+ here = state->lencode[BITS(state->lenbits)];
|
|
+ if ((unsigned)(here.bits) <= bits) break;
|
|
+ PULLBYTE();
|
|
+ }
|
|
+ if (here.val < 16) {
|
|
+ DROPBITS(here.bits);
|
|
+ state->lens[state->have++] = here.val;
|
|
+ }
|
|
+ else {
|
|
+ if (here.val == 16) {
|
|
+ NEEDBITS(here.bits + 2);
|
|
+ DROPBITS(here.bits);
|
|
+ if (state->have == 0) {
|
|
+ strm->msg = (char *)"invalid bit length repeat";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ len = state->lens[state->have - 1];
|
|
+ copy = 3 + BITS(2);
|
|
+ DROPBITS(2);
|
|
+ }
|
|
+ else if (here.val == 17) {
|
|
+ NEEDBITS(here.bits + 3);
|
|
+ DROPBITS(here.bits);
|
|
+ len = 0;
|
|
+ copy = 3 + BITS(3);
|
|
+ DROPBITS(3);
|
|
+ }
|
|
+ else {
|
|
+ NEEDBITS(here.bits + 7);
|
|
+ DROPBITS(here.bits);
|
|
+ len = 0;
|
|
+ copy = 11 + BITS(7);
|
|
+ DROPBITS(7);
|
|
+ }
|
|
+ if (state->have + copy > state->nlen + state->ndist) {
|
|
+ strm->msg = (char *)"invalid bit length repeat";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ while (copy--)
|
|
+ state->lens[state->have++] = (unsigned short)len;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* handle error breaks in while */
|
|
+ if (state->mode == BAD) break;
|
|
+
|
|
+ /* check for end-of-block code (better have one) */
|
|
+ if (state->lens[256] == 0) {
|
|
+ strm->msg = (char *)"invalid code -- missing end-of-block";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* build code tables -- note: do not change the lenbits or distbits
|
|
+ values here (9 and 6) without reading the comments in inftrees.h
|
|
+ concerning the ENOUGH constants, which depend on those values */
|
|
+ state->next = state->codes;
|
|
+ state->lencode = (const code FAR *)(state->next);
|
|
+ state->lenbits = 9;
|
|
+ ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
|
|
+ &(state->lenbits), state->work);
|
|
+ if (ret) {
|
|
+ strm->msg = (char *)"invalid literal/lengths set";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ state->distcode = (const code FAR *)(state->next);
|
|
+ state->distbits = 6;
|
|
+ ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
|
|
+ &(state->next), &(state->distbits), state->work);
|
|
+ if (ret) {
|
|
+ strm->msg = (char *)"invalid distances set";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ Tracev((stderr, "inflate: codes ok\n"));
|
|
+ state->mode = LEN_;
|
|
+ if (flush == Z_TREES) goto inf_leave;
|
|
+ case LEN_:
|
|
+ state->mode = LEN;
|
|
+ case LEN:
|
|
+ if (have >= 6 && left >= 258) {
|
|
+ RESTORE();
|
|
+ inflate_fast(strm, out);
|
|
+ LOAD();
|
|
+ if (state->mode == TYPE)
|
|
+ state->back = -1;
|
|
+ break;
|
|
+ }
|
|
+ state->back = 0;
|
|
+ for (;;) {
|
|
+ here = state->lencode[BITS(state->lenbits)];
|
|
+ if ((unsigned)(here.bits) <= bits) break;
|
|
+ PULLBYTE();
|
|
+ }
|
|
+ if (here.op && (here.op & 0xf0) == 0) {
|
|
+ last = here;
|
|
+ for (;;) {
|
|
+ here = state->lencode[last.val +
|
|
+ (BITS(last.bits + last.op) >> last.bits)];
|
|
+ if ((unsigned)(last.bits + here.bits) <= bits) break;
|
|
+ PULLBYTE();
|
|
+ }
|
|
+ DROPBITS(last.bits);
|
|
+ state->back += last.bits;
|
|
+ }
|
|
+ DROPBITS(here.bits);
|
|
+ state->back += here.bits;
|
|
+ state->length = (unsigned)here.val;
|
|
+ if ((int)(here.op) == 0) {
|
|
+ Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
|
|
+ "inflate: literal '%c'\n" :
|
|
+ "inflate: literal 0x%02x\n", here.val));
|
|
+ state->mode = LIT;
|
|
+ break;
|
|
+ }
|
|
+ if (here.op & 32) {
|
|
+ Tracevv((stderr, "inflate: end of block\n"));
|
|
+ state->back = -1;
|
|
+ state->mode = TYPE;
|
|
+ break;
|
|
+ }
|
|
+ if (here.op & 64) {
|
|
+ strm->msg = (char *)"invalid literal/length code";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ state->extra = (unsigned)(here.op) & 15;
|
|
+ state->mode = LENEXT;
|
|
+ case LENEXT:
|
|
+ if (state->extra) {
|
|
+ NEEDBITS(state->extra);
|
|
+ state->length += BITS(state->extra);
|
|
+ DROPBITS(state->extra);
|
|
+ state->back += state->extra;
|
|
+ }
|
|
+ Tracevv((stderr, "inflate: length %u\n", state->length));
|
|
+ state->was = state->length;
|
|
+ state->mode = DIST;
|
|
+ case DIST:
|
|
+ for (;;) {
|
|
+ here = state->distcode[BITS(state->distbits)];
|
|
+ if ((unsigned)(here.bits) <= bits) break;
|
|
+ PULLBYTE();
|
|
+ }
|
|
+ if ((here.op & 0xf0) == 0) {
|
|
+ last = here;
|
|
+ for (;;) {
|
|
+ here = state->distcode[last.val +
|
|
+ (BITS(last.bits + last.op) >> last.bits)];
|
|
+ if ((unsigned)(last.bits + here.bits) <= bits) break;
|
|
+ PULLBYTE();
|
|
+ }
|
|
+ DROPBITS(last.bits);
|
|
+ state->back += last.bits;
|
|
+ }
|
|
+ DROPBITS(here.bits);
|
|
+ state->back += here.bits;
|
|
+ if (here.op & 64) {
|
|
+ strm->msg = (char *)"invalid distance code";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ state->offset = (unsigned)here.val;
|
|
+ state->extra = (unsigned)(here.op) & 15;
|
|
+ state->mode = DISTEXT;
|
|
+ case DISTEXT:
|
|
+ if (state->extra) {
|
|
+ NEEDBITS(state->extra);
|
|
+ state->offset += BITS(state->extra);
|
|
+ DROPBITS(state->extra);
|
|
+ state->back += state->extra;
|
|
+ }
|
|
+#ifdef INFLATE_STRICT
|
|
+ if (state->offset > state->dmax) {
|
|
+ strm->msg = (char *)"invalid distance too far back";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+#endif
|
|
+ Tracevv((stderr, "inflate: distance %u\n", state->offset));
|
|
+ state->mode = MATCH;
|
|
+ case MATCH:
|
|
+ if (left == 0) goto inf_leave;
|
|
+ copy = out - left;
|
|
+ if (state->offset > copy) { /* copy from window */
|
|
+ copy = state->offset - copy;
|
|
+ if (copy > state->whave) {
|
|
+ if (state->sane) {
|
|
+ strm->msg = (char *)"invalid distance too far back";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
|
|
+ Trace((stderr, "inflate.c too far\n"));
|
|
+ copy -= state->whave;
|
|
+ if (copy > state->length) copy = state->length;
|
|
+ if (copy > left) copy = left;
|
|
+ left -= copy;
|
|
+ state->length -= copy;
|
|
+ do {
|
|
+ *put++ = 0;
|
|
+ } while (--copy);
|
|
+ if (state->length == 0) state->mode = LEN;
|
|
+ break;
|
|
+#endif
|
|
+ }
|
|
+ if (copy > state->wnext) {
|
|
+ copy -= state->wnext;
|
|
+ from = state->window + (state->wsize - copy);
|
|
+ }
|
|
+ else
|
|
+ from = state->window + (state->wnext - copy);
|
|
+ if (copy > state->length) copy = state->length;
|
|
+ }
|
|
+ else { /* copy from output */
|
|
+ from = put - state->offset;
|
|
+ copy = state->length;
|
|
+ }
|
|
+ if (copy > left) copy = left;
|
|
+ left -= copy;
|
|
+ state->length -= copy;
|
|
+ do {
|
|
+ *put++ = *from++;
|
|
+ } while (--copy);
|
|
+ if (state->length == 0) state->mode = LEN;
|
|
+ break;
|
|
+ case LIT:
|
|
+ if (left == 0) goto inf_leave;
|
|
+ *put++ = (unsigned char)(state->length);
|
|
+ left--;
|
|
+ state->mode = LEN;
|
|
+ break;
|
|
+ case CHECK:
|
|
+ if (state->wrap) {
|
|
+ NEEDBITS(32);
|
|
+ out -= left;
|
|
+ strm->total_out += out;
|
|
+ state->total += out;
|
|
+ if ((state->wrap & 4) && out)
|
|
+ strm->adler = state->check =
|
|
+ UPDATE(state->check, put - out, out);
|
|
+ out = left;
|
|
+ if ((state->wrap & 4) && (
|
|
+#ifdef GUNZIP
|
|
+ state->flags ? hold :
|
|
+#endif
|
|
+ ZSWAP32(hold)) != state->check) {
|
|
+ strm->msg = (char *)"incorrect data check";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ INITBITS();
|
|
+ Tracev((stderr, "inflate: check matches trailer\n"));
|
|
+ }
|
|
+#ifdef GUNZIP
|
|
+ state->mode = LENGTH;
|
|
+ case LENGTH:
|
|
+ if (state->wrap && state->flags) {
|
|
+ NEEDBITS(32);
|
|
+ if (hold != (state->total & 0xffffffffUL)) {
|
|
+ strm->msg = (char *)"incorrect length check";
|
|
+ state->mode = BAD;
|
|
+ break;
|
|
+ }
|
|
+ INITBITS();
|
|
+ Tracev((stderr, "inflate: length matches trailer\n"));
|
|
+ }
|
|
+#endif
|
|
+ state->mode = DONE;
|
|
+ case DONE:
|
|
+ ret = Z_STREAM_END;
|
|
+ goto inf_leave;
|
|
+ case BAD:
|
|
+ ret = Z_DATA_ERROR;
|
|
+ goto inf_leave;
|
|
+ case MEM:
|
|
+ return Z_MEM_ERROR;
|
|
+ case SYNC:
|
|
+ default:
|
|
+ return Z_STREAM_ERROR;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ Return from inflate(), updating the total counts and the check value.
|
|
+ If there was no progress during the inflate() call, return a buffer
|
|
+ error. Call updatewindow() to create and/or update the window state.
|
|
+ Note: a memory error from inflate() is non-recoverable.
|
|
+ */
|
|
+ inf_leave:
|
|
+ RESTORE();
|
|
+ if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
|
|
+ (state->mode < CHECK || flush != Z_FINISH)))
|
|
+ if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
|
|
+ state->mode = MEM;
|
|
+ return Z_MEM_ERROR;
|
|
+ }
|
|
+ in -= strm->avail_in;
|
|
+ out -= strm->avail_out;
|
|
+ strm->total_in += in;
|
|
+ strm->total_out += out;
|
|
+ state->total += out;
|
|
+ if ((state->wrap & 4) && out)
|
|
+ strm->adler = state->check =
|
|
+ UPDATE(state->check, strm->next_out - out, out);
|
|
+ strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
|
|
+ (state->mode == TYPE ? 128 : 0) +
|
|
+ (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
|
|
+ if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
|
|
+ ret = Z_BUF_ERROR;
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateEnd(strm)
|
|
+z_streamp strm;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+ if (inflateStateCheck(strm))
|
|
+ return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ if (state->window != Z_NULL) ZFREE(strm, state->window);
|
|
+ ZFREE(strm, strm->state);
|
|
+ strm->state = Z_NULL;
|
|
+ Tracev((stderr, "inflate: end\n"));
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength)
|
|
+z_streamp strm;
|
|
+Bytef *dictionary;
|
|
+uInt *dictLength;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+
|
|
+ /* check state */
|
|
+ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+
|
|
+ /* copy dictionary */
|
|
+ if (state->whave && dictionary != Z_NULL) {
|
|
+ zmemcpy(dictionary, state->window + state->wnext,
|
|
+ state->whave - state->wnext);
|
|
+ zmemcpy(dictionary + state->whave - state->wnext,
|
|
+ state->window, state->wnext);
|
|
+ }
|
|
+ if (dictLength != Z_NULL)
|
|
+ *dictLength = state->whave;
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
|
|
+z_streamp strm;
|
|
+const Bytef *dictionary;
|
|
+uInt dictLength;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+ unsigned long dictid;
|
|
+ int ret;
|
|
+
|
|
+ /* check state */
|
|
+ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ if (state->wrap != 0 && state->mode != DICT)
|
|
+ return Z_STREAM_ERROR;
|
|
+
|
|
+ /* check for correct dictionary identifier */
|
|
+ if (state->mode == DICT) {
|
|
+ dictid = adler32(0L, Z_NULL, 0);
|
|
+ dictid = adler32(dictid, dictionary, dictLength);
|
|
+ if (dictid != state->check)
|
|
+ return Z_DATA_ERROR;
|
|
+ }
|
|
+
|
|
+ /* copy dictionary to window using updatewindow(), which will amend the
|
|
+ existing dictionary if appropriate */
|
|
+ ret = updatewindow(strm, dictionary + dictLength, dictLength);
|
|
+ if (ret) {
|
|
+ state->mode = MEM;
|
|
+ return Z_MEM_ERROR;
|
|
+ }
|
|
+ state->havedict = 1;
|
|
+ Tracev((stderr, "inflate: dictionary set\n"));
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateGetHeader(strm, head)
|
|
+z_streamp strm;
|
|
+gz_headerp head;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+
|
|
+ /* check state */
|
|
+ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ if ((state->wrap & 2) == 0) return Z_STREAM_ERROR;
|
|
+
|
|
+ /* save header structure */
|
|
+ state->head = head;
|
|
+ head->done = 0;
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+/*
|
|
+ Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found
|
|
+ or when out of input. When called, *have is the number of pattern bytes
|
|
+ found in order so far, in 0..3. On return *have is updated to the new
|
|
+ state. If on return *have equals four, then the pattern was found and the
|
|
+ return value is how many bytes were read including the last byte of the
|
|
+ pattern. If *have is less than four, then the pattern has not been found
|
|
+ yet and the return value is len. In the latter case, syncsearch() can be
|
|
+ called again with more data and the *have state. *have is initialized to
|
|
+ zero for the first call.
|
|
+ */
|
|
+local unsigned syncsearch(have, buf, len)
|
|
+unsigned FAR *have;
|
|
+const unsigned char FAR *buf;
|
|
+unsigned len;
|
|
+{
|
|
+ unsigned got;
|
|
+ unsigned next;
|
|
+
|
|
+ got = *have;
|
|
+ next = 0;
|
|
+ while (next < len && got < 4) {
|
|
+ if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
|
|
+ got++;
|
|
+ else if (buf[next])
|
|
+ got = 0;
|
|
+ else
|
|
+ got = 4 - got;
|
|
+ next++;
|
|
+ }
|
|
+ *have = got;
|
|
+ return next;
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateSync(strm)
|
|
+z_streamp strm;
|
|
+{
|
|
+ unsigned len; /* number of bytes to look at or looked at */
|
|
+ unsigned long in, out; /* temporary to save total_in and total_out */
|
|
+ unsigned char buf[4]; /* to restore bit buffer to byte string */
|
|
+ struct inflate_state FAR *state;
|
|
+
|
|
+ /* check parameters */
|
|
+ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
|
|
+
|
|
+ /* if first time, start search in bit buffer */
|
|
+ if (state->mode != SYNC) {
|
|
+ state->mode = SYNC;
|
|
+ state->hold <<= state->bits & 7;
|
|
+ state->bits -= state->bits & 7;
|
|
+ len = 0;
|
|
+ while (state->bits >= 8) {
|
|
+ buf[len++] = (unsigned char)(state->hold);
|
|
+ state->hold >>= 8;
|
|
+ state->bits -= 8;
|
|
+ }
|
|
+ state->have = 0;
|
|
+ syncsearch(&(state->have), buf, len);
|
|
+ }
|
|
+
|
|
+ /* search available input */
|
|
+ len = syncsearch(&(state->have), strm->next_in, strm->avail_in);
|
|
+ strm->avail_in -= len;
|
|
+ strm->next_in += len;
|
|
+ strm->total_in += len;
|
|
+
|
|
+ /* return no joy or set up to restart inflate() on a new block */
|
|
+ if (state->have != 4) return Z_DATA_ERROR;
|
|
+ in = strm->total_in; out = strm->total_out;
|
|
+ inflateReset(strm);
|
|
+ strm->total_in = in; strm->total_out = out;
|
|
+ state->mode = TYPE;
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+/*
|
|
+ Returns true if inflate is currently at the end of a block generated by
|
|
+ Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
|
|
+ implementation to provide an additional safety check. PPP uses
|
|
+ Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored
|
|
+ block. When decompressing, PPP checks that at the end of input packet,
|
|
+ inflate is waiting for these length bytes.
|
|
+ */
|
|
+int ZEXPORT inflateSyncPoint(strm)
|
|
+z_streamp strm;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+
|
|
+ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ return state->mode == STORED && state->bits == 0;
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateCopy(dest, source)
|
|
+z_streamp dest;
|
|
+z_streamp source;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+ struct inflate_state FAR *copy;
|
|
+ unsigned char FAR *window;
|
|
+ unsigned wsize;
|
|
+
|
|
+ /* check input */
|
|
+ if (inflateStateCheck(source) || dest == Z_NULL)
|
|
+ return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)source->state;
|
|
+
|
|
+ /* allocate space */
|
|
+ copy = (struct inflate_state FAR *)
|
|
+ ZALLOC(source, 1, sizeof(struct inflate_state));
|
|
+ if (copy == Z_NULL) return Z_MEM_ERROR;
|
|
+ window = Z_NULL;
|
|
+ if (state->window != Z_NULL) {
|
|
+ window = (unsigned char FAR *)
|
|
+ ZALLOC(source, 1U << state->wbits, sizeof(unsigned char));
|
|
+ if (window == Z_NULL) {
|
|
+ ZFREE(source, copy);
|
|
+ return Z_MEM_ERROR;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* copy state */
|
|
+ zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
|
|
+ zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
|
|
+ copy->strm = dest;
|
|
+ if (state->lencode >= state->codes &&
|
|
+ state->lencode <= state->codes + ENOUGH - 1) {
|
|
+ copy->lencode = copy->codes + (state->lencode - state->codes);
|
|
+ copy->distcode = copy->codes + (state->distcode - state->codes);
|
|
+ }
|
|
+ copy->next = copy->codes + (state->next - state->codes);
|
|
+ if (window != Z_NULL) {
|
|
+ wsize = 1U << state->wbits;
|
|
+ zmemcpy(window, state->window, wsize);
|
|
+ }
|
|
+ copy->window = window;
|
|
+ dest->state = (struct internal_state FAR *)copy;
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateUndermine(strm, subvert)
|
|
+z_streamp strm;
|
|
+int subvert;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+
|
|
+ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
|
|
+ state->sane = !subvert;
|
|
+ return Z_OK;
|
|
+#else
|
|
+ (void)subvert;
|
|
+ state->sane = 1;
|
|
+ return Z_DATA_ERROR;
|
|
+#endif
|
|
+}
|
|
+
|
|
+int ZEXPORT inflateValidate(strm, check)
|
|
+z_streamp strm;
|
|
+int check;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+
|
|
+ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ if (check)
|
|
+ state->wrap |= 4;
|
|
+ else
|
|
+ state->wrap &= ~4;
|
|
+ return Z_OK;
|
|
+}
|
|
+
|
|
+long ZEXPORT inflateMark(strm)
|
|
+z_streamp strm;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+
|
|
+ if (inflateStateCheck(strm))
|
|
+ return -(1L << 16);
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ return (long)(((unsigned long)((long)state->back)) << 16) +
|
|
+ (state->mode == COPY ? state->length :
|
|
+ (state->mode == MATCH ? state->was - state->length : 0));
|
|
+}
|
|
+
|
|
+unsigned long ZEXPORT inflateCodesUsed(strm)
|
|
+z_streamp strm;
|
|
+{
|
|
+ struct inflate_state FAR *state;
|
|
+ if (inflateStateCheck(strm)) return (unsigned long)-1;
|
|
+ state = (struct inflate_state FAR *)strm->state;
|
|
+ return (unsigned long)(state->next - state->codes);
|
|
+}
|
|
|
|
From 247147654fe5cd11cf15d8dff91440405ea57040 Mon Sep 17 00:00:00 2001
|
|
From: Simon Hosie <simon.hosie@arm.com>
|
|
Date: Wed, 12 Apr 2017 15:44:21 -0700
|
|
Subject: [PATCH 2/2] Inflate using wider loads and stores
|
|
|
|
In inflate_fast() the output pointer always has plenty of room to write. This
|
|
means that so long as the target is capable, wide un-aligned loads and stores
|
|
can be used to transfer several bytes at once. When the reference distance is
|
|
too short simply unroll the data a little to increase the distance.
|
|
|
|
Change-Id: I59854eb25d2b1e43561c8a2afaf9175bf10cf674
|
|
---
|
|
contrib/arm/chunkcopy.h | 279 ++++++++++++++++++++++++++++++++++++++++++++++++
|
|
contrib/arm/inffast.c | 96 +++++++----------
|
|
contrib/arm/inflate.c | 22 ++--
|
|
3 files changed, 335 insertions(+), 62 deletions(-)
|
|
create mode 100644 contrib/arm/chunkcopy.h
|
|
|
|
diff --git a/contrib/arm/chunkcopy.h b/contrib/arm/chunkcopy.h
|
|
new file mode 100644
|
|
index 00000000..2d6fd6f9
|
|
--- /dev/null
|
|
+++ b/contrib/arm/chunkcopy.h
|
|
@@ -0,0 +1,279 @@
|
|
+/* chunkcopy.h -- fast copies and sets
|
|
+ * Copyright (C) 2017 ARM, Inc.
|
|
+ * For conditions of distribution and use, see copyright notice in zlib.h
|
|
+ */
|
|
+
|
|
+#ifndef CHUNKCOPY_H
|
|
+#define CHUNKCOPY_H
|
|
+
|
|
+#include "zutil.h"
|
|
+#include <arm_neon.h>
|
|
+
|
|
+#if __STDC_VERSION__ >= 199901L
|
|
+#define Z_RESTRICT restrict
|
|
+#else
|
|
+#define Z_RESTRICT
|
|
+#endif
|
|
+
|
|
+typedef uint8x16_t chunkcopy_chunk_t;
|
|
+#define CHUNKCOPY_CHUNK_SIZE sizeof(chunkcopy_chunk_t)
|
|
+
|
|
+/*
|
|
+ Ask the compiler to perform a wide, unaligned load with an machine
|
|
+ instruction appropriate for the chunkcopy_chunk_t type.
|
|
+ */
|
|
+static inline chunkcopy_chunk_t loadchunk(const unsigned char FAR *s) {
|
|
+ chunkcopy_chunk_t c;
|
|
+ __builtin_memcpy(&c, s, sizeof(c));
|
|
+ return c;
|
|
+}
|
|
+
|
|
+/*
|
|
+ Ask the compiler to perform a wide, unaligned store with an machine
|
|
+ instruction appropriate for the chunkcopy_chunk_t type.
|
|
+ */
|
|
+static inline void storechunk(unsigned char FAR *d, chunkcopy_chunk_t c) {
|
|
+ __builtin_memcpy(d, &c, sizeof(c));
|
|
+}
|
|
+
|
|
+/*
|
|
+ Perform a memcpy-like operation, but assume that length is non-zero and that
|
|
+ it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if
|
|
+ the length is shorter than this.
|
|
+
|
|
+ It also guarantees that it will properly unroll the data if the distance
|
|
+ between `out` and `from` is at least CHUNKCOPY_CHUNK_SIZE, which we rely on
|
|
+ in chunkcopy_relaxed().
|
|
+
|
|
+ Aside from better memory bus utilisation, this means that short copies
|
|
+ (CHUNKCOPY_CHUNK_SIZE bytes or fewer) will fall straight through the loop
|
|
+ without iteration, which will hopefully make the branch prediction more
|
|
+ reliable.
|
|
+ */
|
|
+static inline unsigned char FAR *chunkcopy_core(unsigned char FAR *out,
|
|
+ const unsigned char FAR *from,
|
|
+ unsigned len) {
|
|
+ int bump = (--len % CHUNKCOPY_CHUNK_SIZE) + 1;
|
|
+ storechunk(out, loadchunk(from));
|
|
+ out += bump;
|
|
+ from += bump;
|
|
+ len /= CHUNKCOPY_CHUNK_SIZE;
|
|
+ while (len-- > 0) {
|
|
+ storechunk(out, loadchunk(from));
|
|
+ out += CHUNKCOPY_CHUNK_SIZE;
|
|
+ from += CHUNKCOPY_CHUNK_SIZE;
|
|
+ }
|
|
+ return out;
|
|
+}
|
|
+
|
|
+/*
|
|
+ Like chunkcopy_core, but avoid writing beyond of legal output.
|
|
+
|
|
+ Accepts an additional pointer to the end of safe output. A generic safe
|
|
+ copy would use (out + len), but it's normally the case that the end of the
|
|
+ output buffer is beyond the end of the current copy, and this can still be
|
|
+ exploited.
|
|
+ */
|
|
+static inline unsigned char FAR *chunkcopy_core_safe(unsigned char FAR *out,
|
|
+ const unsigned char FAR * from,
|
|
+ unsigned len,
|
|
+ unsigned char FAR *limit) {
|
|
+ Assert(out + len <= limit, "chunk copy exceeds safety limit");
|
|
+ if (limit - out < CHUNKCOPY_CHUNK_SIZE) {
|
|
+ const unsigned char FAR * Z_RESTRICT rfrom = from;
|
|
+ if (len & 8) { __builtin_memcpy(out, rfrom, 8); out += 8; rfrom += 8; }
|
|
+ if (len & 4) { __builtin_memcpy(out, rfrom, 4); out += 4; rfrom += 4; }
|
|
+ if (len & 2) { __builtin_memcpy(out, rfrom, 2); out += 2; rfrom += 2; }
|
|
+ if (len & 1) { *out++ = *rfrom++; }
|
|
+ return out;
|
|
+ }
|
|
+ return chunkcopy_core(out, from, len);
|
|
+}
|
|
+
|
|
+/*
|
|
+ Perform short copies until distance can be rewritten as being at least
|
|
+ CHUNKCOPY_CHUNK_SIZE.
|
|
+
|
|
+ This assumes that it's OK to overwrite at least the first
|
|
+ 2*CHUNKCOPY_CHUNK_SIZE bytes of output even if the copy is shorter than
|
|
+ this. This assumption holds within inflate_fast() which starts every
|
|
+ iteration with at least 258 bytes of output space available (258 being the
|
|
+ maximum length output from a single token; see inffast.c).
|
|
+ */
|
|
+static inline unsigned char FAR *chunkunroll_relaxed(unsigned char FAR *out,
|
|
+ unsigned FAR *dist,
|
|
+ unsigned FAR *len) {
|
|
+ const unsigned char FAR *from = out - *dist;
|
|
+ while (*dist < *len && *dist < CHUNKCOPY_CHUNK_SIZE) {
|
|
+ storechunk(out, loadchunk(from));
|
|
+ out += *dist;
|
|
+ *len -= *dist;
|
|
+ *dist += *dist;
|
|
+ }
|
|
+ return out;
|
|
+}
|
|
+
|
|
+
|
|
+static inline uint8x16_t chunkset_vld1q_dup_u8x8(const unsigned char FAR * Z_RESTRICT from) {
|
|
+#if defined(__clang__) || defined(__aarch64__)
|
|
+ return vreinterpretq_u8_u64(vld1q_dup_u64((void *)from));
|
|
+#else
|
|
+ /* 32-bit GCC uses an alignment hint for vld1q_dup_u64, even when given a
|
|
+ * void pointer, so here's an alternate implementation.
|
|
+ */
|
|
+ uint8x8_t h = vld1_u8(from);
|
|
+ return vcombine_u8(h, h);
|
|
+#endif
|
|
+}
|
|
+
|
|
+/*
|
|
+ Perform an overlapping copy which behaves as a memset() operation, but
|
|
+ supporting periods other than one, and assume that length is non-zero and
|
|
+ that it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE*3 bytes of output
|
|
+ even if the length is shorter than this.
|
|
+ */
|
|
+static inline unsigned char FAR *chunkset_core(unsigned char FAR *out,
|
|
+ unsigned period,
|
|
+ unsigned len) {
|
|
+ uint8x16_t f;
|
|
+ int bump = ((len - 1) % sizeof(f)) + 1;
|
|
+
|
|
+ switch (period) {
|
|
+ case 1:
|
|
+ f = vld1q_dup_u8(out - 1);
|
|
+ vst1q_u8(out, f);
|
|
+ out += bump;
|
|
+ len -= bump;
|
|
+ while (len > 0) {
|
|
+ vst1q_u8(out, f);
|
|
+ out += sizeof(f);
|
|
+ len -= sizeof(f);
|
|
+ }
|
|
+ return out;
|
|
+ case 2:
|
|
+ f = vreinterpretq_u8_u16(vld1q_dup_u16((void *)(out - 2)));
|
|
+ vst1q_u8(out, f);
|
|
+ out += bump;
|
|
+ len -= bump;
|
|
+ if (len > 0) {
|
|
+ f = vreinterpretq_u8_u16(vld1q_dup_u16((void *)(out - 2)));
|
|
+ do {
|
|
+ vst1q_u8(out, f);
|
|
+ out += sizeof(f);
|
|
+ len -= sizeof(f);
|
|
+ } while (len > 0);
|
|
+ }
|
|
+ return out;
|
|
+ case 4:
|
|
+ f = vreinterpretq_u8_u32(vld1q_dup_u32((void *)(out - 4)));
|
|
+ vst1q_u8(out, f);
|
|
+ out += bump;
|
|
+ len -= bump;
|
|
+ if (len > 0) {
|
|
+ f = vreinterpretq_u8_u32(vld1q_dup_u32((void *)(out - 4)));
|
|
+ do {
|
|
+ vst1q_u8(out, f);
|
|
+ out += sizeof(f);
|
|
+ len -= sizeof(f);
|
|
+ } while (len > 0);
|
|
+ }
|
|
+ return out;
|
|
+ case 8:
|
|
+ f = chunkset_vld1q_dup_u8x8(out - 8);
|
|
+ vst1q_u8(out, f);
|
|
+ out += bump;
|
|
+ len -= bump;
|
|
+ if (len > 0) {
|
|
+ f = chunkset_vld1q_dup_u8x8(out - 8);
|
|
+ do {
|
|
+ vst1q_u8(out, f);
|
|
+ out += sizeof(f);
|
|
+ len -= sizeof(f);
|
|
+ } while (len > 0);
|
|
+ }
|
|
+ return out;
|
|
+ }
|
|
+ out = chunkunroll_relaxed(out, &period, &len);
|
|
+ return chunkcopy_core(out, out - period, len);
|
|
+}
|
|
+
|
|
+/*
|
|
+ Perform a memcpy-like operation, but assume that length is non-zero and that
|
|
+ it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if
|
|
+ the length is shorter than this.
|
|
+
|
|
+ Unlike chunkcopy_core() above, no guarantee is made regarding the behaviour
|
|
+ of overlapping buffers, regardless of the distance between the pointers.
|
|
+ This is reflected in the `restrict`-qualified pointers, allowing the
|
|
+ compiler to reorder loads and stores.
|
|
+ */
|
|
+static inline unsigned char FAR *chunkcopy_relaxed(unsigned char FAR * Z_RESTRICT out,
|
|
+ const unsigned char FAR * Z_RESTRICT from,
|
|
+ unsigned len) {
|
|
+ return chunkcopy_core(out, from, len);
|
|
+}
|
|
+
|
|
+/*
|
|
+ Like chunkcopy_relaxed, but avoid writing beyond of legal output.
|
|
+
|
|
+ Unlike chunkcopy_core_safe() above, no guarantee is made regarding the
|
|
+ behaviour of overlapping buffers, regardless of the distance between the
|
|
+ pointers. This is reflected in the `restrict`-qualified pointers, allowing
|
|
+ the compiler to reorder loads and stores.
|
|
+
|
|
+ Accepts an additional pointer to the end of safe output. A generic safe
|
|
+ copy would use (out + len), but it's normally the case that the end of the
|
|
+ output buffer is beyond the end of the current copy, and this can still be
|
|
+ exploited.
|
|
+ */
|
|
+static inline unsigned char FAR *chunkcopy_safe(unsigned char FAR *out,
|
|
+ const unsigned char FAR * Z_RESTRICT from,
|
|
+ unsigned len,
|
|
+ unsigned char FAR *limit) {
|
|
+ Assert(out + len <= limit, "chunk copy exceeds safety limit");
|
|
+ return chunkcopy_core_safe(out, from, len, limit);
|
|
+}
|
|
+
|
|
+/*
|
|
+ Perform chunky copy within the same buffer, where the source and destination
|
|
+ may potentially overlap.
|
|
+
|
|
+ Assumes that len > 0 on entry, and that it's safe to write at least
|
|
+ CHUNKCOPY_CHUNK_SIZE*3 bytes to the output.
|
|
+ */
|
|
+static inline unsigned char FAR *chunkcopy_lapped_relaxed(unsigned char FAR *out,
|
|
+ unsigned dist,
|
|
+ unsigned len) {
|
|
+ if (dist < len && dist < CHUNKCOPY_CHUNK_SIZE) {
|
|
+ return chunkset_core(out, dist, len);
|
|
+ }
|
|
+ return chunkcopy_core(out, out - dist, len);
|
|
+}
|
|
+
|
|
+/*
|
|
+ Behave like chunkcopy_lapped_relaxed, but avoid writing beyond of legal output.
|
|
+
|
|
+ Accepts an additional pointer to the end of safe output. A generic safe
|
|
+ copy would use (out + len), but it's normally the case that the end of the
|
|
+ output buffer is beyond the end of the current copy, and this can still be
|
|
+ exploited.
|
|
+ */
|
|
+static inline unsigned char FAR *chunkcopy_lapped_safe(unsigned char FAR *out,
|
|
+ unsigned dist,
|
|
+ unsigned len,
|
|
+ unsigned char FAR *limit) {
|
|
+ Assert(out + len <= limit, "chunk copy exceeds safety limit");
|
|
+ if (limit - out < CHUNKCOPY_CHUNK_SIZE * 3) {
|
|
+ /* TODO: try harder to optimise this */
|
|
+ while (len-- > 0) {
|
|
+ *out = *(out - dist);
|
|
+ out++;
|
|
+ }
|
|
+ return out;
|
|
+ }
|
|
+ return chunkcopy_lapped_relaxed(out, dist, len);
|
|
+}
|
|
+
|
|
+#undef Z_RESTRICT
|
|
+
|
|
+#endif /* CHUNKCOPY_H */
|
|
diff --git a/contrib/arm/inffast.c b/contrib/arm/inffast.c
|
|
index 0dbd1dbc..f7f50071 100644
|
|
--- a/contrib/arm/inffast.c
|
|
+++ b/contrib/arm/inffast.c
|
|
@@ -7,6 +7,7 @@
|
|
#include "inftrees.h"
|
|
#include "inflate.h"
|
|
#include "inffast.h"
|
|
+#include "chunkcopy.h"
|
|
|
|
#ifdef ASMINF
|
|
# pragma message("Assembler code may have bugs -- use at your own risk")
|
|
@@ -57,6 +58,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
|
|
unsigned char FAR *out; /* local strm->next_out */
|
|
unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
|
|
unsigned char FAR *end; /* while out < end, enough space available */
|
|
+ unsigned char FAR *limit; /* safety limit for chunky copies */
|
|
#ifdef INFLATE_STRICT
|
|
unsigned dmax; /* maximum distance from zlib header */
|
|
#endif
|
|
@@ -84,12 +86,13 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
|
|
out = strm->next_out;
|
|
beg = out - (start - strm->avail_out);
|
|
end = out + (strm->avail_out - 257);
|
|
+ limit = out + strm->avail_out;
|
|
#ifdef INFLATE_STRICT
|
|
dmax = state->dmax;
|
|
#endif
|
|
wsize = state->wsize;
|
|
whave = state->whave;
|
|
- wnext = state->wnext;
|
|
+ wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext;
|
|
window = state->window;
|
|
hold = state->hold;
|
|
bits = state->bits;
|
|
@@ -197,70 +200,51 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
|
|
#endif
|
|
}
|
|
from = window;
|
|
- if (wnext == 0) { /* very common case */
|
|
- from += wsize - op;
|
|
- if (op < len) { /* some from window */
|
|
- len -= op;
|
|
- do {
|
|
- *out++ = *from++;
|
|
- } while (--op);
|
|
- from = out - dist; /* rest from output */
|
|
- }
|
|
+ if (wnext >= op) { /* contiguous in window */
|
|
+ from += wnext - op;
|
|
}
|
|
- else if (wnext < op) { /* wrap around window */
|
|
- from += wsize + wnext - op;
|
|
+ else { /* wrap around window */
|
|
op -= wnext;
|
|
+ from += wsize - op;
|
|
if (op < len) { /* some from end of window */
|
|
len -= op;
|
|
- do {
|
|
- *out++ = *from++;
|
|
- } while (--op);
|
|
- from = window;
|
|
- if (wnext < len) { /* some from start of window */
|
|
- op = wnext;
|
|
- len -= op;
|
|
- do {
|
|
- *out++ = *from++;
|
|
- } while (--op);
|
|
- from = out - dist; /* rest from output */
|
|
- }
|
|
+ out = chunkcopy_safe(out, from, op, limit);
|
|
+ from = window; /* more from start of window */
|
|
+ op = wnext;
|
|
+ /* This (rare) case can create a situation where
|
|
+ the first chunkcopy below must be checked.
|
|
+ */
|
|
}
|
|
}
|
|
- else { /* contiguous in window */
|
|
- from += wnext - op;
|
|
- if (op < len) { /* some from window */
|
|
- len -= op;
|
|
- do {
|
|
- *out++ = *from++;
|
|
- } while (--op);
|
|
- from = out - dist; /* rest from output */
|
|
- }
|
|
- }
|
|
- while (len > 2) {
|
|
- *out++ = *from++;
|
|
- *out++ = *from++;
|
|
- *out++ = *from++;
|
|
- len -= 3;
|
|
- }
|
|
- if (len) {
|
|
- *out++ = *from++;
|
|
- if (len > 1)
|
|
- *out++ = *from++;
|
|
+ if (op < len) { /* still need some from output */
|
|
+ out = chunkcopy_safe(out, from, op, limit);
|
|
+ len -= op;
|
|
+ /* When dist is small the amount of data that can be
|
|
+ copied from the window is also small, and progress
|
|
+ towards the dangerous end of the output buffer is
|
|
+ also small. This means that for trivial memsets and
|
|
+ for chunkunroll_relaxed() a safety check is
|
|
+ unnecessary. However, these conditions may not be
|
|
+ entered at all, and in that case it's possible that
|
|
+ the main copy is near the end.
|
|
+ */
|
|
+ out = chunkunroll_relaxed(out, &dist, &len);
|
|
+ out = chunkcopy_safe(out, out - dist, len, limit);
|
|
+ } else {
|
|
+ /* from points to window, so there is no risk of
|
|
+ overlapping pointers requiring memset-like behaviour
|
|
+ */
|
|
+ out = chunkcopy_safe(out, from, len, limit);
|
|
}
|
|
}
|
|
else {
|
|
- from = out - dist; /* copy direct from output */
|
|
- do { /* minimum length is three */
|
|
- *out++ = *from++;
|
|
- *out++ = *from++;
|
|
- *out++ = *from++;
|
|
- len -= 3;
|
|
- } while (len > 2);
|
|
- if (len) {
|
|
- *out++ = *from++;
|
|
- if (len > 1)
|
|
- *out++ = *from++;
|
|
- }
|
|
+ /* Whole reference is in range of current output. No
|
|
+ range checks are necessary because we start with room
|
|
+ for at least 258 bytes of output, so unroll and roundoff
|
|
+ operations can write beyond `out+len` so long as they
|
|
+ stay within 258 bytes of `out`.
|
|
+ */
|
|
+ out = chunkcopy_lapped_relaxed(out, dist, len);
|
|
}
|
|
}
|
|
else if ((op & 64) == 0) { /* 2nd level distance code */
|
|
diff --git a/contrib/arm/inflate.c b/contrib/arm/inflate.c
|
|
index ac333e8c..e40322c3 100644
|
|
--- a/contrib/arm/inflate.c
|
|
+++ b/contrib/arm/inflate.c
|
|
@@ -84,6 +84,7 @@
|
|
#include "inftrees.h"
|
|
#include "inflate.h"
|
|
#include "inffast.h"
|
|
+#include "contrib/arm/chunkcopy.h"
|
|
|
|
#ifdef MAKEFIXED
|
|
# ifndef BUILDFIXED
|
|
@@ -405,10 +406,20 @@ unsigned copy;
|
|
|
|
/* if it hasn't been done already, allocate space for the window */
|
|
if (state->window == Z_NULL) {
|
|
+ unsigned wsize = 1U << state->wbits;
|
|
state->window = (unsigned char FAR *)
|
|
- ZALLOC(strm, 1U << state->wbits,
|
|
+ ZALLOC(strm, wsize + CHUNKCOPY_CHUNK_SIZE,
|
|
sizeof(unsigned char));
|
|
if (state->window == Z_NULL) return 1;
|
|
+#ifdef INFLATE_CLEAR_UNUSED_UNDEFINED
|
|
+ /* Copies from the overflow portion of this buffer are undefined and
|
|
+ may cause analysis tools to raise a warning if we don't initialize
|
|
+ it. However, this undefined data overwrites other undefined data
|
|
+ and is subsequently either overwritten or left deliberately
|
|
+ undefined at the end of decode; so there's really no point.
|
|
+ */
|
|
+ memset(state->window + wsize, 0, CHUNKCOPY_CHUNK_SIZE);
|
|
+#endif
|
|
}
|
|
|
|
/* if window not in use yet, initialize */
|
|
@@ -1175,17 +1186,16 @@ int flush;
|
|
else
|
|
from = state->window + (state->wnext - copy);
|
|
if (copy > state->length) copy = state->length;
|
|
+ if (copy > left) copy = left;
|
|
+ put = chunkcopy_safe(put, from, copy, put + left);
|
|
}
|
|
else { /* copy from output */
|
|
- from = put - state->offset;
|
|
copy = state->length;
|
|
+ if (copy > left) copy = left;
|
|
+ put = chunkcopy_lapped_safe(put, state->offset, copy, put + left);
|
|
}
|
|
- if (copy > left) copy = left;
|
|
left -= copy;
|
|
state->length -= copy;
|
|
- do {
|
|
- *put++ = *from++;
|
|
- } while (--copy);
|
|
if (state->length == 0) state->mode = LEN;
|
|
break;
|
|
case LIT:
|