diff --git a/src/snappy.ts b/src/snappy.ts new file mode 100644 index 0000000..c9277aa --- /dev/null +++ b/src/snappy.ts @@ -0,0 +1,122 @@ +const WORD_MASK = [0, 0xff, 0xffff, 0xffffff, 0xffffffff] + +/** + * Copy bytes from one array to another + */ +function copyBytes(fromArray: Uint8Array, fromPos: number, toArray: Uint8Array, toPos: number, length: number) { + for (let i = 0; i < length; i++) { + toArray[toPos + i] = fromArray[fromPos + i] + } +} + +/** + * Copy bytes within an array + */ +function selfCopyBytes(array: Uint8Array, pos: number, offset: number, length: number) { + for (let i = 0; i < length; i++) { + array[pos + i] = array[pos - offset + i] + } +} + +/** + * Decompress snappy data. + * Accepts an output buffer to avoid allocating a new buffer for each call. + * + * @param inputArray compressed data + * @param outputArray output buffer + * @returns true if successful + */ +export function snappyUncompress(inputArray: Uint8Array, outputArray: Uint8Array): boolean { + const inputLength = inputArray.byteLength + + let pos = 0 + let outPos = 0 + + // skip preamble (contains uncompressed length as varint) + let uncompressedLength = 0 + let shift = 0 + while (pos < inputLength) { + const c = inputArray[pos] + pos += 1 + uncompressedLength |= (c & 0x7f) << shift + if (c < 128) { + break + } + shift += 7 + } + + while (pos < inputLength) { + const c = inputArray[pos] + let len = 0 + pos += 1 + + // There are two types of elements, literals and copies (back references) + if ((c & 0x3) === 0) { + // Literals are uncompressed data stored directly in the byte stream + let len = (c >>> 2) + 1 + // Longer literal length is encoded in multiple bytes + if (len > 60) { + if (pos + 3 >= inputLength) { + console.warn('snappy error literal pos + 3 >= inputLength') + return false + } + const lengthSize = len - 60 // length bytes - 1 + len = inputArray[pos] + + (inputArray[pos + 1] << 8) + + (inputArray[pos + 2] << 16) + + (inputArray[pos + 3] << 24) + len = (len & WORD_MASK[lengthSize]) + 1 + pos += lengthSize + } + if (pos + len > inputLength) { + return false // literal exceeds input length + } + copyBytes(inputArray, pos, outputArray, outPos, len) + pos += len + outPos += len + } else { + // Copy elements + let offset = 0 // offset back from current position to read + switch (c & 0x3) { + case 1: + // Copy with 1-byte offset + len = ((c >>> 2) & 0x7) + 4 + offset = inputArray[pos] + ((c >>> 5) << 8) + pos += 1 + break + case 2: + // Copy with 2-byte offset + if (inputLength <= pos + 1) { + return false // end of input + } + len = (c >>> 2) + 1 + offset = inputArray[pos] + (inputArray[pos + 1] << 8) + pos += 2 + break + case 3: + // Copy with 4-byte offset + if (inputLength <= pos + 3) { + return false // end of input + } + len = (c >>> 2) + 1 + offset = inputArray[pos] + + (inputArray[pos + 1] << 8) + + (inputArray[pos + 2] << 16) + + (inputArray[pos + 3] << 24) + pos += 4 + break + default: + break + } + if (offset === 0 || isNaN(offset)) { + return false // invalid offset + } + if (offset > outPos) { + return false // cannot copy from before start of buffer + } + selfCopyBytes(outputArray, outPos, offset, len) + outPos += len + } + } + return true +} diff --git a/test/snappy.test.ts b/test/snappy.test.ts new file mode 100644 index 0000000..86c72e6 --- /dev/null +++ b/test/snappy.test.ts @@ -0,0 +1,28 @@ +import { describe, expect, it } from 'vitest' +import { snappyUncompress } from '../src/snappy' + +describe('snappy uncompress', () => { + it('decompresses valid input correctly', () => { + const testCases = [ + { compressed: new Uint8Array([0x02, 0x04, 0x68, 0x79]), expected: 'hy' }, + { compressed: new Uint8Array([0x03, 0x08, 0x68, 0x79, 0x70]), expected: 'hyp' }, + { compressed: new Uint8Array([0x05, 0x10, 0x68, 0x79, 0x70, 0x65, 0x72]), expected: 'hyper' }, + { compressed: new Uint8Array([0x0a, 0x24, 0x68, 0x79, 0x70, 0x65, 0x72, 0x70, 0x61, 0x72, 0x61, 0x6d]), expected: 'hyperparam' }, + { compressed: new Uint8Array([0x15, 0x08, 0x68, 0x79, 0x70, 0x46, 0x03, 0x00]), expected: 'hyphyphyphyphyphyphyp' }, + ] + + testCases.forEach(({ compressed, expected }) => { + const outputArray = new Uint8Array(expected.length) + const result = snappyUncompress(compressed, outputArray) + const outputStr = new TextDecoder().decode(outputArray) + expect(result).toBe(true) + expect(outputStr).toBe(expected) + }) + }) + + it('returns false for invalid input', () => { + const outputArray = new Uint8Array(10) + expect(snappyUncompress(new Uint8Array([0x03, 0x61]), outputArray)).toBe(false) + expect(snappyUncompress(new Uint8Array([0x03, 0xf1]), outputArray)).toBe(false) + }) +})