diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0ec5c83..3edd8f5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,4 +23,4 @@ jobs: steps: - uses: actions/checkout@v3 - run: npm i - - run: npm test + - run: npm run coverage diff --git a/package.json b/package.json index b71ae1d..bb1081b 100644 --- a/package.json +++ b/package.json @@ -27,8 +27,8 @@ "typecheck": "tsc" }, "devDependencies": { - "@types/node": "20.12.11", - "@typescript-eslint/eslint-plugin": "7.8.0", + "@types/node": "20.12.12", + "@typescript-eslint/eslint-plugin": "7.9.0", "@vitest/coverage-v8": "1.6.0", "eslint": "8.57.0", "eslint-plugin-import": "2.29.1", diff --git a/src/column.js b/src/column.js index 901bf8a..f4b2817 100644 --- a/src/column.js +++ b/src/column.js @@ -60,6 +60,7 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, ) const { definitionLevels, repetitionLevels, dataPage } = readDataPage(page, daph, schemaPath, columnMetadata) valuesSeen += daph.num_values + // assert(!daph.statistics || daph.statistics.null_count === BigInt(daph.num_values - dataPage.length)) // construct output values: skip nulls and construct lists if (repetitionLevels.length) { @@ -83,10 +84,7 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, dereferenceDictionary(dictionary, dataPage) values = convert(dataPage, element) } - - // TODO: check that we are at the end of the page - // values.length !== daph.num_values isn't right. In cases like arrays, - // you need the total number of children, not the number of top-level values. + // assert(BigInt(values.length) === rowGroup.num_rows) concat(rowData, values) } else if (header.type === 'DICTIONARY_PAGE') { diff --git a/src/convert.js b/src/convert.js index ad9e1e0..8057eb1 100644 --- a/src/convert.js +++ b/src/convert.js @@ -12,29 +12,34 @@ export function convert(data, schemaElement) { const ctype = schemaElement.converted_type if (ctype === 'UTF8') { const decoder = new TextDecoder() - return data.map(v => v && decoder.decode(v)) + const arr = new Array(data.length) + for (let i = 0; i < arr.length; i++) { + arr[i] = data[i] && decoder.decode(data[i]) + } + return arr } if (ctype === 'DECIMAL') { const scale = schemaElement.scale || 0 const factor = Math.pow(10, -scale) - if (typeof data[0] === 'number') { - if (factor === 1) return data - return Array.from(data).map(v => v * factor) - } else if (typeof data[0] === 'bigint') { - if (factor === 1) return data - return Array.from(data).map(v => Number(v) * factor) - } else { - return Array.from(data).map(v => parseDecimal(v) * factor) + const arr = new Array(data.length) + for (let i = 0; i < arr.length; i++) { + if (data[0] instanceof Uint8Array) { + arr[i] = parseDecimal(data[i]) * factor + } else { + arr[i] = Number(data[i]) * factor + } } - } - if (ctype === 'DATE') { - return Array.from(data).map(v => new Date(v * dayMillis)) + return arr } if (ctype === undefined && schemaElement.type === 'INT96') { return Array.from(data).map(parseInt96Date) } - if (ctype === 'TIME_MILLIS') { - return Array.from(data).map(v => new Date(v)) + if (ctype === 'DATE') { + const arr = new Array(data.length) + for (let i = 0; i < arr.length; i++) { + arr[i] = new Date(data[i] * dayMillis) + } + return arr } if (ctype === 'JSON') { return data.map(v => JSON.parse(v)) @@ -45,10 +50,12 @@ export function convert(data, schemaElement) { if (ctype === 'INTERVAL') { throw new Error('parquet interval not supported') } + // TODO: ctype UINT const logicalType = schemaElement.logical_type?.type if (logicalType === 'FLOAT16') { return Array.from(data).map(parseFloat16) } + // TODO: logical types return data } diff --git a/test/convert.test.js b/test/convert.test.js index 563e0b3..23d947b 100644 --- a/test/convert.test.js +++ b/test/convert.test.js @@ -38,7 +38,7 @@ describe('convert function', () => { const data = [BigInt(1000), BigInt(2000)] /** @type {SchemaElement} */ const schemaElement = { name, converted_type: 'DECIMAL' } - expect(convert(data, schemaElement)).toEqual([1000n, 2000n]) + expect(convert(data, schemaElement)).toEqual([1000, 2000]) }) it('converts bigint to DECIMAL with scale', () => { @@ -62,14 +62,6 @@ describe('convert function', () => { expect(convert(data, schemaElement)).toEqual([new Date(86400000), new Date(86400000 * 2)]) }) - it('converts milliseconds to TIME_MILLIS', () => { - const now = Date.now() - const data = [now] - /** @type {SchemaElement} */ - const schemaElement = { name, converted_type: 'TIME_MILLIS' } - expect(convert(data, schemaElement)).toEqual([new Date(now)]) - }) - it('converts INT96 to DATE', () => { // from alltypes_plain.parquet const data = [45284764452596988585705472n, 45284764452597048585705472n]