Introduce 'custom parsers' option for decoding dates (#87)

2025-12-05 22:41:55 +00:00 · 2025-06-09 20:02:31 -05:00 · 2025-06-09 20:02:31 -05:00 · 8609192b23
commit 8609192b23
parent 67ab9d5e1a
8 changed files with 195 additions and 64 deletions
--- a/src/convert.js
+++ b/src/convert.js
@ -1,4 +1,26 @@
-const dayMillis = 86400000 // 1 day in milliseconds
+/**
+ * @import {ColumnDecoder, DecodedArray, Encoding, ParquetParsers, SchemaElement} from '../src/types.d.ts'
+ */
+
+/**
+ * Default type parsers when no custom ones are given
+ * @type ParquetParsers
+ */
+export const DEFAULT_PARSERS = {
+  timestampFromMilliseconds(millis) {
+    return new Date(Number(millis))
+  },
+  timestampFromMicroseconds(micros) {
+    return new Date(Number(micros / 1000n))
+  },
+  timestampFromNanoseconds(nanos) {
+    return new Date(Number(nanos / 1000000n))
+  },
+  dateFromDays(days) {
+    const dayInMillis = 86400000
+    return new Date(days * dayInMillis)
+  },
+}

 /**
 * Convert known types from primitive to rich, and dereference dictionary.
@ -29,11 +51,11 @@ export function convertWithDictionary(data, dictionary, encoding, columnDecoder)
 * Convert known types from primitive to rich.
 *
 * @param {DecodedArray} data series of primitive types
- * @param {Pick<ColumnDecoder, "element" | "utf8">} columnDecoder
+ * @param {Pick<ColumnDecoder, "element" | "utf8" | "parsers">} columnDecoder
 * @returns {DecodedArray} series of rich types
 */
 export function convert(data, columnDecoder) {
-  const { element, utf8 = true } = columnDecoder
+  const { element, parsers, utf8 = true } = columnDecoder
  const { type, converted_type: ctype, logical_type: ltype } = element
  if (ctype === 'DECIMAL') {
    const scale = element.scale || 0
@ -49,26 +71,30 @@ export function convert(data, columnDecoder) {
    return arr
  }
  if (!ctype && type === 'INT96') {
-    return Array.from(data).map(parseInt96Date)
+    const arr = new Array(data.length)
+    for (let i = 0; i < arr.length; i++) {
+      arr[i] = parsers.timestampFromNanoseconds(parseInt96Nanos(data[i]))
+    }
+    return arr
  }
  if (ctype === 'DATE') {
    const arr = new Array(data.length)
    for (let i = 0; i < arr.length; i++) {
-      arr[i] = new Date(data[i] * dayMillis)
+      arr[i] = parsers.dateFromDays(data[i])
    }
    return arr
  }
  if (ctype === 'TIMESTAMP_MILLIS') {
    const arr = new Array(data.length)
    for (let i = 0; i < arr.length; i++) {
-      arr[i] = new Date(Number(data[i]))
+      arr[i] = parsers.timestampFromMilliseconds(data[i])
    }
    return arr
  }
  if (ctype === 'TIMESTAMP_MICROS') {
    const arr = new Array(data.length)
    for (let i = 0; i < arr.length; i++) {
-      arr[i] = new Date(Number(data[i] / 1000n))
+      arr[i] = parsers.timestampFromMicroseconds(data[i])
    }
    return arr
  }
@ -111,12 +137,13 @@ export function convert(data, columnDecoder) {
  }
  if (ltype?.type === 'TIMESTAMP') {
    const { unit } = ltype
-    let factor = 1n
-    if (unit === 'MICROS') factor = 1000n
-    if (unit === 'NANOS') factor = 1000000n
+    /** @type {ParquetParsers[keyof ParquetParsers]} */
+    let parser = parsers.timestampFromMilliseconds
+    if (unit === 'MICROS') parser = parsers.timestampFromMicroseconds
+    if (unit === 'NANOS') parser = parsers.timestampFromNanoseconds
    const arr = new Array(data.length)
    for (let i = 0; i < arr.length; i++) {
-      arr[i] = new Date(Number(data[i] / factor))
+      arr[i] = parser(data[i])
    }
    return arr
  }
@ -143,15 +170,14 @@ export function parseDecimal(bytes) {
 }

 /**
- * @import {ColumnDecoder, DecodedArray, Encoding, SchemaElement} from '../src/types.d.ts'
+ * Converts INT96 date format (hi 32bit days, lo 64bit nanos) to nanos since epoch
 * @param {bigint} value
- * @returns {Date}
+ * @returns {bigint}
 */
-function parseInt96Date(value) {
-  const days = Number((value >> 64n) - 2440588n)
-  const nano = Number((value & 0xffffffffffffffffn) / 1000000n)
-  const millis = days * dayMillis + nano
-  return new Date(millis)
+function parseInt96Nanos(value) {
+  const days = (value >> 64n) - 2440588n
+  const nano = value & 0xffffffffffffffffn
+  return days * 86400000000000n + nano
 }

 /**
--- a/src/index.js
+++ b/src/index.js
@ -60,4 +60,6 @@ export function parquetReadObjects(options) {
 * @typedef {import('../src/types.d.ts').BoundaryOrder} BoundaryOrder
 * @typedef {import('../src/types.d.ts').ColumnData} ColumnData
 * @typedef {import('../src/types.d.ts').ParquetReadOptions} ParquetReadOptions
+ * @typedef {import('../src/types.d.ts').MetadataOptions} MetadataOptions
+ * @typedef {import('../src/types.d.ts').ParquetParsers} ParquetParsers
 */
--- a/src/indexes.js
+++ b/src/indexes.js
@ -1,18 +1,22 @@
 import { BoundaryOrder } from './constants.js'
+import { DEFAULT_PARSERS } from './convert.js'
 import { convertMetadata } from './metadata.js'
 import { deserializeTCompactProtocol } from './thrift.js'

 /**
 * @param {DataReader} reader
 * @param {SchemaElement} schema
+ * @param {ParquetParsers | undefined} parsers
 * @returns {ColumnIndex}
 */
-export function readColumnIndex(reader, schema) {
+export function readColumnIndex(reader, schema, parsers = undefined) {
+  parsers = { ...DEFAULT_PARSERS, ...parsers }
+
  const thrift = deserializeTCompactProtocol(reader)
  return {
    null_pages: thrift.field_1,
-    min_values: thrift.field_2.map((/** @type {any} */ m) => convertMetadata(m, schema)),
-    max_values: thrift.field_3.map((/** @type {any} */ m) => convertMetadata(m, schema)),
+    min_values: thrift.field_2.map((/** @type {any} */ m) => convertMetadata(m, schema, parsers)),
+    max_values: thrift.field_3.map((/** @type {any} */ m) => convertMetadata(m, schema, parsers)),
    boundary_order: BoundaryOrder[thrift.field_4],
    null_counts: thrift.field_5,
    repetition_level_histograms: thrift.field_6,
@ -33,7 +37,7 @@ export function readOffsetIndex(reader) {
 }

 /**
- * @import {ColumnIndex, DataReader, OffsetIndex, PageLocation, SchemaElement} from '../src/types.d.ts'
+ * @import {ColumnIndex, DataReader, OffsetIndex, PageLocation, ParquetParsers, SchemaElement} from '../src/types.d.ts'
 * @param {any} loc
 * @returns {PageLocation}
 */
--- a/src/metadata.js
+++ b/src/metadata.js
@ -1,5 +1,5 @@
 import { CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, PageType, ParquetType } from './constants.js'
-import { parseDecimal, parseFloat16 } from './convert.js'
+import { DEFAULT_PARSERS, parseDecimal, parseFloat16 } from './convert.js'
 import { getSchemaPath } from './schema.js'
 import { deserializeTCompactProtocol } from './thrift.js'

@ -26,10 +26,10 @@ export const defaultInitialFetchSize = 1 << 19 // 512kb
 * or a second request for up to the metadata size.
 *
 * @param {AsyncBuffer} asyncBuffer parquet file contents
- * @param {number} initialFetchSize initial fetch size in bytes (default 512kb)
+ * @param {MetadataOptions & { initialFetchSize?: number }} options initial fetch size in bytes (default 512kb)
 * @returns {Promise<FileMetaData>} parquet metadata object
 */
-export async function parquetMetadataAsync(asyncBuffer, initialFetchSize = defaultInitialFetchSize) {
+export async function parquetMetadataAsync(asyncBuffer, { parsers, initialFetchSize = defaultInitialFetchSize } = {}) {
  if (!asyncBuffer || !(asyncBuffer.byteLength >= 0)) throw new Error('parquet expected AsyncBuffer')

  // fetch last bytes (footer) of the file
@ -59,10 +59,10 @@ export async function parquetMetadataAsync(asyncBuffer, initialFetchSize = defau
    const combinedView = new Uint8Array(combinedBuffer)
    combinedView.set(new Uint8Array(metadataBuffer))
    combinedView.set(new Uint8Array(footerBuffer), footerOffset - metadataOffset)
-    return parquetMetadata(combinedBuffer)
+    return parquetMetadata(combinedBuffer, { parsers })
  } else {
    // parse metadata from the footer
-    return parquetMetadata(footerBuffer)
+    return parquetMetadata(footerBuffer, { parsers })
  }
 }

@ -70,12 +70,16 @@ export async function parquetMetadataAsync(asyncBuffer, initialFetchSize = defau
 * Read parquet metadata from a buffer synchronously.
 *
 * @param {ArrayBuffer} arrayBuffer parquet file footer
+ * @param {MetadataOptions} options metadata parsing options
 * @returns {FileMetaData} parquet metadata object
 */
-export function parquetMetadata(arrayBuffer) {
+export function parquetMetadata(arrayBuffer, { parsers } = {}) {
  if (!(arrayBuffer instanceof ArrayBuffer)) throw new Error('parquet expected ArrayBuffer')
  const view = new DataView(arrayBuffer)

+  // Use default parsers if not given
+  parsers = { ...DEFAULT_PARSERS, ...parsers }
+
  // Validate footer magic number "PAR1"
  if (view.byteLength < 8) {
    throw new Error('parquet file is too short')
@ -135,7 +139,7 @@ export function parquetMetadata(arrayBuffer) {
        data_page_offset: column.field_3.field_9,
        index_page_offset: column.field_3.field_10,
        dictionary_page_offset: column.field_3.field_11,
-        statistics: convertStats(column.field_3.field_12, columnSchema[columnIndex]),
+        statistics: convertStats(column.field_3.field_12, columnSchema[columnIndex], parsers),
        encoding_stats: column.field_3.field_13?.map((/** @type {any} */ encodingStat) => ({
          page_type: PageType[encodingStat.field_1],
          encoding: Encoding[encodingStat.field_2],
@ -246,19 +250,20 @@ function timeUnit(unit) {
 /**
 * Convert column statistics based on column type.
 *
- * @import {AsyncBuffer, FileMetaData, LogicalType, MinMaxType, SchemaElement, SchemaTree, Statistics, TimeUnit} from '../src/types.d.ts'
+ * @import {AsyncBuffer, FileMetaData, LogicalType, MetadataOptions, MinMaxType, ParquetParsers, SchemaElement, SchemaTree, Statistics, TimeUnit} from '../src/types.d.ts'
 * @param {any} stats
 * @param {SchemaElement} schema
+ * @param {ParquetParsers} parsers
 * @returns {Statistics}
 */
-function convertStats(stats, schema) {
+function convertStats(stats, schema, parsers) {
  return stats && {
-    max: convertMetadata(stats.field_1, schema),
-    min: convertMetadata(stats.field_2, schema),
+    max: convertMetadata(stats.field_1, schema, parsers),
+    min: convertMetadata(stats.field_2, schema, parsers),
    null_count: stats.field_3,
    distinct_count: stats.field_4,
-    max_value: convertMetadata(stats.field_5, schema),
-    min_value: convertMetadata(stats.field_6, schema),
+    max_value: convertMetadata(stats.field_5, schema, parsers),
+    min_value: convertMetadata(stats.field_6, schema, parsers),
    is_max_value_exact: stats.field_7,
    is_min_value_exact: stats.field_8,
  }
@ -267,9 +272,10 @@ function convertStats(stats, schema) {
 /**
 * @param {Uint8Array | undefined} value
 * @param {SchemaElement} schema
+ * @param {ParquetParsers} parsers
 * @returns {MinMaxType | undefined}
 */
-export function convertMetadata(value, schema) {
+export function convertMetadata(value, schema, parsers) {
  const { type, converted_type, logical_type } = schema
  if (value === undefined) return value
  if (type === 'BOOLEAN') return value[0] === 1
@ -277,12 +283,12 @@ export function convertMetadata(value, schema) {
  const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
  if (type === 'FLOAT' && view.byteLength === 4) return view.getFloat32(0, true)
  if (type === 'DOUBLE' && view.byteLength === 8) return view.getFloat64(0, true)
-  if (type === 'INT32' && converted_type === 'DATE') return new Date(view.getInt32(0, true) * 86400000)
-  if (type === 'INT64' && converted_type === 'TIMESTAMP_MICROS') return new Date(Number(view.getBigInt64(0, true) / 1000n))
-  if (type === 'INT64' && converted_type === 'TIMESTAMP_MILLIS') return new Date(Number(view.getBigInt64(0, true)))
-  if (type === 'INT64' && logical_type?.type === 'TIMESTAMP' && logical_type?.unit === 'NANOS') return new Date(Number(view.getBigInt64(0, true) / 1000000n))
-  if (type === 'INT64' && logical_type?.type === 'TIMESTAMP' && logical_type?.unit === 'MICROS') return new Date(Number(view.getBigInt64(0, true) / 1000n))
-  if (type === 'INT64' && logical_type?.type === 'TIMESTAMP') return new Date(Number(view.getBigInt64(0, true)))
+  if (type === 'INT32' && converted_type === 'DATE') return parsers.dateFromDays(view.getInt32(0, true))
+  if (type === 'INT64' && converted_type === 'TIMESTAMP_MILLIS') return parsers.timestampFromMilliseconds(view.getBigInt64(0, true))
+  if (type === 'INT64' && converted_type === 'TIMESTAMP_MICROS') return parsers.timestampFromMicroseconds(view.getBigInt64(0, true))
+  if (type === 'INT64' && logical_type?.type === 'TIMESTAMP' && logical_type?.unit === 'NANOS') return parsers.timestampFromNanoseconds(view.getBigInt64(0, true))
+  if (type === 'INT64' && logical_type?.type === 'TIMESTAMP' && logical_type?.unit === 'MICROS') return parsers.timestampFromMicroseconds(view.getBigInt64(0, true))
+  if (type === 'INT64' && logical_type?.type === 'TIMESTAMP') return parsers.timestampFromMilliseconds(view.getBigInt64(0, true))
  if (type === 'INT32' && view.byteLength === 4) return view.getInt32(0, true)
  if (type === 'INT64' && view.byteLength === 8) return view.getBigInt64(0, true)
  if (converted_type === 'DECIMAL') return parseDecimal(value) * 10 ** -(schema.scale || 0)
--- a/src/rowgroup.js
+++ b/src/rowgroup.js
@ -1,11 +1,12 @@
 import { assembleNested } from './assemble.js'
 import { readColumn } from './column.js'
+import { DEFAULT_PARSERS } from './convert.js'
 import { getColumnRange } from './plan.js'
 import { getSchemaPath } from './schema.js'
 import { flatten } from './utils.js'

 /**
- * @import {AsyncColumn, AsyncRowGroup, DecodedArray, GroupPlan, ParquetReadOptions, QueryPlan, RowGroup, SchemaTree} from './types.js'
+ * @import {AsyncColumn, AsyncRowGroup, DecodedArray, GroupPlan, ParquetParsers, ParquetReadOptions, QueryPlan, RowGroup, SchemaTree} from './types.js'
 */
 /**
 * Read a row group from a file-like object.
@ -20,6 +21,8 @@ export function readRowGroup(options, { metadata, columns }, groupPlan) {

  /** @type {AsyncColumn[]} */
  const asyncColumns = []
+  /** @type {ParquetParsers} */
+  const parsers = { ...DEFAULT_PARSERS, ...options.parsers }

  // read column data
  for (const { file_path, meta_data } of groupPlan.rowGroup.columns) {
@ -58,6 +61,7 @@ export function readRowGroup(options, { metadata, columns }, groupPlan) {
          element: schemaPath[schemaPath.length - 1].element,
          schemaPath,
          codec: meta_data.codec,
+          parsers,
          compressors,
          utf8,
        }
--- a/src/types.d.ts
+++ b/src/types.d.ts
@ -1,3 +1,20 @@
+/**
+ * Custom parsers for columns
+ */
+export interface ParquetParsers {
+  timestampFromMilliseconds(millis: bigint): any;
+  timestampFromMicroseconds(micros: bigint): any;
+  timestampFromNanoseconds(nanos: bigint): any;
+  dateFromDays(days: number): any;
+}
+
+/**
+ * Paruqet Metadata options for metadata parsing
+ */
+export interface MetadataOptions {
+  parsers?: ParquetParsers // custom parsers to decode advanced types
+}
+
 /**
 * Parquet query options for reading data
 */
@ -13,6 +30,7 @@ export interface ParquetReadOptions {
  onComplete?: (rows: any[][]) => void // called when all requested rows and columns are parsed
  compressors?: Compressors // custom decompressors
  utf8?: boolean // decode byte arrays as utf8 strings (default true)
+  parsers?: ParquetParsers // custom parsers to decode advanced types
 }

 /**
@ -409,6 +427,7 @@ export interface ColumnDecoder {
  element: SchemaElement
  schemaPath: SchemaTree[]
  codec: CompressionCodec
+  parsers: ParquetParsers
  compressors?: Compressors
  utf8?: boolean
 }
--- a/test/column.test.js
+++ b/test/column.test.js
@ -1,5 +1,6 @@
 import { describe, expect, it } from 'vitest'
 import { readColumn } from '../src/column.js'
+import { DEFAULT_PARSERS } from '../src/convert.js'
 import { parquetMetadata } from '../src/index.js'
 import { asyncBufferFromFile } from '../src/node.js'
 import { getColumnRange } from '../src/plan.js'
@ -29,6 +30,7 @@ describe('readColumn', () => {
      type: column.meta_data.type,
      element: schemaPath[schemaPath.length - 1].element,
      schemaPath,
+      parsers: DEFAULT_PARSERS,
      codec: column.meta_data.codec,
    }
    const rowGroupSelect = {
@ -59,6 +61,7 @@ describe('readColumn', () => {
      type: column.meta_data.type,
      element: schemaPath[schemaPath.length - 1].element,
      schemaPath,
+      parsers: DEFAULT_PARSERS,
      codec: column.meta_data.codec,
    }
    const rowGroupSelect = {
--- a/test/convert.test.js
+++ b/test/convert.test.js
@ -1,5 +1,5 @@
 import { describe, expect, it } from 'vitest'
-import { convert, parseDecimal, parseFloat16 } from '../src/convert.js'
+import { DEFAULT_PARSERS, convert, parseDecimal, parseFloat16 } from '../src/convert.js'

 /**
 * @import {ColumnDecoder, SchemaElement} from '../src/types.js'
@ -7,31 +7,33 @@ import { convert, parseDecimal, parseFloat16 } from '../src/convert.js'

 describe('convert function', () => {
  const name = 'name'
+  const parsers = DEFAULT_PARSERS
+
  it('returns the same data if converted_type is undefined', () => {
    const data = [1, 2, 3]
    const element = { name }
-    expect(convert(data, { element })).toEqual(data)
+    expect(convert(data, { element, parsers })).toEqual(data)
  })

  it('converts byte arrays to utf8', () => {
    const data = [new TextEncoder().encode('foo'), new TextEncoder().encode('bar')]
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'UTF8' }
-    expect(convert(data, { element })).toEqual(['foo', 'bar'])
+    expect(convert(data, { element, parsers })).toEqual(['foo', 'bar'])
  })

  it('converts byte arrays to utf8 default true', () => {
    const data = [new TextEncoder().encode('foo'), new TextEncoder().encode('bar')]
    /** @type {SchemaElement} */
    const element = { name, type: 'BYTE_ARRAY' }
-    expect(convert(data, { element })).toEqual(['foo', 'bar'])
+    expect(convert(data, { element, parsers })).toEqual(['foo', 'bar'])
  })

  it('preserves byte arrays utf8=false', () => {
    const data = [new TextEncoder().encode('foo'), new TextEncoder().encode('bar')]
    /** @type {SchemaElement} */
    const element = { name, type: 'BYTE_ARRAY' }
-    expect(convert(data, { element, utf8: false })).toEqual([
+    expect(convert(data, { element, parsers, utf8: false })).toEqual([
      new Uint8Array([102, 111, 111]), new Uint8Array([98, 97, 114]),
    ])
  })
@ -40,49 +42,49 @@ describe('convert function', () => {
    const data = [100, 200]
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'DECIMAL' }
-    expect(convert(data, { element })).toEqual([100, 200])
+    expect(convert(data, { element, parsers })).toEqual([100, 200])
  })

  it('converts numbers to DECIMAL with scale', () => {
    const data = [100, 200]
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'DECIMAL', scale: 2 }
-    expect(convert(data, { element })).toEqual([1, 2])
+    expect(convert(data, { element, parsers })).toEqual([1, 2])
  })

  it('converts bigint to DECIMAL', () => {
    const data = [1000n, 2000n]
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'DECIMAL' }
-    expect(convert(data, { element })).toEqual([1000, 2000])
+    expect(convert(data, { element, parsers })).toEqual([1000, 2000])
  })

  it('converts bigint to DECIMAL with scale', () => {
    const data = [10n, 20n]
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'DECIMAL', scale: 2 }
-    expect(convert(data, { element })).toEqual([0.1, 0.2])
+    expect(convert(data, { element, parsers })).toEqual([0.1, 0.2])
  })

  it('converts byte arrays to DECIMAL', () => {
    const data = [new Uint8Array([0, 0, 0, 100]), new Uint8Array([0, 0, 0, 200])]
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'DECIMAL', scale: 0 }
-    expect(convert(data, { element })).toEqual([100, 200])
+    expect(convert(data, { element, parsers })).toEqual([100, 200])
  })

  it('converts byte array from issue #59 to DECIMAL', () => {
    const data = [new Uint8Array([18, 83, 137, 151, 156, 0])]
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'DECIMAL', scale: 10, precision: 14 }
-    expect(convert(data, { element })).toEqual([2015])
+    expect(convert(data, { element, parsers })).toEqual([2015])
  })

  it('converts epoch time to DATE', () => {
    const data = [1, 2] // days since epoch
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'DATE' }
-    expect(convert(data, { element })).toEqual([new Date(86400000), new Date(86400000 * 2)])
+    expect(convert(data, { element, parsers })).toEqual([new Date(86400000), new Date(86400000 * 2)])
  })

  it('converts INT96 to DATE', () => {
@ -90,14 +92,14 @@ describe('convert function', () => {
    const data = [45284764452596988585705472n, 45284764452597048585705472n]
    /** @type {SchemaElement} */
    const element = { name, type: 'INT96' }
-    expect(convert(data, { element })).toEqual([new Date('2009-03-01T00:00:00.000Z'), new Date('2009-03-01T00:01:00.000Z')])
+    expect(convert(data, { element, parsers })).toEqual([new Date('2009-03-01T00:00:00.000Z'), new Date('2009-03-01T00:01:00.000Z')])
  })

  it('converts epoch time to TIMESTAMP_MILLIS', () => {
    const data = [1716506900000n, 1716507000000n]
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'TIMESTAMP_MILLIS' }
-    expect(convert(data, { element })).toEqual([
+    expect(convert(data, { element, parsers })).toEqual([
      new Date('2024-05-23T23:28:20.000Z'), new Date('2024-05-23T23:30:00.000Z'),
    ])
  })
@ -106,7 +108,7 @@ describe('convert function', () => {
    const data = [1716506900000000n, 1716507000000000n]
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'TIMESTAMP_MICROS' }
-    expect(convert(data, { element })).toEqual([
+    expect(convert(data, { element, parsers })).toEqual([
      new Date('2024-05-23T23:28:20.000Z'), new Date('2024-05-23T23:30:00.000Z'),
    ])
  })
@ -117,28 +119,28 @@ describe('convert function', () => {
      .map(str => encoder.encode(str))
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'JSON' }
-    expect(convert(data, { element })).toEqual([{ key: true }, { quay: 314 }])
+    expect(convert(data, { element, parsers })).toEqual([{ key: true }, { quay: 314 }])
  })

  it('converts uint64', () => {
    const data = [100n, -100n]
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'UINT_64' }
-    expect(convert(data, { element })).toEqual(new BigUint64Array([100n, 18446744073709551516n]))
+    expect(convert(data, { element, parsers })).toEqual(new BigUint64Array([100n, 18446744073709551516n]))
  })

  it('converts to float16', () => {
    const data = [new Uint8Array([0x00, 0x3c]), new Uint8Array([0x00, 0x40])]
    /** @type {SchemaElement} */
    const element = { name, logical_type: { type: 'FLOAT16' } }
-    expect(convert(data, { element })).toEqual([1, 2])
+    expect(convert(data, { element, parsers })).toEqual([1, 2])
  })

  it('converts timestamp with units', () => {
    const data = [1716506900000000n, 1716507000000000n]
    /** @type {SchemaElement} */
    const element = { name, logical_type: { type: 'TIMESTAMP', isAdjustedToUTC: true, unit: 'MICROS' } }
-    expect(convert(data, { element })).toEqual([
+    expect(convert(data, { element, parsers })).toEqual([
      new Date('2024-05-23T23:28:20.000Z'), new Date('2024-05-23T23:30:00.000Z'),
    ])
  })
@ -147,7 +149,7 @@ describe('convert function', () => {
    const data = [{}]
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'BSON' }
-    expect(() => convert(data, { element }))
+    expect(() => convert(data, { element, parsers }))
      .toThrow('parquet bson not supported')
  })

@ -155,9 +157,74 @@ describe('convert function', () => {
    const data = [{}]
    /** @type {SchemaElement} */
    const element = { name, converted_type: 'INTERVAL' }
-    expect(() => convert(data, { element }))
+    expect(() => convert(data, { element, parsers }))
      .toThrow('parquet interval not supported')
  })
+
+  it('respects custom parsers - dateFromDays', () => {
+    const data = [1, 2] // days since epoch
+    /** @type {SchemaElement} */
+    const element = { name, converted_type: 'DATE' }
+    /** @type {Pick<ColumnDecoder, "element" | "utf8" | "parsers">} */
+    const columnParser = {
+      element,
+      parsers: {
+        ...parsers,
+        dateFromDays: days => days,
+      },
+    }
+
+    expect(convert(data, columnParser)).toEqual([ 1, 2 ])
+  })
+
+  it('respects custom parsers - timestampFromMilliseconds', () => {
+    const data = [1716506900000n, 1716507000000n]
+    /** @type {SchemaElement} */
+    const element = { name, converted_type: 'TIMESTAMP_MILLIS' }
+    /** @type {Pick<ColumnDecoder, "element" | "utf8" | "parsers">} */
+    const columnParser = {
+      element,
+      parsers: {
+        ...parsers,
+        timestampFromMilliseconds: millis => Number(millis / 100000n),
+      },
+    }
+
+    expect(convert(data, columnParser)).toEqual([ 17165069, 17165070 ])
+  })
+
+  it('respects custom parsers - timestampFromMicroseconds', () => {
+    const data = [1716506900000000n, 1716507000000000n]
+    /** @type {SchemaElement} */
+    const element = { name, logical_type: { type: 'TIMESTAMP', isAdjustedToUTC: true, unit: 'MICROS' } }
+    /** @type {Pick<ColumnDecoder, "element" | "utf8" | "parsers">} */
+    const columnParser = {
+      element,
+      parsers: {
+        ...parsers,
+        timestampFromMicroseconds: micros => Number(micros / 100000000n),
+      },
+    }
+
+    expect(convert(data, columnParser)).toEqual([ 17165069, 17165070 ])
+  })
+
+  it('respects custom parsers - timestampFromNanoseconds', () => {
+    // from alltypes_plain.parquet
+    const data = [45284764452596988585705472n, 45284764452597048585705472n]
+    /** @type {SchemaElement} */
+    const element = { name, type: 'INT96' }
+    /** @type {Pick<ColumnDecoder, "element" | "utf8" | "parsers">} */
+    const columnParser = {
+      element,
+      parsers: {
+        ...parsers,
+        timestampFromNanoseconds: micros => Number(micros / 100000000000n),
+      },
+    }
+
+    expect(convert(data, columnParser)).toEqual([ 12358656, 12358656 ])
+  })
 })

 describe('parseFloat16', () => {