From 786816c55fedcb4f40e4556c02461b5c99081950 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Sat, 19 Apr 2025 01:15:13 -0700 Subject: [PATCH] Set dictionary threshold to 2 --- src/column.js | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/column.js b/src/column.js index 7320c3e..3197448 100644 --- a/src/column.js +++ b/src/column.js @@ -25,7 +25,7 @@ export function writeColumn(writer, schemaPath, values, compressed, stats) { const statistics = stats ? getStatistics(values) : undefined // dictionary encoding - let dictionary_page_offset = undefined + let dictionary_page_offset let data_page_offset = BigInt(writer.offset) /** @type {DecodedArray | undefined} */ const dictionary = useDictionary(values, type) @@ -81,11 +81,9 @@ function useDictionary(values, type) { const unique = new Set(values) unique.delete(undefined) unique.delete(null) - if (values.length > 10 && values.length / unique.size > 0.1) { - if (unique.size < values.length) { - // TODO: sort by frequency - return Array.from(unique) - } + if (values.length / unique.size > 2) { + // TODO: sort by frequency + return Array.from(unique) } }