mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
Don't write file_path, duckdb doesn't like it
This commit is contained in:
parent
0e680e2706
commit
de03425587
@ -48,7 +48,7 @@ ParquetWriter.prototype.write = function({ columnData, rowGroupSize = 100000 })
|
||||
|
||||
// write columns
|
||||
for (let j = 0; j < columnData.length; j++) {
|
||||
const { name, data } = columnData[j]
|
||||
const { data } = columnData[j]
|
||||
const schemaPath = [this.schema[0], this.schema[j + 1]]
|
||||
const groupData = data.slice(groupStartIndex, groupStartIndex + groupSize)
|
||||
const file_offset = BigInt(this.writer.offset)
|
||||
@ -56,7 +56,6 @@ ParquetWriter.prototype.write = function({ columnData, rowGroupSize = 100000 })
|
||||
|
||||
// save column chunk metadata
|
||||
columns.push({
|
||||
file_path: name,
|
||||
file_offset,
|
||||
meta_data,
|
||||
})
|
||||
|
||||
@ -31,7 +31,6 @@ export const exampleMetadata = {
|
||||
row_groups: [{
|
||||
columns: [
|
||||
{
|
||||
file_path: 'bool',
|
||||
file_offset: 4n,
|
||||
meta_data: {
|
||||
type: 'BOOLEAN',
|
||||
@ -50,7 +49,6 @@ export const exampleMetadata = {
|
||||
},
|
||||
},
|
||||
{
|
||||
file_path: 'int',
|
||||
file_offset: 28n,
|
||||
meta_data: {
|
||||
type: 'INT32',
|
||||
@ -69,7 +67,6 @@ export const exampleMetadata = {
|
||||
},
|
||||
},
|
||||
{
|
||||
file_path: 'bigint',
|
||||
file_offset: 67n,
|
||||
meta_data: {
|
||||
type: 'INT64',
|
||||
@ -88,7 +85,6 @@ export const exampleMetadata = {
|
||||
},
|
||||
},
|
||||
{
|
||||
file_path: 'float',
|
||||
file_offset: 110n,
|
||||
meta_data: {
|
||||
type: 'FLOAT',
|
||||
@ -107,7 +103,6 @@ export const exampleMetadata = {
|
||||
},
|
||||
},
|
||||
{
|
||||
file_path: 'double',
|
||||
file_offset: 149n,
|
||||
meta_data: {
|
||||
type: 'DOUBLE',
|
||||
@ -126,7 +121,6 @@ export const exampleMetadata = {
|
||||
},
|
||||
},
|
||||
{
|
||||
file_path: 'string',
|
||||
file_offset: 200n,
|
||||
meta_data: {
|
||||
type: 'BYTE_ARRAY',
|
||||
@ -145,7 +139,6 @@ export const exampleMetadata = {
|
||||
},
|
||||
},
|
||||
{
|
||||
file_path: 'nullable',
|
||||
file_offset: 242n,
|
||||
meta_data: {
|
||||
type: 'BOOLEAN',
|
||||
@ -167,5 +160,5 @@ export const exampleMetadata = {
|
||||
total_byte_size: 264n,
|
||||
num_rows: 4n,
|
||||
}],
|
||||
metadata_length: 497,
|
||||
metadata_length: 445,
|
||||
}
|
||||
|
||||
@ -24,7 +24,7 @@ describe('writeMetadata', () => {
|
||||
{ key: 'key1', value: 'value1' },
|
||||
{ key: 'key2', value: 'value2' },
|
||||
],
|
||||
metadata_length: 529,
|
||||
metadata_length: 477,
|
||||
}
|
||||
writeMetadata(writer, withKvMetadata)
|
||||
|
||||
|
||||
@ -39,9 +39,9 @@ describe('parquetWriteBuffer', () => {
|
||||
bool[500] = true
|
||||
bool[9999] = false
|
||||
const file = parquetWriteBuffer({ columnData: [{ name: 'bool', data: bool }] })
|
||||
expect(file.byteLength).toBe(160)
|
||||
expect(file.byteLength).toBe(154)
|
||||
const metadata = parquetMetadata(file)
|
||||
expect(metadata.metadata_length).toBe(98)
|
||||
expect(metadata.metadata_length).toBe(92)
|
||||
const result = await parquetReadObjects({ file })
|
||||
expect(result.length).toBe(10000)
|
||||
expect(result[0]).toEqual({ bool: null })
|
||||
@ -55,14 +55,14 @@ describe('parquetWriteBuffer', () => {
|
||||
it('efficiently serializes long string', () => {
|
||||
const str = 'a'.repeat(10000)
|
||||
const file = parquetWriteBuffer({ columnData: [{ name: 'string', data: [str] }] })
|
||||
expect(file.byteLength).toBe(646)
|
||||
expect(file.byteLength).toBe(638)
|
||||
})
|
||||
|
||||
it('less efficiently serializes string without compression', () => {
|
||||
const str = 'a'.repeat(10000)
|
||||
const columnData = [{ name: 'string', data: [str] }]
|
||||
const file = parquetWriteBuffer({ columnData, compressed: false })
|
||||
expect(file.byteLength).toBe(10176)
|
||||
expect(file.byteLength).toBe(10168)
|
||||
})
|
||||
|
||||
it('efficiently serializes column with few distinct values', async () => {
|
||||
@ -70,7 +70,7 @@ describe('parquetWriteBuffer', () => {
|
||||
.fill('aaaa', 0, 50000)
|
||||
.fill('bbbb', 50000, 100000)
|
||||
const file = parquetWriteBuffer({ columnData: [{ name: 'string', data }], statistics: false })
|
||||
expect(file.byteLength).toBe(178)
|
||||
expect(file.byteLength).toBe(170)
|
||||
// round trip
|
||||
const result = await parquetReadObjects({ file })
|
||||
expect(result.length).toBe(100000)
|
||||
@ -81,8 +81,8 @@ describe('parquetWriteBuffer', () => {
|
||||
it('writes statistics when enabled', () => {
|
||||
const withStats = parquetWriteBuffer({ columnData: exampleData, statistics: true })
|
||||
const noStats = parquetWriteBuffer({ columnData: exampleData, statistics: false })
|
||||
expect(withStats.byteLength).toBe(773)
|
||||
expect(noStats.byteLength).toBe(663)
|
||||
expect(withStats.byteLength).toBe(721)
|
||||
expect(noStats.byteLength).toBe(611)
|
||||
})
|
||||
|
||||
it('serializes list types', async () => {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user