Split out schemaTree tests

2026-02-22 04:11:32 +00:00 · 2024-01-20 12:17:11 -08:00 · 2024-01-20 12:17:11 -08:00 · 8484426bc8
commit 8484426bc8
parent a40e678214
4 changed files with 92 additions and 107 deletions
--- a/test/helpers.js
+++ b/test/helpers.js
@ -0,0 +1,26 @@
+import fs from 'fs'
+
+/**
+ * Helper function to read .parquet file into ArrayBuffer
+ *
+ * @param {string} filePath
+ * @returns {Promise<ArrayBuffer>}
+ */
+export async function readFileToArrayBuffer(filePath) {
+  const buffer = await fs.promises.readFile(filePath)
+  return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)
+}
+
+/**
+ * Wrap .parquet file in an AsyncBuffer
+ *
+ * @typedef {import('../src/types.js').AsyncBuffer} AsyncBuffer
+ * @param {string} filePath
+ * @returns {AsyncBuffer}
+ */
+export function fileToAsyncBuffer(filePath) {
+  return {
+    byteLength: fs.statSync(filePath).size,
+    slice: async (start, end) => (await readFileToArrayBuffer(filePath)).slice(start, end),
+  }
+}
--- a/test/metadata.test.js
+++ b/test/metadata.test.js
@ -1,32 +1,7 @@
-import fs from 'fs'
 import { describe, expect, it } from 'vitest'
-import { parquetMetadata, parquetMetadataAsync, parquetSchema } from '../src/hyparquet.js'
+import { parquetMetadata, parquetMetadataAsync } from '../src/hyparquet.js'
 import { toJson } from '../src/toJson.js'
-
-/**
- * Helper function to read .parquet file into ArrayBuffer
- *
- * @param {string} filePath
- * @returns {Promise<ArrayBuffer>}
- */
-async function readFileToArrayBuffer(filePath) {
-  const buffer = await fs.promises.readFile(filePath)
-  return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)
-}
-
-/**
- * Wrap .parquet file in an AsyncBuffer
- *
- * @typedef {import('../src/types.js').AsyncBuffer} AsyncBuffer
- * @param {string} filePath
- * @returns {AsyncBuffer}
- */
-function fileToAsyncBuffer(filePath) {
-  return {
-    byteLength: fs.statSync(filePath).size,
-    slice: async (start, end) => (await readFileToArrayBuffer(filePath)).slice(start, end),
-  }
-}
+import { fileToAsyncBuffer, readFileToArrayBuffer } from './helpers.js'

 describe('parquetMetadata', () => {
  it('should parse metadata from addrtype-missing-value.parquet', async () => {
@ -77,59 +52,6 @@ describe('parquetMetadataAsync', () => {
  })
 })

-describe('parquetSchema', () => {
-  it('should parse schema from addrtype-missing-value.parquet', async () => {
-    const arrayBuffer = await readFileToArrayBuffer('test/files/addrtype-missing-value.parquet')
-    const metadata = parquetMetadata(arrayBuffer)
-    const result = parquetSchema(metadata)
-    expect(toJson(result)).toEqual({
-      children: [
-        {
-          children: [],
-          count: 1,
-          element: {
-            converted_type: 0,
-            name: 'ADDRTYPE',
-            repetition_type: 1,
-            type: 6,
-          },
-        },
-      ],
-      count: 2,
-      element: {
-        name: 'duckdb_schema',
-        num_children: 1,
-        repetition_type: 0,
-      },
-    })
-  })
-
-  it('should parse schema from rowgroups.parquet', async () => {
-    const arrayBuffer = await readFileToArrayBuffer('test/files/rowgroups.parquet')
-    const metadata = parquetMetadata(arrayBuffer)
-    const result = parquetSchema(metadata)
-    expect(toJson(result)).toEqual({
-      children: [
-        {
-          children: [],
-          count: 1,
-          element: {
-            name: 'numbers',
-            repetition_type: 1,
-            type: 2,
-          },
-        },
-      ],
-      count: 2,
-      element: {
-        name: 'schema',
-        num_children: 1,
-        repetition_type: 0,
-      },
-    })
-  })
-})
-
 // Parquet v1 from DuckDB
 const addrtypeMetadata = {
  version: 1,
--- a/test/read.test.js
+++ b/test/read.test.js
@ -1,32 +1,7 @@
-import fs from 'fs'
 import { describe, expect, it } from 'vitest'
 import { parquetRead } from '../src/hyparquet.js'
 import { toJson } from '../src/toJson.js'
-
-/**
- * Helper function to read .parquet file into ArrayBuffer
- *
- * @param {string} filePath
- * @returns {Promise<ArrayBuffer>}
- */
-async function readFileToArrayBuffer(filePath) {
-  const buffer = await fs.promises.readFile(filePath)
-  return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)
-}
-
-/**
- * Wrap .parquet file in an AsyncBuffer
- *
- * @typedef {import('../src/types.js').AsyncBuffer} AsyncBuffer
- * @param {string} filePath
- * @returns {AsyncBuffer}
- */
-function fileToAsyncBuffer(filePath) {
-  return {
-    byteLength: fs.statSync(filePath).size,
-    slice: async (start, end) => (await readFileToArrayBuffer(filePath)).slice(start, end),
-  }
-}
+import { fileToAsyncBuffer } from './helpers.js'

 describe('parquetMetadataAsync', () => {
  it('should parse data from addrtype-missing-value.parquet', async () => {
@ -34,7 +9,7 @@ describe('parquetMetadataAsync', () => {
    await parquetRead({
      file: asyncBuffer,
      onComplete: (rows) => {
-        expect(toJson(rows)).toEqual(addrtypeData)
+        expect(rows).toEqual(addrtypeData)
      },
    })
  })
--- a/test/schemaTree.test.js
+++ b/test/schemaTree.test.js
@ -0,0 +1,62 @@
+import { describe, expect, it } from 'vitest'
+import { parquetMetadata, parquetSchema } from '../src/hyparquet.js'
+import { readFileToArrayBuffer } from './helpers.js'
+
+describe('schemaTree', () => {
+  it('should parse schema tree from addrtype-missing-value.parquet', async () => {
+    const arrayBuffer = await readFileToArrayBuffer('test/files/addrtype-missing-value.parquet')
+    const metadata = parquetMetadata(arrayBuffer)
+    const result = parquetSchema(metadata)
+    expect(result).toEqual(addrtypeSchema)
+  })
+
+  it('should parse schema tree from rowgroups.parquet', async () => {
+    const arrayBuffer = await readFileToArrayBuffer('test/files/rowgroups.parquet')
+    const metadata = parquetMetadata(arrayBuffer)
+    const result = parquetSchema(metadata)
+    expect(result).toEqual(rowgroupsSchema)
+  })
+})
+
+// Parquet v1 from DuckDB
+const addrtypeSchema = {
+  children: [
+    {
+      children: [],
+      count: 1,
+      element: {
+        converted_type: 0,
+        name: 'ADDRTYPE',
+        repetition_type: 1,
+        type: 6,
+      },
+    },
+  ],
+  count: 2,
+  element: {
+    name: 'duckdb_schema',
+    num_children: 1,
+    repetition_type: 0,
+  },
+}
+
+// Parquet v2 from pandas with 2 row groups
+const rowgroupsSchema = {
+  children: [
+    {
+      children: [],
+      count: 1,
+      element: {
+        name: 'numbers',
+        repetition_type: 1,
+        type: 2,
+      },
+    },
+  ],
+  count: 2,
+  element: {
+    name: 'schema',
+    num_children: 1,
+    repetition_type: 0,
+  },
+}