mirror of
https://github.com/asadbek064/hyparquet.git
synced 2026-02-21 20:01:33 +00:00
schemaElement returns trees
This commit is contained in:
parent
8a98407734
commit
c6ad30b59a
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,7 +1,6 @@
|
||||
node_modules
|
||||
package-lock.json
|
||||
coverage
|
||||
dist
|
||||
*.tgz
|
||||
example.parquet
|
||||
.vscode
|
||||
|
||||
@ -27,15 +27,15 @@
|
||||
"typecheck": "tsc"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "20.11.21",
|
||||
"@typescript-eslint/eslint-plugin": "7.1.0",
|
||||
"@types/node": "20.11.26",
|
||||
"@typescript-eslint/eslint-plugin": "7.2.0",
|
||||
"@vitest/coverage-v8": "1.3.1",
|
||||
"eslint": "8.57.0",
|
||||
"eslint-plugin-import": "2.29.1",
|
||||
"eslint-plugin-jsdoc": "48.2.0",
|
||||
"eslint-plugin-jsdoc": "48.2.1",
|
||||
"http-server": "14.1.1",
|
||||
"hysnappy": "0.3.0",
|
||||
"typescript": "5.3.3",
|
||||
"typescript": "5.4.2",
|
||||
"vitest": "1.3.1"
|
||||
}
|
||||
}
|
||||
|
||||
@ -79,10 +79,10 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
} else {
|
||||
if (dictionaryEncoding && dictionary) {
|
||||
dereferenceDictionary(dictionary, dataPage)
|
||||
values = convert(dataPage, schemaElement(schema, columnMetadata.path_in_schema))
|
||||
values = convert(dataPage, schemaElement(schema, columnMetadata.path_in_schema).element)
|
||||
} else if (Array.isArray(dataPage)) {
|
||||
// convert primitive types to rich types
|
||||
values = convert(dataPage, schemaElement(schema, columnMetadata.path_in_schema))
|
||||
values = convert(dataPage, schemaElement(schema, columnMetadata.path_in_schema).element)
|
||||
} else {
|
||||
values = dataPage // TODO: data page shouldn't be a fixed byte array?
|
||||
}
|
||||
|
||||
@ -61,8 +61,8 @@ export function readDataPage(bytes, daph, schema, columnMetadata) {
|
||||
// read values based on encoding
|
||||
const nValues = daph.num_values - numNulls
|
||||
if (daph.encoding === 'PLAIN') {
|
||||
const se = schemaElement(schema, columnMetadata.path_in_schema)
|
||||
const utf8 = se.converted_type === 'UTF8'
|
||||
const { element } = schemaElement(schema, columnMetadata.path_in_schema)
|
||||
const utf8 = element.converted_type === 'UTF8'
|
||||
const plainObj = readPlain(dataView, columnMetadata.type, nValues, offset, utf8)
|
||||
values = Array.isArray(plainObj.value) ? plainObj.value : Array.from(plainObj.value)
|
||||
offset += plainObj.byteLength
|
||||
|
||||
@ -47,8 +47,8 @@ export function readDataPageV2(compressedBytes, ph, schema, columnMetadata, comp
|
||||
// read values based on encoding
|
||||
const nValues = daph2.num_values - daph2.num_nulls
|
||||
if (daph2.encoding === 'PLAIN') {
|
||||
const se = schemaElement(schema, columnMetadata.path_in_schema)
|
||||
const utf8 = se.converted_type === 'UTF8'
|
||||
const { element } = schemaElement(schema, columnMetadata.path_in_schema)
|
||||
const utf8 = element.converted_type === 'UTF8'
|
||||
let page = compressedBytes.slice(offset)
|
||||
if (daph2.is_compressed && columnMetadata.codec !== 'UNCOMPRESSED') {
|
||||
page = decompressPage(page, uncompressedPageSize, columnMetadata.codec, compressors)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, ParquetType } from './constants.js'
|
||||
import { schemaTree } from './schema.js'
|
||||
import { schemaElement } from './schema.js'
|
||||
import { deserializeTCompactProtocol } from './thrift.js'
|
||||
|
||||
/**
|
||||
@ -170,7 +170,7 @@ export function parquetMetadata(arrayBuffer) {
|
||||
* @returns {SchemaTree} tree of schema elements
|
||||
*/
|
||||
export function parquetSchema(metadata) {
|
||||
return schemaTree(metadata.schema, 0)
|
||||
return schemaElement(metadata.schema, [])
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -10,7 +10,7 @@
|
||||
* @param {number} rootIndex index of the root element
|
||||
* @returns {SchemaTree} tree of schema elements
|
||||
*/
|
||||
export function schemaTree(schema, rootIndex) {
|
||||
function schemaTree(schema, rootIndex) {
|
||||
const root = schema[rootIndex]
|
||||
const children = []
|
||||
let count = 1
|
||||
@ -32,7 +32,7 @@ export function schemaTree(schema, rootIndex) {
|
||||
*
|
||||
* @param {SchemaElement[]} schema
|
||||
* @param {string[]} name path to the element
|
||||
* @returns {SchemaElement} schema element
|
||||
* @returns {SchemaTree} schema element
|
||||
*/
|
||||
export function schemaElement(schema, name) {
|
||||
let tree = schemaTree(schema, 0)
|
||||
@ -42,7 +42,7 @@ export function schemaElement(schema, name) {
|
||||
if (!child) throw new Error(`parquet schema element not found: ${name}`)
|
||||
tree = child
|
||||
}
|
||||
return tree.element
|
||||
return tree
|
||||
}
|
||||
|
||||
/**
|
||||
@ -77,7 +77,7 @@ export function isRequired(schema, name) {
|
||||
export function getMaxRepetitionLevel(schema, parts) {
|
||||
let maxLevel = 0
|
||||
parts.forEach((part, i) => {
|
||||
const element = schemaElement(schema, parts.slice(0, i + 1))
|
||||
const { element } = schemaElement(schema, parts.slice(0, i + 1))
|
||||
if (element.repetition_type === 'REPEATED') {
|
||||
maxLevel += 1
|
||||
}
|
||||
@ -95,7 +95,7 @@ export function getMaxRepetitionLevel(schema, parts) {
|
||||
export function getMaxDefinitionLevel(schema, parts) {
|
||||
let maxLevel = 0
|
||||
parts.forEach((part, i) => {
|
||||
const element = schemaElement(schema, parts.slice(0, i + 1))
|
||||
const { element } = schemaElement(schema, parts.slice(0, i + 1))
|
||||
if (element.repetition_type !== 'REQUIRED') {
|
||||
maxLevel += 1
|
||||
}
|
||||
|
||||
@ -20,7 +20,11 @@ describe('Parquet schema utils', () => {
|
||||
|
||||
describe('schemaElement', () => {
|
||||
it('should return the correct schema element', () => {
|
||||
expect(schemaElement(schema, ['child1'])).toEqual(schema[1])
|
||||
expect(schemaElement(schema, ['child1'])).toEqual({
|
||||
children: [],
|
||||
count: 1,
|
||||
element: { name: 'child1', repetition_type: 'OPTIONAL' },
|
||||
})
|
||||
})
|
||||
|
||||
it('should throw an error if element not found', () => {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user