Fix for issue #23 nested struct assembly

This commit is contained in:
Kenny Daniel 2024-07-30 16:19:09 -07:00
parent 23f6529a73
commit c6c79c05ca
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
4 changed files with 3541 additions and 8 deletions

@ -125,7 +125,7 @@ export function assembleNested(subcolumnData, schema, depth = 0) {
const subcolumn = sublist.path.join('.')
const values = subcolumnData.get(subcolumn)
if (!values) throw new Error('parquet list-like column missing values')
if (!values) throw new Error('parquet list column missing values')
if (optional) flattenAtDepth(values, depth)
subcolumnData.set(path, values)
subcolumnData.delete(subcolumn)
@ -142,10 +142,10 @@ export function assembleNested(subcolumnData, schema, depth = 0) {
const keys = subcolumnData.get(`${path}.${mapName}.key`)
const values = subcolumnData.get(`${path}.${mapName}.value`)
if (!keys) throw new Error('parquet map-like column missing keys')
if (!values) throw new Error('parquet map-like column missing values')
if (!keys) throw new Error('parquet map column missing keys')
if (!values) throw new Error('parquet map column missing values')
if (keys.length !== values.length) {
throw new Error('parquet map-like column key/value length mismatch')
throw new Error('parquet map column key/value length mismatch')
}
const out = assembleMaps(keys, values, nextDepth)
@ -160,12 +160,13 @@ export function assembleNested(subcolumnData, schema, depth = 0) {
// Struct-like column
if (schema.children.length) {
// construct a meta struct and then invert
const invertDepth = schema.element.repetition_type === 'REQUIRED' ? depth : depth + 1
/** @type {Record<string, any>} */
const struct = {}
for (const child of schema.children) {
assembleNested(subcolumnData, child, nextDepth)
assembleNested(subcolumnData, child, invertDepth)
const childData = subcolumnData.get(child.path.join('.'))
if (!childData) throw new Error('parquet struct-like column missing child data')
if (!childData) throw new Error('parquet struct missing child data')
struct[child.element.name] = childData
}
// remove children
@ -173,12 +174,10 @@ export function assembleNested(subcolumnData, schema, depth = 0) {
subcolumnData.delete(child.path.join('.'))
}
// invert struct by depth
const invertDepth = schema.element.repetition_type === 'REQUIRED' ? depth : depth + 1
const inverted = invertStruct(struct, invertDepth)
if (optional) flattenAtDepth(inverted, depth)
subcolumnData.set(path, inverted)
}
// assert(schema.element.repetition_type !== 'REPEATED')
}
/**
@ -238,6 +237,7 @@ function invertStruct(struct, depth) {
/** @type {Record<string, any>} */
const obj = {}
for (const key of keys) {
if (struct[key].length !== length) throw new Error('parquet struct parsing error')
obj[key] = struct[key][i]
}
if (depth) {

335
test/files/issue23.json Normal file

@ -0,0 +1,335 @@
[
[
{
"categories": [
{
"key": "22428702",
"name": [
{
"key": "PL",
"value": "Test category 23"
}
],
"dishes": [
{
"key": "22428878",
"name": [
{
"key": "PL",
"value": "Test"
}
],
"addons": [
{
"key": "22806893",
"name": [
{
"key": "PL",
"value": "Dish extra 1"
}
],
"required": false,
"maxSelected": -1,
"items": [
{
"key": "21985972",
"ingredient": {
"key": "861005",
"name": [
{
"key": "UK",
"value": "Цебуля"
},
{
"key": "EN",
"value": "Onion"
},
{
"key": "PL",
"value": "Cebula"
}
]
},
"unitSize": "KG",
"price": {
"scale": 1,
"int_val": {
"value": "7"
}
},
"volume": {
"scale": 0
}
},
{
"key": "21985948",
"ingredient": {
"key": "20997760",
"name": [
{
"key": "PL",
"value": "asd"
}
]
},
"unitSize": "KG",
"price": {
"scale": 1,
"int_val": {
"value": "\u000d"
}
},
"volume": {
"scale": 0
}
}
]
},
{
"key": "22806869",
"name": [
{
"key": "PL",
"value": "Dodatki do śniadania"
}
],
"required": false,
"maxSelected": -1,
"items": [
{
"key": "22861716",
"ingredient": {
"key": "11742617",
"name": [
{
"key": "PL",
"value": "Śmietana 12%"
}
]
},
"unitSize": "KG",
"price": {
"scale": 0,
"int_val": {
"value": "\u0001"
}
},
"volume": {
"scale": 0
}
},
{
"key": "22673128",
"ingredient": {
"key": "895653",
"name": [
{
"key": "UK",
"value": "Картопля"
},
{
"key": "EN",
"value": "Potato"
},
{
"key": "PL",
"value": "Ziemnaik"
}
]
},
"unitSize": "KG",
"price": {
"scale": 0,
"int_val": {
"value": "o"
}
},
"volume": {
"scale": 0
}
},
{
"key": "22672907",
"ingredient": {
"key": "861005",
"name": [
{
"key": "UK",
"value": "Цебуля"
},
{
"key": "EN",
"value": "Onion"
},
{
"key": "PL",
"value": "Cebula"
}
]
},
"unitSize": "KG",
"price": {
"scale": 0
},
"volume": {
"scale": 0
}
}
]
}
],
"description": [
{
"key": "PL",
"value": "asd"
}
],
"components": [
{
"key": "PL",
"value": ""
}
],
"unitSize": "KG",
"priceSizes": [
{
"volume": {
"scale": 0,
"int_val": {
"value": "d"
}
},
"price": {
"scale": 0,
"int_val": {
"value": "\u000a"
}
}
}
],
"orderIndex": 0,
"popular": false,
"tags": [
"VEGETARIAN",
"VEGAN",
"HALAL",
"KOSHER",
"LACTOSE_FREE",
"GLUTEN_FREE",
"ORGANIC"
],
"images": [
{
"url": "https://file-storage.itasty.eu/image/dish/d92c8ad1-1679-41e4-8596-4652c0ec9131/d92c8ad1-1679-41e4-8596-4652c0ec9131.jpeg",
"transformations": [
{
"label": "THUMBNAIL_SMALL",
"url": "https://file-storage.itasty.eu/image/dish/d92c8ad1-1679-41e4-8596-4652c0ec9131/d92c8ad1-1679-41e4-8596-4652c0ec9131_thumbnail_small_w285.jpeg"
},
{
"label": "THUMBNAIL",
"url": "https://file-storage.itasty.eu/image/dish/d92c8ad1-1679-41e4-8596-4652c0ec9131/d92c8ad1-1679-41e4-8596-4652c0ec9131_thumbnail_w400.jpeg"
},
{
"label": "LARGE",
"url": "https://file-storage.itasty.eu/image/dish/d92c8ad1-1679-41e4-8596-4652c0ec9131/d92c8ad1-1679-41e4-8596-4652c0ec9131_large_w800.jpeg"
}
]
}
]
}
]
},
{
"key": "22425554",
"name": [
{
"key": "PL",
"value": "Test catergory"
}
],
"dishes": [
{
"key": "22425677",
"name": [
{
"key": "UK",
"value": "Duży awokado tost z jajkiem"
},
{
"key": "PL",
"value": "Duży awokado tost z jajkiem"
}
],
"addons": [],
"description": [
{
"key": "UK",
"value": "Duży awokado tost z jajkiem"
},
{
"key": "PL",
"value": "Rzemieślnicze pieczywo, dwa jajka sadzone lub jajecznica, guacamole, serek kremowy, podawane z sałatką"
}
],
"components": [
{
"key": "UK",
"value": ""
},
{
"key": "PL",
"value": ""
}
],
"unitSize": "KG",
"priceSizes": [
{
"volume": {
"scale": 0,
"int_val": {
"value": "\u0001"
}
},
"price": {
"scale": 0,
"int_val": {
"value": "\u000a"
}
}
}
],
"orderIndex": 0,
"popular": false,
"tags": [
"VEGETARIAN"
],
"images": []
}
]
}
],
"description": [
{
"key": "PL",
"value": "Test Fable"
}
],
"key": "22425501",
"languages": [
"PL",
"EN",
"UK"
],
"name": [
{
"key": "PL",
"value": "Test Fable"
}
],
"title": ""
},
null,
"PLN"
]
]

File diff suppressed because it is too large Load Diff

BIN
test/files/issue23.parquet Normal file

Binary file not shown.