From 17d90c33ef8e2938ed77bc65f7fa4a9ee1497802 Mon Sep 17 00:00:00 2001 From: mwish Date: Thu, 2 Jan 2025 19:23:10 +0800 Subject: [PATCH 1/2] fix asan schema problem --- cpp/src/parquet/arrow/arrow_schema_test.cc | 21 +++++++++++++++++++++ cpp/src/parquet/arrow/schema.cc | 11 ++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index a6e04e54259c9..f4fd86389e9f4 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -832,6 +832,27 @@ TEST_F(TestConvertParquetSchema, IllegalParquetNestedSchema) { Invalid, testing::HasSubstr("LIST-annotated groups must not be repeated."), ConvertSchema(parquet_fields)); } + // List>: outer list is two-level encoding, inner list is empty. + // + // optional group my_list (LIST) { + // repeated group array (LIST) { + // repeated group list { + // } + // } + // } + { + auto list = GroupNode::Make("list", Repetition::REPEATED, {}); + auto array = + GroupNode::Make("array", Repetition::REPEATED, {list}, ConvertedType::LIST); + std::vector parquet_fields; + parquet_fields.push_back( + GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, ConvertedType::LIST)); + + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, + testing::HasSubstr("LIST-annotated groups must have at least one child."), + ConvertSchema(parquet_fields)); + } } Status ArrowSchemaToParquetMetadata(std::shared_ptr<::arrow::Schema>& arrow_schema, diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index c19e2b9e48bb3..d94c73452c44d 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -676,9 +676,14 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels, return Status::Invalid("Group with one repeated child must be LIST-annotated."); } // LIST-annotated group with three-level encoding cannot be repeated. - if (repeated_field->is_group() && - !static_cast(*repeated_field).field(0)->is_repeated()) { - return Status::Invalid("LIST-annotated groups must not be repeated."); + if (repeated_field->is_group()) { + auto& repeated_group_field = static_cast(*repeated_field); + if (repeated_group_field.field_count() == 0) { + return Status::Invalid("LIST-annotated groups must have at least one child."); + } + if (!repeated_group_field.field(0)->is_repeated()) { + return Status::Invalid("LIST-annotated groups must not be repeated."); + } } RETURN_NOT_OK( NodeToSchemaField(*repeated_field, current_levels, ctx, out, child_field)); From e0ab9c6f7b2d4a5c194d5c97f63455f72f1db131 Mon Sep 17 00:00:00 2001 From: mwish <1506118561@qq.com> Date: Fri, 3 Jan 2025 14:04:00 +0800 Subject: [PATCH 2/2] Update cpp/src/parquet/arrow/arrow_schema_test.cc Co-authored-by: Gang Wu --- cpp/src/parquet/arrow/arrow_schema_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index f4fd86389e9f4..535efa0c8e5de 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -832,7 +832,7 @@ TEST_F(TestConvertParquetSchema, IllegalParquetNestedSchema) { Invalid, testing::HasSubstr("LIST-annotated groups must not be repeated."), ConvertSchema(parquet_fields)); } - // List>: outer list is two-level encoding, inner list is empty. + // List>: outer list is two-level encoding, inner list is empty. // // optional group my_list (LIST) { // repeated group array (LIST) {