Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arrow, Parquet, Spark 3.5, Flink 1.20: Avoid deprecated method #11874

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.Dictionary;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;

/**
Expand Down Expand Up @@ -225,7 +225,9 @@ private ArrowVectorAccessor<DecimalT, Utf8StringT, ArrayT, ChildVectorT> getPlai
}

private static boolean isDecimal(PrimitiveType primitive) {
return primitive != null && OriginalType.DECIMAL.equals(primitive.getOriginalType());
return primitive != null
&& primitive.getLogicalTypeAnnotation()
instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
}

private static class BooleanAccessor<
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
import org.apache.parquet.column.page.PageReadStore;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;

/**
Expand Down Expand Up @@ -226,7 +226,8 @@ private static Types.NestedField getPhysicalType(
PrimitiveType primitive = desc.getPrimitiveType();
PrimitiveType.PrimitiveTypeName typeName = primitive.getPrimitiveTypeName();
Types.NestedField physicalType = logicalType;
if (OriginalType.DECIMAL.equals(primitive.getOriginalType())) {
if (primitive.getLogicalTypeAnnotation()
instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
org.apache.iceberg.types.Type type;
if (PrimitiveType.PrimitiveTypeName.INT64.equals(typeName)) {
// Use BigIntVector for long backed decimal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;

Expand All @@ -51,106 +51,100 @@ public static <T> T visit(
} else {
// if not a primitive, the typeId must be a group
GroupType group = type.asGroupType();
OriginalType annotation = group.getOriginalType();
if (annotation != null) {
switch (annotation) {
case LIST:
Preconditions.checkArgument(
!group.isRepetition(Type.Repetition.REPEATED),
"Invalid list: top-level group is repeated: %s",
group);
Preconditions.checkArgument(
group.getFieldCount() == 1,
"Invalid list: does not contain single repeated field: %s",
group);

GroupType repeatedElement = group.getFields().get(0).asGroupType();
Preconditions.checkArgument(
repeatedElement.isRepetition(Type.Repetition.REPEATED),
"Invalid list: inner group is not repeated");
Preconditions.checkArgument(
repeatedElement.getFieldCount() <= 1,
"Invalid list: repeated group is not a single field: %s",
group);

Preconditions.checkArgument(
sType instanceof ArrayType, "Invalid list: %s is not an array", sType);
ArrayType array = (ArrayType) sType;
RowType.RowField element =
new RowField(
"element", array.getElementType(), "element of " + array.asSummaryString());

visitor.fieldNames.push(repeatedElement.getName());
try {
T elementResult = null;
if (repeatedElement.getFieldCount() > 0) {
elementResult = visitField(element, repeatedElement.getType(0), visitor);
}

return visitor.list(array, group, elementResult);

} finally {
visitor.fieldNames.pop();
}

case MAP:
Preconditions.checkArgument(
!group.isRepetition(Type.Repetition.REPEATED),
"Invalid map: top-level group is repeated: %s",
group);
Preconditions.checkArgument(
group.getFieldCount() == 1,
"Invalid map: does not contain single repeated field: %s",
group);

GroupType repeatedKeyValue = group.getType(0).asGroupType();
Preconditions.checkArgument(
repeatedKeyValue.isRepetition(Type.Repetition.REPEATED),
"Invalid map: inner group is not repeated");
Preconditions.checkArgument(
repeatedKeyValue.getFieldCount() <= 2,
"Invalid map: repeated group does not have 2 fields");

Preconditions.checkArgument(
sType instanceof MapType, "Invalid map: %s is not a map", sType);
MapType map = (MapType) sType;
RowField keyField =
new RowField("key", map.getKeyType(), "key of " + map.asSummaryString());
RowField valueField =
new RowField("value", map.getValueType(), "value of " + map.asSummaryString());

visitor.fieldNames.push(repeatedKeyValue.getName());
try {
T keyResult = null;
T valueResult = null;
switch (repeatedKeyValue.getFieldCount()) {
case 2:
// if there are 2 fields, both key and value are projected
keyResult = visitField(keyField, repeatedKeyValue.getType(0), visitor);
valueResult = visitField(valueField, repeatedKeyValue.getType(1), visitor);
break;
case 1:
// if there is just one, use the name to determine what it is
Type keyOrValue = repeatedKeyValue.getType(0);
if (keyOrValue.getName().equalsIgnoreCase("key")) {
keyResult = visitField(keyField, keyOrValue, visitor);
// value result remains null
} else {
valueResult = visitField(valueField, keyOrValue, visitor);
// key result remains null
}
break;
default:
// both results will remain null
LogicalTypeAnnotation annotation = group.getLogicalTypeAnnotation();
if (LogicalTypeAnnotation.listType().equals(annotation)) {
Preconditions.checkArgument(
!group.isRepetition(Type.Repetition.REPEATED),
"Invalid list: top-level group is repeated: %s",
group);
Preconditions.checkArgument(
group.getFieldCount() == 1,
"Invalid list: does not contain single repeated field: %s",
group);

GroupType repeatedElement = group.getFields().get(0).asGroupType();
Preconditions.checkArgument(
repeatedElement.isRepetition(Type.Repetition.REPEATED),
"Invalid list: inner group is not repeated");
Preconditions.checkArgument(
repeatedElement.getFieldCount() <= 1,
"Invalid list: repeated group is not a single field: %s",
group);

Preconditions.checkArgument(
sType instanceof ArrayType, "Invalid list: %s is not an array", sType);
ArrayType array = (ArrayType) sType;
RowField element =
new RowField(
"element", array.getElementType(), "element of " + array.asSummaryString());

visitor.fieldNames.push(repeatedElement.getName());
try {
T elementResult = null;
if (repeatedElement.getFieldCount() > 0) {
elementResult = visitField(element, repeatedElement.getType(0), visitor);
}

return visitor.list(array, group, elementResult);

} finally {
visitor.fieldNames.pop();
}
} else if (LogicalTypeAnnotation.mapType().equals(annotation)) {
Preconditions.checkArgument(
!group.isRepetition(Type.Repetition.REPEATED),
"Invalid map: top-level group is repeated: %s",
group);
Preconditions.checkArgument(
group.getFieldCount() == 1,
"Invalid map: does not contain single repeated field: %s",
group);

GroupType repeatedKeyValue = group.getType(0).asGroupType();
Preconditions.checkArgument(
repeatedKeyValue.isRepetition(Type.Repetition.REPEATED),
"Invalid map: inner group is not repeated");
Preconditions.checkArgument(
repeatedKeyValue.getFieldCount() <= 2,
"Invalid map: repeated group does not have 2 fields");

Preconditions.checkArgument(
sType instanceof MapType, "Invalid map: %s is not a map", sType);
MapType map = (MapType) sType;
RowField keyField =
new RowField("key", map.getKeyType(), "key of " + map.asSummaryString());
RowField valueField =
new RowField("value", map.getValueType(), "value of " + map.asSummaryString());

visitor.fieldNames.push(repeatedKeyValue.getName());
try {
T keyResult = null;
T valueResult = null;
switch (repeatedKeyValue.getFieldCount()) {
case 2:
// if there are 2 fields, both key and value are projected
keyResult = visitField(keyField, repeatedKeyValue.getType(0), visitor);
valueResult = visitField(valueField, repeatedKeyValue.getType(1), visitor);
break;
case 1:
// if there is just one, use the name to determine what it is
Type keyOrValue = repeatedKeyValue.getType(0);
if (keyOrValue.getName().equalsIgnoreCase("key")) {
keyResult = visitField(keyField, keyOrValue, visitor);
// value result remains null
} else {
valueResult = visitField(valueField, keyOrValue, visitor);
// key result remains null
}
break;
default:
// both results will remain null
}

return visitor.map(map, group, keyResult, valueResult);

} finally {
visitor.fieldNames.pop();
}
return visitor.map(map, group, keyResult, valueResult);

default:
} finally {
visitor.fieldNames.pop();
}
}
Preconditions.checkArgument(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;

Expand All @@ -41,17 +41,11 @@ public static <T> T visit(Type type, ParquetTypeVisitor<T> visitor) {
} else {
// if not a primitive, the typeId must be a group
GroupType group = type.asGroupType();
OriginalType annotation = group.getOriginalType();
if (annotation != null) {
switch (annotation) {
case LIST:
return visitList(group, visitor);

case MAP:
return visitMap(group, visitor);

default:
}
LogicalTypeAnnotation annotation = group.getLogicalTypeAnnotation();
if (LogicalTypeAnnotation.listType().equals(annotation)) {
return visitList(group, visitor);
} else if (LogicalTypeAnnotation.mapType().equals(annotation)) {
return visitMap(group, visitor);
}

return visitor.struct(group, visitFields(group, visitor));
Expand Down
Loading