Skip to content

Commit

Permalink
Extended prefix maps usage updates
Browse files Browse the repository at this point in the history
- Bugfix: clean_prefix_map(): Was ignoring prefix aliases. This was causing an error, as there was a perceived mismatch between the prefixes of the mapping set, and the prefix_map.
- Bugfix: get_metadata_and_prefix_map(): Was not utilizing extended prefix maps. This manifested in issue where prefix aliases were not incorporated. This meant that (a) if we tried to fix by removing the alias from the plain prefix_map, these CURIEs could not be resolved, (b) if we included the alias in the plain prefix_map, there would be a duplicate URI prefix, which would result in an error.
- Add: convert_plain_prefix_map_to_extended()
  • Loading branch information
joeflack4 committed Jan 16, 2024
1 parent f40a1c8 commit 764dc1e
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 2 deletions.
21 changes: 20 additions & 1 deletion src/sssom/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,23 @@ def split_file(input_path: str, output_directory: Union[str, Path]) -> None:
write_tables(splitted, output_directory)


def convert_plain_prefix_map_to_extended(prefix_map):
"""Convert a standard key/val prefix map to extended prefix map format."""
by_uri_prefix = {}
for prefix, uri_prefix in prefix_map.items():
if uri_prefix in by_uri_prefix:
by_uri_prefix[uri_prefix]["prefix_synonyms"].append(prefix)
continue
by_uri_prefix[uri_prefix] = {
"prefix": prefix,
"prefix_synonyms": [],
"uri_prefix": uri_prefix,
"uri_prefix_synonyms": [],
}
epm = list(by_uri_prefix.values())
return epm


def get_metadata_and_prefix_map(
metadata_path: Union[None, str, Path] = None, *, prefix_map_mode: Optional[MergeMode] = None
) -> Tuple[Converter, MetadataType]:
Expand All @@ -147,7 +164,9 @@ def get_metadata_and_prefix_map(
metadata = yaml.safe_load(file)

metadata = dict(ChainMap(metadata, get_default_metadata()))
converter = Converter.from_prefix_map(metadata.pop(CURIE_MAP, {}))
prefix_map = metadata.pop(CURIE_MAP, {})
epm = convert_plain_prefix_map_to_extended(prefix_map)
converter = Converter.from_extended_prefix_map(epm)
converter = _merge_converter(converter, prefix_map_mode=prefix_map_mode)
return converter, metadata

Expand Down
10 changes: 9 additions & 1 deletion src/sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,15 @@ def clean_prefix_map(self, strict: bool = True) -> None:
if self.metadata:
prefixes_in_table.update(get_prefixes_used_in_metadata(self.metadata))

missing_prefixes = prefixes_in_table - self.converter.get_prefixes()
prefixes = {record.prefix for record in self.converter.records}
aliases = {
p
for p in itt.chain(*[record.prefix_synonyms for record in self.converter.records])
if p
}
converter_prefixes = prefixes | aliases
missing_prefixes = prefixes_in_table - converter_prefixes

if missing_prefixes and strict:
raise ValueError(
f"{missing_prefixes} are used in the SSSOM mapping set but it does not exist in the prefix map"
Expand Down

0 comments on commit 764dc1e

Please sign in to comment.