Skip to content

Commit

Permalink
Add function for handling a bad prefix map
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Jan 15, 2024
1 parent 094ad63 commit 920b077
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 0 deletions.
4 changes: 4 additions & 0 deletions docs/source/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ This function also accepts a string with a HTTP, HTTPS, or FTP path to a remote
structure for situations when there can be CURIE synonyms or even URI prefix synonyms is
the *extended prefix map* (see below).

If you're not in a position where you can fix data issues upstream, you can try using the
:func:`curies.clean_prefix_map` to extract a canonical extended prefix map from a non-bijective
prefix map.

Loading Extended Prefix Maps
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Extended prefix maps (EPMs) address the issues with prefix maps by including explicit
Expand Down
12 changes: 12 additions & 0 deletions src/curies/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2191,3 +2191,15 @@ def _get_shacl_line(prefix: str, uri_prefix: str, pattern: Optional[str] = None)
pattern = pattern.replace("\\", "\\\\")
line += f'; sh:pattern "{pattern}"'
return line + " ]"


def clean_prefix_map(prefix_map: Mapping[str, str]) -> List[Record]:
"""Convert a problematic prefix map (i.e., not bijective) into a list of records."""
dd = defaultdict(list)
for curie_prefix, uri_prefix in prefix_map.items():
dd[uri_prefix].append(curie_prefix)
xx = {uri_prefix: sorted(curie_prefixes) for uri_prefix, curie_prefixes in dd.items()}
return [
Record(prefix=prefix, prefix_synonyms=prefix_synonyms, uri_prefix=uri_prefix)
for uri_prefix, (prefix, *prefix_synonyms) in sorted(xx.items())
]
12 changes: 12 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
ReferenceTuple,
URIStandardizationError,
chain,
clean_prefix_map,
)
from curies.sources import (
BIOREGISTRY_CONTEXTS,
Expand Down Expand Up @@ -860,3 +861,14 @@ def test_version_type(self):
"""
version = get_version()
self.assertIsInstance(version, str)


class TestUtils(unittest.TestCase):
"""Test utility functions."""

def test_clean(self):
"""Test clean."""
prefix_map = {"a": "https://example.com/", "b": "https://example.com/"}
records = clean_prefix_map(prefix_map)
self.assertEqual(1, len(records))
self.assertEqual("a", records[0].prefix)

0 comments on commit 920b077

Please sign in to comment.