Skip to content

Commit

Permalink
Merge pull request #8 from INCATools/bracket-issue
Browse files Browse the repository at this point in the history
Bracket issue
  • Loading branch information
hrshdhgd authored Aug 29, 2022
2 parents 4e2f1f2 + da8febe commit e6d1f9f
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 14 deletions.
4 changes: 3 additions & 1 deletion src/kgcl_schema/grammar/kgcl.lark
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,10 @@ INNER_SINGLE_QUOTE_LITERAL: /((?!\').)*/
INNER_TRIPLE_SINGLE_QUOTE_LITERAL: /((?!\'\'\').)*/

ID : "<" INNER_ID ">"
| INNER_ID

INNER_ID: /\S+/

INNER_ID: /((?!>).)*/
STRING: /.+/

_WS: /[ \t\f\r\n]/+
Expand Down
34 changes: 21 additions & 13 deletions src/kgcl_schema/grammar/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@ def parse_statement(input: str) -> Change:
Return an instantiated dataclass object from model.kgcl_schema.
"""
regex = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
uri_list = re.findall(regex, input)
if uri_list:
# curie = curie_from_iri(uri[0].replace("<", "").replace(">",""))
for _, uri in enumerate(uri_list):
pref, i = parse_iri(uri)
pref = get_preferred_prefix(pref)
curie = curie_to_str(pref, i)
input = input.replace(uri, curie)
# regex = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
# uri_list = re.findall(regex, input)
# if uri_list:
# # curie = curie_from_iri(uri[0].replace("<", "").replace(">",""))
# for _, uri in enumerate(uri_list):
# pref, i = parse_iri(uri)
# pref = get_preferred_prefix(pref)
# curie = curie_to_str(pref, i)
# input = input.replace(uri, curie)

tree = kgcl_parser.parse(input)
id = "kgcl_change_id_" + str(next(id_gen))
Expand Down Expand Up @@ -181,12 +181,12 @@ def parse_create(tree, id):
language_token = extract(tree, "language")

entity, representation = get_entity_representation(term_id_token)

return NodeCreation(
id=id,
about_node=entity,
about_node_representation=representation,
node_id=term_id_token,
node_id=entity, # was term_id_token
name=label_token,
language=language_token,
)
Expand Down Expand Up @@ -517,7 +517,7 @@ def get_entity_representation(entity):
first_character = entity[0]
last_character = entity[-1:]
if first_character == "<" and last_character == ">":
return entity, "uri" # not removing brackets (TODO why?)
return contract_uri(entity.replace("<", "").replace(">","")), "curie" # removing brackets
if first_character == "'" and last_character == "'" and entity[1] != "'":
return entity[1:-1], "label"
if first_character == '"' and last_character == '"':
Expand All @@ -528,9 +528,17 @@ def get_entity_representation(entity):
return entity[3:-3], "literal"

# TODO: use predefined set of prefixes to identify CURIEs
return entity, "curie"
return contract_uri(str(entity)), "curie"
# return entity, "error"

def contract_uri(uri_or_curie:str):
if uri_or_curie.startswith("http://") or uri_or_curie.startswith("https://"):
pref, i = parse_iri(uri_or_curie)
pref = get_preferred_prefix(pref)
curie = curie_to_str(pref, i)
return curie
else:
return uri_or_curie

@click.command()
@click.option("--output",
Expand Down
12 changes: 12 additions & 0 deletions tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,18 @@
about_node_representation='curie'),
None
),
(
f"create node {NEW_TERM_URI} 'foo'",
# TODO: diff not working here:
#f"create node {NEW_TERM_URI} 'foo'",
TODO_TOKEN,
NodeCreation(id=UID,
node_id=NEW_TERM, ## TODO: remove this
about_node=NEW_TERM,
name="'foo'",
about_node_representation='curie'),
None
),
(
f"create edge {NUCLEUS} {PART_OF} {RESPONSE_TO_UV}",
f"create edge {NUCLEUS_URI} {PART_OF_URI} {RESPONSE_TO_UV_URI}",
Expand Down

0 comments on commit e6d1f9f

Please sign in to comment.