From 384652c46ddf14bff200581c045775f656814974 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Mon, 25 Jul 2022 19:27:42 -0400 Subject: [PATCH] Feature: Basic slurp pipeline - Update: Basic pseudo code in Python updated - Update: Makfile: Updating formatting. --- src/ontology/mondo-ingest.Makefile | 10 +++- src/scripts/migrate.py | 77 ++++++++++++++++++------------ 2 files changed, 55 insertions(+), 32 deletions(-) diff --git a/src/ontology/mondo-ingest.Makefile b/src/ontology/mondo-ingest.Makefile index 223d79530..1c0c1bd02 100644 --- a/src/ontology/mondo-ingest.Makefile +++ b/src/ontology/mondo-ingest.Makefile @@ -267,10 +267,16 @@ signature_reports: $(ALL_MIRROR_SIGNTAURE_REPORTS) $(ALL_COMPONENT_SIGNTAURE_REP slurp/: mkdir -p $@ +# Feel free to change the signature. Min ID is the next available Mondo ID. slurp/%.tsv: components/%.owl tmp/mondo.sssom.tsv reports/mirror-signature-mondo.tsv | slurp/ - python $(SCRIPTSDIR)/migrate.py -i $< --mapping-file tmp/mondo.sssom.tsv --min-id 123000 --mondo-terms reports/mirror-signature-mondo.tsv --output $@ - # Feel free to change the signature. Min ID is the next available Mondo ID. + python $(SCRIPTSDIR)/migrate.py \ + -i $< \ + --mapping-file tmp/mondo.sssom.tsv \ + --min-id 123000 \ + --mondo-terms reports/mirror-signature-mondo.tsv \ + --output $@ slurp-%: slurp/%.tsv +# TODO: add more ontologies, e.g.: doid, icd10cm, icd10who, ncit, ordo slurp: slurp-omim \ No newline at end of file diff --git a/src/scripts/migrate.py b/src/scripts/migrate.py index 697845dd8..97368979f 100644 --- a/src/scripts/migrate.py +++ b/src/scripts/migrate.py @@ -1,37 +1,54 @@ -# Migration pipeline +"""Migration pipeline #### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING #### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING #### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING #### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING +TODOs: + - add CLI: look to makefile for what to include +""" +import oakliblib +import pandas + + #Inputs: -source_ontology #e.g. omim -sssom_map # e.g. mondo.sssom.tsv -min_id -termlist_mondo - -#Outputs: -data = [] - -for t in source_ontology: - if t not in sssom_map['object_id']: - parents = [] - migrate = True - for p in oak.get_direct_parents(t): - if p not in sssom_map['object_id']: - migrate = False - break - elif sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:exactMatch' \ - or sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:narrowMatch': - # In other words, if the parent is mapped, and the mapping is either exact or narrower - parents.append(sssom_map[sssom_map['object_id']==p]['subject_id']) - else: - # Its fine, just continue looking for other parents in this case - if migrate and parents: - next_mondo_id = determine_next_available_mondo_id(min_id, termlist_mondo) # satrting from min_id, then counting up and checking if it does not already exist. - label = oak.get_label(t) - definition = oak.get_definition(t) - data.append({'mondo_id':next_mondo_id, 'xref': t, 'label': label, 'definition': definition}) - -pandas.DataFrame(data).to_csv(fn, sep="\t") \ No newline at end of file +source_ontology = '' #e.g. omim +sssom_map = '' # e.g. mondo.sssom.tsv +min_id = '' +termlist_mondo = '' + + +def run(source_ontology = '', sssom_map = '', min_id = '', termlist_mondo = ''): + """source_ontology = '' #e.g. omim + sssom_map = '' # e.g. mondo.sssom.tsv + min_id = '' + termlist_mondo = ''""" + #Outputs: + data = [] + + for t in source_ontology: + if t not in sssom_map['object_id']: + parents = [] + migrate = True + for p in oaklib.get_direct_parents(t): + if p not in sssom_map['object_id']: + migrate = False + break + elif sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:exactMatch' \ + or sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:narrowMatch': + # In other words, if the parent is mapped, and the mapping is either exact or narrower + parents.append(sssom_map[sssom_map['object_id']==p]['subject_id']) + else: + # Its fine, just continue looking for other parents in this case + if migrate and parents: + next_mondo_id = determine_next_available_mondo_id(min_id, termlist_mondo) # satrting from min_id, then counting up and checking if it does not already exist. + label = oaklib.get_label(t) + definition = oaklib.get_definition(t) + data.append({'mondo_id':next_mondo_id, 'xref': t, 'label': label, 'definition': definition}) + + pandas.DataFrame(data).to_csv(fn, sep="\t") + + +if __name__ == '__main__': + run()