Skip to content

Commit

Permalink
Cl annotation patch (#66)
Browse files Browse the repository at this point in the history
* Refactored adding cell_type nodes and consists_of relations

* Refactored seed_list to seed_dict

* Updated test cases

* Updated version to 0.1.12

* Updated poetry.lock
  • Loading branch information
ubyndr authored Apr 16, 2024
1 parent ef2463e commit 0a4765c
Show file tree
Hide file tree
Showing 7 changed files with 678 additions and 1,021 deletions.
4 changes: 3 additions & 1 deletion pandasaurus_cxg/anndata_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,9 @@ def _enrich_co_annotation(enricher: AnndataEnricher):
enriched_df = enricher.enricher.enriched_df
if enriched_df.empty:
return enriched_df
return enriched_df[enriched_df["o"].isin(enricher.seed_list)][["s_label", "o_label"]]
return enriched_df[enriched_df["o"].isin(list(enricher.seed_dict.keys()))][
["s_label", "o_label"]
]

def _filter_data_and_drop_duplicates(self, field_name_1, field_name_2, disease):
# Filter the data based on the disease condition
Expand Down
10 changes: 7 additions & 3 deletions pandasaurus_cxg/anndata_enricher.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,12 @@ def __init__(
ontology_list_for_slims = ["Cell Ontology"]
# TODO Do we need to keep whole anndata? Would it be enough to keep the obs only?
self.anndata = anndata
self.seed_list = self.anndata.obs[cell_type_field].unique().tolist()
self.enricher = Query(self.seed_list)
self.seed_dict = dict(
self.anndata.obs.drop_duplicates(subset=[cell_type_field, "cell_type"])[
[cell_type_field, "cell_type"]
].values
)
self.enricher = Query(list(self.seed_dict.keys()))
try:
unique_context = self.anndata.obs[
[context_field, context_field_label]
Expand Down Expand Up @@ -226,7 +230,7 @@ def set_enricher_property_list(self, property_list: List[str]):
Args:
property_list (List[str]): The list of properties to include in the enrichment analysis.
"""
self.enricher = Query(self.seed_list, property_list)
self.enricher = Query(list(self.seed_dict.keys()), property_list)

def validate_slim_list(self, slim_list):
"""Check if any slim term in the given list is invalid.
Expand Down
43 changes: 14 additions & 29 deletions pandasaurus_cxg/graph_generator/graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,38 +147,11 @@ def generate_rdf_graph(self):
# transitive reduction step
self.graph = graphgen.apply_transitive_reduction(self.graph, [subcluster.toPython()])

def enrich_rdf_graph(self):
"""
Enrich RDF graph with enriched DataFrame from AnndataEnricher
Returns:
"""
if self.ea.enricher_manager.enricher.enriched_df.empty:
# TODO or we can just call simple_enrichment method
enrichment_methods = [i for i in dir(AnndataEnricher) if "_enrichment" in i]
enrichment_methods.sort()
raise MissingEnrichmentProcess(enrichment_methods)
cell_type_dict = (
pd.concat(
[
self.ea.enricher_manager.enricher.enriched_df[["s", "s_label"]],
self.ea.enricher_manager.enricher.enriched_df[["o", "o_label"]].rename(
columns={"o": "s", "o_label": "s_label"}
),
],
axis=0,
ignore_index=True,
)
.drop_duplicates()
.set_index("s")["s_label"]
.to_dict()
)
# add cell_type nodes and consists_of relations
cl_namespace = Namespace("http://purl.obolibrary.org/obo/CL_")
consist_of = URIRef(CONSIST_OF.get("iri"))
self.graph.add((consist_of, RDFS.label, Literal(CONSIST_OF.get("label"))))
for curie, label in cell_type_dict.items():
for curie, label in self.ea.enricher_manager.seed_dict.items():
resource = cl_namespace[curie.split(":")[-1]]
self.graph.add((resource, RDFS.label, Literal(label)))
self.graph.add((resource, RDF.type, OWL.Class))
Expand All @@ -192,7 +165,19 @@ def enrich_rdf_graph(self):
# Add the restriction
self.graph.add((s, RDF.type, class_expression_bnode))

# add enrichment graph
def enrich_rdf_graph(self):
"""
Enrich RDF graph with enriched DataFrame from AnndataEnricher
Returns:
"""
if self.ea.enricher_manager.enricher.enriched_df.empty:
# TODO or we can just call simple_enrichment method
enrichment_methods = [i for i in dir(AnndataEnricher) if "_enrichment" in i]
enrichment_methods.sort()
raise MissingEnrichmentProcess(enrichment_methods)
# add enrichment graph, subClassOf relations
self.graph += self.ea.enricher_manager.enricher.graph

def save_rdf_graph(
Expand Down
Loading

0 comments on commit 0a4765c

Please sign in to comment.