Skip to content

Commit

Permalink
Add number of cells to each cell set node (#91)
Browse files Browse the repository at this point in the history
* Update version to 0.2.2

* Refactor co_annotation_report by introducing _generate_co_annotation_dataframe

* Add cell_count to cell sets

* Update test_graph_generator.py
  • Loading branch information
ubyndr authored Nov 28, 2024
1 parent 01cd36d commit b3cf832
Show file tree
Hide file tree
Showing 5 changed files with 1,159 additions and 661 deletions.
49 changes: 46 additions & 3 deletions pandasaurus_cxg/anndata_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ def from_file_path(file_path: str, author_cell_type_list: Optional[List[str]] =
"""
return AnndataAnalyzer(AnndataLoader.load_from_file(file_path), author_cell_type_list)

def co_annotation_report(self, disease: Optional[str] = None, enrich: bool = False):
def co_annotation_report(
self, disease: Optional[str] = None, enrich: bool = False
) -> pd.DataFrame:
"""
Generates a co-annotation report based on the provided schema.
Expand All @@ -103,7 +105,23 @@ def co_annotation_report(self, disease: Optional[str] = None, enrich: bool = Fal
Returns:
pd.DataFrame: The co-annotation report.
"""
# Call the core method to generate the full DataFrame
full_df = self._generate_co_annotation_dataframe(disease, enrich)

# Return only the first 5 columns
return full_df.iloc[:, :5]

def _generate_co_annotation_dataframe(self, disease: Optional[str] = None, enrich: bool = False):
"""
Core method to generate a full co-annotation dataframe.
Args:
disease (Optional[str]): A valid disease CURIE used to filter the rows.
enrich (bool): Whether to enable enrichment in the co-annotation report.
Returns:
pd.DataFrame: The complete co-annotation dataframe with all columns.
"""
# TODO needs a refactoring about what enrichment method to use. Or would it better to accept
# enriched_df as parameter, so users get to decide?
Expand Down Expand Up @@ -136,6 +154,16 @@ def co_annotation_report(self, disease: Optional[str] = None, enrich: bool = Fal
).reset_index(drop=True)

AnndataAnalyzer._assign_predicate_column(co_oc, field_name_1, field_name_2)
# Calculate cell counts for `field_name_1`
field_1_counts = (
self._anndata.obs.groupby(field_name_1, observed=False).size().to_dict()
)
co_oc[f"{field_name_1}_cell_count"] = co_oc[field_name_1].map(field_1_counts)
# Calculate cell counts for `field_name_2`
field_2_counts = (
self._anndata.obs.groupby(field_name_2, observed=False).size().to_dict()
)
co_oc[f"{field_name_2}_cell_count"] = co_oc[field_name_2].map(field_2_counts)
temp_result.extend(co_oc.to_dict(orient="records"))

result = [
Expand All @@ -144,8 +172,23 @@ def co_annotation_report(self, disease: Optional[str] = None, enrich: bool = Fal
]
# unique_result = AnndataAnalyzer._remove_duplicates(result)
self.report_df = pd.DataFrame(
[inner_list[:2] + inner_list[5:6] + inner_list[2:4] for inner_list in result],
columns=["field_name1", "value1", "predicate", "field_name2", "value2"],
[
inner_list[:2]
+ inner_list[5:6]
+ inner_list[2:4]
+ inner_list[7:8]
+ inner_list[9:10]
for inner_list in result
],
columns=[
"field_name1",
"value1",
"predicate",
"field_name2",
"value2",
"field_name1_cell_count",
"field_name2_cell_count",
],
)
return self.report_df

Expand Down
2 changes: 1 addition & 1 deletion pandasaurus_cxg/graph_generator/graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def generate_rdf_graph(self, merge: bool = False):
temp_dict = {}
for inner_list in inner_dict.values.tolist():
# Initialize the base dictionary based on the current list
inner_dict_uuid = {inner_list[0]: inner_list[1]}
inner_dict_uuid = {inner_list[0]: inner_list[1], "cell_count": inner_list[5]}

# Update dictionary based on specific conditions
if inner_list[2] == "subcluster_of":
Expand Down
Loading

0 comments on commit b3cf832

Please sign in to comment.