From 92f68348a96db15ee633185e6b00bd3b54e73ec9 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Fri, 13 Dec 2024 16:31:10 +0100 Subject: [PATCH] Encode `sym_id` as integers --- src/biotite/structure/io/pdb/file.py | 14 +++++++++++--- src/biotite/structure/io/pdbx/convert.py | 7 +++---- tests/structure/io/test_pdb.py | 3 --- tests/structure/io/test_pdbx.py | 16 ++++++++-------- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/biotite/structure/io/pdb/file.py b/src/biotite/structure/io/pdb/file.py index 2235cb1fa..4ae2bb516 100644 --- a/src/biotite/structure/io/pdb/file.py +++ b/src/biotite/structure/io/pdb/file.py @@ -855,8 +855,12 @@ def get_assembly( affected_chain_ids = [] transform_start = None for j, line in enumerate(assembly_lines[start:stop]): - if line.startswith("APPLY THE FOLLOWING TO CHAINS:") or line.startswith( - " AND CHAINS:" + if any( + line.startswith(chain_signal_string) + for chain_signal_string in [ + "APPLY THE FOLLOWING TO CHAINS:", + " AND CHAINS:", + ] ): affected_chain_ids += [ chain_id.strip() for chain_id in line[30:].split(",") @@ -1150,7 +1154,11 @@ def _apply_transformations(structure, rotations, translations): coord += translation assembly_coord[i] = coord - return repeat(structure, assembly_coord) + assembly = repeat(structure, assembly_coord) + assembly.set_annotation( + "sym_id", np.repeat(np.arange(len(rotations)), structure.array_length()) + ) + return assembly def _check_pdb_compatibility(array, hybrid36): diff --git a/src/biotite/structure/io/pdbx/convert.py b/src/biotite/structure/io/pdbx/convert.py index dfb18ad96..5367c74bd 100644 --- a/src/biotite/structure/io/pdbx/convert.py +++ b/src/biotite/structure/io/pdbx/convert.py @@ -1649,7 +1649,6 @@ def _apply_transformations(structure, transformation_dict, operations): """ # Additional first dimesion for 'structure.repeat()' assembly_coord = np.zeros((len(operations),) + structure.coord.shape) - sym_ids = [] # Apply corresponding transformation for each copy in the assembly for i, operation in enumerate(operations): coord = structure.coord @@ -1661,12 +1660,12 @@ def _apply_transformations(structure, transformation_dict, operations): coord = matrix_rotate(coord, rotation_matrix) # Translate coord += translation_vector - - sym_ids.append("-".join(list(operation))) assembly_coord[i] = coord assembly = repeat(structure, assembly_coord) - assembly.set_annotation("sym_id", np.repeat(sym_ids, structure.array_length())) + assembly.set_annotation( + "sym_id", np.repeat(np.arange(len(operations)), structure.array_length()) + ) return assembly diff --git a/tests/structure/io/test_pdb.py b/tests/structure/io/test_pdb.py index 81b4f8b08..3b1ef07a6 100644 --- a/tests/structure/io/test_pdb.py +++ b/tests/structure/io/test_pdb.py @@ -158,9 +158,6 @@ def test_pdbx_consistency_assembly(path, model): ref_assembly = pdbx.get_assembly(pdbx_file, model=model) for category in ref_assembly.get_annotation_categories(): - if category == "sym_id": - # Symmetry ID annotation is currently not returned for PDB files - continue assert ( test_assembly.get_annotation(category).tolist() == ref_assembly.get_annotation(category).tolist() diff --git a/tests/structure/io/test_pdbx.py b/tests/structure/io/test_pdbx.py index eb8112891..2b80ecfda 100644 --- a/tests/structure/io/test_pdbx.py +++ b/tests/structure/io/test_pdbx.py @@ -479,19 +479,19 @@ def test_assembly_chain_count(format, pdb_id, model): @pytest.mark.parametrize( - "pdb_id, assembly_id, ref_sym_ids", + "pdb_id, assembly_id, symmetric_unit_count", [ # Single operation - ("5zng", "1", np.array([1]).astype(str)), + ("5zng", "1", 1), # Multiple operations with continuous operation IDs - ("1f2n", "1", np.arange(1, 60 + 1).astype(str)), + ("1f2n", "1", 60), # Multiple operations with discontinuous operation IDs - ("1f2n", "4", np.array([1, 2, 6, 10, 23, 24]).astype(str)), + ("1f2n", "4", 6), # Multiple combined operations - ("1f2n", "6", np.char.add(np.arange(1, 60 + 1).astype(str), "-X0")), + ("1f2n", "6", 60), ], ) -def test_assembly_sym_id(pdb_id, assembly_id, ref_sym_ids): +def test_assembly_sym_id(pdb_id, assembly_id, symmetric_unit_count): """ Check if the :func:`get_assembly()` function returns the correct symmetry ID annotation for a known example. @@ -500,11 +500,11 @@ def test_assembly_sym_id(pdb_id, assembly_id, ref_sym_ids): assembly = pdbx.get_assembly(pdbx_file, assembly_id=assembly_id) # 'unique_indices' contains the FIRST occurence of each unique value unique_sym_ids, unique_indices = np.unique(assembly.sym_id, return_index=True) - # Sort by first occurrence instead of alphabetically + # Sort by first occurrence order = np.argsort(unique_indices) unique_sym_ids = unique_sym_ids[order] unique_indices = unique_indices[order] - assert unique_sym_ids.tolist() == ref_sym_ids.tolist() + assert unique_sym_ids.tolist() == list(range(symmetric_unit_count)) # Every asymmetric unit should have the same length, # as each operation is applied to all atoms in the asymmetric unit asym_lengths = np.diff(np.append(unique_indices, assembly.array_length()))