Skip to content

Commit

Permalink
Encode sym_id as integers
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Dec 20, 2024
1 parent fda7181 commit 92f6834
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 18 deletions.
14 changes: 11 additions & 3 deletions src/biotite/structure/io/pdb/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,8 +855,12 @@ def get_assembly(
affected_chain_ids = []
transform_start = None
for j, line in enumerate(assembly_lines[start:stop]):
if line.startswith("APPLY THE FOLLOWING TO CHAINS:") or line.startswith(
" AND CHAINS:"
if any(
line.startswith(chain_signal_string)
for chain_signal_string in [
"APPLY THE FOLLOWING TO CHAINS:",
" AND CHAINS:",
]
):
affected_chain_ids += [
chain_id.strip() for chain_id in line[30:].split(",")
Expand Down Expand Up @@ -1150,7 +1154,11 @@ def _apply_transformations(structure, rotations, translations):
coord += translation
assembly_coord[i] = coord

return repeat(structure, assembly_coord)
assembly = repeat(structure, assembly_coord)
assembly.set_annotation(
"sym_id", np.repeat(np.arange(len(rotations)), structure.array_length())
)
return assembly


def _check_pdb_compatibility(array, hybrid36):
Expand Down
7 changes: 3 additions & 4 deletions src/biotite/structure/io/pdbx/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -1649,7 +1649,6 @@ def _apply_transformations(structure, transformation_dict, operations):
"""
# Additional first dimesion for 'structure.repeat()'
assembly_coord = np.zeros((len(operations),) + structure.coord.shape)
sym_ids = []
# Apply corresponding transformation for each copy in the assembly
for i, operation in enumerate(operations):
coord = structure.coord
Expand All @@ -1661,12 +1660,12 @@ def _apply_transformations(structure, transformation_dict, operations):
coord = matrix_rotate(coord, rotation_matrix)
# Translate
coord += translation_vector

sym_ids.append("-".join(list(operation)))
assembly_coord[i] = coord

assembly = repeat(structure, assembly_coord)
assembly.set_annotation("sym_id", np.repeat(sym_ids, structure.array_length()))
assembly.set_annotation(
"sym_id", np.repeat(np.arange(len(operations)), structure.array_length())
)
return assembly


Expand Down
3 changes: 0 additions & 3 deletions tests/structure/io/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,6 @@ def test_pdbx_consistency_assembly(path, model):
ref_assembly = pdbx.get_assembly(pdbx_file, model=model)

for category in ref_assembly.get_annotation_categories():
if category == "sym_id":
# Symmetry ID annotation is currently not returned for PDB files
continue
assert (
test_assembly.get_annotation(category).tolist()
== ref_assembly.get_annotation(category).tolist()
Expand Down
16 changes: 8 additions & 8 deletions tests/structure/io/test_pdbx.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,19 +479,19 @@ def test_assembly_chain_count(format, pdb_id, model):


@pytest.mark.parametrize(
"pdb_id, assembly_id, ref_sym_ids",
"pdb_id, assembly_id, symmetric_unit_count",
[
# Single operation
("5zng", "1", np.array([1]).astype(str)),
("5zng", "1", 1),
# Multiple operations with continuous operation IDs
("1f2n", "1", np.arange(1, 60 + 1).astype(str)),
("1f2n", "1", 60),
# Multiple operations with discontinuous operation IDs
("1f2n", "4", np.array([1, 2, 6, 10, 23, 24]).astype(str)),
("1f2n", "4", 6),
# Multiple combined operations
("1f2n", "6", np.char.add(np.arange(1, 60 + 1).astype(str), "-X0")),
("1f2n", "6", 60),
],
)
def test_assembly_sym_id(pdb_id, assembly_id, ref_sym_ids):
def test_assembly_sym_id(pdb_id, assembly_id, symmetric_unit_count):
"""
Check if the :func:`get_assembly()` function returns the correct
symmetry ID annotation for a known example.
Expand All @@ -500,11 +500,11 @@ def test_assembly_sym_id(pdb_id, assembly_id, ref_sym_ids):
assembly = pdbx.get_assembly(pdbx_file, assembly_id=assembly_id)
# 'unique_indices' contains the FIRST occurence of each unique value
unique_sym_ids, unique_indices = np.unique(assembly.sym_id, return_index=True)
# Sort by first occurrence instead of alphabetically
# Sort by first occurrence
order = np.argsort(unique_indices)
unique_sym_ids = unique_sym_ids[order]
unique_indices = unique_indices[order]
assert unique_sym_ids.tolist() == ref_sym_ids.tolist()
assert unique_sym_ids.tolist() == list(range(symmetric_unit_count))
# Every asymmetric unit should have the same length,
# as each operation is applied to all atoms in the asymmetric unit
asym_lengths = np.diff(np.append(unique_indices, assembly.array_length()))
Expand Down

0 comments on commit 92f6834

Please sign in to comment.