Skip to content

Commit

Permalink
Merge pull request #700 from alex-hh/disambiguate-assembly-chains
Browse files Browse the repository at this point in the history
disambiguate assembly chains by appending assembly id
  • Loading branch information
padix-key authored Dec 20, 2024
2 parents 8155123 + 92f6834 commit 97ea5be
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 10 deletions.
7 changes: 4 additions & 3 deletions src/biotite/structure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,15 @@
The following annotation categories are optionally used by some
functions:
========= =========== ================= ============================
========= =========== ================= =========================================
Category Type Examples Description
========= =========== ================= ============================
========= =========== ================= =========================================
atom_id int 1,2,3, ... Atom serial number
b_factor float 0.9, 12.3, ... Temperature factor
occupancy float .1, .3, .9, ... Occupancy
charge int -2,-1,0,1,2, ... Electric charge of the atom
========= =========== ================= ============================
sym_id string '1','2','3', ... Symmetry ID for assemblies/symmetry mates
========= =========== ================= =========================================
For each type, the attributes can be accessed directly.
Both :class:`AtomArray` and :class:`AtomArrayStack` support
Expand Down
2 changes: 2 additions & 0 deletions src/biotite/structure/io/pdb/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ def get_assembly(
assembly : AtomArray or AtomArrayStack
The assembly.
The return type depends on the `model` parameter.
Contains the `sym_id` annotation, which enumerates the copies of the asymmetric
unit in the assembly.
Examples
--------
Expand Down
16 changes: 13 additions & 3 deletions src/biotite/structure/io/pdb/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,8 @@ def get_assembly(
assembly : AtomArray or AtomArrayStack
The assembly.
The return type depends on the `model` parameter.
Contains the `sym_id` annotation, which enumerates the copies of the
asymmetric unit in the assembly.
Examples
--------
Expand Down Expand Up @@ -853,8 +855,12 @@ def get_assembly(
affected_chain_ids = []
transform_start = None
for j, line in enumerate(assembly_lines[start:stop]):
if line.startswith("APPLY THE FOLLOWING TO CHAINS:") or line.startswith(
" AND CHAINS:"
if any(
line.startswith(chain_signal_string)
for chain_signal_string in [
"APPLY THE FOLLOWING TO CHAINS:",
" AND CHAINS:",
]
):
affected_chain_ids += [
chain_id.strip() for chain_id in line[30:].split(",")
Expand Down Expand Up @@ -1148,7 +1154,11 @@ def _apply_transformations(structure, rotations, translations):
coord += translation
assembly_coord[i] = coord

return repeat(structure, assembly_coord)
assembly = repeat(structure, assembly_coord)
assembly.set_annotation(
"sym_id", np.repeat(np.arange(len(rotations)), structure.array_length())
)
return assembly


def _check_pdb_compatibility(array, hybrid36):
Expand Down
12 changes: 9 additions & 3 deletions src/biotite/structure/io/pdbx/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -1557,7 +1557,10 @@ def get_assembly(
Returns
-------
assembly : AtomArray or AtomArrayStack
The assembly. The return type depends on the `model` parameter.
The assembly.
The return type depends on the `model` parameter.
Contains the `sym_id` annotation, which enumerates the copies of the asymmetric
unit in the assembly.
Examples
--------
Expand Down Expand Up @@ -1646,7 +1649,6 @@ def _apply_transformations(structure, transformation_dict, operations):
"""
# Additional first dimesion for 'structure.repeat()'
assembly_coord = np.zeros((len(operations),) + structure.coord.shape)

# Apply corresponding transformation for each copy in the assembly
for i, operation in enumerate(operations):
coord = structure.coord
Expand All @@ -1660,7 +1662,11 @@ def _apply_transformations(structure, transformation_dict, operations):
coord += translation_vector
assembly_coord[i] = coord

return repeat(structure, assembly_coord)
assembly = repeat(structure, assembly_coord)
assembly.set_annotation(
"sym_id", np.repeat(np.arange(len(operations)), structure.array_length())
)
return assembly


def _get_transformations(struct_oper):
Expand Down
35 changes: 34 additions & 1 deletion tests/structure/io/test_pdbx.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ def test_list_assemblies(format):
"format, pdb_id, model",
itertools.product(["cif", "bcif"], ["1f2n", "5zng"], [None, 1, -1]),
)
def test_get_assembly(format, pdb_id, model):
def test_assembly_chain_count(format, pdb_id, model):
"""
Test whether the :func:`get_assembly()` function produces the same
number of peptide chains as the
Expand Down Expand Up @@ -478,6 +478,39 @@ def test_get_assembly(format, pdb_id, model):
assert assembly.array_length() % monomer_atom_count == 0


@pytest.mark.parametrize(
"pdb_id, assembly_id, symmetric_unit_count",
[
# Single operation
("5zng", "1", 1),
# Multiple operations with continuous operation IDs
("1f2n", "1", 60),
# Multiple operations with discontinuous operation IDs
("1f2n", "4", 6),
# Multiple combined operations
("1f2n", "6", 60),
],
)
def test_assembly_sym_id(pdb_id, assembly_id, symmetric_unit_count):
"""
Check if the :func:`get_assembly()` function returns the correct
symmetry ID annotation for a known example.
"""
pdbx_file = pdbx.BinaryCIFFile.read(join(data_dir("structure"), f"{pdb_id}.bcif"))
assembly = pdbx.get_assembly(pdbx_file, assembly_id=assembly_id)
# 'unique_indices' contains the FIRST occurence of each unique value
unique_sym_ids, unique_indices = np.unique(assembly.sym_id, return_index=True)
# Sort by first occurrence
order = np.argsort(unique_indices)
unique_sym_ids = unique_sym_ids[order]
unique_indices = unique_indices[order]
assert unique_sym_ids.tolist() == list(range(symmetric_unit_count))
# Every asymmetric unit should have the same length,
# as each operation is applied to all atoms in the asymmetric unit
asym_lengths = np.diff(np.append(unique_indices, assembly.array_length()))
assert (asym_lengths == asym_lengths[0]).all()


@pytest.mark.parametrize(
"path, use_ideal_coord",
itertools.product(
Expand Down

0 comments on commit 97ea5be

Please sign in to comment.