Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove MMTF usage #544

Merged
merged 4 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions doc/examples/scripts/structure/adjacency_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
In this example we create an adjacency matrix of the CA atoms in the
lysozyme crystal structure (PDB: 1AKI).
The values in the adjacency matrix ``m`` are
``m[i,j] = 1 if distance(i,j) <= threshold else 0``.
``m[i,j] = 1 if distance(i,j) <= threshold else 0``.
"""

# Code source: Patrick Kunzmann
Expand All @@ -16,12 +16,11 @@
import biotite.structure as struc
import biotite.structure.io as strucio
import biotite.database.rcsb as rcsb
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap


file_name = rcsb.fetch("1aki", "mmtf", gettempdir())
file_name = rcsb.fetch("1aki", "bcif", gettempdir())
array = strucio.load_structure(file_name)
# We only consider CA atoms
ca = array[array.atom_name == "CA"]
Expand Down
6 changes: 3 additions & 3 deletions doc/examples/scripts/structure/contact_sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import numpy as np
import biotite.structure as struc
import biotite.structure.io.mmtf as mmtf
import biotite.structure.io.pdbx as pdbx
import biotite.database.rcsb as rcsb


Expand All @@ -25,8 +25,8 @@


# Fetch and load structure
mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("2or1", "mmtf"))
structure = mmtf.get_structure(mmtf_file, model=1)
pdbx_file = pdbx.BinaryCIFFile.read(rcsb.fetch("2or1", "bcif"))
structure = pdbx.get_structure(pdbx_file, model=1)


# Separate structure into the DNA and the two identical protein chains
Expand Down
2 changes: 1 addition & 1 deletion doc/examples/scripts/structure/diameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import biotite.database.rcsb as rcsb

def get_diameter(pdb_id):
file_name = rcsb.fetch(pdb_id, "mmtf", gettempdir())
file_name = rcsb.fetch(pdb_id, "bcif", gettempdir())
atom_array = strucio.load_structure(file_name)
# Remove all non-amino acids
atom_array = atom_array[struc.filter_amino_acids(atom_array)]
Expand Down
21 changes: 11 additions & 10 deletions doc/examples/scripts/structure/disulfide_bonds.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
# Code source: Patrick Kunzmann
# License: BSD 3 clause

import io
from tempfile import gettempdir
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import biotite.sequence as seq
import biotite.structure as struc
import biotite.structure.io as strucio
import biotite.structure.io.mmtf as mmtf
import biotite.structure.io.pdbx as pdbx
import biotite.database.rcsb as rcsb


Expand Down Expand Up @@ -96,18 +96,18 @@ def detect_disulfide_bonds(structure, distance=2.05, distance_tol=0.05,
# (PDB: `2IT7 <http://www.rcsb.org/structure/2IT7>`_).
# This motif is famous for its three characteristic disulfide bridges
# forming a 'knot'.
# However, the loaded MMTF file already has information about the
# covalent bonds - including the disulfide bridges.
# However, the loaded PDBx file already has information about the
# the disulfide bridges.
# To have a proper test case, all disulfide bonds are removed from the
# structure and we pretend that the structure never had information
# about the disulfide bonds.
# For later verification that the implemented function wroks correctly,
# For later verification that the implemented function works correctly,
# the disulfide bonds, that are removed, are printed out.

mmtf_file = mmtf.MMTFFile.read(
rcsb.fetch("2IT7", "mmtf", gettempdir())
pdbx_file = pdbx.BinaryCIFFile.read(
rcsb.fetch("2IT7", "bcif", gettempdir())
)
knottin = mmtf.get_structure(mmtf_file, include_bonds=True, model=1)
knottin = pdbx.get_structure(pdbx_file, include_bonds=True, model=1)
sulfide_indices = np.where(
(knottin.res_name == "CYS") & (knottin.atom_name == "SG")
)[0]
Expand Down Expand Up @@ -175,8 +175,9 @@ def detect_disulfide_bonds(structure, distance=2.05, distance_tol=0.05,
knottin.bonds.add_bond(sg1_index, sg2_index, struc.BondType.SINGLE)
# The structure with added disulfide bonds
# could now be written back into a structure file
# At the moment, Biotite only supports bond input/outpout in MMTF files
#
# strucio.save_structure("structure_with_disulfide_bonds.mmtf", knottin)
out_file = pdbx.BinaryCIFFile()
pdbx.set_structure(out_file, knottin)
out_file.write(io.BytesIO())

plt.show()
9 changes: 4 additions & 5 deletions doc/examples/scripts/structure/docking.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,17 @@
from scipy.stats import spearmanr
import biotite.structure as struc
import biotite.structure.info as info
import biotite.structure.io.mmtf as mmtf
import biotite.structure.graphics as graphics
import biotite.structure.io.pdbx as pdbx
import biotite.database.rcsb as rcsb
import biotite.application.autodock as autodock


# Get the receptor structure
# and the original 'correct' conformation of the ligand
mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("2RTG", "mmtf"))
structure = mmtf.get_structure(
pdbx_file = pdbx.BinaryCIFFile.read(rcsb.fetch("2RTG", "bcif"))
structure = pdbx.get_structure(
# Include formal charge for accurate partial charge calculation
mmtf_file, model=1, include_bonds=True, extra_fields=["charge"]
pdbx_file, model=1, include_bonds=True, extra_fields=["charge"]
)
# The asymmetric unit describes a streptavidin homodimer
# However, we are only interested in a single monomer
Expand Down
2 changes: 1 addition & 1 deletion doc/examples/scripts/structure/domain_hbonds.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import biotite.database.rcsb as rcsb


file_name = rcsb.fetch("2KB1", "mmtf", gettempdir())
file_name = rcsb.fetch("2KB1", "bcif", gettempdir())
stack = strucio.load_structure(file_name)
# Four identical chains, consider only chain A
chain_a = stack[:, stack.chain_id == "A"]
Expand Down
7 changes: 3 additions & 4 deletions doc/examples/scripts/structure/gap_bars.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# License: BSD 3 clause

from tempfile import gettempdir
import biotite.structure as struc
import biotite.structure.io as strucio
import biotite.database.rcsb as rcsb
import matplotlib.pyplot as plt
Expand All @@ -25,7 +24,7 @@

def plot_gaps(pdb_id, chain_id, ax):
# Download and parse structure file
path = rcsb.fetch(pdb_id, "mmtf", gettempdir())
path = rcsb.fetch(pdb_id, "bcif", gettempdir())
atom_array = strucio.load_structure(path)
# Consider only one chain
atom_array = atom_array[atom_array.chain_id == chain_id]
Expand All @@ -43,7 +42,7 @@ def plot_gaps(pdb_id, chain_id, ax):
else:
# existing
states[i] = 2

# Find the intervals for each state
state_intervals = []
curr_state = None
Expand All @@ -58,7 +57,7 @@ def plot_gaps(pdb_id, chain_id, ax):
curr_start = i
curr_state = states[i]
state_intervals.append((curr_start, i, curr_state))

# Draw the state intervals as colored rectangles
for interval in state_intervals:
start = interval[0]
Expand Down
41 changes: 20 additions & 21 deletions doc/examples/scripts/structure/glycan_visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
from networkx.drawing.nx_pydot import graphviz_layout
import biotite.sequence as seq
import biotite.structure as struc
import biotite.structure.info as info
import biotite.structure.io.mmtf as mmtf
import biotite.structure.io.pdbx as pdbx
import biotite.database.rcsb as rcsb


Expand Down Expand Up @@ -113,60 +112,60 @@
"All": ("o", "purple"),
"Tal": ("o", "lightsteelblue"),
"Ido": ("o", "chocolate"),

"GlcNAc": ("s", "royalblue"),
"ManNAc": ("s", "forestgreen"),
"GalNAc": ("s", "gold"),
"GulNAc": ("s", "darkorange"),
"AllNAc": ("s", "purple"),
"IdoNAc": ("s", "chocolate"),

"GlcN": ("1", "royalblue"),
"ManN": ("1", "forestgreen"),
"GalN": ("1", "gold"),

"GlcA": ("v", "royalblue"),
"ManA": ("v", "forestgreen"),
"GalA": ("v", "gold"),
"GulA": ("v", "darkorange"),
"TalA": ("v", "lightsteelblue"),
"IdoA": ("v", "chocolate"),

"Qui": ("^", "royalblue"),
"Rha": ("^", "forestgreen"),
"6dGul": ("^", "darkorange"),
"Fuc": ("^", "crimson"),

"QuiNAc": ("P", "royalblue"),
"FucNAc": ("P", "crimson"),

"Oli": ("X", "royalblue"),
"Tyv": ("X", "forestgreen"),
"Abe": ("X", "darkorange"),
"Par": ("X", "pink"),
"Dig": ("X", "purple"),

"Ara": ("*", "forestgreen"),
"Lyx": ("*", "gold"),
"Xyl": ("*", "darkorange"),
"Rib": ("*", "pink"),

"Kdn": ("D", "forestgreen"),
"Neu5Ac": ("D", "mediumvioletred"),
"Neu5Gc": ("D", "turquoise"),

"LDManHep": ("H", "forestgreen"),
"Kdo": ("H", "gold"),
"DDManHep": ("H", "pink"),
"MurNAc": ("H", "purple"),
"Mur": ("H", "chocolate"),

"Api": ("p", "royalblue"),
"Fru": ("p", "forestgreen"),
"Tag": ("p", "gold"),
"Sor": ("p", "darkorange"),
"Psi": ("p", "pink"),

# Default representation
None: ("h", "black")
}
Expand All @@ -183,8 +182,8 @@

PDB_ID = "4CUO"

mmtf_file = mmtf.MMTFFile.read(rcsb.fetch(PDB_ID, "mmtf"))
structure = mmtf.get_structure(mmtf_file, model=1, include_bonds=True)
pdbx_file = pdbx.BinaryCIFFile.read(rcsb.fetch(PDB_ID, "bcif"))
structure = pdbx.get_structure(pdbx_file, model=1, include_bonds=True)

# Create masks identifying whether an atom is part of a glycan...
is_glycan = np.isin(structure.res_name, list(SACCHARIDE_NAMES.keys()))
Expand Down Expand Up @@ -239,7 +238,7 @@
# So far, so good. We can already see glycans (red) on the long peptide
# chain (blue).
# The surrounding single nodes belong to water, ions etc.
# In the final plot only the glycans should be highlighted.
# In the final plot only the glycans should be highlighted.
# For this purpose the edges between all non-saccharides will be
# removed.
# The remaining subgraphs are either single nodes,
Expand Down Expand Up @@ -299,7 +298,7 @@
[(min(atom_i, atom_j), max(atom_i, atom_j))
for atom_i, atom_j in glycan_graph.edges()]
)

# The 'root' is the amino acid
root = [
atom_i for atom_i in glycan_graph.nodes() if is_amino_acid[atom_i]
Expand Down Expand Up @@ -342,18 +341,18 @@
pos_array[:,0] += structure.res_id[root]
# Convert array back to dictionary
pos = {node: tuple(coord) for node, coord in zip(nodes, pos_array)}

nx.draw_networkx_edges(
glycan_graph, pos, ax=ax,
arrows=False, node_size=0, width=LINE_WIDTH
)

# Draw each node individually
for atom_i in glycan_graph.nodes():
# Only plot glycans, not amino acids
if not is_glycan[atom_i]:
continue

# Now the above data sets come into play
common_name = SACCHARIDE_NAMES.get(structure.res_name[atom_i])
shape, color = SACCHARIDE_REPRESENTATION[common_name]
Expand Down Expand Up @@ -388,7 +387,7 @@
# Set the end of the axis to the last amino acid
ax.set_xlim(1, np.max(structure.res_id[is_amino_acid]))
ax.set_ylim(0, 7)
ax.set_title(mmtf_file["title"])
ax.set_title(pdbx_file.block["struct"]["title"].as_item())
fig.tight_layout()

# sphinx_gallery_thumbnail_number = 2
Expand Down
68 changes: 0 additions & 68 deletions doc/examples/scripts/structure/mmtf_archive_noexec.py

This file was deleted.

Loading
Loading