Source code for binana.load_ligand_receptor

# This file is part of BINANA, released under the Apache 2.0 License. See
# LICENSE.md or go to https://opensource.org/licenses/Apache-2.0 for full
# details. Copyright 2021 Jacob D. Durrant.

"""This module contains functions for loading ligands and receptors. Note that
while BINANA can process PDB files, the PDB format lacks some information
required for a full BINANA analysis. PDBQT recommended."""

import binana
from math import sqrt, pi
from binana._structure.point import Point as _Point
from binana._structure.mol import Mol as _Mol
from binana._utils import _math_functions

_ligand_receptor_dists_cache = {}
_ligand_receptor_aromatic_dists = None

# Contains all pi-pi interactions (of all types).
# pi_interactions = None


[docs]def from_texts(ligand_text, receptor_text, max_cutoff=None): """Loads a ligand and receptor from a PDBQT- or PDB-formatted string (text). PDBQT recommended. Args: ligand_text (str): The ligand text to load. Preferably PDBQT formatted, though BINANA and perform most analyses on PDB files as well. receptor_text (str): The receptor text to load. Preferably PDBQT formatted, though BINANA and perform most analyses on PDB files as well. max_cutoff (float, optional): If specified, will only load receptor atoms that fall within a cube extending this many angstroms beyond the ligand in the x, y, and z directions. Can dramatically speed calculations on large proteins, if an appropriate max_cutoff is known beforehand. On the other hand, may prevent proper assignment of secondary structure. Defaults to None, meaning load all receptor atoms. Returns: list: A list of binana._structure.mol.Mol objects, for the ligand and receptor, respectively. """ ligand = _Mol() ligand.load_pdb_from_text(ligand_text) receptor = _Mol() if max_cutoff is None: # Load the full receptor (all atoms). receptor.load_pdb_from_text(receptor_text) else: receptor.load_pdb_from_text( receptor_text, None, ligand.min_x - max_cutoff, ligand.max_x + max_cutoff, ligand.min_y - max_cutoff, ligand.max_y + max_cutoff, ligand.min_z - max_cutoff, ligand.max_z + max_cutoff, ) receptor.assign_secondary_structure() # Clears the cache _clear_cache() return ligand, receptor
[docs]def from_files(ligand_filename, receptor_filename, max_cutoff=None): """Loads a ligand and receptor from PDBQT or PDB files. PDBQT recommended. Args: ligand_pdbqt_filename (str): The ligand filename to load. Preferably PDBQT formatted, though BINANA and perform most analyses on PDB files as well. receptor_pdbqt_filename (str): The receptor filename to load. Preferably PDBQT formatted, though BINANA and perform most analyses on PDB files as well. max_cutoff (float, optional): If specified, will only load receptor atoms that fall within a cube extending this many angstroms beyond the ligand in the x, y, and z directions. Can dramatically speed calculations on large proteins, if an appropriate max_cutoff is known beforehand. On the other hand, may prevent proper assignment of secondary structure. Defaults to None, meaning load all receptor atoms. Returns: list: A list of binana._structure.mol.Mol objects, for the ligand and receptor, respectively. """ # import pdb; pdb.set_trace() ligand = _Mol() ligand.load_pdb_file(ligand_filename) receptor = _Mol() if max_cutoff is None: # Load the full receptor (all atoms). receptor.load_pdb_file(receptor_filename) else: receptor.load_pdb_file( receptor_filename, ligand.min_x - max_cutoff, ligand.max_x + max_cutoff, ligand.min_y - max_cutoff, ligand.max_y + max_cutoff, ligand.min_z - max_cutoff, ligand.max_z + max_cutoff, ) receptor.assign_secondary_structure() # Clears the cache _clear_cache() return ligand, receptor
def _clear_cache(): global _ligand_receptor_dists_cache global _ligand_receptor_aromatic_dists # global pi_interactions _ligand_receptor_dists_cache = {} _ligand_receptor_aromatic_dists = None # pi_interactions = None # cum_time = 0 # import time def _get_coor_mol_dists(atom, coor, mol_all_atoms, max_dist_sqr, dist_inf_list): """Gets the distances between an atom/coordinate and the atoms of a molecule. Args: atom ([type]): The atom. If mol is receptor, atom is a ligand atom, and visa versa. coor ([type]): The corresponding coordinate. Should be the same as atom.coordinates, but I think there's a speed up by not retrieving the value every time. mol_all_atoms ([type]): All the atoms in the molecule (e.g., receptor). max_dist_sqr ([type]): The square of the maximum distance to consider. dist_inf_list ([type]): A list to store the information about the distances. """ # if mol is receptor, atom is a ligand atom. for mol_atom in mol_all_atoms: # Try to get it from the cache. In benchmarks, the cache system # actually slows things down quite a bit. Especially in JavaScript, # but even here in Python. # key = (ligand_atom_index, receptor_atom_index) # if key in _ligand_receptor_dists_cache_keys: # val = _ligand_receptor_dists_cache[key] # if val[2] < max_dist: # ligand_receptor_dists.append(val) # continue # It's not in the cache, so keep looking mol_coor = mol_atom.coordinates # Doing as below because benchmarks suggestit is faster than # things like math.pow, math.fabs, math.abs, etc. delta_x = mol_coor.x - coor.x summed = delta_x * delta_x if summed > max_dist_sqr: continue delta_y = mol_coor.y - coor.y summed += delta_y * delta_y if summed > max_dist_sqr: continue delta_z = mol_coor.z - coor.z summed += delta_z * delta_z if summed > max_dist_sqr: continue dist = sqrt(summed) val = (atom, mol_atom, dist) # _ligand_receptor_dists_cache[key] = val dist_inf_list.append(val) def _get_ligand_receptor_dists(ligand, receptor, max_dist, elements=None): # global cum_time # t1 = time.time() # global _ligand_receptor_dists_cache # Get all the atoms ligand_all_atoms_dict = ligand.all_atoms receptor_all_atoms_dict = receptor.all_atoms ligand_atom_indexes = ligand_all_atoms_dict.keys() receptor_atom_indexes = receptor_all_atoms_dict.keys() ligand_all_atoms = [ligand_all_atoms_dict[i] for i in ligand_atom_indexes] receptor_all_atoms = [receptor_all_atoms_dict[i] for i in receptor_atom_indexes] # Filter the atoms by element if needed. if elements is not None: # So elements are specified. Filter by those. ligand_all_atoms = [a for a in ligand_all_atoms if a.element in elements] receptor_all_atoms = [a for a in receptor_all_atoms if a.element in elements] # Use max_dist to go faster ligand_receptor_dists = [] max_dist_sqr = max_dist * max_dist # _ligand_receptor_dists_cache_keys = _ligand_receptor_dists_cache.keys() for ligand_atom in ligand_all_atoms: ligand_coor = ligand_atom.coordinates _get_coor_mol_dists( ligand_atom, ligand_coor, receptor_all_atoms, max_dist_sqr, ligand_receptor_dists, ) # cum_time += time.time() - t1 # print(cum_time) return ligand_receptor_dists def _get_ligand_receptor_aromatic_dists(ligand, receptor, pi_pi_general_dist_cutoff): global _ligand_receptor_aromatic_dists # Get it from the cache if _ligand_receptor_aromatic_dists is not None: return _ligand_receptor_aromatic_dists _ligand_receptor_aromatic_dists = [] for ligand_aromatic in ligand.aromatic_rings: for receptor_aromatic in receptor.aromatic_rings: dist = ligand_aromatic.center.dist_to(receptor_aromatic.center) if dist < pi_pi_general_dist_cutoff: # so there could be some pi-pi interactions. first, let's # check for stacking interactions. Are the two pi's # roughly parallel? ligand_aromatic_norm_vector = _Point( ligand_aromatic.plane_coeff[0], ligand_aromatic.plane_coeff[1], ligand_aromatic.plane_coeff[2], ) receptor_aromatic_norm_vector = _Point( receptor_aromatic.plane_coeff[0], receptor_aromatic.plane_coeff[1], receptor_aromatic.plane_coeff[2], ) angle_between_planes = ( _math_functions.angle_between_points( ligand_aromatic_norm_vector, receptor_aromatic_norm_vector ) * 180.0 / pi ) _ligand_receptor_aromatic_dists.append( ( ligand_aromatic, receptor_aromatic, dist, angle_between_planes, ) ) return _ligand_receptor_aromatic_dists