Source code for binding_sites.absl

"""

Binding site location algorithm
===============================

General procedure is to:

 * use the most central point in the molecule to first place the guest
 * align the guest with nearby probability peaks
 * report the outcome as site + orientation sets

"""

import operator
from logging import debug

from numpy import dot
from numpy.linalg import norm


[docs]def min_vect(c_coa, f_coa, c_cob, f_cob_in, box): """Calculate the closest distance assuming fractional, in-cell coords.""" f_cob = f_cob_in[:] fdx = f_coa[0] - f_cob[0] if fdx < -0.5: f_cob[0] -= 1 elif fdx > 0.5: f_cob[0] += 1 fdy = f_coa[1] - f_cob[1] if fdy < -0.5: f_cob[1] -= 1 elif fdy > 0.5: f_cob[1] += 1 fdz = f_coa[2] - f_cob[2] if fdz < -0.5: f_cob[2] -= 1 elif fdz > 0.5: f_cob[2] += 1 if f_cob == f_cob_in: # if nothing has changed, use initial values return direction3d(c_coa, c_cob) else: new_b = [f_cob[0]*box[0][0] + f_cob[1]*box[1][0] + f_cob[2]*box[2][0], f_cob[0]*box[0][1] + f_cob[1]*box[1][1] + f_cob[2]*box[2][1], f_cob[0]*box[0][2] + f_cob[1]*box[1][2] + f_cob[2]*box[2][2]] return direction3d(c_coa, new_b)
[docs]def direction3d(source, target): """Return the vector connecting two 3d points.""" return [target[0] - source[0], target[1] - source[1], target[2] - source[2]]
[docs]def vecdist3(coord1, coord2): """Calculate vector between two 3d points.""" vec = [coord2[0] - coord1[0], coord2[1] - coord1[1], coord2[2] - coord1[2]] return (vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2])**0.5
[docs]def calculate_binding_sites(guest, tp_point, cell): """ Return the sets of points in the maxima that identify binding sites. Each set minimally contains a tuple (index of first atom, position of second atom), plus tuples of the index and vectors to a second and third atom. Parameters ---------- guest : Guest The guest instance containing the TABASCO maxima. tp_point : tuple The identifier for the state point to calculate the binding sites for. cell : Cell The unit cell of the system. Returns ------- binding_sites : list A list of lists, each containing: (index_0, position_0, magnitude_0), (index_1, vector_0_1), # [optional] (index_2, vector_0_2) # [optional] """ guest_locations = guest.guest_locations[tp_point] # Generate fractional coordinates for all guest sites. for pdist in guest_locations: guest_locations[pdist] = [ (loc[0], loc[1], dot(cell.inverse, loc[0]).tolist()) for loc in guest_locations[pdist]] # Work out which atoms to use # Only use atom positions for site location guest_atom_distances = [] # Some guests previously only specified the COM probability # This is not supported, but fix by saying that COM is the # only atom if len(guest.atoms) == 1 and guest.probability == [(0, )]: guest.probability = [(1, )] guest_locations[(1, )] = guest_locations[(0,)] for idx, atom in enumerate(guest.atoms): distance = vecdist3(guest.com, atom.pos) for probability in guest.probability: # idx for atoms starts from 1; don't lose track! if (idx + 1) in probability: guest_atom_distances.append((distance, idx, probability)) # from COM outwards # (distance, atom_idx, prob_key) guest_atom_distances.sort() if len(guest_atom_distances) == 1: # There is only one site, use it debug("One atom") distance_0_1 = None distance_0_2 = None distance_1_2 = None linear_guest = True reversible_guest = True elif len(guest_atom_distances) == 2: # Two points to align to, use both debug("Two atoms") distance_0_1 = vecdist3(guest.atoms[guest_atom_distances[0][1]].pos, guest.atoms[guest_atom_distances[1][1]].pos) distance_0_2 = None distance_1_2 = None linear_guest = True reversible_guest = guest.is_reversible(guest_atom_distances[0][1], guest_atom_distances[1][1]) else: debug("More than two atoms, take the closest three") distance_0_1 = vecdist3(guest.atoms[guest_atom_distances[0][1]].pos, guest.atoms[guest_atom_distances[1][1]].pos) distance_0_2 = vecdist3(guest.atoms[guest_atom_distances[0][1]].pos, guest.atoms[guest_atom_distances[2][1]].pos) distance_1_2 = vecdist3(guest.atoms[guest_atom_distances[1][1]].pos, guest.atoms[guest_atom_distances[2][1]].pos) linear_guest = guest.is_linear(guest_atom_distances[0][1], guest_atom_distances[1][1], guest_atom_distances[2][1]) reversible_guest = guest.is_reversible(guest_atom_distances[0][1], guest_atom_distances[1][1], guest_atom_distances[2][1]) # Eugene's tolerance was 0.2 # Using 0.3 fits the width of the neighbourhood overlap_tol = 0.30 binding_sites = [] # Atom closest to the COM origin_key = guest_atom_distances[0][2] # keep a list of sets of atoms that are already included so we don't # use them twice reversible_sets = [] for origin_atom in sorted(guest_locations[origin_key], key=operator.itemgetter(1), reverse=True): if distance_0_1 is None: # add the un-oriented guest binding_sites.append([(guest_atom_distances[0][1], origin_atom[0], origin_atom[1])]) continue # We have more than one atom to align align_key = guest_atom_distances[1][2] align_closest = (999.9, None) align_found = False for align_atom in sorted(guest_locations[align_key], key=operator.itemgetter(1), reverse=True): if align_atom == origin_atom: continue # don't forget periodic boundaries! vector_0_1 = min_vect(origin_atom[0], origin_atom[2], align_atom[0], align_atom[2], cell.cell) separation_0_1 = norm(vector_0_1) align_overlap = abs(separation_0_1 - distance_0_1) if align_overlap < align_closest[0]: align_closest = (align_overlap, align_atom) if align_overlap < overlap_tol: # We fit the alignment so it's good align_found = True if distance_0_2 is None: # found to points to align a two atom guest. Use them! if reversible_guest: this_set = sorted([origin_atom, align_atom]) if this_set in reversible_sets: # already have this the other way round # (from the lower occupancy pair) continue else: # first occurrance of this set, use it binding_sites.append([ (guest_atom_distances[0][1], origin_atom[0], origin_atom[1]), (guest_atom_distances[1][1], vector_0_1)]) # make sure it's not used again reversible_sets.append(this_set) continue else: # add the guest in this orientation, it's unique binding_sites.append([ (guest_atom_distances[0][1], origin_atom[0], origin_atom[1]), (guest_atom_distances[1][1], vector_0_1)]) continue # we can try and fit all three orient_key = guest_atom_distances[1][2] orient_closest = (999.9, None) found_site = False for orient_atom in sorted(guest_locations[orient_key], key=operator.itemgetter(1), reverse=True): # need all the separations vector_0_2 = min_vect(origin_atom[0], origin_atom[2], orient_atom[0], orient_atom[2], cell.cell) separation_0_2 = norm(vector_0_2) vector_1_2 = min_vect(align_atom[0], align_atom[2], orient_atom[0], orient_atom[2], cell.cell) separation_1_2 = norm(vector_1_2) overlap_0_2 = abs(separation_0_2 - distance_0_2) overlap_1_2 = abs(separation_1_2 - distance_1_2) if overlap_0_2 + 0.5*overlap_1_2 < orient_closest[0]: orient_closest = (overlap_0_2 + 0.5*overlap_1_2, orient_atom) if overlap_0_2 < overlap_tol and overlap_1_2 < 2*overlap_tol: # Found three points to align to! Binding site here found_site = True # Check we are not duplicating guests that have symmetry if reversible_guest: this_set = sorted([origin_atom, align_atom, orient_atom]) if this_set in reversible_sets: # We already have this one, don't add again continue elif linear_guest: # just add the two sites binding_sites.append([ (guest_atom_distances[0][1], origin_atom[0], origin_atom[1]), (guest_atom_distances[1][1], vector_0_1)]) # these atom are now taken reversible_sets.append(this_set) else: binding_sites.append([ (guest_atom_distances[0][1], origin_atom[0], origin_atom[1]), (guest_atom_distances[1][1], vector_0_1), (guest_atom_distances[1][1], vector_0_2)]) found_site = True elif linear_guest: # just add the two sites binding_sites.append([ (guest_atom_distances[0][1], origin_atom[0], origin_atom[1]), (guest_atom_distances[1][1], vector_0_1)]) else: # Add all three sites binding_sites.append([ (guest_atom_distances[0][1], origin_atom[0], origin_atom[1]), (guest_atom_distances[1][1], vector_0_1), (guest_atom_distances[1][1], vector_0_2)]) if not found_site: if linear_guest: # don't care about reversible guests since no third # site is found, but we can still make the guest with # two sites alone # just add the two sites binding_sites.append([ (guest_atom_distances[0][1], origin_atom[0], origin_atom[1]), (guest_atom_distances[1][1], vector_0_1)]) #TODO(tdaff): nothing within overlap, use closest (3 sites) else: if distance_0_2 is None and align_closest[0] > distance_0_1: # Very isolated atom, not within 2 distances of any others # treat as isolated point atom and still make a guest binding_sites.append([(guest_atom_distances[0][1], origin_atom[0], origin_atom[1])]) #TODO(tdaff): nothing within overlap, use closest return binding_sites