Source code for shepherd_score.evaluations.evaluate.evals

"""
Evaluation pipeline classes for generated molecules.
"""

import sys
import os
from typing import Tuple, Optional
from pathlib import Path
from copy import deepcopy
from importlib.metadata import distributions

import numpy as np
import pandas as pd
from rdkit import Chem

if any(d.metadata["Name"] == 'rdkit' for d in distributions()):
    from rdkit.Contrib.SA_Score import sascorer # type: ignore
else:
    sys.path.append(os.path.join(os.environ['CONDA_PREFIX'],'share','RDKit','Contrib'))
    from SA_Score import sascorer # type: ignore

from rdkit.Chem import QED, Crippen, Lipinski, rdFingerprintGenerator
from rdkit.Chem.rdMolAlign import GetBestRMS, AlignMol

from shepherd_score.evaluations.utils.convert_data import extract_mol_from_xyz_block, get_mol_from_atom_pos

from shepherd_score.score.constants import ALPHA, LAM_SCALING
from shepherd_score.score.constants import P_TYPES

from shepherd_score.conformer_generation import optimize_conformer_with_xtb_from_xyz_block, single_point_xtb_from_xyz

from shepherd_score.container import Molecule, MoleculePair
from shepherd_score.score.gaussian_overlap_np import get_overlap_np
from shepherd_score.score.electrostatic_scoring_np import get_overlap_esp_np
from shepherd_score.score.pharmacophore_scoring_np import get_overlap_pharm_np

RNG = np.random.default_rng()
morgan_fp_gen = rdFingerprintGenerator.GetMorganGenerator(radius=3, includeChirality=True)

TMPDIR = Path('./')
if 'TMPDIR' in os.environ:
    TMPDIR = Path(os.environ['TMPDIR'])


def _clean_dummy_atom_arrays(
    atomic_numbers: np.ndarray, positions: np.ndarray
    ) -> Tuple[np.ndarray, np.ndarray]:
    """
    Clean dummy atoms from the molecule.
    """
    non_dummy_inds = np.where(atomic_numbers != 0)[0]
    return atomic_numbers[non_dummy_inds], positions[non_dummy_inds]


def _clean_dummy_pharm_arrays(
    pharm_types: np.ndarray, pharm_ancs: np.ndarray, pharm_vecs: np.ndarray
    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Clean dummy pharmacophores from the molecule.
    """
    non_dummy_inds = np.where(pharm_types != P_TYPES.index('Dummy'))[0]
    return pharm_types[non_dummy_inds], pharm_ancs[non_dummy_inds], pharm_vecs[non_dummy_inds]


[docs] class ConfEval: """ Generated conformer evaluation pipeline """
[docs] def __init__(self, atoms: np.ndarray, positions: np.ndarray, solvent: Optional[str] = None, num_processes: int = 1): """ Base class for evaluation of a single generated conformer. Checks validity with RDKit pipeline and xTB single point calculation and optimization. Calculates 2D graph properties for valid molecules. Automatically aligns relaxed structure to the original structure via rdkit RMS. Arguments --------- atoms : np.ndarray (N,) of atomic numbers of the generated molecule or (N,M) one-hot encoding. positions : np.ndarray (N,3) of coordinates for the generated molecule's atoms. solvent : str solvent type for xtb relaxation num_processes : int (default = 1) number of processors to use for xtb relaxation and RDKit RMSD alignment. """ self.xyz_block = None self.mol = None self.smiles = None self.molblock = None self.energy = None self.partial_charges = None self.solvent = solvent self.charge = 0 self.xyz_block_post_opt = None self.mol_post_opt = None self.smiles_post_opt = None self.molblock_post_opt = None self.energy_post_opt = None self.partial_charges_post_opt = None self.is_valid = False self.is_valid_post_opt = False self.is_graph_consistent = False # 2D graph features self.SA_score = None self.QED = None self.logP = None self.fsp3 = None self.morgan_fp = None self.SA_score_post_opt = None self.QED_post_opt = None self.logP_post_opt = None self.fsp3_post_opt = None self.morgan_fp_post_opt = None # Consistency in 3D self.strain_energy = None self.rmsd = None # 1. Converts coords + atom_ids -> xyz block atoms, positions = _clean_dummy_atom_arrays(atoms, positions) # 2. Get mol from xyz block self.mol, self.charge, self.xyz_block = get_mol_from_atom_pos(atoms=atoms, positions=positions) # 3. Get xtb energy and charges of initial conformation try: self.energy, self.partial_charges = single_point_xtb_from_xyz(xyz_block=self.xyz_block, solvent=self.solvent, charge=self.charge, num_cores=num_processes, temp_dir=TMPDIR) self.partial_charges = np.array(self.partial_charges) except Exception: pass self.is_valid = self.mol is not None and self.partial_charges is not None if self.is_valid: self.smiles = Chem.MolToSmiles(Chem.RemoveHs(self.mol)) self.molblock = Chem.MolToMolBlock(self.mol) # 4. Relax structure with xtb try: xtb_out = optimize_conformer_with_xtb_from_xyz_block(self.xyz_block, solvent=self.solvent, num_cores=num_processes, charge=self.charge, temp_dir=TMPDIR) self.xyz_block_post_opt, self.energy_post_opt, self.partial_charges_post_opt = xtb_out self.partial_charges_post_opt = np.array(self.partial_charges_post_opt) # 5. Check if relaxed_structure is valid self.mol_post_opt = extract_mol_from_xyz_block(xyz_block=self.xyz_block_post_opt, charge=self.charge) except Exception: pass self.is_valid_post_opt = self.mol_post_opt is not None and self.partial_charges_post_opt is not None # 6. Check if 2D molecular graphs are consistent if self.is_valid and self.is_valid_post_opt: self.is_graph_consistent = Chem.MolToSmiles(self.mol) == Chem.MolToSmiles(self.mol_post_opt) # Align post-opt mol with RMSD mol_atom_ids = list(range(self.mol.GetNumAtoms())) mol_post_opt_atom_ids = list(range(self.mol_post_opt.GetNumAtoms())) AlignMol(prbMol=self.mol_post_opt, refMol=self.mol, atomMap=[i for i in zip(mol_post_opt_atom_ids, mol_atom_ids)]) if self.is_valid_post_opt: self.smiles_post_opt = Chem.MolToSmiles(Chem.RemoveHs(self.mol_post_opt)) self.molblock_post_opt = Chem.MolToMolBlock(self.mol_post_opt) # 7. Calculate strain energy with relaxed structure if self.energy is not None and self.energy_post_opt is not None: self.strain_energy = self.energy - self.energy_post_opt # 8. Calculate RMSD from relaxed structure if self.is_graph_consistent: mol_copy = deepcopy(Chem.RemoveHs(self.mol)) mol_post_opt_copy = deepcopy(Chem.RemoveHs(self.mol_post_opt)) self.rmsd = GetBestRMS(prbMol=mol_copy, refMol=mol_post_opt_copy, numThreads=num_processes) # 9. 2D graph properties if self.is_valid: self.SA_score = sascorer.calculateScore(Chem.RemoveHs(self.mol)) self.QED = QED.qed(self.mol) self.logP = Crippen.MolLogP(self.mol) self.fsp3 = Lipinski.FractionCSP3(self.mol) self.morgan_fp = morgan_fp_gen.GetFingerprint(mol=Chem.RemoveHs(self.mol)) # 10. 2D graph properties post optimization if self.is_valid_post_opt: self.SA_score_post_opt = sascorer.calculateScore(Chem.RemoveHs(self.mol_post_opt)) self.QED_post_opt = QED.qed(self.mol_post_opt) self.logP_post_opt = Crippen.MolLogP(self.mol_post_opt) self.fsp3_post_opt = Lipinski.FractionCSP3(self.mol_post_opt) self.morgan_fp_post_opt = morgan_fp_gen.GetFingerprint(mol=Chem.RemoveHs(self.mol_post_opt))
[docs] def to_pandas(self): """ Convert the stored attributes to a pd.Series (for global attributes). Arguments --------- self Returns ------- pd.Series : holds attributes in an easy to visualize way """ global_attrs = {} # Global attributes for key, value in self.__dict__.items(): global_attrs[key] = value series_global = pd.Series(global_attrs) return series_global
[docs] class ConsistencyEval(ConfEval): """ Evaluation of the consistency between jointly generated molecules' features. Consistency in terms of similarity scores. """
[docs] def __init__(self, atoms: np.ndarray, positions: np.ndarray, surf_points: Optional[np.ndarray] = None, surf_esp: Optional[np.ndarray] = None, pharm_feats: Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]] = None, pharm_multi_vector: Optional[bool] = None, solvent: Optional[str] = None, probe_radius: float = 1.2, num_processes: int = 1): """ Consistency evaluation class for jointly generated molecule and features. Uses 3D similarity scoring functions. Inherits from ConfEval so that it can first run a conformer evaluation on the generated molecule. Must supply ``atoms`` and ``positions`` AND at least one of the features necessary for similarity scoring. Notes ----- Important assumptions: - Gaussian width parameter (alpha) for surface similarity was fitted to a probe radius of 1.2 A. - ESP weighting parameter (lam) for electrostatic similarity is set to 0.3 which was tested for the above assumption. Parameters ---------- atoms : np.ndarray Array of shape (N,) of atomic numbers of the generated molecule or (N, M) one-hot encoding. positions : np.ndarray Array of shape (N, 3) of coordinates for the generated molecule's atoms. surf_points : np.ndarray, optional Array of shape (M, 3) of generated surface point cloud. surf_esp : np.ndarray, optional Array of shape (M,) of generated electrostatic potential on surface. pharm_feats : tuple, optional Tuple of (pharm_types, pharm_ancs, pharm_vecs) where pharm_types is (P,) type of pharmacophore defined by shepherd_score.score.constants.P_TYPES, pharm_ancs is (P, 3) anchor positions, and pharm_vecs is (P, 3) unit vectors relative to anchor. pharm_multi_vector : bool, optional Use multiple vectors to represent Aro/HBA/HBD or single. solvent : str, optional Solvent type for xTB relaxation. probe_radius : float, optional Radius of probe atom used to generate solvent accessible surface. Default is 1.2 (vdW radius of hydrogen). num_processes : int, optional Number of processors to use for xTB relaxation. Default is 1. """ if not (isinstance(atoms, np.ndarray) or isinstance(positions, np.ndarray)): raise ValueError(f"Must provide `atoms` and `positions` as np.ndarrays. Instead {type(atoms)} and {type(positions)} were given.") super().__init__(atoms=atoms, positions=positions, solvent=solvent, num_processes=num_processes) self.molec = None self.probe_radius = probe_radius self.molec_regen = None self.molec_post_opt = None self.sim_surf_consistent = None self.sim_esp_consistent = None self.sim_pharm_consistent = None self.sim_surf_consistent_relax = None self.sim_esp_consistent_relax = None self.sim_pharm_consistent_relax = None self.sim_surf_consistent_relax_optimal = None self.sim_esp_consistent_relax_optimal = None self.sim_pharm_consistent_relax_optimal = None if pharm_feats is not None: pharm_types, pharm_ancs, pharm_vecs = pharm_feats num_pharms = len(pharm_types) if pharm_ancs.shape != (num_pharms, 3) or pharm_vecs.shape != (num_pharms, 3): raise ValueError( f'Provided pharmacophore features do not match dimensions: pharm_types {pharm_types.shape}, pharm_ancs {pharm_ancs.shape}, pharm_vecs {pharm_vecs.shape}' ) pharm_types, pharm_ancs, pharm_vecs = _clean_dummy_pharm_arrays(pharm_types, pharm_ancs, pharm_vecs) else: pharm_types, pharm_ancs, pharm_vecs = None, None, None has_pharm_features = (isinstance(pharm_types, np.ndarray) and isinstance(pharm_ancs, np.ndarray) and isinstance(pharm_vecs, np.ndarray)) if has_pharm_features and pharm_multi_vector is None: print('WARNING: Generated pharmacophore features provided, but `pharm_multi_vector` is None.') print(' Pharmacophore similarity not computed.') if not isinstance(surf_points, np.ndarray) and not isinstance(surf_esp, np.ndarray) and not has_pharm_features: raise ValueError('Must provide at least one of the generated representations: surface, electrostatics, or pharmacophores.') # Scoring parameters self.num_surf_points = len(surf_points) if surf_points is not None else None # Assumes no radius scaling with probe_radius=1.2 self.alpha = ALPHA(self.num_surf_points) if self.num_surf_points is not None else None self.lam = 0.3 # Optimal lambda for probe_radius=1.2 -> ONLY TO BE USED FOR ESP ALIGNMENT self.lam_scaled = self.lam * LAM_SCALING # -> ONLY TO BE USED FOR get_overlap_esp* # Self-consistency of features for generated molecule if self.is_valid: # Generate a Molecule object with generated features self.molec = Molecule( self.mol, partial_charges=np.array(self.partial_charges), surface_points=surf_points, electrostatics=surf_esp, pharm_types=pharm_types, pharm_ancs=pharm_ancs, pharm_vecs=pharm_vecs, probe_radius=self.probe_radius ) # Generate a Molecule object with regenerated features self.molec_regen = Molecule( self.mol, num_surf_points = self.num_surf_points, probe_radius=self.probe_radius, partial_charges = np.array(self.partial_charges), pharm_multi_vector = pharm_multi_vector if has_pharm_features else None ) if self.molec.surf_pos is not None: self.sim_surf_consistent = get_overlap_np( self.molec.surf_pos, self.molec_regen.surf_pos, alpha=self.alpha ) if self.molec.surf_esp is not None and self.molec.surf_pos is not None: self.sim_esp_consistent = get_overlap_esp_np( self.molec.surf_pos, self.molec_regen.surf_pos, self.molec.surf_esp, self.molec_regen.surf_esp, alpha=self.alpha, lam=self.lam_scaled ) if has_pharm_features and pharm_multi_vector is not None: self.sim_pharm_consistent = get_overlap_pharm_np( self.molec.pharm_types, self.molec_regen.pharm_types, self.molec.pharm_ancs, self.molec_regen.pharm_ancs, self.molec.pharm_vecs, self.molec_regen.pharm_vecs, similarity='tanimoto', extended_points=False, only_extended=False ) # Consistency between generated molecule and relaxed structure and features if self.is_valid and self.is_valid_post_opt: # Generate a Molecule object of relaxed structure self.molec_post_opt = Molecule( self.mol_post_opt, num_surf_points = self.num_surf_points, probe_radius=self.probe_radius, partial_charges = np.array(self.partial_charges_post_opt), pharm_multi_vector = pharm_multi_vector if has_pharm_features else None ) # Score only since we already align w.r.t. RMS of the generated atomic point cloud if self.molec_post_opt.surf_pos is not None: self.sim_surf_consistent_relax = get_overlap_np( self.molec.surf_pos, self.molec_post_opt.surf_pos, alpha=self.alpha ) if self.molec_post_opt.surf_pos is not None and self.molec_post_opt.surf_esp is not None: self.sim_esp_consistent_relax = get_overlap_esp_np( self.molec.surf_pos, self.molec_post_opt.surf_pos, self.molec.surf_esp, self.molec_post_opt.surf_esp, alpha=self.alpha, lam=self.lam_scaled ) if isinstance(pharm_multi_vector, bool) and self.molec_post_opt.pharm_ancs is not None and self.molec.pharm_ancs is not None: self.sim_pharm_consistent_relax = get_overlap_pharm_np( self.molec.pharm_types, self.molec_post_opt.pharm_types, self.molec.pharm_ancs, self.molec_post_opt.pharm_ancs, self.molec.pharm_vecs, self.molec_post_opt.pharm_vecs, similarity='tanimoto', extended_points=False, only_extended=False ) # Alignment with scoring functions mp_ref_and_relaxed = MoleculePair(self.molec, self.molec_post_opt, num_surf_points=self.num_surf_points, do_center=False) if self.molec_post_opt.surf_pos is not None: self.sim_surf_consistent_relax_optimal = self._align_with_surface(mp_ref_and_relaxed=mp_ref_and_relaxed) if self.molec_post_opt.surf_pos is not None and self.molec_post_opt.surf_esp is not None: self.sim_esp_consistent_relax_optimal = self._align_with_esp(mp_ref_and_relaxed=mp_ref_and_relaxed) if isinstance(pharm_multi_vector, bool) and self.molec_post_opt.pharm_ancs is not None and self.molec.pharm_ancs is not None: self.sim_pharm_consistent_relax_optimal = self._align_with_pharm(mp_ref_and_relaxed=mp_ref_and_relaxed)
def _align_with_surface(self, mp_ref_and_relaxed: MoleculePair) -> float: """ Align relaxed molecule to reference/target molecule with surface. Returns ------- float : Surface similarity score of optimally aligned molecule. """ _ = mp_ref_and_relaxed.align_with_surf( self.alpha, num_repeats=1, trans_init=False, use_jax=False ) surf_similarity = mp_ref_and_relaxed.sim_aligned_surf return float(surf_similarity) def _align_with_esp(self, mp_ref_and_relaxed: MoleculePair) -> float: """ Align relaxed molecule to reference/target molecule with ESP Returns ------- float : ESP similarity score of optimally aligned molecule. """ _ = mp_ref_and_relaxed.align_with_esp( self.alpha, lam=self.lam, num_repeats=1, trans_init=False, use_jax=False ) esp_similarity = mp_ref_and_relaxed.sim_aligned_esp return float(esp_similarity) def _align_with_pharm(self, mp_ref_and_relaxed: MoleculePair) -> float: """ Align relaxed molecule to reference/target molecule with pharmacophores. Stores aligned fit anchors and vectors on ``self._aligned_pharm_ancs`` and ``self._aligned_pharm_vecs`` for downstream subset scoring. Returns ------- float : Pharmacophore similarity score of optimally aligned molecule. """ aligned_fit_anchors, aligned_fit_vectors = mp_ref_and_relaxed.align_with_pharm( similarity='tanimoto', extended_points=False, only_extended=False, num_repeats=1, trans_init=False, use_jax=False ) self._aligned_pharm_ancs = aligned_fit_anchors self._aligned_pharm_vecs = aligned_fit_vectors pharm_similarity = mp_ref_and_relaxed.sim_aligned_pharm return float(pharm_similarity)
[docs] class ConditionalEval(ConfEval): """ Evaluation of conditionally generated molecules' quality and similarity. """
[docs] def __init__(self, ref_molec: Molecule, atoms: np.ndarray, positions: np.ndarray, condition: str, num_surf_points: int = 400, pharm_multi_vector: Optional[bool] = None, priority_pharm_indices: Optional[list] = None, solvent: Optional[str] = None, num_processes: int = 1, ): """ Evaluation pipeline for conditionally-generated molecules. Inherits from ConfEval so that it can first run a conformer evaluation on the generated molecule. Notes ----- Important assumptions: - Gaussian width parameter (alpha) for surface similarity assumes a probe radius of 1.2A. - ESP weighting parameter (lam) for electrostatic similarity is set to 0.3 which was tested for the above assumption. Parameters ---------- ref_molec : Molecule Molecule object of reference/target molecule. Must contain the representation that was used for conditioning. atoms : np.ndarray Array of shape (N,) of atomic numbers of the generated molecule or (N, M) one-hot encoding. positions : np.ndarray Array of shape (N, 3) of coordinates for the generated molecule's atoms. condition : str Condition that the molecule was conditioned on. One of 'surface', 'esp', 'pharm', or 'all'. Used for alignment. Choose 'esp' or 'all' if you want to compute ESP-aligned scores for other profiles. num_surf_points : int, optional Number of surface points to sample for similarity scoring. Default is 400. pharm_multi_vector : bool, optional Use multiple vectors to represent Aro/HBA/HBD or single. priority_pharm_indices : list of int, optional Indices (into ``ref_molec`` pharmacophore arrays) of "priority" pharmacophores. When provided, two additional Tversky (``'tversky_ref'``) scores are computed after the full-set pharm alignment: one for the priority subset and one for the non-priority complement subset, each scored against the full pharmacophore set of the aligned generated molecule. Requires ``condition`` to be ``'pharm'`` or ``'all'`` and ``pharm_multi_vector`` to be a bool. Must satisfy ``0 < len(priority_pharm_indices) < N_pharm``. solvent : str, optional Solvent type for xTB relaxation. num_processes : int, optional Number of processors to use for xTB relaxation. Default is 1. """ condition = condition.lower() self.condition = None if 'surf' in condition or 'shape' in condition: self.condition = 'surface' elif 'esp' in condition or 'electrostatic' in condition: self.condition = 'esp' elif 'pharm' in condition: self.condition = 'pharm' elif condition == 'all': self.condition = 'all' else: raise ValueError(f'`condition` must contain one of the following: "surf", "esp", "pharm", or "all". Instead, {condition} was given.') super().__init__(atoms=atoms, positions=positions, solvent=solvent, num_processes=num_processes) self.sim_surf_target = None self.sim_esp_target = None self.sim_pharm_target = None self.sim_surf_target_relax = None self.sim_esp_target_relax = None self.sim_pharm_target_relax = None self.sim_surf_target_relax_optimal = None self.sim_esp_target_relax_optimal = None self.sim_pharm_target_relax_optimal = None self.sim_surf_target_relax_esp_aligned = None self.sim_pharm_target_relax_esp_aligned = None self.sim_pharm_priority_target_relax_optimal = None self.sim_pharm_nonpriority_target_relax_optimal = None self.priority_pharm_indices = priority_pharm_indices # Scoring parameters self.num_surf_points = num_surf_points self.alpha = ALPHA(self.num_surf_points) # Fitted to probe_radius=1.2 self.lam = 0.3 # Optimal lambda for probe_radius=1.2 -> ONLY TO BE USED FOR ESP ALIGNMENT self.lam_scaled = self.lam * LAM_SCALING # -> ONLY TO BE USED FOR get_overlap_esp* self.ref_molec = ref_molec # Reference Molecule object self.molec_regen = None self.molec_post_opt = None if self.is_valid: # Generate a Molecule object with regenerated features self.molec_regen = Molecule( self.mol, num_surf_points = self.num_surf_points, partial_charges = np.array(self.partial_charges), pharm_multi_vector = pharm_multi_vector, probe_radius=self.ref_molec.probe_radius ) if self.molec_regen.surf_pos is not None: self.sim_surf_target = get_overlap_np( self.molec_regen.surf_pos, self.ref_molec.surf_pos, alpha=self.alpha ) if self.molec_regen.surf_esp is not None and self.molec_regen.surf_pos is not None: self.sim_esp_target = get_overlap_esp_np( self.molec_regen.surf_pos, self.ref_molec.surf_pos, self.molec_regen.surf_esp, self.ref_molec.surf_esp, alpha=self.alpha, lam=self.lam_scaled ) if pharm_multi_vector is not None and self.ref_molec.pharm_ancs is not None: self.sim_pharm_target = get_overlap_pharm_np( self.molec_regen.pharm_types, self.ref_molec.pharm_types, self.molec_regen.pharm_ancs, self.ref_molec.pharm_ancs, self.molec_regen.pharm_vecs, self.ref_molec.pharm_vecs, similarity='tanimoto', extended_points=False, only_extended=False ) # Similarity between relaxed structure and target molecule -> first align if self.is_valid_post_opt: # Generate a Molecule object of relaxed structure self.molec_post_opt = Molecule( self.mol_post_opt, num_surf_points = self.num_surf_points, partial_charges = np.array(self.partial_charges_post_opt), pharm_multi_vector = pharm_multi_vector, probe_radius=self.ref_molec.probe_radius ) # Score based on RMS alignment if self.molec_post_opt.surf_pos is not None: self.sim_surf_target_relax = get_overlap_np( self.molec_post_opt.surf_pos, self.ref_molec.surf_pos, alpha=self.alpha ) if self.molec_post_opt.surf_esp is not None and self.molec_post_opt.surf_pos is not None: self.sim_esp_target_relax = get_overlap_esp_np( self.molec_post_opt.surf_pos, self.ref_molec.surf_pos, self.molec_post_opt.surf_esp, self.ref_molec.surf_esp, alpha=self.alpha, lam=self.lam_scaled ) if pharm_multi_vector is not None and self.ref_molec.pharm_ancs is not None: self.sim_pharm_target_relax = get_overlap_pharm_np( self.molec_post_opt.pharm_types, self.ref_molec.pharm_types, self.molec_post_opt.pharm_ancs, self.ref_molec.pharm_ancs, self.molec_post_opt.pharm_vecs, self.ref_molec.pharm_vecs, similarity='tanimoto', extended_points=False, only_extended=False ) # Align and score w.r.t. specified condition mp_ref_and_relaxed = MoleculePair(self.ref_molec, self.molec_post_opt, num_surf_points=self.num_surf_points, do_center=False) if (self.condition == 'surface' or self.condition == 'all') and self.molec_post_opt.surf_pos is not None: self.sim_surf_target_relax_optimal = self._align_with_surface(mp_ref_and_relaxed=mp_ref_and_relaxed) if (self.condition == 'esp' or self.condition == 'all') and self.molec_post_opt.surf_pos is not None and self.molec_post_opt.surf_esp is not None: self.sim_esp_target_relax_optimal = self._align_with_esp(mp_ref_and_relaxed=mp_ref_and_relaxed) if (self.condition == 'pharm' or self.condition == 'all') and isinstance(pharm_multi_vector, bool): self.sim_pharm_target_relax_optimal = self._align_with_pharm(mp_ref_and_relaxed=mp_ref_and_relaxed) # Priority subset Tversky scoring using the alignment from the full-set pharm alignment above if (self.priority_pharm_indices is not None and hasattr(self, '_aligned_pharm_ancs') and self.ref_molec.pharm_ancs is not None): n_pharm = len(self.ref_molec.pharm_types) nonpriority_indices = sorted(set(range(n_pharm)) - set(self.priority_pharm_indices)) priority_idx = self.priority_pharm_indices self.sim_pharm_priority_target_relax_optimal = float(get_overlap_pharm_np( ptype_1=self.ref_molec.pharm_types[priority_idx], ptype_2=self.molec_post_opt.pharm_types, anchors_1=self.ref_molec.pharm_ancs[priority_idx], anchors_2=self._aligned_pharm_ancs, vectors_1=self.ref_molec.pharm_vecs[priority_idx], vectors_2=self._aligned_pharm_vecs, similarity='tversky_ref', extended_points=False, only_extended=False )) if nonpriority_indices: self.sim_pharm_nonpriority_target_relax_optimal = float(get_overlap_pharm_np( ptype_1=self.ref_molec.pharm_types[nonpriority_indices], ptype_2=self.molec_post_opt.pharm_types, anchors_1=self.ref_molec.pharm_ancs[nonpriority_indices], anchors_2=self._aligned_pharm_ancs, vectors_1=self.ref_molec.pharm_vecs[nonpriority_indices], vectors_2=self._aligned_pharm_vecs, similarity='tversky_ref', extended_points=False, only_extended=False )) # Compute ESP-aligned surf and pharmacophore similarity scores if mp_ref_and_relaxed.transform_esp is not None and self.condition in ('esp', 'all'): molec_post_opt_esp_aligned = mp_ref_and_relaxed.get_transformed_molecule(mp_ref_and_relaxed.transform_esp) esp_aligned_molec_pair = MoleculePair(ref_mol=self.ref_molec, fit_mol=molec_post_opt_esp_aligned, num_surf_points=self.ref_molec.num_surf_points, do_center=False) self.sim_surf_target_relax_esp_aligned = esp_aligned_molec_pair.score_with_surf( alpha=self.alpha, use='np' ) if isinstance(pharm_multi_vector, bool): self.sim_pharm_target_relax_esp_aligned = esp_aligned_molec_pair.score_with_pharm( similarity='tanimoto', extended_points=False, only_extended=False, use='np' )
def _align_with_surface(self, mp_ref_and_relaxed: MoleculePair) -> float: """ Align relaxed molecule to reference/target molecule with surface. Returns ------- float : Surface similarity score of optimally aligned molecule. """ _ = mp_ref_and_relaxed.align_with_surf( self.alpha, num_repeats=1, trans_init=False, use_jax=False ) surf_similarity = mp_ref_and_relaxed.sim_aligned_surf return float(surf_similarity) def _align_with_esp(self, mp_ref_and_relaxed: MoleculePair) -> float: """ Align relaxed molecule to reference/target molecule with ESP Returns ------- float : ESP similarity score of optimally aligned molecule. """ _ = mp_ref_and_relaxed.align_with_esp( self.alpha, lam=self.lam, num_repeats=1, trans_init=False, use_jax=False ) esp_similarity = mp_ref_and_relaxed.sim_aligned_esp return float(esp_similarity) def _align_with_pharm(self, mp_ref_and_relaxed: MoleculePair) -> float: """ Align relaxed molecule to reference/target molecule with pharmacophores. Stores aligned fit anchors and vectors on ``self._aligned_pharm_ancs`` and ``self._aligned_pharm_vecs`` for downstream subset scoring. Returns ------- float : Pharmacophore similarity score of optimally aligned molecule. """ aligned_fit_anchors, aligned_fit_vectors = mp_ref_and_relaxed.align_with_pharm( similarity='tanimoto', extended_points=False, only_extended=False, num_repeats=1, trans_init=False, use_jax=False ) self._aligned_pharm_ancs = aligned_fit_anchors self._aligned_pharm_vecs = aligned_fit_vectors pharm_similarity = mp_ref_and_relaxed.sim_aligned_pharm return float(pharm_similarity)