Source code for shepherd_score.evaluations.evaluate.evals

"""
Evaluation pipeline classes for generated molecules.
"""

import sys
import os
from typing import Tuple, Optional
from pathlib import Path
from copy import deepcopy
from importlib.metadata import distributions

import numpy as np
import pandas as pd
from rdkit import Chem

if any(d.metadata["Name"] == 'rdkit' for d in distributions()):
    from rdkit.Contrib.SA_Score import sascorer # type: ignore
else:
    sys.path.append(os.path.join(os.environ['CONDA_PREFIX'],'share','RDKit','Contrib'))
    from SA_Score import sascorer # type: ignore

from rdkit.Chem import QED, Crippen, Lipinski, rdFingerprintGenerator
from rdkit.Chem.rdMolAlign import GetBestRMS, AlignMol

from shepherd_score.evaluations.utils.convert_data import extract_mol_from_xyz_block, get_mol_from_atom_pos

from shepherd_score.score.constants import ALPHA, LAM_SCALING
from shepherd_score.score.constants import P_TYPES

from shepherd_score.conformer_generation import optimize_conformer_with_xtb_from_xyz_block, single_point_xtb_from_xyz

from shepherd_score.container import Molecule, MoleculePair
from shepherd_score.score.gaussian_overlap_np import get_overlap_np
from shepherd_score.score.electrostatic_scoring_np import get_overlap_esp_np
from shepherd_score.score.pharmacophore_scoring_np import get_overlap_pharm_np

RNG = np.random.default_rng()
morgan_fp_gen = rdFingerprintGenerator.GetMorganGenerator(radius=3, includeChirality=True)

TMPDIR = Path('./')
if 'TMPDIR' in os.environ:
    TMPDIR = Path(os.environ['TMPDIR'])


def _clean_dummy_atom_arrays(
    atomic_numbers: np.ndarray, positions: np.ndarray
    ) -> Tuple[np.ndarray, np.ndarray]:
    """
    Clean dummy atoms from the molecule.
    """
    non_dummy_inds = np.where(atomic_numbers != 0)[0]
    return atomic_numbers[non_dummy_inds], positions[non_dummy_inds]


def _clean_dummy_pharm_arrays(
    pharm_types: np.ndarray, pharm_ancs: np.ndarray, pharm_vecs: np.ndarray
    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Clean dummy pharmacophores from the molecule.
    """
    non_dummy_inds = np.where(pharm_types != P_TYPES.index('Dummy'))[0]
    return pharm_types[non_dummy_inds], pharm_ancs[non_dummy_inds], pharm_vecs[non_dummy_inds]



[docs]
class ConfEval:
    """ Generated conformer evaluation pipeline """


[docs]
    def __init__(self,
                 atoms: np.ndarray,
                 positions: np.ndarray,
                 solvent: Optional[str] = None,
                 num_processes: int = 1,
                 timeout_minutes: Optional[float] = None):
        """
        Base class for evaluation of a single generated conformer.

        Checks validity with RDKit pipeline and xTB single point calculation and optimization.
        Calculates 2D graph properties for valid molecules.

        Automatically aligns relaxed structure to the original structure via rdkit RMS.

        Arguments
        ---------
        atoms : np.ndarray (N,) of atomic numbers of the generated molecule or (N,M) one-hot
            encoding.
        positions : np.ndarray (N,3) of coordinates for the generated molecule's atoms.
        solvent : str solvent type for xtb relaxation
        num_processes : int (default = 1) number of processors to use for xtb relaxation and RDKit
            RMSD alignment.
        timeout_minutes : float, optional
            Per-xtb-call timeout in minutes. If exceeded, the xtb subprocess is killed and the
            corresponding step (single point or relaxation) is treated as failed. Default is
            ``None`` (no timeout).
        """
        xtb_timeout = timeout_minutes * 60 if timeout_minutes is not None else None
        self.xyz_block = None
        self.mol = None
        self.smiles = None
        self.molblock = None
        self.energy = None
        self.partial_charges = None

        self.solvent = solvent
        self.charge = 0

        self.xyz_block_post_opt = None
        self.mol_post_opt = None
        self.smiles_post_opt = None
        self.molblock_post_opt = None
        self.energy_post_opt = None
        self.partial_charges_post_opt = None

        self.is_valid = False
        self.is_valid_post_opt = False
        self.is_graph_consistent = False

        # 2D graph features
        self.SA_score = None
        self.QED = None
        self.logP = None
        self.fsp3 = None
        self.morgan_fp = None

        self.SA_score_post_opt = None
        self.QED_post_opt = None
        self.logP_post_opt = None
        self.fsp3_post_opt = None
        self.morgan_fp_post_opt = None

        # Consistency in 3D
        self.strain_energy = None
        self.rmsd = None

        # 1. Converts coords + atom_ids -> xyz block
        atoms, positions = _clean_dummy_atom_arrays(atoms, positions)
        # 2. Get mol from xyz block
        self.mol, self.charge, self.xyz_block = get_mol_from_atom_pos(atoms=atoms, positions=positions)

        # 3. Get xtb energy and charges of initial conformation
        try:
            self.energy, self.partial_charges = single_point_xtb_from_xyz(xyz_block=self.xyz_block,
                                                                          solvent=self.solvent,
                                                                          charge=self.charge,
                                                                          num_cores=num_processes,
                                                                          temp_dir=TMPDIR,
                                                                          timeout=xtb_timeout)
            self.partial_charges = np.array(self.partial_charges)
        except Exception:
            pass
        self.is_valid = self.mol is not None and self.partial_charges is not None
        if self.is_valid:
            self.smiles = Chem.MolToSmiles(Chem.RemoveHs(self.mol))
            self.molblock = Chem.MolToMolBlock(self.mol)

        # 4. Relax structure with xtb
        try:
            xtb_out = optimize_conformer_with_xtb_from_xyz_block(self.xyz_block,
                                                                solvent=self.solvent,
                                                                num_cores=num_processes,
                                                                charge=self.charge,
                                                                temp_dir=TMPDIR,
                                                                timeout=xtb_timeout)
            self.xyz_block_post_opt, self.energy_post_opt, self.partial_charges_post_opt = xtb_out
            self.partial_charges_post_opt = np.array(self.partial_charges_post_opt)

            # 5. Check if relaxed_structure is valid
            self.mol_post_opt = extract_mol_from_xyz_block(xyz_block=self.xyz_block_post_opt,
                                                           charge=self.charge)
        except Exception:
            pass

        self.is_valid_post_opt = self.mol_post_opt is not None and self.partial_charges_post_opt is not None

        # 6. Check if 2D molecular graphs are consistent
        if self.is_valid and self.is_valid_post_opt:
            self.is_graph_consistent = Chem.MolToSmiles(self.mol) == Chem.MolToSmiles(self.mol_post_opt)
            # Align post-opt mol with RMSD
            mol_atom_ids = list(range(self.mol.GetNumAtoms()))
            mol_post_opt_atom_ids = list(range(self.mol_post_opt.GetNumAtoms()))
            AlignMol(prbMol=self.mol_post_opt, refMol=self.mol, atomMap=[i for i in zip(mol_post_opt_atom_ids, mol_atom_ids)])

        if self.is_valid_post_opt:
            self.smiles_post_opt = Chem.MolToSmiles(Chem.RemoveHs(self.mol_post_opt))
            self.molblock_post_opt = Chem.MolToMolBlock(self.mol_post_opt)

        # 7. Calculate strain energy with relaxed structure
        if self.energy is not None and self.energy_post_opt is not None:
            self.strain_energy = self.energy - self.energy_post_opt

        # 8. Calculate RMSD from relaxed structure
        if self.is_graph_consistent:
            mol_copy = deepcopy(Chem.RemoveHs(self.mol))
            mol_post_opt_copy = deepcopy(Chem.RemoveHs(self.mol_post_opt))
            self.rmsd = GetBestRMS(prbMol=mol_copy, refMol=mol_post_opt_copy, numThreads=num_processes)

        # 9. 2D graph properties
        if self.is_valid:
            self.SA_score = sascorer.calculateScore(Chem.RemoveHs(self.mol))
            self.QED = QED.qed(self.mol)
            self.logP = Crippen.MolLogP(self.mol)
            self.fsp3 = Lipinski.FractionCSP3(self.mol)
            self.morgan_fp = morgan_fp_gen.GetFingerprint(mol=Chem.RemoveHs(self.mol))

        # 10. 2D graph properties post optimization
        if self.is_valid_post_opt:
            self.SA_score_post_opt = sascorer.calculateScore(Chem.RemoveHs(self.mol_post_opt))
            self.QED_post_opt = QED.qed(self.mol_post_opt)
            self.logP_post_opt = Crippen.MolLogP(self.mol_post_opt)
            self.fsp3_post_opt = Lipinski.FractionCSP3(self.mol_post_opt)
            self.morgan_fp_post_opt = morgan_fp_gen.GetFingerprint(mol=Chem.RemoveHs(self.mol_post_opt))




[docs]
    def to_pandas(self):
        """
        Convert the stored attributes to a pd.Series (for global attributes).

        Arguments
        ---------
        self

        Returns
        -------
        pd.Series : holds attributes in an easy to visualize way
        """
        global_attrs = {} # Global attributes

        for key, value in self.__dict__.items():
            global_attrs[key] = value

        series_global = pd.Series(global_attrs)

        return series_global





[docs]
class ConsistencyEval(ConfEval):
    """
    Evaluation of the consistency between jointly generated molecules' features.
    Consistency in terms of similarity scores.
    """

[docs]
    def __init__(self,
                 atoms: np.ndarray,
                 positions: np.ndarray,
                 surf_points: Optional[np.ndarray] = None,
                 surf_esp: Optional[np.ndarray] = None,
                 pharm_feats: Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]] = None,
                 pharm_multi_vector: Optional[bool] = None,
                 solvent: Optional[str] = None,
                 probe_radius: float = 1.2,
                 num_processes: int = 1,
                 timeout_minutes: Optional[float] = None):
        """
        Consistency evaluation class for jointly generated molecule and features.

        Uses 3D similarity scoring functions. Inherits from ConfEval so that it
        can first run a conformer evaluation on the generated molecule.

        Must supply ``atoms`` and ``positions`` AND at least one of the features necessary for
        similarity scoring.

        Notes
        -----
        Important assumptions:

        - Gaussian width parameter (alpha) for surface similarity was fitted to a probe
          radius of 1.2 A.
        - ESP weighting parameter (lam) for electrostatic similarity is set to 0.3 which
          was tested for the above assumption.

        Parameters
        ----------
        atoms : np.ndarray
            Array of shape (N,) of atomic numbers of the generated molecule or (N, M)
            one-hot encoding.
        positions : np.ndarray
            Array of shape (N, 3) of coordinates for the generated molecule's atoms.
        surf_points : np.ndarray, optional
            Array of shape (M, 3) of generated surface point cloud.
        surf_esp : np.ndarray, optional
            Array of shape (M,) of generated electrostatic potential on surface.
        pharm_feats : tuple, optional
            Tuple of (pharm_types, pharm_ancs, pharm_vecs) where pharm_types is (P,)
            type of pharmacophore defined by shepherd_score.score.constants.P_TYPES,
            pharm_ancs is (P, 3) anchor positions, and pharm_vecs is (P, 3) unit vectors
            relative to anchor.
        pharm_multi_vector : bool, optional
            Use multiple vectors to represent Aro/HBA/HBD or single.
        solvent : str, optional
            Solvent type for xTB relaxation.
        probe_radius : float, optional
            Radius of probe atom used to generate solvent accessible surface.
            Default is 1.2 (vdW radius of hydrogen).
        num_processes : int, optional
            Number of processors to use for xTB relaxation. Default is 1.
        timeout_minutes : float, optional
            Per-xtb-call timeout in minutes. If exceeded, the xtb subprocess is killed and the
            corresponding step (single point or relaxation) is treated as failed. Default is
            ``None`` (no timeout).
        """
        if not (isinstance(atoms, np.ndarray) or isinstance(positions, np.ndarray)):
            raise ValueError(f"Must provide `atoms` and `positions` as np.ndarrays. Instead {type(atoms)} and {type(positions)} were given.")

        super().__init__(atoms=atoms, positions=positions, solvent=solvent, num_processes=num_processes,
                         timeout_minutes=timeout_minutes)

        self.molec = None
        self.probe_radius = probe_radius
        self.molec_regen = None
        self.molec_post_opt = None

        self.sim_surf_consistent = None
        self.sim_esp_consistent = None
        self.sim_pharm_consistent = None

        self.sim_surf_consistent_relax = None
        self.sim_esp_consistent_relax = None
        self.sim_pharm_consistent_relax = None

        self.sim_surf_consistent_relax_optimal = None
        self.sim_esp_consistent_relax_optimal = None
        self.sim_pharm_consistent_relax_optimal = None

        if pharm_feats is not None:
            pharm_types, pharm_ancs, pharm_vecs = pharm_feats
            num_pharms = len(pharm_types)

            if pharm_ancs.shape != (num_pharms, 3) or pharm_vecs.shape != (num_pharms, 3):
                raise ValueError(
                    f'Provided pharmacophore features do not match dimensions: pharm_types {pharm_types.shape}, pharm_ancs {pharm_ancs.shape}, pharm_vecs {pharm_vecs.shape}'
                )
            pharm_types, pharm_ancs, pharm_vecs = _clean_dummy_pharm_arrays(pharm_types, pharm_ancs, pharm_vecs)
        else:
            pharm_types, pharm_ancs, pharm_vecs = None, None, None

        has_pharm_features = (isinstance(pharm_types, np.ndarray)
                              and isinstance(pharm_ancs, np.ndarray)
                              and isinstance(pharm_vecs, np.ndarray))
        if has_pharm_features and pharm_multi_vector is None:
            print('WARNING: Generated pharmacophore features provided, but `pharm_multi_vector` is None.')
            print('         Pharmacophore similarity not computed.')
        if not isinstance(surf_points, np.ndarray) and not isinstance(surf_esp, np.ndarray) and not has_pharm_features:
            raise ValueError('Must provide at least one of the generated representations: surface, electrostatics, or pharmacophores.')

        # Scoring parameters
        self.num_surf_points = len(surf_points) if surf_points is not None else None
        # Assumes no radius scaling with probe_radius=1.2
        self.alpha = ALPHA(self.num_surf_points) if self.num_surf_points is not None else None
        self.lam = 0.3 # Optimal lambda for probe_radius=1.2 -> ONLY TO BE USED FOR ESP ALIGNMENT
        self.lam_scaled = self.lam * LAM_SCALING # -> ONLY TO BE USED FOR get_overlap_esp*

        # Self-consistency of features for generated molecule
        if self.is_valid:
            # Generate a Molecule object with generated features
            self.molec = Molecule(
                self.mol,
                partial_charges=np.array(self.partial_charges),
                surface_points=surf_points,
                electrostatics=surf_esp,
                pharm_types=pharm_types,
                pharm_ancs=pharm_ancs,
                pharm_vecs=pharm_vecs,
                probe_radius=self.probe_radius
            )
            # Generate a Molecule object with regenerated features
            self.molec_regen = Molecule(
                self.mol,
                num_surf_points = self.num_surf_points,
                probe_radius=self.probe_radius,
                partial_charges = np.array(self.partial_charges),
                pharm_multi_vector = pharm_multi_vector if has_pharm_features else None
            )

            if self.molec.surf_pos is not None:
                self.sim_surf_consistent = get_overlap_np(
                    self.molec.surf_pos,
                    self.molec_regen.surf_pos,
                    alpha=self.alpha
                )

            if self.molec.surf_esp is not None and self.molec.surf_pos is not None:
                self.sim_esp_consistent = get_overlap_esp_np(
                    self.molec.surf_pos, self.molec_regen.surf_pos,
                    self.molec.surf_esp, self.molec_regen.surf_esp,
                    alpha=self.alpha,
                    lam=self.lam_scaled
                )

            if has_pharm_features and pharm_multi_vector is not None:
                self.sim_pharm_consistent = get_overlap_pharm_np(
                    self.molec.pharm_types,
                    self.molec_regen.pharm_types,
                    self.molec.pharm_ancs,
                    self.molec_regen.pharm_ancs,
                    self.molec.pharm_vecs,
                    self.molec_regen.pharm_vecs,
                    similarity='tanimoto',
                    extended_points=False,
                    only_extended=False
                )

        # Consistency between generated molecule and relaxed structure and features
        if self.is_valid and self.is_valid_post_opt:
            # Generate a Molecule object of relaxed structure
            self.molec_post_opt = Molecule(
                self.mol_post_opt,
                num_surf_points = self.num_surf_points,
                probe_radius=self.probe_radius,
                partial_charges = np.array(self.partial_charges_post_opt),
                pharm_multi_vector = pharm_multi_vector if has_pharm_features else None
            )

            # Score only since we already align w.r.t. RMS of the generated atomic point cloud
            if self.molec_post_opt.surf_pos is not None:
                self.sim_surf_consistent_relax = get_overlap_np(
                    self.molec.surf_pos,
                    self.molec_post_opt.surf_pos,
                    alpha=self.alpha
                )
            if self.molec_post_opt.surf_pos is not None and self.molec_post_opt.surf_esp is not None:
                self.sim_esp_consistent_relax = get_overlap_esp_np(
                    self.molec.surf_pos, self.molec_post_opt.surf_pos,
                    self.molec.surf_esp, self.molec_post_opt.surf_esp,
                    alpha=self.alpha,
                    lam=self.lam_scaled
                )
            if isinstance(pharm_multi_vector, bool) and self.molec_post_opt.pharm_ancs is not None and self.molec.pharm_ancs is not None:
                self.sim_pharm_consistent_relax = get_overlap_pharm_np(
                    self.molec.pharm_types,
                    self.molec_post_opt.pharm_types,
                    self.molec.pharm_ancs,
                    self.molec_post_opt.pharm_ancs,
                    self.molec.pharm_vecs,
                    self.molec_post_opt.pharm_vecs,
                    similarity='tanimoto',
                    extended_points=False,
                    only_extended=False
                )

            # Alignment with scoring functions
            mp_ref_and_relaxed = MoleculePair(self.molec,
                                              self.molec_post_opt,
                                              num_surf_points=self.num_surf_points,
                                              do_center=False)
            if self.molec_post_opt.surf_pos is not None:
                self.sim_surf_consistent_relax_optimal = self._align_with_surface(mp_ref_and_relaxed=mp_ref_and_relaxed)
            if self.molec_post_opt.surf_pos is not None and self.molec_post_opt.surf_esp is not None:
                self.sim_esp_consistent_relax_optimal = self._align_with_esp(mp_ref_and_relaxed=mp_ref_and_relaxed)
            if isinstance(pharm_multi_vector, bool) and self.molec_post_opt.pharm_ancs is not None and self.molec.pharm_ancs is not None:
                self.sim_pharm_consistent_relax_optimal = self._align_with_pharm(mp_ref_and_relaxed=mp_ref_and_relaxed)



    def _align_with_surface(self, mp_ref_and_relaxed: MoleculePair) -> float:
        """
        Align relaxed molecule to reference/target molecule with surface.

        Returns
        -------
        float : Surface similarity score of optimally aligned molecule.
        """
        _ = mp_ref_and_relaxed.align_with_surf(
            self.alpha,
            num_repeats=1,
            trans_init=False,
            use_jax=False
        )
        surf_similarity = mp_ref_and_relaxed.sim_aligned_surf
        return float(surf_similarity)


    def _align_with_esp(self, mp_ref_and_relaxed: MoleculePair) -> float:
        """
        Align relaxed molecule to reference/target molecule with ESP

        Returns
        -------
        float : ESP similarity score of optimally aligned molecule.
        """
        _ = mp_ref_and_relaxed.align_with_esp(
            self.alpha,
            lam=self.lam,
            num_repeats=1,
            trans_init=False,
            use_jax=False
        )
        esp_similarity = mp_ref_and_relaxed.sim_aligned_esp
        return float(esp_similarity)


    def _align_with_pharm(self, mp_ref_and_relaxed: MoleculePair) -> float:
        """
        Align relaxed molecule to reference/target molecule with pharmacophores.

        Stores aligned fit anchors and vectors on ``self._aligned_pharm_ancs``
        and ``self._aligned_pharm_vecs`` for downstream subset scoring.

        Returns
        -------
        float : Pharmacophore similarity score of optimally aligned molecule.
        """
        aligned_fit_anchors, aligned_fit_vectors = mp_ref_and_relaxed.align_with_pharm(
            similarity='tanimoto',
            extended_points=False,
            only_extended=False,
            num_repeats=1,
            trans_init=False,
            use_jax=False
        )
        self._aligned_pharm_ancs = aligned_fit_anchors
        self._aligned_pharm_vecs = aligned_fit_vectors
        pharm_similarity = mp_ref_and_relaxed.sim_aligned_pharm
        return float(pharm_similarity)




[docs]
class ConditionalEval(ConfEval):
    """ Evaluation of conditionally generated molecules' quality and similarity. """


[docs]
    def __init__(self,
                 ref_molec: Molecule,
                 atoms: np.ndarray,
                 positions: np.ndarray,
                 condition: str,
                 num_surf_points: int = 400,
                 pharm_multi_vector: Optional[bool] = None,
                 priority_pharm_indices: Optional[list] = None,
                 solvent: Optional[str] = None,
                 num_processes: int = 1,
                 timeout_minutes: Optional[float] = None,
                 ):
        """
        Evaluation pipeline for conditionally-generated molecules.

        Inherits from ConfEval so that it can first run a conformer evaluation on the
        generated molecule.

        Notes
        -----
        Important assumptions:

        - Gaussian width parameter (alpha) for surface similarity assumes a probe radius
          of 1.2A.
        - ESP weighting parameter (lam) for electrostatic similarity is set to 0.3 which
          was tested for the above assumption.

        Parameters
        ----------
        ref_molec : Molecule
            Molecule object of reference/target molecule. Must contain the representation
            that was used for conditioning.
        atoms : np.ndarray
            Array of shape (N,) of atomic numbers of the generated molecule or (N, M)
            one-hot encoding.
        positions : np.ndarray
            Array of shape (N, 3) of coordinates for the generated molecule's atoms.
        condition : str
            Condition that the molecule was conditioned on. One of 'surface', 'esp',
            'pharm', or 'all'. Used for alignment. Choose 'esp' or 'all' if you want
            to compute ESP-aligned scores for other profiles.
        num_surf_points : int, optional
            Number of surface points to sample for similarity scoring. Default is 400.
        pharm_multi_vector : bool, optional
            Use multiple vectors to represent Aro/HBA/HBD or single.
        priority_pharm_indices : list of int, optional
            Indices (into ``ref_molec`` pharmacophore arrays) of "priority"
            pharmacophores. When provided, two additional Tversky
            (``'tversky_ref'``) scores are computed after the full-set pharm
            alignment: one for the priority subset and one for the non-priority
            complement subset, each scored against the full pharmacophore set
            of the aligned generated molecule. Requires ``condition`` to be
            ``'pharm'`` or ``'all'`` and ``pharm_multi_vector`` to be a bool.
            Must satisfy ``0 < len(priority_pharm_indices) < N_pharm``.
        solvent : str, optional
            Solvent type for xTB relaxation.
        num_processes : int, optional
            Number of processors to use for xTB relaxation. Default is 1.
        timeout_minutes : float, optional
            Per-xtb-call timeout in minutes. If exceeded, the xtb subprocess is killed and the
            corresponding step (single point or relaxation) is treated as failed. Default is
            ``None`` (no timeout).
        """
        condition = condition.lower()
        self.condition = None
        if 'surf' in condition or 'shape' in condition:
            self.condition = 'surface'
        elif 'esp' in condition or 'electrostatic' in condition:
            self.condition = 'esp'
        elif 'pharm' in condition:
            self.condition = 'pharm'
        elif condition == 'all':
            self.condition = 'all'
        else:
            raise ValueError(f'`condition` must contain one of the following: "surf", "esp", "pharm", or "all". Instead, {condition} was given.')

        super().__init__(atoms=atoms,
                         positions=positions,
                         solvent=solvent,
                         num_processes=num_processes,
                         timeout_minutes=timeout_minutes)

        self.sim_surf_target = None
        self.sim_esp_target = None
        self.sim_pharm_target = None

        self.sim_surf_target_relax = None
        self.sim_esp_target_relax = None
        self.sim_pharm_target_relax = None

        self.sim_surf_target_relax_optimal = None
        self.sim_esp_target_relax_optimal = None
        self.sim_pharm_target_relax_optimal = None

        self.sim_surf_target_relax_esp_aligned = None
        self.sim_pharm_target_relax_esp_aligned = None

        self.sim_pharm_priority_target_relax_optimal = None
        self.sim_pharm_nonpriority_target_relax_optimal = None

        self.priority_pharm_indices = priority_pharm_indices

        # Scoring parameters
        self.num_surf_points = num_surf_points
        self.alpha = ALPHA(self.num_surf_points) # Fitted to probe_radius=1.2
        self.lam = 0.3 # Optimal lambda for probe_radius=1.2 -> ONLY TO BE USED FOR ESP ALIGNMENT
        self.lam_scaled = self.lam * LAM_SCALING # -> ONLY TO BE USED FOR get_overlap_esp*

        self.ref_molec = ref_molec # Reference Molecule object
        self.molec_regen = None
        self.molec_post_opt = None

        if self.is_valid:
            # Generate a Molecule object with regenerated features
            self.molec_regen = Molecule(
                self.mol,
                num_surf_points = self.num_surf_points,
                partial_charges = np.array(self.partial_charges),
                pharm_multi_vector = pharm_multi_vector,
                probe_radius=self.ref_molec.probe_radius
            )

            if self.molec_regen.surf_pos is not None:
                self.sim_surf_target = get_overlap_np(
                    self.molec_regen.surf_pos,
                    self.ref_molec.surf_pos,
                    alpha=self.alpha
                )

            if self.molec_regen.surf_esp is not None and self.molec_regen.surf_pos is not None:
                self.sim_esp_target = get_overlap_esp_np(
                    self.molec_regen.surf_pos, self.ref_molec.surf_pos,
                    self.molec_regen.surf_esp, self.ref_molec.surf_esp,
                    alpha=self.alpha,
                    lam=self.lam_scaled
                )

            if pharm_multi_vector is not None and self.ref_molec.pharm_ancs is not None:
                self.sim_pharm_target = get_overlap_pharm_np(
                    self.molec_regen.pharm_types,
                    self.ref_molec.pharm_types,
                    self.molec_regen.pharm_ancs,
                    self.ref_molec.pharm_ancs,
                    self.molec_regen.pharm_vecs,
                    self.ref_molec.pharm_vecs,
                    similarity='tanimoto',
                    extended_points=False,
                    only_extended=False
                )

        # Similarity between relaxed structure and target molecule -> first align
        if self.is_valid_post_opt:
            # Generate a Molecule object of relaxed structure
            self.molec_post_opt = Molecule(
                self.mol_post_opt,
                num_surf_points = self.num_surf_points,
                partial_charges = np.array(self.partial_charges_post_opt),
                pharm_multi_vector = pharm_multi_vector,
                probe_radius=self.ref_molec.probe_radius
            )

            # Score based on RMS alignment
            if self.molec_post_opt.surf_pos is not None:
                self.sim_surf_target_relax = get_overlap_np(
                    self.molec_post_opt.surf_pos,
                    self.ref_molec.surf_pos,
                    alpha=self.alpha
                )

            if self.molec_post_opt.surf_esp is not None and self.molec_post_opt.surf_pos is not None:
                self.sim_esp_target_relax = get_overlap_esp_np(
                    self.molec_post_opt.surf_pos, self.ref_molec.surf_pos,
                    self.molec_post_opt.surf_esp, self.ref_molec.surf_esp,
                    alpha=self.alpha,
                    lam=self.lam_scaled
                )

            if pharm_multi_vector is not None and self.ref_molec.pharm_ancs is not None:
                self.sim_pharm_target_relax = get_overlap_pharm_np(
                    self.molec_post_opt.pharm_types,
                    self.ref_molec.pharm_types,
                    self.molec_post_opt.pharm_ancs,
                    self.ref_molec.pharm_ancs,
                    self.molec_post_opt.pharm_vecs,
                    self.ref_molec.pharm_vecs,
                    similarity='tanimoto',
                    extended_points=False,
                    only_extended=False
                )

            # Align and score w.r.t. specified condition
            mp_ref_and_relaxed = MoleculePair(self.ref_molec,
                                              self.molec_post_opt,
                                              num_surf_points=self.num_surf_points,
                                              do_center=False)
            if (self.condition == 'surface' or self.condition == 'all') and self.molec_post_opt.surf_pos is not None:
                self.sim_surf_target_relax_optimal = self._align_with_surface(mp_ref_and_relaxed=mp_ref_and_relaxed)
            if (self.condition == 'esp' or self.condition == 'all') and self.molec_post_opt.surf_pos is not None and self.molec_post_opt.surf_esp is not None:
                self.sim_esp_target_relax_optimal = self._align_with_esp(mp_ref_and_relaxed=mp_ref_and_relaxed)
            if (self.condition == 'pharm' or self.condition == 'all') and isinstance(pharm_multi_vector, bool):
                self.sim_pharm_target_relax_optimal = self._align_with_pharm(mp_ref_and_relaxed=mp_ref_and_relaxed)

                # Priority subset Tversky scoring using the alignment from the full-set pharm alignment above
                if (self.priority_pharm_indices is not None
                        and hasattr(self, '_aligned_pharm_ancs')
                        and self.ref_molec.pharm_ancs is not None):
                    n_pharm = len(self.ref_molec.pharm_types)
                    nonpriority_indices = sorted(set(range(n_pharm)) - set(self.priority_pharm_indices))
                    priority_idx = self.priority_pharm_indices

                    self.sim_pharm_priority_target_relax_optimal = float(get_overlap_pharm_np(
                        ptype_1=self.ref_molec.pharm_types[priority_idx],
                        ptype_2=self.molec_post_opt.pharm_types,
                        anchors_1=self.ref_molec.pharm_ancs[priority_idx],
                        anchors_2=self._aligned_pharm_ancs,
                        vectors_1=self.ref_molec.pharm_vecs[priority_idx],
                        vectors_2=self._aligned_pharm_vecs,
                        similarity='tversky_ref',
                        extended_points=False,
                        only_extended=False
                    ))

                    if nonpriority_indices:
                        self.sim_pharm_nonpriority_target_relax_optimal = float(get_overlap_pharm_np(
                            ptype_1=self.ref_molec.pharm_types[nonpriority_indices],
                            ptype_2=self.molec_post_opt.pharm_types,
                            anchors_1=self.ref_molec.pharm_ancs[nonpriority_indices],
                            anchors_2=self._aligned_pharm_ancs,
                            vectors_1=self.ref_molec.pharm_vecs[nonpriority_indices],
                            vectors_2=self._aligned_pharm_vecs,
                            similarity='tversky_ref',
                            extended_points=False,
                            only_extended=False
                        ))

            # Compute ESP-aligned surf and pharmacophore similarity scores
            if mp_ref_and_relaxed.transform_esp is not None and self.condition in ('esp', 'all'):
                molec_post_opt_esp_aligned = mp_ref_and_relaxed.get_transformed_molecule(mp_ref_and_relaxed.transform_esp)
                esp_aligned_molec_pair = MoleculePair(ref_mol=self.ref_molec,
                                                      fit_mol=molec_post_opt_esp_aligned,
                                                      num_surf_points=self.ref_molec.num_surf_points,
                                                      do_center=False)
                self.sim_surf_target_relax_esp_aligned = esp_aligned_molec_pair.score_with_surf(
                    alpha=self.alpha, use='np'
                )
                if isinstance(pharm_multi_vector, bool):
                    self.sim_pharm_target_relax_esp_aligned = esp_aligned_molec_pair.score_with_pharm(
                        similarity='tanimoto',
                        extended_points=False,
                        only_extended=False,
                        use='np'
                    )



    def _align_with_surface(self, mp_ref_and_relaxed: MoleculePair) -> float:
        """
        Align relaxed molecule to reference/target molecule with surface.

        Returns
        -------
        float : Surface similarity score of optimally aligned molecule.
        """
        _ = mp_ref_and_relaxed.align_with_surf(
            self.alpha,
            num_repeats=1,
            trans_init=False,
            use_jax=False
        )

        surf_similarity = mp_ref_and_relaxed.sim_aligned_surf
        return float(surf_similarity)


    def _align_with_esp(self, mp_ref_and_relaxed: MoleculePair) -> float:
        """
        Align relaxed molecule to reference/target molecule with ESP

        Returns
        -------
        float : ESP similarity score of optimally aligned molecule.
        """
        _ = mp_ref_and_relaxed.align_with_esp(
            self.alpha,
            lam=self.lam,
            num_repeats=1,
            trans_init=False,
            use_jax=False
        )
        esp_similarity = mp_ref_and_relaxed.sim_aligned_esp
        return float(esp_similarity)


    def _align_with_pharm(self, mp_ref_and_relaxed: MoleculePair) -> float:
        """
        Align relaxed molecule to reference/target molecule with pharmacophores.

        Stores aligned fit anchors and vectors on ``self._aligned_pharm_ancs``
        and ``self._aligned_pharm_vecs`` for downstream subset scoring.

        Returns
        -------
        float : Pharmacophore similarity score of optimally aligned molecule.
        """
        aligned_fit_anchors, aligned_fit_vectors = mp_ref_and_relaxed.align_with_pharm(
            similarity='tanimoto',
            extended_points=False,
            only_extended=False,
            num_repeats=1,
            trans_init=False,
            use_jax=False
        )
        self._aligned_pharm_ancs = aligned_fit_anchors
        self._aligned_pharm_vecs = aligned_fit_vectors
        pharm_similarity = mp_ref_and_relaxed.sim_aligned_pharm
        return float(pharm_similarity)