Source code for shepherd_score.evaluations.docking.pipelines

"""
Autodock Vina Docking evaluation pipelines.

Requires:
- vina
- meeko
- openbabel (if protonating ligands)
"""
from typing import List, Optional, Dict, Literal, Tuple, Any, Union
from pathlib import Path
from tqdm import tqdm
import multiprocessing
import pickle

import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem

from shepherd_score.evaluations.utils.convert_data import get_smiles_from_atom_pos

from shepherd_score.evaluations.docking.docking import VinaSmiles
from shepherd_score.evaluations.docking.targets import docking_target_info


# Global variable to store VinaSmiles instance in each worker process
_worker_vina_smiles = None


def _init_worker_vina(
    receptor_pdbqt_file: str,
    center: Tuple[float, float, float],
    box_size: Tuple[float, float, float],
    pH: float,
    scorefunction: str,
    num_processes: int,
    verbose: int,
):
    """
    Initialize VinaSmiles instance in worker process.
    This is called once per worker when the pool is created.
    """
    global _worker_vina_smiles
    _worker_vina_smiles = VinaSmiles(
        receptor_pdbqt_file=receptor_pdbqt_file,
        center=center,
        box_size=box_size,
        pH=pH,
        scorefunction=scorefunction,
        num_processes=num_processes,
        verbose=verbose,
    )


def _eval_docking_single(
    i: int,
    smiles: str,
    exhaustiveness: int,
    n_poses: int,
    protonate: bool,
    return_best_protomer: bool,
    save_poses_path: Optional[str],
) -> Dict[str, Any]:
    """
    Evaluate a single SMILES string for docking.

    This function is designed to be called by multiprocessing workers.
    It uses the pre-initialized VinaSmiles instance from the worker.
    """
    global _worker_vina_smiles

    if smiles is None:
        return {'i': i, 'energy': np.nan, 'error': 'SMILES is None'}

    if _worker_vina_smiles is None:
        return {'i': i, 'energy': np.nan, 'error': 'VinaSmiles not initialized in worker'}

    try:
        # Reset state before each docking call to ensure clean state
        # (set_ligand_from_string should replace previous ligand, but this ensures clean state)
        _worker_vina_smiles.state = None

        energy, docked_mol = _worker_vina_smiles(
            ligand_smiles=smiles,
            output_file=save_poses_path,
            exhaustiveness=exhaustiveness,
            n_poses=n_poses,
            protonate=protonate,
            return_best_protomer=True,
        )

        # Pickle the docked mol for return
        docked_mol_pickle = None
        if docked_mol is not None:
            docked_mol_pickle = pickle.dumps(docked_mol)

        return {
            'i': i,
            'energy': float(energy),
            'error': None,
            'docked_mol': docked_mol_pickle,
        }
    except Exception as e:
        return {'i': i, 'energy': np.nan, 'error': str(e), 'docked_mol': None}


def _unpack_eval_docking_single(args):
    """Unpacker function for multiprocessing."""
    return _eval_docking_single(*args)


def _eval_relax_single(
    i: int,
    mol_pickle: bytes,
    center: Union[bool, Tuple[float, float, float]],
    max_steps: Optional[int],
    save_poses_path: Optional[str],
) -> Dict[str, Any]:
    """
    Evaluate a single mol object for relaxation.

    This function is designed to be called by multiprocessing workers.
    It uses the pre-initialized VinaSmiles instance from the worker.
    """
    global _worker_vina_smiles

    if mol_pickle is None:
        return {'i': i, 'energy': np.nan, 'relaxed_mol': None, 'error': 'mol is None'}

    if _worker_vina_smiles is None:
        return {'i': i, 'energy': np.nan, 'relaxed_mol': None, 'error': 'VinaSmiles not initialized in worker'}

    try:
        # Unpickle the mol object
        mol = pickle.loads(mol_pickle)

        if mol is None:
            return {'i': i, 'energy': np.nan, 'relaxed_mol': None, 'error': 'mol is None after unpickling'}

        # Reset state before each optimization call
        _worker_vina_smiles.state = None

        total_energy, _, optimized_mol = _worker_vina_smiles.optimize_ligand(
            ligand=mol,
            center=center,
            max_steps=max_steps,
            output_file=save_poses_path,
        )

        # Pickle the optimized mol for return
        optimized_mol_pickle = None
        if optimized_mol is not None:
            optimized_mol_pickle = pickle.dumps(optimized_mol)

        return {
            'i': i,
            'energy': float(total_energy),
            'relaxed_mol': optimized_mol_pickle,
            'error': None
        }
    except Exception as e:
        return {'i': i, 'energy': np.nan, 'relaxed_mol': None, 'error': str(e)}


def _unpack_eval_relax_single(args):
    """Unpacker function for multiprocessing."""
    return _eval_relax_single(*args)


[docs] class DockingEvalPipeline:
[docs] def __init__(self, pdb_id: str, num_processes: int = 4, docking_target_info_dict: Optional[Dict] = None, verbose: int = 0, path_to_bin: str = ''): """ Constructor for docking evaluation pipeline. Initializes VinaSmiles with receptor pdbqt. Parameters ---------- pdb_id : str PDB ID of receptor. Natively only supports: 1iep, 3eml, 3ny8, 4rlu, 4unn, 5mo4, 7l11. num_processes : int, optional Number of CPUs to use for scoring. Default is 4. docking_target_info_dict : dict, optional Dict holding minimum information needed for docking. Defaults to the built-in target info for the seven targets listed above. Custom dicts must follow the format:: {"1iep": {"center": (15.614, 53.380, 15.455), "size": (15, 15, 15), "pdbqt": "path_to_file.pdbqt"}} verbose : int, optional Level of verbosity from vina.Vina (0 is silent). Default is 0. path_to_bin : str, optional Path to environment bin containing ``mk_prepare_ligand.py``. Default is ''. """ if docking_target_info_dict is None: docking_target_info_dict = docking_target_info self.pdb_id = pdb_id self.path_to_bin = path_to_bin self.docking_target_info = docking_target_info_dict self.vina_smiles = None self.smiles = [] self.energies = [] self.buffer = {} self.num_failed = 0 self.repeats = 0 self.buffer_relaxed = {} self.relaxed_mols = [] self.relaxed_rmsd = [] if pdb_id not in list(self.docking_target_info.keys()): raise ValueError( f"Provided `pdb_id` ({pdb_id}) not supported. Please choose from: {list(self.docking_target_info.keys())}." ) path_to_receptor_pdbqt = Path(self.docking_target_info[self.pdb_id]['pdbqt']) if not path_to_receptor_pdbqt.is_file(): raise ValueError( f"Provided .pdbqt file does not exist. Please check `docking_target_info_dict`. Was given: {path_to_receptor_pdbqt}" ) pH = self.docking_target_info[self.pdb_id]['pH'] if 'pH' in self.docking_target_info[self.pdb_id] else 7.4 self.receptor_pdbqt_file = str(path_to_receptor_pdbqt) self.center = self.docking_target_info[self.pdb_id]['center'] self.box_size = self.docking_target_info[self.pdb_id]['size'] self.pH = pH self.scorefunction = 'vina' self.verbose = verbose self.vina_smiles = VinaSmiles( receptor_pdbqt_file=self.receptor_pdbqt_file, center=self.center, box_size=self.box_size, pH=self.pH, scorefunction=self.scorefunction, num_processes=num_processes, verbose=self.verbose )
[docs] def evaluate(self, smiles_ls: List[str], exhaustiveness: int = 32, n_poses: int = 1, protonate: bool = False, save_poses_dir_path: Optional[str] = None, verbose: bool = False, num_workers: int = 1, num_processes: int = 4, return_best_protomer: bool = False, *, mp_context: Literal['spawn', 'forkserver'] = 'spawn' ) -> List[float]: """ Loop through supplied list of SMILES strings, dock, and collect energies. Arguments --------- smiles_ls : List[str] list of SMILES to dock exhaustiveness : int (default = 32) Number of Monte Carlo simulations to run per pose n_poses : int (default = 1) Number of poses to save protonate : bool (default = False) Use protonation protocol save_poses_dir_path : Optional[str] (default = None) Path to directory to save docked poses. verbose : bool (default = False) show tqdm progress bar for each SMILES. num_workers : int (default = 1) number of parallel worker processes. Only recommended if `smiles_ls` is > 100 due to start-up overhead of new processes. num_processes : int (default = 4) number of processes each worker uses internally for Vina. Constraint: num_workers * num_processes <= available CPUs mp_context : Literal['spawn', 'forkserver'] context for multiprocessing. Returns ------- List of energies (affinities) in kcal/mol """ self.smiles = smiles_ls dir_path = None if save_poses_dir_path is not None: dir_path = Path(save_poses_dir_path) # Check buffer first and filter out cached SMILES energies = [] indices_to_process = [] smiles_to_process = [] for i, smiles in enumerate(smiles_ls): if smiles in self.buffer: self.repeats += 1 energies.append((i, self.buffer[smiles]['energy'])) elif smiles is None: energies.append((i, np.nan)) self.num_failed += 1 else: indices_to_process.append(i) smiles_to_process.append(smiles) available_cpus = multiprocessing.cpu_count() or 1 if num_processes > exhaustiveness: num_processes = exhaustiveness if num_workers < 1: num_workers = 1 if num_processes < 1: num_processes = 1 # Calculate max workers: num_workers * num_processes <= available_cpus max_workers_allowed = max(1, available_cpus // max(1, num_processes)) if num_workers > max_workers_allowed: num_workers = max_workers_allowed if num_workers > 1 and len(smiles_to_process) > 0: multiprocessing.set_start_method(mp_context, force=True) # Prepare inputs for workers (only task-specific parameters) inputs = [] for idx, smiles in zip(indices_to_process, smiles_to_process): save_poses_path = None if save_poses_dir_path is not None: save_poses_path = str(dir_path / f'{self.pdb_id}_docked_{idx}{"_prot" if protonate else ""}.pdbqt') inputs.append(( idx, smiles, exhaustiveness, n_poses, protonate if not return_best_protomer else True, return_best_protomer, save_poses_path, )) # Initialize VinaSmiles once per worker using initializer with multiprocessing.Pool( num_workers, initializer=_init_worker_vina, initargs=( self.receptor_pdbqt_file, self.center, self.box_size, self.pH, self.scorefunction, num_processes, self.verbose, ) ) as pool: if verbose: pbar = tqdm(total=len(smiles_to_process), desc=f'Docking {self.pdb_id}') else: pbar = None results_iter = pool.imap_unordered(_unpack_eval_docking_single, inputs, chunksize=1) pending_results = {idx: None for idx in indices_to_process} for res in results_iter: idx = res['i'] pending_results[idx] = res energy = res['energy'] energies.append((idx, energy)) # Update buffer smiles_str = smiles_ls[idx] docked_mol = None if res['docked_mol'] is not None: try: docked_mol = pickle.loads(res['docked_mol']) except Exception: docked_mol = None if smiles_str is not None: self.buffer[smiles_str] = {'energy': float(energy), 'docked_mol': docked_mol} if return_best_protomer: self.buffer[smiles_str].update({'protomer_smiles': None}) if docked_mol is not None: self.buffer[smiles_str]['protomer_smiles'] = Chem.MolToSmiles(Chem.RemoveHs(docked_mol)) if np.isnan(energy): self.num_failed += 1 if verbose and pbar is not None: pbar.update(1) if verbose and pbar is not None: pbar.close() for idx in indices_to_process: if pending_results[idx] is None: energies.append((idx, np.nan)) smiles_str = smiles_ls[idx] if smiles_str is not None: self.buffer[smiles_str] = {'energy': float(np.nan), 'docked_mol': None} if return_best_protomer: self.buffer[smiles_str].update({'protomer_smiles': None}) self.num_failed += 1 else: # Single process evaluation if len(smiles_to_process) > 0: if verbose: pbar = tqdm(enumerate(zip(indices_to_process, smiles_to_process)), desc=f'Docking {self.pdb_id}', total=len(smiles_to_process)) else: pbar = enumerate(zip(indices_to_process, smiles_to_process)) for _, (idx, smiles) in pbar: save_poses_path = None if save_poses_dir_path is not None: if return_best_protomer: save_poses_path = dir_path / f'{self.pdb_id}_docked_best_prot_{idx}.pdbqt' else: save_poses_path = dir_path / f'{self.pdb_id}_docked{"_prot" if protonate else ""}_{idx}.pdbqt' try: energy, docked_mol = self.vina_smiles( ligand_smiles=smiles, output_file=save_poses_path, exhaustiveness=exhaustiveness, n_poses=n_poses, protonate=protonate if not return_best_protomer else True, return_best_protomer=return_best_protomer, ) energies.append((idx, float(energy))) self.buffer[smiles] = {'energy': float(energy), 'docked_mol': docked_mol} if return_best_protomer: self.buffer[smiles].update({'protomer_smiles': None}) if docked_mol is not None: self.buffer[smiles]['protomer_smiles'] = Chem.MolToSmiles(Chem.RemoveHs(docked_mol)) except Exception: energies.append((idx, np.nan)) self.buffer[smiles] = {'energy': float(np.nan), 'docked_mol': None} if return_best_protomer: self.buffer[smiles].update({'protomer_smiles': None}) self.num_failed += 1 # Sort by original index and extract energies energies.sort(key=lambda x: x[0]) self.energies = np.array([e[1] for e in energies]) return [e[1] for e in energies]
[docs] def evaluate_relax(self, mol_ls: List[Chem.Mol], center: Union[bool, Tuple[float, float, float]] = False, max_steps: Optional[int] = 10000, save_poses_dir_path: Optional[str] = None, verbose: bool = False, num_workers: int = 1, *, mp_context: Literal['spawn', 'forkserver'] = 'spawn' ) -> List[float]: """ Loop through supplied list of mol objects, optimize, and collect energies. Arguments --------- mol_ls : List[Chem.Mol] list of rdkit mol objects to relax center : bool or tuple of float (default = False) If a tuple, centers to those coordinates. If True, centers the ligand to the receptor's center. If False, does not translate the ligand from its initial conformation. max_steps : int or None (default = 10000) Maximum number of steps to take in the optimization. If None, uses the default value of 10000. save_poses_dir_path : Optional[str] (default = None) Path to directory to save optimized poses. verbose : bool (default = False) show tqdm progress bar for each mol. num_workers : int (default = 1) number of parallel worker processes. mp_context : Literal['spawn', 'forkserver'] context for multiprocessing. Returns ------- List of energies (affinities) in kcal/mol """ dir_path = None if save_poses_dir_path is not None: dir_path = Path(save_poses_dir_path) # Process mol objects energies = [] relaxed_mols = [] indices_to_process = [] mols_to_process = [] for i, mol in enumerate(mol_ls): if mol is None: energies.append((i, np.nan)) relaxed_mols.append((i, None)) self.num_failed += 1 else: indices_to_process.append(i) mols_to_process.append(mol) self.buffer_relaxed[Chem.MolToSmiles(Chem.RemoveHs(mol))] = {'energy': np.nan, 'relaxed_mol': None, 'rmsd': np.nan} available_cpus = multiprocessing.cpu_count() or 1 if num_workers < 1: num_workers = 1 # Calculate max workers: num_workers * num_processes <= available_cpus max_workers_allowed = max(1, available_cpus) if num_workers > max_workers_allowed: num_workers = max_workers_allowed if num_workers > 1 and len(mols_to_process) > 0: multiprocessing.set_start_method(mp_context, force=True) # Prepare inputs for workers (pickle mol objects for multiprocessing) inputs = [] for idx, mol in zip(indices_to_process, mols_to_process): save_poses_path = None if save_poses_dir_path is not None: save_poses_path = str(dir_path / f'{self.pdb_id}_relaxed_{idx}.pdbqt') # Pickle the mol for multiprocessing mol_pickle = pickle.dumps(mol) inputs.append(( idx, mol_pickle, center, max_steps, save_poses_path, )) # Initialize VinaSmiles once per worker using initializer with multiprocessing.Pool( num_workers, initializer=_init_worker_vina, initargs=( self.receptor_pdbqt_file, self.center, self.box_size, self.pH, self.scorefunction, 1, # num processes = 1 since only one process is used for relaxation self.verbose, ) ) as pool: if verbose: pbar = tqdm(total=len(mols_to_process), desc=f'Relaxing {self.pdb_id}') else: pbar = None results_iter = pool.imap_unordered(_unpack_eval_relax_single, inputs, chunksize=1) pending_results = {idx: None for idx in indices_to_process} for res in results_iter: idx = res['i'] pending_results[idx] = res energy = res['energy'] energies.append((idx, energy)) smiles_str = Chem.MolToSmiles(Chem.RemoveHs(mols_to_process[idx])) if smiles_str is not None: self.buffer_relaxed[smiles_str]['energy'] = float(energy) self.buffer_relaxed[smiles_str]['relaxed_mol'] = mols_to_process[idx] # Unpickle the relaxed mol relaxed_mol = None if res['relaxed_mol'] is not None: try: relaxed_mol = pickle.loads(res['relaxed_mol']) except Exception: relaxed_mol = None relaxed_mols.append((idx, relaxed_mol)) smiles_str = Chem.MolToSmiles(Chem.RemoveHs(mols_to_process[idx])) if smiles_str is not None: self.buffer_relaxed[smiles_str]['energy'] = float(energy) self.buffer_relaxed[smiles_str]['relaxed_mol'] = relaxed_mol if relaxed_mol is not None else None self.buffer_relaxed[smiles_str]['rmsd'] = AllChem.CalcRMS( self.vina_smiles._center_ligand(mols_to_process[idx], self.center), relaxed_mol ) if np.isnan(energy): self.num_failed += 1 if verbose and pbar is not None: pbar.update(1) if verbose and pbar is not None: pbar.close() for idx in indices_to_process: if pending_results[idx] is None: energies.append((idx, np.nan)) relaxed_mols.append((idx, None)) self.num_failed += 1 else: # Single process evaluation if len(mols_to_process) > 0: if verbose: pbar = tqdm(enumerate(zip(indices_to_process, mols_to_process)), desc=f'Relaxing {self.pdb_id}', total=len(mols_to_process)) else: pbar = enumerate(zip(indices_to_process, mols_to_process)) for _, (idx, mol) in pbar: save_poses_path = None if save_poses_dir_path is not None: save_poses_path = dir_path / f'{self.pdb_id}_relaxed_{idx}.pdbqt' try: total_energy, _, optimized_mol = self.vina_smiles.optimize_ligand( ligand=mol, center=center, max_steps=max_steps, output_file=save_poses_path, ) energies.append((idx, float(total_energy))) relaxed_mols.append((idx, optimized_mol)) except Exception: energies.append((idx, np.nan)) relaxed_mols.append((idx, None)) self.num_failed += 1 smiles_str = Chem.MolToSmiles(Chem.RemoveHs(mol)) if smiles_str is not None: self.buffer_relaxed[smiles_str]['energy'] = float(total_energy) self.buffer_relaxed[smiles_str]['relaxed_mol'] = optimized_mol if optimized_mol is not None else None self.buffer_relaxed[smiles_str]['rmsd'] = AllChem.CalcRMS( self.vina_smiles._center_ligand(mol, self.center), optimized_mol ) # Sort by original index and extract energies and relaxed mols energies.sort(key=lambda x: x[0]) relaxed_mols.sort(key=lambda x: x[0]) return np.array([e[1] for e in energies])
[docs] def benchmark(self, exhaustiveness: int = 32, n_poses: int = 5, protonate: bool = False, save_poses_dir_path: Optional[str] = None ) -> float: """ Run benchmark with experimental ligands. Arguments --------- exhaustiveness : int (default = 32) Number of Monte Carlo simulations to run per pose n_poses : int (default = 5) Number of poses to save protonate : bool (default = False) (de-)protonate ligand with OpenBabel at pH=7.4 save_poses_dir_path : Optional[str] (default = None) Path to directory to save docked poses. Returns ------- float : Energies (affinities) in kcal/mol """ save_poses_path = None if save_poses_dir_path is not None: dir_path = Path(save_poses_dir_path) save_poses_path = dir_path / f"{self.pdb_id}_docked{'_prot' if protonate else ''}.pdbqt" best_energy, docked_ligand = self.vina_smiles( self.docking_target_info[self.pdb_id]['ligand'], output_file=str(save_poses_path), exhaustiveness=exhaustiveness, n_poses=n_poses, protonate=protonate, ) return best_energy, docked_ligand
[docs] def to_pandas(self, docked_mol_as_molblock: bool = False, sort_by_energies: bool = True, reset_index: bool = True, ) -> pd.DataFrame: """ Convert the attributes of generated smiles and the energies to a pd.DataFrame Arguments --------- docked_mol_as_molblock : bool (default = False) Whether to convert the docked mol to a molblock reset_index : bool (default = True) Whether to reset the index sort_by_energies : bool (default = True) Whether to sort the dataframe by energies Returns ------- pd.DataFrame : attributes for each evaluated sample """ df = self._to_pandas_buffer( buffer=self.buffer, docked_mol_as_molblock=docked_mol_as_molblock, sort_by_energies=sort_by_energies, reset_index=reset_index, ) return df
[docs] def to_pandas_relaxed(self, docked_mol_as_molblock: bool = False, sort_by_energies: bool = True, reset_index: bool = True, ) -> pd.DataFrame: """ Convert the attributes of relaxed mols and the energies to a pd.DataFrame Returns ------- pd.DataFrame : attributes for each relaxed sample """ df_relaxed = self._to_pandas_buffer( buffer=self.buffer_relaxed, docked_mol_as_molblock=docked_mol_as_molblock, sort_by_energies=sort_by_energies, reset_index=reset_index, ) return df_relaxed
def _to_pandas_buffer( self, buffer: Dict[str, Any], docked_mol_as_molblock: bool = False, sort_by_energies: bool = True, reset_index: bool = True, ) -> pd.DataFrame: """ Convert the buffer to a pd.DataFrame """ df = pd.DataFrame(buffer).T if docked_mol_as_molblock: df['docked_mol'] = df['docked_mol'].apply(lambda x: Chem.MolToMolBlock(x)) if not reset_index: df = df.reset_index(names='smiles') if sort_by_energies: df = df.sort_values(by='energy') if reset_index: df = df.reset_index(names='smiles') return df
[docs] def run_docking_benchmark(save_dir_path: str, pdb_id: str, num_processes: int = 4, docking_target_info_dict: Optional[Dict] = None, protonate: bool = False ) -> None: """ Run docking benchmark on experimental SMILES. Uses an exhaustiveness of 32 and saves the top-30 poses to a specified location. Parameters ---------- save_dir_path : str Path to save docked poses to. pdb_id : str PDB ID of receptor. Natively only supports: 1iep, 3eml, 3ny8, 4rlu, 4unn, 5mo4, 7l11. num_processes : int, optional Number of CPUs to use for scoring. Default is 4. docking_target_info_dict : dict, optional Dict holding minimum information needed for docking. Defaults to the built-in target info for the seven targets listed above. Custom dicts must follow the format:: {"1iep": {"center": (15.614, 53.380, 15.455), "size": (15, 15, 15), "pdbqt": "path_to_file.pdbqt", "ligand": "SMILES string of experimental ligand"}} protonate : bool, optional Whether to protonate ligands at a given pH. Requires ``"pH"`` field to be filled out in docking_target_info_dict. Default is ``False``. Returns ------- None """ dep = DockingEvalPipeline(pdb_id=pdb_id, num_processes=num_processes, docking_target_info_dict=docking_target_info_dict, verbose=0, path_to_bin='') dep.benchmark(exhaustiveness=32, n_poses=30, save_poses_dir_path=save_dir_path, protonate=protonate)
[docs] def run_docking_evaluation(atoms: List[np.ndarray], positions: List[np.ndarray], pdb_id: str, num_processes: int = 4, docking_target_info_dict: Optional[Dict] = None, exhaustiveness: int = 32, n_poses: int = 1, protonate: bool = False, save_poses_dir_path: Optional[str] = None, verbose: bool = True, num_workers: int = 1, *, mp_context: Literal['spawn', 'forkserver'] = 'spawn' ) -> DockingEvalPipeline: """ Run docking evaluation with an exhaustiveness of 32. Parameters ---------- atoms : list List of np.ndarray (N,) of atomic numbers of the generated molecule or (N, M) one-hot encoding. positions : list List of np.ndarray (N, 3) of coordinates for the generated molecule's atoms. pdb_id : str PDB ID of receptor. Natively only supports: 1iep, 3eml, 3ny8, 4rlu, 4unn, 5mo4, 7l11. num_processes : int, optional Number of CPUs to use for Autodock Vina. Default is 4. docking_target_info_dict : dict, optional Dict holding minimum information needed for docking. Defaults to the built-in target info for the seven targets listed above. Custom dicts must follow the format:: {"1iep": {"center": (15.614, 53.380, 15.455), "size": (15, 15, 15), "pdbqt": "path_to_file.pdbqt"}} exhaustiveness : int, optional Number of Monte Carlo simulations to run per pose. Default is 32. n_poses : int, optional Number of poses to save. Default is 1. protonate : bool, optional Use protonation protocol. Default is ``False``. save_poses_dir_path : str, optional Path to directory to save docked poses. Default is ``None``. verbose : bool, optional Show tqdm progress bar for each SMILES. Default is ``True``. num_workers : int, optional Number of parallel worker processes. Default is 1. mp_context : str, optional Context for multiprocessing. One of 'spawn' or 'forkserver'. Default is 'spawn'. Returns ------- DockingEvalPipeline Results are found in the ``buffer`` attribute {'smiles': energy} or in ``smiles`` and ``energies`` which preserves the order of provided atoms/positions as a list. """ docking_pipe = DockingEvalPipeline(pdb_id=pdb_id, num_processes=num_processes, docking_target_info_dict=docking_target_info_dict, verbose=0, path_to_bin='') smiles_list = [] for sample in zip(atoms, positions): smiles_list.append(get_smiles_from_atom_pos(atoms=sample[0], positions=sample[1])) docking_pipe.evaluate(smiles_list, exhaustiveness=exhaustiveness, n_poses=n_poses, protonate=protonate, save_poses_dir_path=save_poses_dir_path, verbose=verbose, num_workers=num_workers, num_processes=num_processes, mp_context=mp_context) return docking_pipe
[docs] def run_docking_evaluation_from_smiles(smiles: List[str], pdb_id: str, num_processes: int = 4, docking_target_info_dict: Optional[Dict] = None, exhaustiveness: int = 32, n_poses: int = 1, protonate: bool = False, save_poses_dir_path: Optional[str] = None, verbose: bool = True, num_workers: int = 1, *, mp_context: Literal['spawn', 'forkserver'] = 'spawn' ) -> DockingEvalPipeline: """ Run docking evaluation with an exhaustiveness of 32. Parameters ---------- smiles : list List of SMILES strings. These must be valid or ``None``. pdb_id : str PDB ID of receptor. Natively only supports: 1iep, 3eml, 3ny8, 4rlu, 4unn, 5mo4, 7l11. num_processes : int, optional Number of CPUs to use for Autodock Vina. Default is 4. docking_target_info_dict : dict, optional Dict holding minimum information needed for docking. Defaults to the built-in target info for the seven targets listed above. Custom dicts must follow the format:: {"1iep": {"center": (15.614, 53.380, 15.455), "size": (15, 15, 15), "pdbqt": "path_to_file.pdbqt"}} exhaustiveness : int, optional Number of Monte Carlo simulations to run per pose. Default is 32. n_poses : int, optional Number of poses to save. Default is 1. protonate : bool, optional Use protonation protocol. Default is ``False``. save_poses_dir_path : str, optional Path to directory to save docked poses. Default is ``None``. verbose : bool, optional Show tqdm progress bar for each SMILES. Default is ``True``. num_workers : int, optional Number of parallel worker processes. Default is 1. mp_context : str, optional Context for multiprocessing. One of 'spawn' or 'forkserver'. Default is 'spawn'. Returns ------- DockingEvalPipeline Results are found in the ``buffer`` attribute {'smiles': energy} or in ``smiles`` and ``energies`` which preserves the order of provided SMILES as a list. """ docking_pipe = DockingEvalPipeline(pdb_id=pdb_id, num_processes=num_processes, docking_target_info_dict=docking_target_info_dict, verbose=0, path_to_bin='') docking_pipe.evaluate(smiles, exhaustiveness=exhaustiveness, n_poses=n_poses, protonate=protonate, save_poses_dir_path=save_poses_dir_path, verbose=verbose, num_workers=num_workers, num_processes=num_processes, mp_context=mp_context) return docking_pipe