Source code for shepherd_score.conformer_generation

"""
Handles anything related to generating conformers with xTB or MMFF94.

Requires xtb installation with command-line access.
See https://xtb-docs.readthedocs.io/en/latest/setup.html for installation instructions.
"""
import os
from copy import deepcopy
import re
import shutil
import subprocess
import time
from pathlib import Path
from tqdm import tqdm
import uuid
from typing import Optional, List, Union
import contextlib
import multiprocessing

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Geometry import Point3D
from rdkit.Chem import rdMolAlign
from rdkit.ML.Cluster import Butina # type: ignore

TMPDIR = os.environ.get('TMPDIR', '')
if TMPDIR == '' and Path('/tmp').is_dir():
    TMPDIR = '/tmp'
TMPDIR = Path(TMPDIR)


[docs] @contextlib.contextmanager def set_thread_limits(num_threads: int): """Temporarily set threading environment variables.""" env_vars = [ 'OMP_NUM_THREADS', 'OPENBLAS_NUM_THREADS', 'MKL_NUM_THREADS', 'NUMEXPR_NUM_THREADS', 'VECLIB_MAXIMUM_THREADS', ] old_env = {var: os.environ.get(var) for var in env_vars} try: for var in env_vars: os.environ[var] = str(num_threads) yield finally: for var, val in old_env.items(): if val is None: os.environ.pop(var, None) else: os.environ[var] = val
[docs] def update_mol_coordinates(mol: Chem.Mol, coordinates) -> Chem.Mol: """ Update the coordinates of a 3D RDKit mol object with a new set of coordinates. Parameters ---------- mol : Chem.Mol RDKit mol object with 3D coordinates to be replaced. coordinates : list or array-like List/array of new [x, y, z] coordinates. Returns ------- Chem.Mol RDKit mol object with updated 3D coordinates. """ mol_new = deepcopy(mol) conf = mol_new.GetConformer() for i in range(mol_new.GetNumAtoms()): x,y,z = coordinates[i] conf.SetAtomPosition(i, Point3D(x,y,z)) return mol_new
[docs] def read_multi_xyz_file(file_dir: str): """ Read an xyz file that potentially contains multiple structures. Parameters ---------- file_dir : str Path to .xyz file. Returns ------- all_coordinates : list List of lists containing the coordinates of each structure in the xyz file. all_elements : list List of lists containing the element types of each atom in each structure. """ atom_types = [Chem.GetPeriodicTable().GetElementSymbol(i) for i in range(1, 119)] with open(file_dir, 'r') as file: lines = file.readlines() all_coordinates = [] all_elements = [] begin_coord = False coordinates = [] elements = [] for line in lines: stripped_line = (' '.join(line.split())) stripped_line_split = stripped_line.split(' ') numbers = re.findall(r"[-+]?(?:\d*\.\d+|\d+)", stripped_line) if len(stripped_line) == 0: continue if (len(numbers) == 3) & (stripped_line_split[0] in atom_types): begin_coord = True x,y,z = numbers coordinates.append([float(x),float(y),float(z)]) elements.append(stripped_line_split[0]) else: if begin_coord: all_coordinates.append(coordinates) all_elements.append(elements) elements = [] coordinates = [] begin_coord = False if begin_coord: all_coordinates.append(coordinates) all_elements.append(elements) return all_coordinates, all_elements
[docs] def embed_conformer(mol: Chem.Mol, attempts: int=50, MMFF_optimize: bool=False, random_seed=-1): """ Embed a mol object into a 3D RDKit mol object with ETKDG (and optional MMFF94). Parameters ---------- mol : Chem.Mol RDKit Mol object. attempts : int, optional Number of embedding attempts. Default is 50. MMFF_optimize : bool, optional Whether to optimize embedded conformer with MMFF94. Default is ``False``. random_seed: int, optional Seed for RDKit's EmbedMolecule. -1 means no seed, otherwise must be positive. Returns ------- Chem.Mol or None RDKit mol object with 3D coordinates, or ``None`` if embedding fails. """ try: mol = Chem.AddHs(mol) AllChem.EmbedMolecule(mol, maxAttempts = attempts, randomSeed = random_seed) if MMFF_optimize: AllChem.MMFFOptimizeMolecule(mol) mol.GetConformer() # test whether conformer generation succeeded except Exception: return None return mol
[docs] def embed_conformer_from_smiles(smiles: str, attempts: int = 50, MMFF_optimize: bool = False, random_seed: int = -1): """ Embed a SMILES into a 3D RDKit mol object with ETKDG (and optionally MMFF94). Parameters ---------- smiles : str SMILES string of molecule. attempts : int, optional Number of embedding attempts. Default is 50. MMFF_optimize : bool, optional Whether to optimize embedded conformer with MMFF94. Default is ``False``. random_seed: int, optional Seed for RDKit's EmbedMolecule. -1 means no seed, otherwise must be positive. Returns ------- Chem.Mol or None RDKit mol object with 3D coordinates, or ``None`` if embedding fails. """ try: mol = Chem.MolFromSmiles(smiles) except Exception as e: print('Error in SMILES string when embedding molecule:', e) return None mol = embed_conformer(mol, attempts, MMFF_optimize, random_seed) return mol
[docs] def conf_to_mol(mol, conf_id): """ Convert a conformer of a RDKit mol object into its own RDKit mol object. Parameters ---------- mol : Chem.Mol RDKit mol object with multiple conformers. conf_id : int ID of conformer to be converted into its own mol object. Returns ------- Chem.Mol Mol object with only 1 conformer (the selected conformer). """ conf = mol.GetConformer(conf_id) new_mol = Chem.Mol(mol) new_mol.RemoveAllConformers() new_mol.AddConformer(Chem.Conformer(conf)) return new_mol
[docs] def generate_conformer_ensemble(mol_3d: Chem.Mol, num_confs: int=100, num_threads: int = 4, threshold: float = 0.25, num_opt_steps: int = 200): """ Use ETKDG algorithm to embed multiple conformers from a given 3D conformer template. Optionally optimizes each embedded conformer with MMFF94. Parameters ---------- mol_3d : Chem.Mol RDKit mol object with 3D coordinates. num_confs : int, optional Maximum number of conformers to be embedded with ETKDG. Default is 100. num_threads : int, optional Number of processors to be used in parallel when embedding conformers. Default is 4. threshold : float, optional RMSD threshold used to eliminate redundant conformers after ETKDG embedding. Default is 0.25. num_opt_steps : int, optional Number of MMFF94 optimization steps. Default is 200. Returns ------- list List of mol objects, each containing 1 (unique) conformer. """ mol_3d = deepcopy(mol_3d) cids = AllChem.EmbedMultipleConfs( mol_3d, clearConfs=True, numConfs=num_confs, pruneRmsThresh = threshold, maxAttempts = 50, numThreads = num_threads, ) if num_opt_steps > 0: for cid in cids: AllChem.MMFFOptimizeMolecule( mol_3d, confId=cid, mmffVariant='MMFF94', maxIters = num_opt_steps, ) mols = [conf_to_mol(mol_3d, c) for c in cids] return mols
[docs] def cluster_conformers_butina(conformers, threshold: float = 0.2, num_max_conformers: int = 100): """ Cluster a list of conformers by their pairwise RMSD with Butina Clustering algorithm. Parameters ---------- conformers : list List of rdkit mol objects containing conformers of a common molecule to be clustered. threshold : float, optional Initial RMSD threshold for clustering. Default is 0.2. num_max_conformers : int, optional Maximum number of conformers in the final clustered ensemble. Default is 100. Returns ------- list List of int indices of the centroids of each cluster, to be indexed into conformers. """ dists = [] for i, conformer_i in enumerate(conformers): for j in range(i): mol_i = Chem.RemoveHs(deepcopy(conformer_i)) mol_j = Chem.RemoveHs(deepcopy(conformers[j])) dists.append(rdMolAlign.GetBestRMS(mol_i, mol_j)) clusts = Butina.ClusterData(dists, len(conformers), threshold, isDistData=True, reordering=True) if isinstance(num_max_conformers, int): while len(clusts) > num_max_conformers: threshold += 0.1 clusts = Butina.ClusterData(dists, len(conformers), threshold, isDistData=True, reordering=True) select_confs = [c[0] for c in clusts] # picking centroids of each cluster return select_confs
[docs] def optimize_conformer_with_xtb(conformer: Chem.Mol, solvent: Optional[str] = None, num_cores: int = 1, charge: int = 0, temp_dir: Union[str, Path] = TMPDIR): """ Use external calls to GFN2-XTB (command line) to optimize a conformer geometry. Parameters ---------- conformer : Chem.Mol RDKit mol object containing 3D coordinates. solvent : str, optional Implicit solvent to be used during optimization. Must be a solvent supported by XTB (https://xtb-docs.readthedocs.io/en/latest/gbsa.html). Default is ``None``. num_cores : int, optional Number of CPU cores to be used in the xTB geometry optimization. Default is 1. charge : int, optional Molecular charge. Default is 0. temp_dir : str or Path, optional Temporary directory for I/O. Default is the system temporary directory. Returns ------- tuple (xtb_mol, energy, charges) - tuple of optimized RDKit mol object, xTB energy (in Hartrees), and partial charges (in e-). """ mol = deepcopy(conformer) with set_thread_limits(num_cores): # rand = str((os.getpid() * int(time.time())) % 123456789) rand = str(uuid.uuid4()) + ''.join(str(time.time()).split('.')[1]) out_dir = Path(f'temp_xtb_opt_{rand}/') try: if temp_dir != '': if isinstance(temp_dir, str): temp_dir = Path(temp_dir) out_dir = temp_dir / out_dir out_dir.mkdir(exist_ok=True) input_file = 'input_mol.xyz' Chem.rdmolfiles.MolToXYZFile(mol, str(out_dir/input_file)) if solvent is not None: subprocess.check_call( ['xtb', input_file, '--opt', '--alpb', solvent, '--parallel', str(num_cores), '--chrg', str(charge)], cwd = out_dir, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, ) else: subprocess.check_call( ['xtb', input_file, '--opt', '--parallel', str(num_cores), '--chrg', str(charge)], cwd = out_dir, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, ) xtb_coords_list, xtb_elements_list = read_multi_xyz_file(out_dir/'xtbopt.xyz') xtb_coords = xtb_coords_list[0] xtb_mol = update_mol_coordinates(mol, xtb_coords) with open(out_dir/'xtbopt.xyz', 'r') as file: lines = file.readlines() for line in lines: if 'energy' in line: numbers = re.findall(r"[-+]?(?:\d*\.\d+|\d+)", line) energy = float(numbers[0]) break with open(out_dir/'charges', 'r') as file: lines = file.readlines() charges = [0]*len(lines) for i, line in enumerate(lines): charges[i] = float(line.split()[0]) xtb_mol.GetAtomWithIdx(i).SetProp('charge', str(charges[i])) xtb_mol.SetProp("energy", str(energy)) finally: if out_dir.exists(): shutil.rmtree(out_dir) return (xtb_mol, energy, charges)
[docs] def optimize_conformer_with_xtb_from_xyz_block(xyz_block: str, solvent: Optional[str] = None, num_cores: int = 1, charge: int = 0, temp_dir: Union[str, Path] = TMPDIR): """ Use external calls to GFN2-XTB (command line) to optimize coordinates from an xyz block. Parameters ---------- xyz_block : str String of an xyz block. solvent : str, optional Implicit solvent to be used during optimization. Must be a solvent supported by XTB (https://xtb-docs.readthedocs.io/en/latest/gbsa.html). Default is ``None``. num_cores : int, optional Number of CPU cores to be used in the xTB geometry optimization. Default is 1. charge : int, optional Molecular charge. Default is 0. temp_dir : str or Path, optional Temporary directory for I/O. Default is the system temporary directory. Returns ------- tuple (xtb_xyz_block, energy, charges) - tuple of optimized xyz block string, xTB energy (in Hartrees), and partial charges (in e-). """ with set_thread_limits(num_cores): # rand = str((os.getpid() * int(time.time())) % 123456789) rand = str(uuid.uuid4()) + ''.join(str(time.time()).split('.')[1]) out_dir = Path(f'temp_xtb_opt_{rand}/') try: if temp_dir != '': if isinstance(temp_dir, str): temp_dir = Path(temp_dir) out_dir = temp_dir / out_dir out_dir.mkdir(exist_ok=True) input_file = 'input_mol.xyz' with open(out_dir/input_file, 'w') as f: f.write(xyz_block) if solvent is not None: subprocess.check_call( ['xtb', input_file, '--opt', '--alpb', solvent, '--parallel', str(num_cores), '--chrg', str(charge)], cwd = out_dir, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, ) else: subprocess.check_call( ['xtb', input_file, '--opt', '--parallel', str(num_cores), '--chrg', str(charge)], cwd = out_dir, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, ) opt_xyz_path = out_dir/'xtbopt.xyz' if opt_xyz_path.is_file(): with open(out_dir/'xtbopt.xyz', 'r') as file: xtb_xyz_block = file.readlines() for line in xtb_xyz_block: if 'energy' in line: numbers = re.findall(r"[-+]?(?:\d*\.\d+|\d+)", line) energy = float(numbers[0]) break if 'energy' in xtb_xyz_block[1]: xtb_xyz_block[1] = '\n' # Replace energy line with blank xtb_xyz_block = ''.join(xtb_xyz_block) else: xtb_xyz_block = None energy = None with open(out_dir/'charges', 'r') as file: lines = file.readlines() charges = [0]*len(lines) for i, line in enumerate(lines): charges[i] = float(line.split()[0]) finally: if out_dir.exists(): shutil.rmtree(out_dir) return (xtb_xyz_block, energy, charges)
[docs] def charges_from_single_point_conformer_with_xtb(conformer: Chem.Mol, solvent: Optional[str] = None, num_cores: int = 1, charge: int = 0, temp_dir: Union[str, Path] = TMPDIR ): """ Compute atomic partial charges from a single point xTB calculation of a provided conformer. Uses external calls to GFN2-XTB (command line). Parameters ---------- conformer : Chem.Mol RDKit mol object containing 3D coordinates. solvent : str, optional Implicit solvent to be used during calculation. Must be a solvent supported by XTB (https://xtb-docs.readthedocs.io/en/latest/gbsa.html). Default is ``None``. num_cores : int, optional Number of CPU cores to be used in the xTB calculation. Default is 1. charge : int, optional Molecular charge. Default is 0. temp_dir : str or Path, optional Temporary directory for I/O. Default is the system temporary directory. Returns ------- list List of partial charges for each atom (in e-). """ mol = deepcopy(conformer) with set_thread_limits(num_cores): # rand = str((os.getpid() * int(time.time())) % 123456789) rand = str(uuid.uuid4()) + ''.join(str(time.time()).split('.')[1]) out_dir = Path(f'temp_xtb_opt_{rand}/') try: if temp_dir != '': if isinstance(temp_dir, str): temp_dir = Path(temp_dir) out_dir = temp_dir / out_dir out_dir.mkdir(exist_ok=True) input_file = 'input_mol.xyz' Chem.rdmolfiles.MolToXYZFile(mol, str(out_dir/input_file)) if solvent is not None: subprocess.check_call( ['xtb', input_file, '--scc', '--alpb', solvent, '--parallel', str(num_cores), '--chrg', str(charge)], cwd = out_dir, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, ) else: subprocess.check_call( ['xtb', input_file, '--scc', '--parallel', str(num_cores), '--chrg', str(charge)], cwd = out_dir, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, ) with open(out_dir/'charges', 'r') as file: lines = file.readlines() charges = [0]*len(lines) for i, line in enumerate(lines): charges[i] = float(line.split()[0]) finally: if out_dir.exists(): shutil.rmtree(out_dir) return charges
[docs] def single_point_xtb_from_xyz(xyz_block: str, solvent: Optional[str] = None, num_cores: int = 1, charge: int = 0, temp_dir: Union[str, Path] = TMPDIR): """ Compute energy and atomic partial charges from a single point xTB calculation. Uses external calls to GFN2-XTB (command line). Parameters ---------- xyz_block : str String of xyz block representing a molecule. solvent : str, optional Implicit solvent to be used during calculation. Must be a solvent supported by XTB (https://xtb-docs.readthedocs.io/en/latest/gbsa.html). Default is ``None``. num_cores : int, optional Number of CPU cores to be used in the xTB calculation. Default is 1. charge : int, optional Molecular charge. Default is 0. temp_dir : str or Path, optional Temporary directory for I/O. Default is the system temporary directory. Returns ------- energy : float xTB energy in Hartrees. charges : list List of partial charges for each atom (in e-). """ with set_thread_limits(num_cores): # rand = str((os.getpid() * int(time.time())) % 123456789) rand = str(uuid.uuid4()) + ''.join(str(time.time()).split('.')[1]) out_dir = Path(f'temp_xtb_opt_{rand}/') try: if temp_dir != '': if isinstance(temp_dir, str): temp_dir = Path(temp_dir) out_dir = temp_dir / out_dir out_dir.mkdir(exist_ok=True) input_file = 'input_mol.xyz' with open(out_dir/input_file, 'w') as f: f.write(xyz_block) if solvent is not None: output = subprocess.check_output( ['xtb', input_file, '--scc', '--alpb', solvent, '--parallel', str(num_cores), '--chrg', str(charge)], cwd = out_dir, text=True, encoding="utf-8", errors="replace", stderr=subprocess.STDOUT, ) else: output = subprocess.check_output( ['xtb', input_file, '--scc', '--parallel', str(num_cores), '--chrg', str(charge)], cwd = out_dir, text=True, encoding="utf-8", errors="replace", stderr=subprocess.STDOUT, ) output = output.split('\n') for line in output[::-1]: if 'TOTAL ENERGY' in line: numbers = re.findall(r"[-+]?(?:\d*\.\d+|\d+)", line) energy = float(numbers[0]) break with open(out_dir/'charges', 'r') as file: lines = file.readlines() charges = [0]*len(lines) for i, line in enumerate(lines): charges[i] = float(line.split()[0]) finally: if out_dir.exists(): shutil.rmtree(out_dir) return energy, charges
def _optimize_conformer_worker(params): """Worker function for multiprocessing - must be at module level for pickling""" return optimize_conformer_with_xtb(*params)
[docs] def optimize_conformer_ensemble_with_xtb(conformers: List[Chem.Mol], solvent: Optional[str] = None, num_processes: int = 1, num_workers: int = 1, charge: int = 0, temp_dir: Union[str, Path] = TMPDIR, verbose: bool = False): """ GFN2-XTB geometry optimization for a list of conformers. Parameters ---------- conformers : list List of RDKit Mol objects (with 3D coordinates) to be optimized. solvent : str, optional Implicit solvent supported by XTB (https://xtb-docs.readthedocs.io/en/latest/gbsa.html). Default is ``None``. num_processes : int, optional Number of CPU cores used per xTB optimization. Default is 1. num_workers : int, optional Number of parallel workers (processes) to distribute conformers across. Ensure num_workers * num_processes <= available CPUs to avoid oversubscription. Default is 1. charge : int, optional Molecular charge. RDKit will be used to compute the formal charge if len(conformers) > 1. Default is 0. temp_dir : str or Path, optional Temporary directory for I/O. Default is the system temporary directory. verbose : bool, optional Show a simple progress bar in single-process mode. Default is ``False``. Returns ------- tuple (conformers_opt, energies_opt, charges_opt) - tuple of lists containing optimized conformers, their energies, and partial charges. """ if not conformers: return [], [], [] available_cpus = os.cpu_count() or 1 total_requested_cpus = max(1, num_workers) * max(1, num_processes) if total_requested_cpus > available_cpus: raise ValueError( f"Requested num_workers * num_processes = {total_requested_cpus} exceeds available CPUs ({available_cpus})." ) # Serial path (single conformer or single worker) if len(conformers) == 1 or num_workers <= 1: conformers_opt = [] energies_opt = [] charges_opt = [] iterator = tqdm(conformers, total=len(conformers), desc='XTB opt') if verbose else conformers for conf in iterator: opt_conf, opt_energy, opt_charges = optimize_conformer_with_xtb( conformer=conf, solvent=solvent, num_cores=num_processes, charge=charge, temp_dir=temp_dir, ) conformers_opt.append(opt_conf) energies_opt.append(opt_energy) charges_opt.append(opt_charges) return conformers_opt, energies_opt, charges_opt # Parallel ctx = multiprocessing.get_context('spawn') args = [ (conf, solvent, num_processes, Chem.GetFormalCharge(conf), temp_dir) for conf in conformers ] with ctx.Pool(processes=num_workers) as pool: iterator = pool.imap(_optimize_conformer_worker, args) if verbose: iterator = tqdm(iterator, total=len(args), desc='XTB opt') results = list(iterator) conformers_opt, energies_opt, charges_opt = zip(*results) return list(conformers_opt), list(energies_opt), list(charges_opt)
[docs] def generate_opt_conformers_xtb(smiles: str, charge: int = 0, solvent: Optional[str]=None, MMFF_optimize: bool = True, num_processes: int = 1, num_workers: int = 1, temp_dir: Union[str, Path] = TMPDIR, verbose: bool = False, num_confs: int = 1000): """ Generate conformer ensemble with RDKit then relax with xTB. Parameters ---------- smiles : str SMILES string of the molecule. charge : int, optional Molecular charge. Default is 0. solvent : str, optional Implicit solvent to be used during optimization. Must be a solvent supported by XTB (https://xtb-docs.readthedocs.io/en/latest/gbsa.html). Default is ``None``. MMFF_optimize : bool, optional Optimize RDKit embedded molecules with MMFF94. Default is ``True``. num_processes : int, optional Number of CPU cores to be used in the xTB geometry optimization. Default is 1. num_workers : int, optional Number of parallel workers (processes) to distribute conformers across. Ensure num_workers * num_processes <= available CPUs to avoid oversubscription. Default is 1. temp_dir : str or Path, optional Temporary directory for I/O. Default is the system temporary directory. verbose : bool, optional Toggle tqdm progress bar. Default is ``False``. num_confs : int, optional Number of conformers to initially generate. Default is 1000. Returns ------- clustered_conformers_xtb : list List of rdkit conformers after xTB relaxation and clustering. clustered_energies_xtb : list List of energies for associated conformers. clustered_charges_xtb : list List of partial charges for associated conformers. """ available_cpus = os.cpu_count() or 1 total_requested_cpus = max(1, num_workers) * max(1, num_processes) if total_requested_cpus > available_cpus: raise ValueError( f"Requested num_workers * num_processes = {total_requested_cpus} exceeds available CPUs ({available_cpus})." ) mol_3d = embed_conformer_from_smiles(smiles, attempts = 50, MMFF_optimize = MMFF_optimize) if mol_3d is None: return None, None, None conformer_ensemble = generate_conformer_ensemble( mol_3d, num_confs=num_confs, num_threads = 4, threshold = 0.05, num_opt_steps = 50, ) if verbose: print(f'Num conformers generated: \t{len(conformer_ensemble)}') clustered_conformers_index = cluster_conformers_butina( conformer_ensemble, threshold = 0.1, num_max_conformers = None, ) clustered_conformers = [conformer_ensemble[c] for c in clustered_conformers_index] if verbose: print(f'Num conformers remaining: \t{len(clustered_conformers)}') # further optimizing each conformer with GFN2-XTB optimized_conformers_xtb, optimized_energies_xtb, optimized_charges_xtb = optimize_conformer_ensemble_with_xtb( clustered_conformers, solvent = solvent, num_processes = num_processes, num_workers = num_workers, charge=charge, temp_dir = temp_dir, verbose=verbose ) if verbose: print(f'Num optimized confomers with xtb: \t{len(optimized_conformers_xtb)}') clustered_conformers_xtb_index = cluster_conformers_butina( optimized_conformers_xtb, threshold = 0.1, num_max_conformers = None, ) clustered_conformers_xtb = [optimized_conformers_xtb[c] for c in clustered_conformers_xtb_index] clustered_charges_xtb = [optimized_charges_xtb[c] for c in clustered_conformers_xtb_index] clustered_energies_xtb = [optimized_energies_xtb[c] for c in clustered_conformers_xtb_index] if verbose: print(f'Num conformers remaining after xtb opt and cluster: \t{len(clustered_conformers_xtb)}') return clustered_conformers_xtb, clustered_energies_xtb, clustered_charges_xtb
[docs] def generate_opt_conformers(smiles: str, MMFF_optimize: bool = True, verbose: bool = False, num_confs: int = 1000): """ Generate optimal conformers with RDKit (MMFF94). Parameters ---------- smiles : str SMILES string of the molecule. MMFF_optimize : bool, optional Optimize RDKit embedded molecules with MMFF94. Default is ``True``. verbose : bool, optional Toggle tqdm progress bar. Default is ``False``. num_confs : int, optional Number of conformers to initially generate. Default is 1000. Returns ------- list List of clustered rdkit conformers after RDKit embedding and optional MMFF relaxation. """ mol_3d = embed_conformer_from_smiles(smiles, attempts = 50, MMFF_optimize = MMFF_optimize) if mol_3d is None: return None, None, None conformer_ensemble = generate_conformer_ensemble( mol_3d, num_confs=num_confs, num_threads = 4, threshold = 0.05, num_opt_steps = 50, ) if verbose: print(f'Num conformers generated: \t{len(conformer_ensemble)}') clustered_conformers_index = cluster_conformers_butina( conformer_ensemble, threshold = 0.1, num_max_conformers = None, ) clustered_conformers = [conformer_ensemble[c] for c in clustered_conformers_index] if verbose: print(f'Num conformers remaining: \t{len(clustered_conformers)}') return clustered_conformers