Source code for qmdesc.handler

"""
This module defines the PathwayRankingHandler for use in Torchserve.
"""
from typing import Dict

import pkg_resources
import torch
from qmdesc.featurization import mol2graph, get_atom_fdim, get_bond_fdim
from rdkit import Chem

[docs]class ReactivityDescriptorHandler(): '''Wrap the trained atom-bond qm descriptors predicting model Predict QM descriptors for a given SMILES string of organic compound containing C, H, O, N, P, S, F, Cl, Br, I, B Example: >>> from qmdesc import ReactivityDescriptorHandler >>> handler = ReactivityDescriptorHandler() >>> results = handler.predict('CCCC') ''' def __init__(self): """ ReactivityDescriptorHandler constructor. """ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_pt_path = "QM_137k.pt" from qmdesc.model import MoleculeModel # Load model and args stream = pkg_resources.resource_stream(__name__, model_pt_path) state = torch.load(stream, lambda storage, loc: storage) args, loaded_state_dict = state['args'], state['state_dict'] atom_fdim = get_atom_fdim() bond_fdim = get_bond_fdim() + atom_fdim self.model = MoleculeModel(args, atom_fdim, bond_fdim) self.model.load_state_dict(loaded_state_dict) self.model.to(self.device) self.model.eval() self.initalized = True def _preprocess(self, smiles: str): """ Preprocess SMILES :param smiles: SMILES string :return: molecular graph """ mol_graph = mol2graph(smiles) f_atoms, f_bonds, a2b, b2a, b2revb, a_scope, b_scope, b2br, bond_types = mol_graph.get_components() f_atoms, f_bonds, a2b, b2a, b2revb, b2br, bond_types = \ f_atoms.to(self.device), f_bonds.to(self.device), a2b.to(self.device), b2a.to(self.device), \ b2revb.to(self.device), b2br.to(self.device), bond_types.to(self.device) return f_atoms, f_bonds, a2b, b2a, b2revb, a_scope, b_scope, b2br, bond_types def _inference(self, data): """ model prediction :param data: molecular graph :return: The output of the model """ descs = self.model(data) return descs def _postprocess(self, inference_output) -> Dict: """ Postprocess results :param inference_output: The output of the model :return: Results """ smiles = inference_output['smiles'] descs = inference_output['descs'] descs = [x.data.cpu().numpy() for x in descs] partial_charge, partial_neu, partial_elec, NMR, bond_order, bond_distance = descs results = {'smiles': smiles, 'partial_charge': partial_charge.flatten(), 'fukui_neu': partial_neu.flatten(), 'fukui_elec': partial_elec.flatten(), 'NMR': NMR.flatten(), 'bond_order': bond_order.flatten(), 'bond_length': bond_distance.flatten()} return results
[docs] def predict(self, smiles: str, sdf: str = None) -> Dict: """ Wrap the preprocess, inference, and postprocess :param smiles: Input SMILES string :param sdf: Output .sdf file :return: A dictionary containing the prediction result """ outputs = self._inference(self._preprocess([smiles])) postprocess_inputs = {'smiles': smiles, 'descs': outputs} results = self._postprocess(postprocess_inputs) if sdf is not None: if not sdf.endswith('.sdf'): print('must provide a sdf name end up with \'.sdf\'') return results writer = Chem.SDWriter(sdf) m = Chem.MolFromSmiles(smiles) m = Chem.AddHs(m) for p in results: p_upper = p.upper() if p == 'smiles': m.SetProp(p_upper, results[p]) else: m.SetProp(p_upper, ','.join(str(x) for x in results[p])) name = sdf.strip('.sdf') m.SetProp('_Name', name) writer.write(m) return results
[docs]def qmdesc() -> None: """ This is the entry point for the command line command :code:'qmdesc' Example: $ qmdesc CCCC --sdf CCCC.sdf """ import argparse parser = argparse.ArgumentParser() parser.add_argument('smiles', type=str, help='Input smiles string') parser.add_argument('--sdf', default='qmdesc.sdf', type=str, help='output sdf saving the qm descriptors') args = parser.parse_args() predictor = ReactivityDescriptorHandler() results = predictor.predict(args.smiles, sdf=args.sdf)