Hi all,
I’m trying to use the dgl-lifesci example on this page to train a model for molecule generation on my own data:
pytorch version: 1.9.0
rdkit version: 2018.09.3.0
Using the code below
%run train.py -d none -o random -tf smiles.csv -vf val.csv
leads to the following error message:
Prepare logging directory...
Created directory ./training_results/none_random_2021-09-20_14-23-53
Saved settings to ./training_results/none_random_2021-09-20_14-23-53/settings.txt
Configure for new dataset none...
Processing smiles 1/96
---------------------------------------------------------------------------
ArgumentError Traceback (most recent call last)
~/dgllife/train.py in <module>
178
179 args = parser.parse_args()
--> 180 args = setup(args, train=True)
181
182 if args['num_processes'] == 1:
~/dgllife/utils.py in setup(args, train)
183
184 if train:
--> 185 setup_dataset(args)
186 args['checkpoint_dir'] = os.path.join(log_dir, 'checkpoint.pth')
187 pprint(args)
~/dgllife/utils.py in setup_dataset(args)
159 else:
160 print('Configure for new dataset {}...'.format(args['dataset']))
--> 161 configure_new_dataset(args['dataset'], args['train_file'], args['val_file'])
162
163 def setup(args, train=True):
~/dgllife/utils.py in configure_new_dataset(dataset, train_file, val_file)
670 path_to_atom_and_bond_types = '_'.join([dataset, 'atom_and_bond_types.pkl'])
671 if not os.path.exists(path_to_atom_and_bond_types):
--> 672 atom_types, bond_types = get_atom_and_bond_types(all_smiles)
673 with open(path_to_atom_and_bond_types, 'wb') as f:
674 pickle.dump({'atom_types': atom_types, 'bond_types': bond_types}, f)
~/dgllife/utils.py in get_atom_and_bond_types(smiles, log)
454 print('Processing smiles {:d}/{:d}'.format(i + 1, n_smiles))
455
--> 456 mol = smiles_to_standard_mol(s)
457 if mol is None:
458 continue
~/dgllife/utils.py in smiles_to_standard_mol(s)
411 """
412 mol = Chem.MolFromSmiles(s)
--> 413 return standardize_mol(mol)
414
415 def mol_to_standard_smile(mol):
~/dgllife/utils.py in standardize_mol(mol)
393 """
394 reactions = initialize_neuralization_reactions()
--> 395 Chem.Kekulize(mol, clearAromaticFlags=True)
396 mol = neutralize_charges(mol, reactions)
397 return mol
ArgumentError: Python argument types in
rdkit.Chem.rdmolops.Kekulize(NoneType)
did not match C++ signature:
Kekulize(RDKit::ROMol {lvalue} mol, bool clearAromaticFlags=False)
Am I doing something wrong or leaving out some important information?