Create a fingerprint database file
Use the FPSim2.io.create_db_file
function to create the fingerprint database file needed to run the searches.
Warning
FPSim2 only supports integer molecule ids.
The fingerprints are calculated with RDKit. Fingerprint types available are:
From the command line
Supports .smi files as input and runs in parallel.
fpsim2-create-db smiles_file.smi fp_db.h5 --fp_type Morgan --fp_params '{"radius": 2, "fpSize": 256}' --processes 32
As a Python library
Does not run in parallel.
from FPSim2.io import create_db_file
create_db_file(
mols_source='sdf_file.sdf',
filename='fp_db.h5',
mol_format=None, # not required
fp_type='Morgan',
fp_params={'radius': 2, 'fpSize': 2048},
mol_id_prop='mol_id'
)
from FPSim2.io import create_db_file
create_db_file(
mols_source='smiles_file.smi',
filename='fp_db.h5',
mol_format=None, # not required
fp_type='Morgan',
fp_params={'radius': 2, 'fpSize': 2048}
)
from FPSim2.io import create_db_file
mols = [['CC', 1], ['CCC', 2], ['CCCC', 3]]
create_db_file(
mols_source=mols,
filename='fp_db.h5',
mol_format='smiles', # required
fp_type='Morgan',
fp_params={'radius': 2, 'fpSize': 2048}
)
SQLAlchemy result proxy as an example
from FPSim2.io import create_db_file
from sqlalchemy import create_engine, text
engine = create_engine('sqlite:///test/test.db')
with engine.connect() as conn:
sql_query = text("select molfile, mol_id from structure")
res_prox = conn.execute(sql_query)
create_db_file(
mols_source=res_prox,
filename='fp_db.h5',
mol_format='molfile', # required
fp_type='Morgan',
fp_params={'radius': 2, 'fpSize': 2048}
)