Source code for add_dti_annotations

"""
Add DTI (Drug-Target Interaction) Annotations to the dataset.
"""

from dataset import Dataset


########### DTI (Drug-Target Interaction) Annotations ###########
[docs] def add_dti_annotations( dataset: Dataset, ): """ Every compound-target pair is assigned a DTI (drug target interaction) annotation. The assignment is based on three questions: - Is the compound-target pair in the drug_mechanisms table? = \ Is it a known relevant compound-target interaction? - What is the max_phase of the compound? = Is it a drug / clinical compound? - Is the target in the drug_mechanisms table = Is it a therapeutic target? The assigments are based on the following table: +------------+----------+-----------+-----------+---------------------------------------------+ |in DM table?|max_phase?|th. target?|DTI |explanation | +============+==========+===========+===========+=============================================+ | yes | 4 | -- | D_DT [#]_ | drug - drug target | +------------+----------+-----------+-----------+---------------------------------------------+ | yes | 3 | -- | C3_DT | clinical candidate in phase 3 - drug target | +------------+----------+-----------+-----------+---------------------------------------------+ | yes | 2 | -- | C2_DT | clinical candidate in phase 2 - drug target | +------------+----------+-----------+-----------+---------------------------------------------+ | yes | 1 | -- | C1_DT | clinical candidate in phase 1 - drug target | +------------+----------+-----------+-----------+---------------------------------------------+ | yes | <1 | -- | C0_DT |compound in unknown phase [#]_ - drug target | +------------+----------+-----------+-----------+---------------------------------------------+ | no | -- | yes | DT | drug target | +------------+----------+-----------+-----------+---------------------------------------------+ | no | -- | no | NDT | not drug target | +------------+----------+-----------+-----------+---------------------------------------------+ .. [#] The annotation D_DT instead of C4_DT was chosen to be consistent \ with the annotations in a previous version of the dataset. \\ \ For the same reason the column is named DTI (drug-target interaction) \ instead of CTI (compound-target interaction) \ despite having specific annotations for clinical canidates. .. [#] C0_DT groups together all compounds with a max_phase not between 1 and 4. Since ChEMBL32 there are three possible annotations in ChEMBL \ with a max_phase value not between 1 and 4: - 0.5 = early phase 1 clinical trials - -1 = clinical phase unknown for drug or clinical candidate drug, \ i.e., where ChEMBL cannot assign a clinical phase - NULL = preclinical compounds with bioactivity data All three are grouped together into the annotation C0_DT. Compound-target pairs that were annotated with NDT, \ i.e., compound-target pairs that are not in the drug_mechanisms table \ and for which the target was also not in the drug_mechanisms table \ (not a comparator compound), are discarded. :param dataset: Dataset with all relevant information: - Pandas DataFrame with compound-target pairs \ based on activities AND drug_mechanism table - set of compound-target pairs in the drug_mechanism table - set of targets in the drug_mechanism table :type dataset: Dataset """ # Add a new column *therapeutic_target* which is set to True # if the target is in the drug_mechanism table dataset.df_result["therapeutic_target"] = dataset.df_result["tid"].isin( dataset.drug_mechanism_targets_set ) # Assign the annotations based on the table. # Compound-target pairs from the drug mechanism table dataset.df_result.loc[ ( dataset.df_result["cpd_target_pair"].isin(dataset.drug_mechanism_pairs_set) & (dataset.df_result["max_phase"] == 4) ), "DTI", ] = "D_DT" dataset.df_result.loc[ ( dataset.df_result["cpd_target_pair"].isin(dataset.drug_mechanism_pairs_set) & (dataset.df_result["max_phase"] == 3) ), "DTI", ] = "C3_DT" dataset.df_result.loc[ ( dataset.df_result["cpd_target_pair"].isin(dataset.drug_mechanism_pairs_set) & (dataset.df_result["max_phase"] == 2) ), "DTI", ] = "C2_DT" dataset.df_result.loc[ ( dataset.df_result["cpd_target_pair"].isin(dataset.drug_mechanism_pairs_set) & (dataset.df_result["max_phase"] == 1) ), "DTI", ] = "C1_DT" # Compounds that are in the drug_mechanism table but don't have a known phase between 1-4: dataset.df_result.loc[ ( dataset.df_result["cpd_target_pair"].isin(dataset.drug_mechanism_pairs_set) & (~dataset.df_result["max_phase"].isin([1, 2, 3, 4])) ), "DTI", ] = "C0_DT" # Target is in the drug mechanism table dataset.df_result.loc[ ( ( ~dataset.df_result["cpd_target_pair"].isin( dataset.drug_mechanism_pairs_set ) ) & (dataset.df_result["therapeutic_target"]) ), "DTI", ] = "DT" # Other compound-target pairs # if target is not a therapeutic target, 'cpd_target_pair' cannot be in DTIs_set # (~dataset.df_result['cpd_target_pair'].isin(DTIs_set)) is included for clarity dataset.df_result.loc[ ( ( ~dataset.df_result["cpd_target_pair"].isin( dataset.drug_mechanism_pairs_set ) ) & ~(dataset.df_result["therapeutic_target"]) ), "DTI", ] = "NDT" # Discard NDT rows dataset.df_result = dataset.df_result[ ( dataset.df_result["DTI"].isin( ["D_DT", "C3_DT", "C2_DT", "C1_DT", "C0_DT", "DT"] ) ) ]