Cancer Cell Lines and Drug Dose Response Curves¶

In [2]:
import pandas as pd
import numpy as np
In [3]:
CCLE_all = pd.read_csv(r"C:\Users\QBPAM\Downloads\'25 summer BigData AI Cancer class by Yongmei Wang\Model.csv")
CCLE_all    #has 2116 rows, which means 2116 cancer cell lines
Out[3]:
ModelID PatientID CellLineName StrippedCellLineName DepmapModelType OncotreeLineage OncotreePrimaryDisease OncotreeSubtype OncotreeCode PatientSubtypeFeatures ... PublicComments CCLEName HCMIID PediatricModelType ModelAvailableInDbgap ModelSubtypeFeatures WTSIMasterCellID SangerModelID COSMICID ModelIDAlias
0 ACH-000001 PT-gj46wT NIH:OVCAR-3 NIHOVCAR3 HGSOC Ovary/Fallopian Tube Ovarian Epithelial Tumor High-Grade Serous Ovarian Cancer HGSOC NaN ... NaN NIHOVCAR3_OVARY NaN False Approved for public sharing - CCLE NaN 2201.0 SIDM00105 905933.0 NaN
1 ACH-000002 PT-5qa3uk HL-60 HL60 AMLMRC Myeloid Acute Myeloid Leukemia AML with Myelodysplasia-Related Changes AMLMRC TP53(del), CDKN2A and NRAS mutations [PubMed=2... ... NaN HL60_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NaN True Approved for public sharing - CCLE NRAS, BCOR and CDKN2A 55.0 SIDM00829 905938.0 NaN
2 ACH-000003 PT-puKIyc CACO2 CACO2 COAD Bowel Colorectal Adenocarcinoma Colon Adenocarcinoma COAD NaN ... NaN CACO2_LARGE_INTESTINE NaN False Approved for public sharing - CCLE NaN NaN SIDM00891 NaN NaN
3 ACH-000004 PT-q4K2cp HEL HEL AMLNOS Myeloid Acute Myeloid Leukemia AML, NOS AMLNOS JAK2 and TP53 mutations ... NaN HEL_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NaN True Approved for public sharing - CCLE JAK2 and TP53 783.0 SIDM00594 907053.0 NaN
4 ACH-000005 PT-q4K2cp HEL 92.1.7 HEL9217 AML Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia AML JAK2 and TP53 mutations ... NaN HEL9217_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NaN True Approved for public sharing - CCLE NaN NaN SIDM00593 NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2111 ACH-003473 PT-fG5tCh CCLF_PEDS_0013_T CCLFPEDS0013T ERMS Soft Tissue Rhabdomyosarcoma Embryonal Rhabdomyosarcoma ERMS NaN ... NaN NaN HCM-BROD-0006-C49 True NaN NaN NaN NaN NaN NaN
2112 ACH-003474 PT-WxfjG3 CCLF_HNSC_0001_T CCLFHNSC0001T HNSC Head and Neck Head and Neck Squamous Cell Carcinoma Head and Neck Squamous Cell Carcinoma HNSC NaN ... NaN NaN HCM-BROD-1131-C06 False NaN NaN NaN NaN NaN NaN
2113 ACH-003475 PT-ce6oqw CCLF_HNSC_0003_T CCLFHNSC0003T HNSC Head and Neck Head and Neck Squamous Cell Carcinoma Head and Neck Squamous Cell Carcinoma HNSC NaN ... NaN NaN NaN False NaN NaN NaN NaN NaN NaN
2114 ACH-003476 PT-ce6oqw CCLF_HNSC_0002_T CCLFHNSC0002T ESCC Esophagus/Stomach Esophageal Squamous Cell Carcinoma Esophageal Squamous Cell Carcinoma ESCC NaN ... NaN NaN HCM-BROD-1130-C06 False NaN NaN NaN NaN NaN NaN
2115 ACH-003480 PT-D6v5Dz CCLF_THYR_0001_T CCLFTHYR0001T THAP Thyroid Anaplastic Thyroid Cancer Anaplastic Thyroid Cancer THAP NaN ... NaN NaN NaN False NaN NaN NaN NaN NaN NaN

2116 rows × 49 columns

In [4]:
CCLE_breast = CCLE_all[CCLE_all['OncotreeLineage'].isin(['Breast'])]
CCLE_breast
Out[4]:
ModelID PatientID CellLineName StrippedCellLineName DepmapModelType OncotreeLineage OncotreePrimaryDisease OncotreeSubtype OncotreeCode PatientSubtypeFeatures ... PublicComments CCLEName HCMIID PediatricModelType ModelAvailableInDbgap ModelSubtypeFeatures WTSIMasterCellID SangerModelID COSMICID ModelIDAlias
15 ACH-000017 PT-8CE6ah SK-BR-3 SKBR3 BRCA Breast Invasive Breast Carcinoma Invasive Breast Carcinoma BRCA NaN ... NaN SKBR3_BREAST NaN False Approved for public sharing - CCLE HER2+ NaN SIDM00897 NaN NaN
17 ACH-000019 PT-viJKnw MCF7 MCF7 BRCA Breast Invasive Breast Carcinoma Invasive Breast Carcinoma BRCA NaN ... NaN MCF7_BREAST NaN False Approved for public sharing - CCLE ER+ 588.0 SIDM00148 905946.0 NaN
26 ACH-000028 PT-viJKnw KPL-1 KPL1 BRCA Breast Invasive Breast Carcinoma Invasive Breast Carcinoma BRCA NaN ... SNP fingerprinting confirms that this line is ... KPL1_BREAST NaN False Approved for public sharing - CCLE ER+ NaN SIDM00147 NaN NaN
42 ACH-000044 PT-HMBfbj MDA-MB-134-VI MDAMB134VI ILC Breast Invasive Breast Carcinoma Breast Invasive Lobular Carcinoma ILC NaN ... NaN MDAMB134VI_BREAST NaN False Approved for public sharing - CCLE luminal ER+ NaN SIDM00005 NaN NaN
95 ACH-000097 PT-k1TO7o ZR-75-1 ZR751 IDC Breast Invasive Breast Carcinoma Breast Invasive Ductal Carcinoma IDC NaN ... NaN ZR751_BREAST NaN False Approved for public sharing - CCLE luminal ER, PR+ NaN SIDM00314 NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2003 ACH-002884 PT-N65Hvq IPM-BO-053R IPMBO053R ILC Breast Invasive Breast Carcinoma Breast Invasive Lobular Carcinoma ILC NaN ... NaN NaN NaN False NaN HER2+ NaN NaN NaN NaN
2004 ACH-002885 PT-pPJgGC IPM-BO-056 IPMBO056 IDC Breast Invasive Breast Carcinoma Breast Invasive Ductal Carcinoma IDC NaN ... NaN NaN NaN False NaN ER, PR+ NaN NaN NaN NaN
2006 ACH-002921 PT-NvrCXN BCK4 BCK4 ILC Breast Invasive Breast Carcinoma Breast Invasive Lobular Carcinoma ILC NaN ... NaN NaN NaN False In Progress NaN NaN NaN NaN NaN
2018 ACH-002950 PT-hxORJf NH93T NH93T BRCA Breast Invasive Breast Carcinoma Invasive Breast Carcinoma BRCA NaN ... NaN NaN NaN False NaN TNBC NaN NaN NaN NaN
2019 ACH-002951 PT-URKT9Y NH84T NH84T BRCA Breast Invasive Breast Carcinoma Invasive Breast Carcinoma BRCA NaN ... NaN NaN NaN False NaN TNBC NaN NaN NaN NaN

96 rows × 49 columns

In [5]:
CCLE_AML = CCLE_all[CCLE_all['OncotreeLineage'].isin(['Myeloid'])]
CCLE_AML
Out[5]:
ModelID PatientID CellLineName StrippedCellLineName DepmapModelType OncotreeLineage OncotreePrimaryDisease OncotreeSubtype OncotreeCode PatientSubtypeFeatures ... PublicComments CCLEName HCMIID PediatricModelType ModelAvailableInDbgap ModelSubtypeFeatures WTSIMasterCellID SangerModelID COSMICID ModelIDAlias
1 ACH-000002 PT-5qa3uk HL-60 HL60 AMLMRC Myeloid Acute Myeloid Leukemia AML with Myelodysplasia-Related Changes AMLMRC TP53(del), CDKN2A and NRAS mutations [PubMed=2... ... NaN HL60_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NaN True Approved for public sharing - CCLE NRAS, BCOR and CDKN2A 55.0 SIDM00829 905938.0 NaN
3 ACH-000004 PT-q4K2cp HEL HEL AMLNOS Myeloid Acute Myeloid Leukemia AML, NOS AMLNOS JAK2 and TP53 mutations ... NaN HEL_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NaN True Approved for public sharing - CCLE JAK2 and TP53 783.0 SIDM00594 907053.0 NaN
4 ACH-000005 PT-q4K2cp HEL 92.1.7 HEL9217 AML Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia AML JAK2 and TP53 mutations ... NaN HEL9217_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NaN True Approved for public sharing - CCLE NaN NaN SIDM00593 NaN NaN
5 ACH-000006 PT-ej13Dz MONO-MAC-6 MONOMAC6 AML Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia AML KMT2A-MLLT3, TP53 Arg273His ... NaN MONOMAC6_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NaN True Approved for public sharing - CCLE KMT2A::MLLT3, TP53, ASXL1 and U2AF1 2167.0 SIDM01023 908148.0 NaN
32 ACH-000034 PT-5qa3uk PLB-985 PLB985 AML Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia AML TP53(del), CDKN2A and NRAS mutations ... SNP fingerprinting confirms that this line is ... PLB985_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NaN True Approved for public sharing - CCLE NaN NaN SIDM00811 NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1961 ACH-002709 PT-92FgnG CMS CMS AML Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia AML NaN ... NaN CMS_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NaN True NaN CBFA2T3::GLIS2 and TP53 NaN NaN NaN NaN
1964 ACH-002716 PT-93dDAU MO-91 MO91 AMLMD Myeloid Acute Myeloid Leukemia AML with Minimal Differentiation AMLMD NaN ... NaN MO91_AML NaN True NaN NaN NaN NaN NaN NaN
2017 ACH-002946 PT-HryOAg UKE-1 UKE1 AML Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia AML JAK2 Val617Phe ... NaN NaN NaN True Approved for dbGaP - HMB MDS NaN NaN NaN NaN NaN
2026 ACH-002994 PT-JpUzYa WSU-AML WSUAML AML Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia AML CBFA2T3-GLIS2 [PubMed:28109323] ... NaN WSU-AML_PROSTATE NaN True In Progress CBFA2T3::GLIS2 NaN NaN NaN NaN
2085 ACH-003273 PT-44yhk7 CHRF-288-11 CHRF28811 AML Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia AML NUP98-KDM5A [PMID: 32381579], JAK2 T875N [PMID... ... NaN NaN NaN True NaN NUP98::KDM5A, TP53 and CREBBP NaN NaN NaN NaN

87 rows × 49 columns

In [6]:
CCLE_AML['ModelSubtypeFeatures'].value_counts()
Out[6]:
ModelSubtypeFeatures
BCR-ABL1 positive                                      11
CBFA2T3::GLIS2 and TP53                                 2
JAK2 and TP53                                           1
NRAS, BCOR and CDKN2A                                   1
KMT2A::AFF1, FLT3-ITD, TP53, STAG2 and MGA              1
KMT2A::MLLT3, TP53, ASXL1 and U2AF1                     1
ETV6::AL133456.1 (ETV6::MN1), SF3B1, KRAS and GATA2     1
KMT2A::MLLT3, NRAS and TP53                             1
KMT2A-PTD and FLT3                                      1
MNX1 SV and PTPN11                                      1
FIP1L1::PDGFRA, KMT2A-PTD, PHF6                         1
TP53, KRAS, ASXL1 and MYC                               1
RUNX1::RUNX1T1, TP53, RAD21 and KIT                     1
PML::RARA; TP53; KRAS                                   1
BCR::ABL1;GATA2::MECOM, PTPN11, SF3B1 and RAD21         1
NPM1c, DNMT3A, NRAS                                     1
KMT2A::MLLT3; FLT3-ITD                                  1
DNMT3A, JAK2 and TP53                                   1
CBFA2T3::ABHD12 and TP53                                1
PICALM::MLLT10, TP53, PTPN11, JAK3 and CCND3            1
RAD21                                                   1
CBFB::MYH11, NRAS, GATA2 and SF3B1                      1
SFPQ::ZFP36L1, NRAS, EZH2, PHF6                         1
BCR::ABL1; TCR::MYB                                     1
KMT2A::AFDN, PTPN11, KRAS                               1
RUNX1::RUNX1T1, TP53, MYC and KIT                       1
ETV6::MN1, PTPN11, KIT and CCND3                        1
KMT2A::AFDN                                             1
DEK::NUP214, ETV6::ABL1, GATA2 and EZH2                 1
ZNF384                                                  1
MECOM SV, ETV6 and TP53                                 1
BCR::ABL1, TCR::MYB, TCR::NIPBL                         1
CBFA2T3::GLIS2                                          1
NUP98::KDM5A, TP53 and CREBBP                           1
Name: count, dtype: int64
In [7]:
drug_screen = pd.read_csv(r"C:\Users\QBPAM\Downloads\'25 summer BigData AI Cancer class by Yongmei Wang\Drug_sensitivity_replicate-level_dose_(Sanger_GDSC2)_subsetted.csv", low_memory = False)
drug_screen
Out[7]:
depmap_id cell_line_display_name lineage_1 lineage_2 lineage_3 lineage_6 lineage_4 (+)-CAMPTOTHECIN (GDSC2:1003) 0.1μM rep1 PD 0325901 (GDSC2:1060) 0.25μM rep1 ENTINOSTAT (GDSC2:1593) 10.0μM rep1 ... KU-57788 (GDSC2:1038) 5.0μM rep35 KU-57788 (GDSC2:1038) 10.0μM rep35 KU-57788 (GDSC2:1038) 2.5μM rep36 KU-57788 (GDSC2:1038) 5.0μM rep36 KU-57788 (GDSC2:1038) 10.0μM rep36 GSK 269962A (GDSC2:1192) 0.0390625μM rep5 GSK 269962A (GDSC2:1192) 0.0390625μM rep6 ENTINOSTAT (GDSC2:1593) 0.03125μM rep8 ENTINOSTAT (GDSC2:1593) 0.5μM rep8 RO-3306 (GDSC2:1052) 0.0390625μM rep7
0 ACH-000973 639V Bladder/Urinary Tract Urethral Cancer Urethral Urothelial Carcinoma NaN NaN 0.083677 0.919947 0.848779 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 ACH-000757 A427 Lung Non-Small Cell Lung Cancer Lung Adenocarcinoma NaN NaN 0.664791 0.637017 0.861281 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 ACH-002209 ALLPO Lymphoid B-Cell Acute Lymphoblastic Leukemia B-Cell Acute Lymphoblastic Leukemia KMT2A NaN 0.145044 0.748603 0.014993 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 ACH-002210 ARH77 Myeloid Non-Cancerous Immortalized Blood NaN NaN 0.074499 0.626689 0.145854 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 ACH-000248 AU565 Breast Invasive Breast Carcinoma Invasive Breast Carcinoma HER2+ NaN 0.292193 0.986368 0.107727 ... 0.629204 0.379062 0.844173 0.664378 0.300903 NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
784 ACH-000452 TE8 Esophagus/Stomach Esophageal Squamous Cell Carcinoma Esophageal Squamous Cell Carcinoma NaN NaN 0.440621 0.843134 0.335483 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
785 ACH-000030 PC14 Lung Non-Small Cell Lung Cancer Lung Adenocarcinoma NaN NaN 0.508382 0.860545 0.421495 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
786 ACH-000626 U266B1 Lymphoid Mature B-Cell Neoplasms Plasma Cell Myeloma NaN NaN 0.671349 0.595165 0.176174 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
787 ACH-000835 GCT Soft Tissue Undifferentiated Pleomorphic Sarcoma/Malignant... Undifferentiated Pleomorphic Sarcoma/Malignant... NaN NaN 0.406913 0.768039 0.334517 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
788 ACH-000579 UACC257 Skin Melanoma Melanoma NaN NaN 0.999540 0.369727 0.290306 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

789 rows × 30261 columns

In [8]:
drug_screen_AML = drug_screen[drug_screen['lineage_1'] == 'Myeloid']
drug_screen_AML
Out[8]:
depmap_id cell_line_display_name lineage_1 lineage_2 lineage_3 lineage_6 lineage_4 (+)-CAMPTOTHECIN (GDSC2:1003) 0.1μM rep1 PD 0325901 (GDSC2:1060) 0.25μM rep1 ENTINOSTAT (GDSC2:1593) 10.0μM rep1 ... KU-57788 (GDSC2:1038) 5.0μM rep35 KU-57788 (GDSC2:1038) 10.0μM rep35 KU-57788 (GDSC2:1038) 2.5μM rep36 KU-57788 (GDSC2:1038) 5.0μM rep36 KU-57788 (GDSC2:1038) 10.0μM rep36 GSK 269962A (GDSC2:1192) 0.0390625μM rep5 GSK 269962A (GDSC2:1192) 0.0390625μM rep6 ENTINOSTAT (GDSC2:1593) 0.03125μM rep8 ENTINOSTAT (GDSC2:1593) 0.5μM rep8 RO-3306 (GDSC2:1052) 0.0390625μM rep7
3 ACH-002210 ARH77 Myeloid Non-Cancerous Immortalized Blood NaN NaN 0.074499 0.626689 0.145854 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
23 ACH-002218 CESS Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia NaN NaN 0.127404 0.745224 0.111212 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
123 ACH-002247 IM9 Myeloid Non-Cancerous Immortalized Blood NaN NaN 0.048018 0.205475 0.044806 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
137 ACH-002258 KMOE2 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia NaN NaN 0.516539 0.129840 0.066219 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
169 ACH-000439 ME1 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia CBFB::MYH11, NRAS, GATA2 and SF3B1 NaN 0.620186 0.736987 0.339090 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
174 ACH-002273 ML2 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia KMT2A::AFDN NaN 0.035792 0.214482 0.047821 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
202 ACH-002290 NKM1 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia NaN NaN 0.088198 0.455089 NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
215 ACH-000770 P31FUJ Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia NaN NaN 0.045234 0.787680 NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
228 ACH-002302 RPMI8866 Myeloid Myeloproliferative Neoplasms Chronic Myeloid Leukemia, BCR-ABL1+ BCR-ABL1 positive NaN 0.287915 0.808903 0.180591 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
258 ACH-002314 TUR Myeloid Acute Myeloid Leukemia Acute Monoblastic/Monocytic Leukemia NaN NaN 0.153453 0.884143 0.394355 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
263 ACH-002316 WIL2NS Myeloid Hereditary Spherocytosis Spherocytosis NaN NaN 0.035805 0.914805 0.045246 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
272 ACH-002262 KY821 Myeloid Acute Myeloid Leukemia Acute Myelomonocytic Leukemia NaN NaN 0.063442 0.309299 0.035446 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
301 ACH-000920 CMLT1 Myeloid Myeloproliferative Neoplasms Chronic Myeloid Leukemia, BCR-ABL1+ BCR::ABL1; TCR::MYB NaN 0.073786 0.549783 0.073457 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
324 ACH-000004 HEL Myeloid Acute Myeloid Leukemia AML, NOS JAK2 and TP53 NaN 0.068901 0.958312 0.085466 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
325 ACH-000006 MONOMAC6 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia KMT2A::MLLT3, TP53, ASXL1 and U2AF1 NaN 0.016069 0.936632 NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
327 ACH-000295 EM2 Myeloid Myeloproliferative Neoplasms Chronic Myeloid Leukemia, BCR-ABL1+ NaN NaN 0.092259 0.634823 0.047049 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
332 ACH-000301 LAMA84 Myeloid Myeloproliferative Neoplasms Chronic Myeloid Leukemia, BCR-ABL1+ BCR-ABL1 positive NaN 0.066904 0.326717 0.031917 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
344 ACH-000113 OCIAML2 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia NaN NaN 0.080551 0.011523 0.007210 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
345 ACH-000065 OCIAML5 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia NaN NaN 0.140074 0.637822 0.065946 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
356 ACH-000190 HDMYZ Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia NaN NaN 0.882623 0.912756 0.848188 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
372 ACH-000198 EOL1 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia FIP1L1::PDGFRA, KMT2A-PTD, PHF6 NaN 0.095446 0.434144 0.041979 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
392 ACH-000983 KCL22 Myeloid Myeloproliferative Neoplasms Chronic Myeloid Leukemia, BCR-ABL1+ BCR-ABL1 positive NaN 0.436350 0.473219 0.055107 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
394 ACH-000751 OCIM1 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia NaN NaN 0.086389 0.549817 0.077968 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
395 ACH-000326 JURLMK1 Myeloid Myeloproliferative Neoplasms Chronic Myeloid Leukemia, BCR-ABL1+ BCR-ABL1 positive NaN 0.084028 0.132027 0.073368 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
401 ACH-000362 MOLM13 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia KMT2A::MLLT3; FLT3-ITD NaN 0.088841 0.505942 0.028031 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
407 ACH-000336 OCIAML3 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia NPM1c, DNMT3A, NRAS NaN 0.127354 0.217878 0.026207 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
435 ACH-000002 HL60 Myeloid Acute Myeloid Leukemia AML with Myelodysplasia-Related Changes NRAS, BCOR and CDKN2A NaN 0.092201 0.326975 0.038654 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
436 ACH-000551 K562 Myeloid Myeloproliferative Neoplasms Chronic Myeloid Leukemia, BCR-ABL1+ NaN NaN 0.487043 0.848782 0.495266 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
437 ACH-000386 KG1 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia NaN NaN 0.272359 0.814012 0.107154 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
497 ACH-000072 MEG01 Myeloid Myeloproliferative Neoplasms Chronic Myeloid Leukemia, BCR-ABL1+ NaN NaN 0.518017 0.418615 0.054622 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
506 ACH-000074 KU812 Myeloid Myeloproliferative Neoplasms Chronic Myeloid Leukemia, BCR-ABL1+ BCR-ABL1 positive NaN 0.229392 0.073405 0.024598 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
540 ACH-000081 GDM1 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia MNX1 SV and PTPN11 NaN 0.181332 0.792789 0.115995 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
543 ACH-000263 KASUMI1 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia RUNX1::RUNX1T1, TP53, RAD21 and KIT NaN 0.551611 0.548271 0.067059 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
616 ACH-000045 MV411 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia KMT2A::AFF1, FLT3-ITD, TP53, STAG2 and MGA NaN 0.013245 0.193469 NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
698 ACH-000168 NOMO1 Myeloid Acute Myeloid Leukemia Acute Monoblastic/Monocytic Leukemia NaN NaN 0.147914 0.394173 0.366025 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
707 ACH-000373 SKM1 Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia NaN NaN 0.061619 0.210921 0.027169 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

36 rows × 30261 columns

Drug Dose Response Curve¶

In [26]:
#all drugs tested
#look for strings like "(+)-CAMPTOTHECIN (GDSC2:1003)", "PD 0325901 (GDSC2:1060)" etc.
#extract just the drug name: "(+)-CAMPTOTHECIN", "PD 0325901", etc.
#store unique names in a set to avoid duplicates.

import re

#get all column names (excluding metadata columns if any)
columns = drug_screen.columns

#use regex to extract drug names from column headers
drug_names = set()
for col in columns:
    match = re.match(r"(.+?)\s+\(GDSC2:\d+\)", col)
    if match:
        drug_names.add(match.group(1).strip())

#convert to sorted list and count
drug_list = sorted(drug_names)
print(f"Number of unique drugs: {len(drug_list)}")
print("Drugs studied:")
for drug in drug_list:
    print(drug)
Number of unique drugs: 169
Drugs studied:
(+)-CAMPTOTHECIN
(+)-JQ-1
1035270-39-3
1044870-39-4
284461-73-0
5XE
681640
A-1065-5
ABT737
ACETALAX
ADAVOSERTIB
AFATINIB
AFURESERTIB
AG-13736
AGI-5198
AGI-6780
AK-55409
AK175551
AK175558
AK298746
ALISERTIB
ALPELISIB
AMG-319
AOB2221
AOB2313
AOB5560
APO866
APR-246
ARAVA
ASP-7487
AT13148
AUY
AZ 960
AZ6102
AZD 2171
AZD 8186
AZD-8055
AZD3759
AZD7762
AZD8931
BDP-00009066
BERZOSERTIB
BEZ235
BI-2536
BI-97C1
BIBR-1532
BIIB-057
BIRABRESIB
BIRB 796
BKM120
BMS 536924
BMS-345541
BMS-754807
BORTEZOMIB
BPD-00008900
BVD-523
CAPIVASERTIB
CARMUSTINE
CERALASERTIB
CIS-DDP
CRA-032765
CRIZOTINIB
CYCLOPHOSPHAMIDE
CYTARABINE
CZC24832
DABRAFENIB
DACTINOMYCIN
DASATINIB
DIHYDROROTENONE
DINACICLIB
DOCETAXEL
ELEPHANTIN
ELOXATIN
ENTINOSTAT
ENTOSPLETINIB
EPIRUBICIN
EPZ-004777
EPZ015866
ERLOTINIB
EXEL-2880
EXELBINE
FLUDARA
FLUOROURACIL
FULVESTRANT
GALLIBISCOQUINAZOLE
GDC 0810
GEFITINIB
GEMCITABINE
GNE 317
GSK 1904529A
GSK 2578215A
GSK 269962A
GSK 602
GSK-2141795
GTPL8020
GX15-070
ICI-46474
IPATASERTIB
IRINOTECAN
IWP-2
KU-55933
KU-57788
LAPATINIB
LCL161
LGK-974
LJI308
M-39831
MIM 1
MIRA 1
MIRIN
MITOXANTRONE
MK 0457
MK 2206
MK-8776
MN-64
MOLIBRESIB
MST 312
NAVITOCLAX
NELARABINE
NILOTINIB
NIRAPARIB
NVP ADW742
OF-1
OLAPARIB
OSIMERTINIB
P 22077
PACLITAXEL
PALBOCICLIB
PD 0325901
PD 173074
PEVONEDISTAT
PF 4708671
PICOLINATE
PICTILISIB
PINOMETOSTAT
PLX 4720
PODOPHYLLOTOXIN BROMIDE
PYRIDOSTATIN
RAPAMYCIN
REBEMADLIN
RECLAST
RIBOCICLIB
RO-3306
RUXOLITINIB
SAVOLITINIB
SB 216763
SB 505124
SCH772984
SEPANTRONIUM BROMIDE
SINULARIN
TALAZOPARIB
TASELISIB
TENIPOSIDE
TOPOTECAN
TRAMETINIB
UMI 77
VE 821
VELBAN
VENETOCLAX
VINCRISTINE
VORINOSTAT
VX-11E
WEHI-539
WIKI 4
WNT-C59
WZ 4003
XAV-939
YK 4-279
ZM 447439
In [33]:
#drugs tested on Myeloid samples only

import re

#get all column names from Myeloid data (excluding metadata columns if any)
columns = drug_screen_AML.columns
drug_names = set()

#extract drug names from column headers
for col in columns:
    match = re.match(r"(.+?)\s+\(GDSC2:\d+\)", col)
    if match:
        if not drug_screen_AML[col].dropna().empty:   #keep column if at least one non-NaN value exists
            drug_names.add(match.group(1).strip())

drug_list = sorted(drug_names)
print(f"Number of unique drugs tested on Myeloid samples: {len(drug_list)}")
print("Drugs studied:")
for drug in drug_list:
    print(drug)
Number of unique drugs tested on Myeloid samples: 152
Drugs studied:
(+)-CAMPTOTHECIN
1035270-39-3
1044870-39-4
284461-73-0
5XE
681640
A-1065-5
ABT737
ACETALAX
ADAVOSERTIB
AFATINIB
AFURESERTIB
AGI-5198
AGI-6780
AK175551
AK175558
AK298746
ALISERTIB
ALPELISIB
AMG-319
AOB2221
AOB2313
AOB5560
APO866
APR-246
ARAVA
ASP-7487
AT13148
AUY
AZ 960
AZ6102
AZD 2171
AZD 8186
AZD3759
AZD7762
AZD8931
BDP-00009066
BERZOSERTIB
BEZ235
BI-97C1
BIBR-1532
BIIB-057
BIRABRESIB
BKM120
BMS 536924
BMS-345541
BORTEZOMIB
BPD-00008900
BVD-523
CAPIVASERTIB
CARMUSTINE
CERALASERTIB
CIS-DDP
CRA-032765
CRIZOTINIB
CYCLOPHOSPHAMIDE
CYTARABINE
CZC24832
DABRAFENIB
DACTINOMYCIN
DASATINIB
DIHYDROROTENONE
DINACICLIB
DOCETAXEL
ELEPHANTIN
ELOXATIN
ENTINOSTAT
ENTOSPLETINIB
EPIRUBICIN
EPZ-004777
EPZ015866
ERLOTINIB
EXEL-2880
EXELBINE
FLUDARA
FLUOROURACIL
FULVESTRANT
GALLIBISCOQUINAZOLE
GDC 0810
GEFITINIB
GEMCITABINE
GNE 317
GSK 1904529A
GSK 2578215A
GSK 602
GSK-2141795
GTPL8020
GX15-070
ICI-46474
IPATASERTIB
IRINOTECAN
IWP-2
LAPATINIB
LCL161
LGK-974
LJI308
M-39831
MIM 1
MIRA 1
MIRIN
MITOXANTRONE
MK 2206
MK-8776
MN-64
MOLIBRESIB
MST 312
NAVITOCLAX
NELARABINE
NILOTINIB
NIRAPARIB
NVP ADW742
OF-1
OLAPARIB
OSIMERTINIB
P 22077
PACLITAXEL
PALBOCICLIB
PD 0325901
PD 173074
PEVONEDISTAT
PICOLINATE
PICTILISIB
PINOMETOSTAT
PLX 4720
PODOPHYLLOTOXIN BROMIDE
PYRIDOSTATIN
RAPAMYCIN
REBEMADLIN
RECLAST
RUXOLITINIB
SAVOLITINIB
SCH772984
SEPANTRONIUM BROMIDE
SINULARIN
TALAZOPARIB
TASELISIB
TENIPOSIDE
TOPOTECAN
TRAMETINIB
UMI 77
VE 821
VELBAN
VENETOCLAX
VINCRISTINE
VORINOSTAT
VX-11E
WEHI-539
WIKI 4
WNT-C59
WZ 4003
XAV-939
YK 4-279
In [39]:
#to take the top 50 drugs most-tested on myeloid cell lines

columns = drug_screen_AML.columns
drug_to_non_nan_count = defaultdict(int)

#count non-NaN values per drug across all relevant columns
for col in columns:
    match = re.match(r"(.+?)\s+\(GDSC2:\d+\)", col)
    if match:
        drug_name = match.group(1).strip()
        non_nan_count = drug_screen_AML[col].count()    #count non-NaN values in the column
        if non_nan_count > 0:
            drug_to_non_nan_count[drug_name] += non_nan_count   #if a drug has multiple doses/reps, sum all non-NaN counts in a total count per drug

#sort drugs by total non-NaN count descending
sorted_drugs = sorted(drug_to_non_nan_count.items(), key = lambda x: x[1], reverse = True)

#take top 50
top_50_drugs = sorted_drugs[:50]

top_50_df = pd.DataFrame(top_50_drugs, columns = ['Drug', 'Data Points'])
top_50_df.index = range(1, len(top_50_df) + 1)      #make index number (left) of first row say 1 (instead of 0), so last row has 50 (instead of 49)
top_50_df
Out[39]:
Drug Data Points
1 ASP-7487 904
2 PD 0325901 877
3 AFATINIB 861
4 (+)-CAMPTOTHECIN 758
5 TASELISIB 708
6 LGK-974 700
7 TRAMETINIB 699
8 DOCETAXEL 685
9 ELOXATIN 684
10 FLUOROURACIL 682
11 REBEMADLIN 663
12 CIS-DDP 656
13 FULVESTRANT 641
14 GSK-2141795 484
15 IRINOTECAN 479
16 AUY 477
17 AZD8931 470
18 GEMCITABINE 468
19 DACTINOMYCIN 467
20 BVD-523 466
21 PALBOCICLIB 466
22 PICTILISIB 465
23 SCH772984 459
24 DASATINIB 456
25 GSK 1904529A 453
26 ADAVOSERTIB 448
27 MK 2206 432
28 EPZ-004777 423
29 AZD3759 413
30 PINOMETOSTAT 401
31 ALPELISIB 396
32 PLX 4720 388
33 ARAVA 252
34 APR-246 252
35 NIRAPARIB 252
36 AZ 960 252
37 RUXOLITINIB 252
38 XAV-939 251
39 ALISERTIB 250
40 IWP-2 250
41 GX15-070 250
42 BMS-345541 250
43 CRIZOTINIB 247
44 MITOXANTRONE 245
45 VINCRISTINE 245
46 MIRIN 245
47 AOB5560 244
48 TENIPOSIDE 244
49 DIHYDROROTENONE 244
50 DINACICLIB 243
In [45]:
import matplotlib.pyplot as plt

drug_of_interest = "ASP-7487"
second_drug_of_interest = "PD 0325901"

#filter columns for each drug
drug_cols = [col for col in drug_screen_AML.columns if drug_of_interest in col]
second_drug_cols = [col for col in drug_screen_AML.columns if second_drug_of_interest in col]


# ----First Drug----
dose_response_1 = []     #empty list to store (dose, mean_response) pairs.
for col in drug_cols:
    dose_match = re.search(r"([\d.]+)μM", col)     #extract the number from strings like "5.0μM"
    if dose_match:
        dose = float(dose_match.group(1))
        try:
            mean_response = pd.to_numeric(drug_screen_AML[col], errors = 'coerce').mean()  #convert the column to numbers, treat all non-numeric as NaN, average viability response across all cell lines for that dose
            dose_response_1.append((dose, mean_response))     #append each dose–response pair to the list
        except:
            continue

dose_response_1.sort()
doses_1, responses_1 = zip(*dose_response_1)


# ----Second Drug----
dose_response_2 = []
for col in second_drug_cols:
    dose_match = re.search(r"([\d.]+)μM", col)
    if dose_match:
        dose = float(dose_match.group(1))
        try:
            mean_response = pd.to_numeric(drug_screen_AML[col], errors = 'coerce').mean()
            dose_response_2.append((dose, mean_response))
        except:
            continue

dose_response_2.sort()
doses_2, responses_2 = zip(*dose_response_2)

# --- Plot ---
plt.figure(figsize = (8,6))
plt.plot(doses_1, responses_1, marker = 'o', label = drug_of_interest)
plt.plot(doses_2, responses_2, marker = 's', label = second_drug_of_interest, color='orange')
plt.xscale('log')
plt.xlabel("Dose Concentration (μM)")
plt.ylabel("Cell Viability")
plt.title(f"Dose-Response Curves in Myeloid Cell Lines: {drug_of_interest} vs {second_drug_of_interest}")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
No description has been provided for this image
In [49]:
from collections import defaultdict
import re
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# --- drug names ---
drug_of_interest = "ASP-7487"
second_drug_of_interest = "PD 0325901"

# --- get drug columns ---
drug_cols = [col for col in drug_screen_AML.columns if drug_of_interest in col]
second_drug_cols = [col for col in drug_screen_AML.columns if second_drug_of_interest in col]

# ---- for first drug ----
dose_to_values = defaultdict(list)

for col in drug_cols:
    dose_match = re.search(r"([\d.]+)μM", col)
    if dose_match:
        dose = float(dose_match.group(1))
        responses = pd.to_numeric(drug_screen_AML[col], errors='coerce')
        dose_to_values[dose].extend(responses.dropna().tolist())

dose_response_1 = []
for dose, values in dose_to_values.items():
    if len(values) > 0:
        mean_val = np.mean(values)
        if not np.isnan(mean_val):
            dose_response_1.append((dose, mean_val))

dose_response_1.sort()
doses_1, responses_1 = zip(*dose_response_1)

# ---- for second drug ----
second_dose_to_values = defaultdict(list)

for col in second_drug_cols:
    dose_match = re.search(r"([\d.]+)μM", col)
    if dose_match:
        dose = float(dose_match.group(1))
        responses = pd.to_numeric(drug_screen_AML[col], errors='coerce')
        second_dose_to_values[dose].extend(responses.dropna().tolist())

dose_response_2 = []
for dose, values in second_dose_to_values.items():
    if len(values) > 0:
        mean_val = np.mean(values)
        if not np.isnan(mean_val):
            dose_response_2.append((dose, mean_val))

dose_response_2.sort()
doses_2, responses_2 = zip(*dose_response_2)

# --- plot ---
plt.figure(figsize = (8, 6))
plt.plot(doses_1, responses_1, marker='o', label=drug_of_interest)
plt.plot(doses_2, responses_2, marker='s', label=second_drug_of_interest)
plt.xscale('log')
plt.xlabel("Dose Concentration (μM)")
plt.ylabel("Mean Cell Viability")
plt.title(f"Average Dose-Response Curves in Myeloid Cell Lines: {drug_of_interest} vs {second_drug_of_interest}")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
No description has been provided for this image
In [50]:
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt

#define a 4-parameter logistic function
def logistic4(x, A, B, C, D):
    # A = min asymptote, D = max asymptote
    # B = slope, C = inflection point (EC50)
    return A + (D - A) / (1.0 + (x / C)**B)

#fit logistic curve to drug 1
popt1, pcov1 = curve_fit(logistic4, doses_1, responses_1, 
                         bounds=([0, 0, 0, 0], [1, 10, max(doses_1)*10, 1]),
                         maxfev=10000)

#generate smooth x values for plotting
x_smooth = np.logspace(np.log10(min(doses_1)), np.log10(max(doses_1)), 200)

#calculate fitted y values
y_fit_1 = logistic4(x_smooth, *popt1)

#again drug 2
popt2, pcov2 = curve_fit(logistic4, doses_2, responses_2, 
                         bounds=([0, 0, 0, 0], [1, 10, max(doses_2)*10, 1]),
                         maxfev=10000)

y_fit_2 = logistic4(x_smooth, *popt2)

#plot raw data and fitted curves
plt.figure(figsize = (8,6))
plt.scatter(doses_1, responses_1, color = 'blue', label = f'{drug_of_interest} data')
plt.plot(x_smooth, y_fit_1, color = 'blue', linestyle = '-', label = f'{drug_of_interest} fit')

plt.scatter(doses_2, responses_2, color='orange', label=f'{second_drug_of_interest} data')
plt.plot(x_smooth, y_fit_2, color = 'orange', linestyle = '-', label = f'{second_drug_of_interest} fit')

plt.xscale('log')
plt.xlabel('Dose Concentration (μM)')
plt.ylabel('Mean Cell Viability')
plt.title(f"Fitted Average Dose-Response Curves in Myeloid Cell Lines: {drug_of_interest} vs {second_drug_of_interest}")
plt.legend()
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]: