import pandas as pd
import numpy as np


df_clinical_patients = pd.read_csv("breast_msk_2018/data_clinical_patient.txt", skiprows=4, sep='\t')


df_clinical_patients


df_gene_mutations = pd.read_csv("breast_msk_2018/data_mutations.txt", sep='\t')


df_gene_mutations


df_gene_matrix = pd.read_csv("breast_msk_2018/data_gene_panel_matrix.txt", sep='\t')
df_gene_matrix


table_gene_matrix = df_gene_matrix.groupby(['mutations']).count()
table_gene_matrix


df_impact_410 = pd.read_csv("misc_data/data_gene_panel_impact410.txt", skiprows=2, sep='\t')
genes = list(df_impact_410)
genes[0] = genes[0].split()[-1]
genes[0]

'ABL1'


df_clinical_sample = pd.read_csv("breast_msk_2018/data_clinical_sample.txt", skiprows=4, sep='\t')
df_clinical_sample


df_fusions = pd.read_csv("breast_msk_2018/data_fusions.txt", sep='\t')
sorted_df_fusions = df_fusions.sort_values(by='Tumor_Sample_Barcode')
sorted_df_fusions


genes_symbols = df_fusions['Hugo_Symbol']


df_cases = pd.read_csv("breast_msk_2018/case_lists/cases_sequenced.txt", skiprows=5, sep='\t')
df_cases_sequences = list(df_cases)
df_cases_sequences[0] = df_cases_sequences[0].split()[-1]
sampled_sequences = df_cases_sequences
patients_sequenced = ["-".join(item.split("-")[:2]) for item in df_cases_sequences]


clinical_data = df_clinical_patients[df_clinical_patients['PATIENT_ID'].isin(patients_sequenced)]
clinical_data


alive = df_clinical_patients[df_clinical_patients['OS_STATUS']=="0:LIVING"]
deceased = df_clinical_patients[df_clinical_patients['OS_STATUS']=="1:DECEASED"]

alive_patients = list(alive["PATIENT_ID"])
deceased_patients = list(deceased["PATIENT_ID"])


clinical_data_gene_sequences = df_fusions[df_fusions['Tumor_Sample_Barcode'].isin(sampled_sequences)]
clinical_data_gene_sequences


def query_by_gene(dataframe_genes, hugo_symbol):
    return dataframe_genes[dataframe_genes['Hugo_Symbol']==hugo_symbol]


def add_patients(dataframe):
    temp_dataframe = dataframe.copy()
    label = lambda row:  "-".join(row["Tumor_Sample_Barcode"].split("-")[:2])
    temp_dataframe['PATIENT_ID'] = dataframe.apply(label, axis="columns") #Apply Label
    return temp_dataframe


clinical_data_with_patients = add_patients(clinical_data_gene_sequences)
query_by_gene(clinical_data_with_patients,"IGF1R")


alive_patients_genes = clinical_data_with_patients[clinical_data_with_patients['PATIENT_ID'].isin(alive_patients)]
deceased_patients_genes = clinical_data_with_patients[clinical_data_with_patients['PATIENT_ID'].isin(deceased_patients)]


alive_patients_genes


deceased_patients_genes


all_genes = list(set(clinical_data_with_patients["Hugo_Symbol"]))
len(all_genes)
alive_genes = list(set(alive_patients_genes["Hugo_Symbol"]))
deceased_genes = list(set(deceased_patients_genes["Hugo_Symbol"]))
at_risk_genes = list(set(all_genes) - set(alive_genes))
at_risk_genes

['ERCC2',
 'ITPR2',
 'ARID5B',
 'SUFU',
 'C2orf67',
 'SLC25A14',
 'MACROD2',
 'BET1',
 'C6orf10',
 'AKAP8',
 'ST7',
 'PITPNC1',
 'PRB2',
 'NUB1',
 'ADCY9',
 'DDR1',
 'HEATR2',
 'DNMT3B',
 'PDE1A',
 'KLHL13',
 'RIPK4',
 'UNC5D',
 'COPZ2',
 'DIP2B',
 'ACER1',
 'HMHA1',
 'WHSC1',
 'DPP6',
 'ZFP64',
 'TET1',
 'MET',
 'NOVA1',
 'KIAA1024',
 'HIST1H3B',
 'BRAF',
 'CTNNB1',
 'RNF43',
 'TRIM8',
 'TGFB2',
 'STK11',
 'HCK',
 'AATF',
 'E2F3',
 'ZFP90',
 'C1orf87',
 'WFDC3',
 'HIST1H4B',
 'CUL5',
 'HIST1H2BD',
 'MIR1281',
 'RGS9',
 'TIMP2',
 'FGFR1',
 'AGBL1',
 'FGFR3',
 'HIST1H2BC',
 'AGK']


query_by_gene(clinical_data_with_patients,"RIPK4")


gene_count = []
template_dict = {"Hugo_Symbol":"","Count":""}
for gene in at_risk_genes:
    temp_dict = template_dict.copy()
    sample = query_by_gene(clinical_data_with_patients,gene)
    temp_dict["Hugo_Symbol"] = gene
    temp_dict["Count"] = len(sample)
    gene_count.append(temp_dict)
    
count_frame = pd.DataFrame(data = gene_count)
count_frame.sort_values(by='Count', ascending=False)[:3]


gene_count = []
for gene in at_risk_genes:
    temp_dict = template_dict.copy()
    sample = query_by_gene(clinical_data_with_patients,gene)
    temp_dict["Hugo_Symbol"] = gene
    temp_dict["Count"] = len(set(sample["PATIENT_ID"]))
    gene_count.append(temp_dict)
    
count_frame = pd.DataFrame(data = gene_count)
count_frame.sort_values(by='Count', ascending=False)[:3]


patient_query = query_by_gene(clinical_data_with_patients,"STK11")
patients_stk11 = list(patient_query["PATIENT_ID"])
patient_query


clinical_data_with_patients[clinical_data_with_patients['PATIENT_ID'].isin(patients_stk11)]


df_clinical_patients[df_clinical_patients['PATIENT_ID'].isin(patients_stk11)]


patient_query_ADCY9 = query_by_gene(clinical_data_with_patients,"ADCY9")
patients_ADCY9 = list(patient_query_ADCY9["PATIENT_ID"])
patient_query_ADCY9


df_clinical_patients[df_clinical_patients['PATIENT_ID'].isin(patients_ADCY9)]


positive_genes = list(set(all_genes) - set(deceased_genes))

positive_gene_count = []
for gene in positive_genes:
    temp_dict = template_dict.copy()
    sample = query_by_gene(clinical_data_with_patients,gene)
    temp_dict["Hugo_Symbol"] = gene
    temp_dict["Count"] = len(set(sample["PATIENT_ID"]))
    positive_gene_count.append(temp_dict)
    
count_frame_positive = pd.DataFrame(data = positive_gene_count)
count_frame_positive.sort_values(by='Count', ascending=False)[:3]
count_frame_positive


def mutation_present(dataframe_genes, patient_id, gene):
    query_result = query_by_gene(dataframe_genes, gene)
    result = len(query_result[query_result['PATIENT_ID']== patient_id]) > 0
    return 0 if result == 0 else 1


patients_gene_network = pd.DataFrame()
patients_gene_network["PATIENT_ID"] = df_clinical_patients["PATIENT_ID"]


clinical_data_with_patients["Hugo_Symbol"]

0       BCAS3
1       BRIP1
2       ERMP1
3      TGFBR1
4       DNAH9
        ...  
538     PTPRS
539     UNC5D
540    NOTCH3
541     IGF1R
542     EPHA3
Name: Hugo_Symbol, Length: 543, dtype: object


all_genes_mapped = list(set(clinical_data_with_patients["Hugo_Symbol"]))


def create_gene_map(dataframe_genes, all_genes):
    gene_map = {}
    for gene in all_genes:
        patient_list = list(dataframe_genes[dataframe_genes['Hugo_Symbol']==gene]["PATIENT_ID"])
        gene_map[gene] = patient_list
    return gene_map


gene_map = create_gene_map(clinical_data_with_patients, all_genes_mapped)


def add_patient_gene_network(dataframe, mapped_genes, gene):
    temp_dataframe = dataframe.copy()
    patients = mapped_genes[gene]
    truth_value = lambda row: row["PATIENT_ID"] in patients
    label = lambda row:  True if truth_value(row) else False
    temp_dataframe[gene] = temp_dataframe.apply(label, axis="columns") #Apply Label
    return temp_dataframe


for gene in all_genes_mapped:
    patients_gene_network = add_patient_gene_network(patients_gene_network, gene_map, gene)
patients_gene_network


patients_gene_network[patients_gene_network['PATIENT_ID']=="P-0014576"]["DNAH9"]

1364    True
Name: DNAH9, dtype: bool


def add_class(dataframe, alive_patients_list):
    temp_dataframe = dataframe.copy()
    truth_value = lambda row: row["PATIENT_ID"] in alive_patients_list
    label = lambda row:  1 if truth_value(row) else 0
    temp_dataframe["Survived"] = temp_dataframe.apply(label, axis="columns") #Apply Label
    return temp_dataframe


from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt


patients_gene_network = add_class(patients_gene_network, alive_patients)


patients_gene_network


def create_table_of_analysis(dataframe, real_label_column_name, test_label_column_name, positive_result, negative_result):
    row_count = dataframe.shape[0]
    true_positive = dataframe.query('`{0}` == `{1}` and `{1}` == "{2}"'.format(real_label_column_name, test_label_column_name, positive_result))
    false_positive = dataframe.query('`{0}` == "{3}" and `{1}` == "{2}"'.format(real_label_column_name, test_label_column_name, positive_result, negative_result))
    true_negative = dataframe.query('`{0}` == `{1}` and `{0}` == "{2}"'.format(real_label_column_name, test_label_column_name, negative_result))
    false_negative = dataframe.query('`{0}` == "{2}" and `{1}` == "{3}"'.format(real_label_column_name, test_label_column_name, positive_result, negative_result))

    true_positive_r = true_positive.shape[0]
    false_positive_r = false_positive.shape[0]
    true_negative_r = true_negative.shape[0] 
    false_negative_r = false_negative.shape[0]
    accuracy = float((true_positive_r + true_negative_r)) / float(row_count)

    tpr = float(true_positive_r)/float((true_positive_r + false_negative_r))
    tnr = float(true_negative_r)/float((true_negative_r + false_positive_r))
    accuracy = float((true_positive_r + true_negative_r)) / float(row_count)

    d = {"TP":[true_positive_r],"FP":[false_positive_r],"TN":[true_negative_r],"FN":[false_negative_r], "Accuracy": [accuracy], "TPR":[tpr], "TNR": [tnr]}

    result_table = pd.DataFrame(data=d)

    #print(result_table)
    return result_table


x_headers = list(patients_gene_network)[1:-1]


train, test = train_test_split(patients_gene_network, test_size=0.5, random_state=42, shuffle=True)

X_train = train[x_headers].to_numpy()
y_train = train['Survived'].to_numpy()
X_test = test[x_headers].to_numpy()
Y_test = test['Survived'].to_numpy()
print(y_train)

[0 1 0 0 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 1 1
 1 0 1 1 1 0 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0
 1 1 1 1 0 1 1 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 1 0 1 0 1 0 1 1 1 1 0 1 0 1 1
 0 1 1 1 1 1 1 1 0 1 1 0 0 1 0 1 1 0 0 0 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1
 1 1 0 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1
 1 0 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 0 0 1 0 1 1 1 0 1 1 1 0 0 1 1 1 1 1
 0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 0
 1 1 1 1 1 0 0 1 1 1 0 1 1 0 1 1 0 1 0 1 1 1 1 1 0 1 0 1 0 1 1 0 1 1 1 1 1
 0 1 1 1 1 0 1 0 1 1 1 1 1 0 1 0 1 1 1 1 1 1 0 1 1 1 0 1 0 0 1 1 1 1 1 0 1
 1 0 1 1 1 1 0 0 1 1 1 0 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1
 0 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 0 1 0
 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 1 0 1 0 1 1 1 0 1 1 0 0 1
 0 1 0 0 1 0 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1
 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1
 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 0 1 1
 0 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0
 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 0 1 1 1 1 1 0 1 1 1 1
 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 0
 0 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1
 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1
 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 0
 0 1 1 1 0 1 1 1 1 0 1 1 0 1 1 1 0 0 1 1 1 1 0 1 1 1 1]


max_accuracy = -1
df4 = pd.DataFrame(data = X_test, columns = x_headers)
d = {"Number of Trees": [np.nan], "Depth":[np.nan], "TP":[np.nan],"FP":[np.nan],"TN":[np.nan],"FN":[np.nan], "Accuracy": [np.nan], "TPR":[np.nan], "TNR": [np.nan]}

table_of_results = pd.DataFrame(data=d)

    
for total_trees in range(1,11):
    for depth in range(1,6):
        temp_dataframe = df4.copy()
        clf_model = RandomForestClassifier(max_depth=depth, n_estimators=total_trees, random_state=0) 
        clf_model.fit(X_train,y_train)

        y_predicted = clf_model.predict(X_test)
        

        temp_dataframe["Class"] = Y_test
        temp_dataframe["Predicted"] = y_predicted
        #print("Number of Trees - ", total_trees)
        #print("Overall Depth - ", depth)
        table_accuracy = create_table_of_analysis(temp_dataframe, "Class", "Predicted", 1, 0)
        table_accuracy["Number of Trees"] = total_trees
        table_accuracy["Depth"] = depth
        accuracy = table_accuracy.iloc[0]["Accuracy"]
        table_of_results = pd.concat([table_of_results,table_accuracy])
        
        if accuracy > max_accuracy:
            maximum_table = table_accuracy
            optimal_features = {"Total Trees" : total_trees, "Depth": depth}
            maximum_model = clf_model
            maximum_confusion_matrix = confusion_matrix(Y_test, y_predicted)
            max_accuracy = accuracy


table_of_results = table_of_results.dropna()
columns_entries = list(table_of_results)
print(table_of_results)

   Number of Trees  Depth     TP     FP   TN   FN  Accuracy       TPR  \
0              1.0    1.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              1.0    2.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              1.0    3.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              1.0    4.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              1.0    5.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              2.0    1.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              2.0    2.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              2.0    3.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              2.0    4.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              2.0    5.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              3.0    1.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              3.0    2.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              3.0    3.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              3.0    4.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              3.0    5.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              4.0    1.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              4.0    2.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              4.0    3.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              4.0    4.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              4.0    5.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              5.0    1.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              5.0    2.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              5.0    3.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              5.0    4.0  691.0  186.0  1.0  0.0  0.788155  1.000000   
0              5.0    5.0  691.0  186.0  1.0  0.0  0.788155  1.000000   
0              6.0    1.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              6.0    2.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              6.0    3.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              6.0    4.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              6.0    5.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              7.0    1.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              7.0    2.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              7.0    3.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              7.0    4.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              7.0    5.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              8.0    1.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              8.0    2.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              8.0    3.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              8.0    4.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              8.0    5.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              9.0    1.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              9.0    2.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              9.0    3.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              9.0    4.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              9.0    5.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0             10.0    1.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0             10.0    2.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0             10.0    3.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0             10.0    4.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0             10.0    5.0  691.0  187.0  0.0  0.0  0.787016  1.000000   

        TNR  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.005348  
0  0.005348  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000


print(maximum_table)

    TP   FP  TN  FN  Accuracy  TPR       TNR  Number of Trees  Depth
0  691  186   1   0  0.788155  1.0  0.005348                5      4


table_of_results = table_of_results.dropna()
columns_entries = list(table_of_results)
table_of_results


fig = plt.figure()
 
# syntax for 3-D projection
ax = Axes3D(fig)

ax.scatter(table_of_results["Number of Trees"], table_of_results["Depth"], table_of_results["Accuracy"])

ax.set_xlabel(columns_entries[0])
ax.set_ylabel(columns_entries[1])
ax.set_zlabel(columns_entries[6])

ax.set_title('Accuracy')
plt.show()


maximum_model

RandomForestClassifier(max_depth=4, n_estimators=5, random_state=0)


from sklearn import tree
tree_1 = maximum_model.estimators_[0]
text_representation = tree.export_text(tree_1, feature_names= x_headers)
print(text_representation)

|--- RAD51C <= 0.50
|   |--- BRAF <= 0.50
|   |   |--- JAK2 <= 0.50
|   |   |   |--- TP53 <= 0.50
|   |   |   |   |--- class: 1.0
|   |   |   |--- TP53 >  0.50
|   |   |   |   |--- class: 1.0
|   |   |--- JAK2 >  0.50
|   |   |   |--- class: 1.0
|   |--- BRAF >  0.50
|   |   |--- class: 0.0
|--- RAD51C >  0.50
|   |--- class: 1.0


curious_genes = list(set(all_genes) - set(positive_genes) - set(at_risk_genes))
curious_genes

['AXL',
 'MYC',
 'TMPRSS2',
 'ARID2',
 'STAT5B',
 'BRIP1',
 'ACBD6',
 'ERBB2',
 'ZFHX3',
 'RUNX1',
 'NOTCH1',
 'MLL3',
 'PAX5',
 'STAT3',
 'RARA',
 'CDH1',
 'IDH2',
 'CARD11',
 'BRCA2',
 'JAK2',
 'MDC1',
 'PPM1D',
 'PTPRS',
 'NOTCH3',
 'EP300',
 'PDE1C',
 'SOX9',
 'PTEN',
 'PIK3R1',
 'CDK12',
 'IGF1R',
 'ARID1A',
 'NF1',
 'FGFR2',
 'ARID1B',
 'SF3B1',
 'SMARCA4',
 'BCAS3',
 'FH',
 'RAD51C',
 'NFKBIA',
 'KDM6A',
 'STAG2',
 'NOTCH4',
 'NOTCH2',
 'ETV6',
 'RB1',
 'SBNO2']


print(len(curious_genes))

48


train, test = train_test_split(patients_gene_network, test_size=0.5, random_state=42, shuffle=True)

X_train = train[curious_genes].to_numpy()
y_train = train['Survived'].to_numpy()
X_test = test[curious_genes].to_numpy()
Y_test = test['Survived'].to_numpy()


max_accuracy = -1
df4 = pd.DataFrame(data = X_test, columns = curious_genes)
d = {"Number of Trees": [np.nan], "Depth":[np.nan], "TP":[np.nan],"FP":[np.nan],"TN":[np.nan],"FN":[np.nan], "Accuracy": [np.nan], "TPR":[np.nan], "TNR": [np.nan]}

table_of_results = pd.DataFrame(data=d)

    
for total_trees in range(2,11):
    for depth in range(2,6):
        temp_dataframe = df4.copy()
        clf_model = RandomForestClassifier(max_depth=depth, n_estimators=total_trees, random_state=0) 
        clf_model.fit(X_train,y_train)

        y_predicted = clf_model.predict(X_test)
        

        temp_dataframe["Class"] = Y_test
        temp_dataframe["Predicted"] = y_predicted
        #print("Number of Trees - ", total_trees)
        #print("Overall Depth - ", depth)
        table_accuracy = create_table_of_analysis(temp_dataframe, "Class", "Predicted", 1, 0)
        table_accuracy["Number of Trees"] = total_trees
        table_accuracy["Depth"] = depth
        accuracy = table_accuracy.iloc[0]["Accuracy"]
        table_of_results = pd.concat([table_of_results,table_accuracy])
        
        if accuracy > max_accuracy:
            maximum_table = table_accuracy
            optimal_features = {"Total Trees" : total_trees, "Depth": depth}
            maximum_model = clf_model
            maximum_confusion_matrix = confusion_matrix(Y_test, y_predicted)
            max_accuracy = accuracy


table_of_results = table_of_results.dropna()
columns_entries = list(table_of_results)
print(table_of_results)

   Number of Trees  Depth     TP     FP   TN   FN  Accuracy       TPR  \
0              2.0    2.0  689.0  187.0  0.0  2.0  0.784738  0.997106   
0              2.0    3.0  688.0  187.0  0.0  3.0  0.783599  0.995658   
0              2.0    4.0  687.0  187.0  0.0  4.0  0.782460  0.994211   
0              2.0    5.0  686.0  186.0  1.0  5.0  0.782460  0.992764   
0              3.0    2.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              3.0    3.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              3.0    4.0  689.0  187.0  0.0  2.0  0.784738  0.997106   
0              3.0    5.0  689.0  187.0  0.0  2.0  0.784738  0.997106   
0              4.0    2.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              4.0    3.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              4.0    4.0  689.0  187.0  0.0  2.0  0.784738  0.997106   
0              4.0    5.0  689.0  187.0  0.0  2.0  0.784738  0.997106   
0              5.0    2.0  689.0  187.0  0.0  2.0  0.784738  0.997106   
0              5.0    3.0  689.0  187.0  0.0  2.0  0.784738  0.997106   
0              5.0    4.0  687.0  187.0  0.0  4.0  0.782460  0.994211   
0              5.0    5.0  687.0  187.0  0.0  4.0  0.782460  0.994211   
0              6.0    2.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              6.0    3.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              6.0    4.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              6.0    5.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              7.0    2.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              7.0    3.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              7.0    4.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              7.0    5.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              8.0    2.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              8.0    3.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              8.0    4.0  689.0  187.0  0.0  2.0  0.784738  0.997106   
0              8.0    5.0  689.0  187.0  0.0  2.0  0.784738  0.997106   
0              9.0    2.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              9.0    3.0  691.0  187.0  0.0  0.0  0.787016  1.000000   
0              9.0    4.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0              9.0    5.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0             10.0    2.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0             10.0    3.0  690.0  187.0  0.0  1.0  0.785877  0.998553   
0             10.0    4.0  689.0  187.0  0.0  2.0  0.784738  0.997106   
0             10.0    5.0  689.0  187.0  0.0  2.0  0.784738  0.997106   

        TNR  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.005348  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000  
0  0.000000


fig = plt.figure()
 
# syntax for 3-D projection
ax = Axes3D(fig)

ax.scatter(table_of_results["Number of Trees"], table_of_results["Depth"], table_of_results["Accuracy"])

ax.set_xlabel(columns_entries[0])
ax.set_ylabel(columns_entries[1])
ax.set_zlabel(columns_entries[6])

ax.set_title('Accuracy')
plt.show()


maximum_table


for tree_i in maximum_model.estimators_:
    text_representation = tree.export_text(tree_i, feature_names=curious_genes)
    print(text_representation)
    print("=======")

|--- PIK3R1 <= 0.50
|   |--- STAT3 <= 0.50
|   |   |--- class: 1.0
|   |--- STAT3 >  0.50
|   |   |--- class: 0.0
|--- PIK3R1 >  0.50
|   |--- class: 0.0

=======
|--- PTEN <= 0.50
|   |--- PIK3R1 <= 0.50
|   |   |--- class: 1.0
|   |--- PIK3R1 >  0.50
|   |   |--- class: 0.0
|--- PTEN >  0.50
|   |--- class: 0.0

=======
|--- ETV6 <= 0.50
|   |--- PDE1C <= 0.50
|   |   |--- class: 1.0
|   |--- PDE1C >  0.50
|   |   |--- class: 0.0
|--- ETV6 >  0.50
|   |--- class: 0.0

=======
|--- IDH2 <= 0.50
|   |--- IGF1R <= 0.50
|   |   |--- class: 1.0
|   |--- IGF1R >  0.50
|   |   |--- class: 0.0
|--- IDH2 >  0.50
|   |--- class: 0.0

=======
|--- RARA <= 0.50
|   |--- PDE1C <= 0.50
|   |   |--- class: 1.0
|   |--- PDE1C >  0.50
|   |   |--- class: 0.0
|--- RARA >  0.50
|   |--- class: 0.0

=======
|--- MLL3 <= 0.50
|   |--- RB1 <= 0.50
|   |   |--- class: 1.0
|   |--- RB1 >  0.50
|   |   |--- class: 0.0
|--- MLL3 >  0.50
|   |--- class: 0.0

=======


len(X_test) == maximum_table["TN"] + maximum_table["FN"]

0    False
dtype: bool


freq = np.histogram(Y_test, bins=[-np.inf] + [0,1] + [np.inf])[0]/Y_test.size
freq

array([0.        , 0.21298405, 0.78701595])


res = ""
import re
pattern = '([A-Z])\w+[0-9]*'
genes_of_interest = []
geneExpression = re.compile(pattern)

for tree_i in maximum_model.estimators_:
    text_representation = tree.export_text(tree_i, feature_names=curious_genes)
    
    lines = text_representation.split("\n")
    for line in lines:
        mo = geneExpression.search(line)
        if mo:
            genes_of_interest.append(mo.group())
    
genes_of_interest = list(set(genes_of_interest))
genes_of_interest

['MLL3',
 'STAT3',
 'RARA',
 'IDH2',
 'PDE1C',
 'PTEN',
 'PIK3R1',
 'ETV6',
 'IGF1R',
 'RB1']


import scipy
assert(scipy.__version__ > "1.7")

#print(train_naive_bayes)

def converter(column):
    return [2 if item else 1 for item in column]


nominal_convert = pd.DataFrame(data = df4, columns = genes_of_interest)

nominal_convert = nominal_convert.apply(lambda x: converter(x))
#nominal_convert = train_naive_bayes[genes_of_interest]


from scipy.stats.contingency import association

association_data = []

for gene_i in genes_of_interest:
    data_row = []
    for gene_j in genes_of_interest:
        
        column_1 = nominal_convert[gene_i].to_numpy()
        column_2 = nominal_convert[gene_j].to_numpy()
        
        two_links = []
        for row_i in range(len(column_1)):
            two_links_temp = [column_1[row_i],column_2[row_i]]
            two_links.append(two_links_temp)
        
        data_row.append(association(two_links, method="tschuprow"))
    association_data.append(data_row)
    

df_association = pd.DataFrame(data = association_data, columns = genes_of_interest)
df_association


import scipy
assert(scipy.__version__ > "1.7")


def converter(column):
    return [2 if item == 1 else 1 for item in column]

nominal_convert = pd.DataFrame(data = df4, columns = genes_of_interest)

nominal_convert = nominal_convert.apply(lambda x: converter(x))
#nominal_convert = train_naive_bayes[genes_of_interest]


from scipy.stats.contingency import association

association_data = []

for gene_i in genes_of_interest:
    data_row = []
    for gene_j in genes_of_interest:
        
        column_1 = nominal_convert[gene_i].to_numpy()
        column_2 = nominal_convert[gene_j].to_numpy()
        
        two_links = []
        for row_i in range(len(column_1)):
            two_links_temp = [column_1[row_i],column_2[row_i]]
            two_links.append(two_links_temp)
        
        data_row.append(association(two_links))
    association_data.append(data_row)
    

df_association = pd.DataFrame(data = association_data, columns = genes_of_interest)
df_association


from sklearn.neural_network import MLPClassifier

df4 = pd.DataFrame(data = X_test, columns = curious_genes)

clf = MLPClassifier(random_state=1, max_iter=300, activation='logistic').fit(X_train, y_train)



probability_predictions = clf.predict_proba(X_test)


temp_dataframe = df4.copy()


y_predicted = clf.predict(X_test)


temp_dataframe["Class"] = Y_test
temp_dataframe["Predicted"] = y_predicted
#print("Number of Trees - ", total_trees)
#print("Overall Depth - ", depth)
table_accuracy = create_table_of_analysis(temp_dataframe, "Class", "Predicted", 1, 0)
print(table_accuracy)

    TP   FP  TN  FN  Accuracy       TPR       TNR
0  683  186   1   8  0.779043  0.988423  0.005348


freq

array([0.        , 0.21298405, 0.78701595])


from sklearn.naive_bayes import GaussianNB
naive_bayes = GaussianNB()

df4 = pd.DataFrame(data = X_test, columns = curious_genes)


train_naive_bayes, test_naive_bayes = train_test_split(patients_gene_network, test_size=0.5, random_state=42, shuffle=True)

X_train_naive_bayes = train_naive_bayes[genes_of_interest].to_numpy()
y_train_naive_bayes = train_naive_bayes['Survived'].to_numpy()
X_test_naive_bayes = test_naive_bayes[genes_of_interest].to_numpy()
Y_test_naive_bayes = test_naive_bayes['Survived'].to_numpy()

naive_bayes.fit(X_train_naive_bayes , y_train_naive_bayes)

 
#Predict on test data
y_predicted = naive_bayes.predict(X_test_naive_bayes)

temp_dataframe = df4.copy()


y_predicted = naive_bayes.predict(X_test_naive_bayes)


temp_dataframe["Class"] = Y_test
temp_dataframe["Predicted"] = y_predicted
#print("Number of Trees - ", total_trees)
#print("Overall Depth - ", depth)
table_accuracy = create_table_of_analysis(temp_dataframe, "Class", "Predicted", 1, 0)

print(table_accuracy)

    TP   FP  TN  FN  Accuracy       TPR      TNR
0  678  183   4  13  0.776765  0.981187  0.02139


mask = patients_gene_network['Survived'] == 1

df_survived = patients_gene_network[mask]
 
# invert the boolean values
df_deceased = patients_gene_network[~mask]

space_of_deceased = freq[1]

range_end = int(len(df_survived) * space_of_deceased)

df_survived = df_survived.iloc[:range_end] 

new_df = df_survived.append(df_deceased)


naive_bayes = GaussianNB()


train_naive_bayes, test_naive_bayes = train_test_split(new_df, test_size=0.5, random_state=42, shuffle=True)

X_train_naive_bayes = train_naive_bayes[genes_of_interest].to_numpy()
y_train_naive_bayes = train_naive_bayes['Survived'].to_numpy()
X_test_naive_bayes = test_naive_bayes[genes_of_interest].to_numpy()
Y_test_naive_bayes = test_naive_bayes['Survived'].to_numpy()

naive_bayes.fit(X_train_naive_bayes , y_train_naive_bayes)

 
#Predict on test data
y_predicted = naive_bayes.predict(X_test_naive_bayes)

temp_dataframe = test_naive_bayes.copy()



temp_dataframe["Class"] = Y_test_naive_bayes
temp_dataframe["Predicted"] = y_predicted
#print("Number of Trees - ", total_trees)
#print("Overall Depth - ", depth)
table_accuracy = create_table_of_analysis(temp_dataframe, "Class", "Predicted", 1, 0)

print(table_accuracy)

    TP   FP  TN  FN  Accuracy       TPR       TNR
0  158  175   3   1  0.477745  0.993711  0.016854


freq_new_data = np.histogram(Y_test_naive_bayes, bins=[-np.inf] + [0,1] + [np.inf])[0]/Y_test_naive_bayes.size
freq_new_data

array([0.        , 0.52818991, 0.47181009])


max_accuracy = -1
d = {"Number of Trees": [np.nan], "Depth":[np.nan], "TP":[np.nan],"FP":[np.nan],"TN":[np.nan],"FN":[np.nan], "Accuracy": [np.nan], "TPR":[np.nan], "TNR": [np.nan]}

table_of_results = pd.DataFrame(data=d)

print(len(new_df))
train_dt, test_dt = train_test_split(new_df, test_size=0.5, random_state=42, shuffle=True)

X_train_dt = train_dt[curious_genes].to_numpy()
y_train_dt = train_dt['Survived'].to_numpy()
X_test_dt = test_dt[curious_genes].to_numpy()
Y_test_dt = test_dt['Survived'].to_numpy()

df4 = pd.DataFrame(data = X_test_dt, columns = curious_genes)

    
for total_trees in range(2,11):
    for depth in range(2,6):
        temp_dataframe = df4.copy()
        clf_model = RandomForestClassifier(max_depth=depth, n_estimators=total_trees, random_state=0) 
        clf_model.fit(X_train_dt,y_train_dt)

        y_predicted = clf_model.predict(X_test_dt)
        
        

        temp_dataframe["Class"] = Y_test_dt
        temp_dataframe["Predicted"] = y_predicted
        #print("Number of Trees - ", total_trees)
        #print("Overall Depth - ", depth)
        table_accuracy = create_table_of_analysis(temp_dataframe, "Class", "Predicted", 1, 0)
        table_accuracy["Number of Trees"] = total_trees
        table_accuracy["Depth"] = depth
        accuracy = table_accuracy.iloc[0]["Accuracy"]
        table_of_results = pd.concat([table_of_results,table_accuracy])
        
        if accuracy > max_accuracy:
            maximum_table = table_accuracy
            optimal_features = {"Total Trees" : total_trees, "Depth": depth}
            maximum_model = clf_model
            maximum_confusion_matrix = confusion_matrix(Y_test_dt, y_predicted)
            max_accuracy = accuracy

674


for tree_i in maximum_model.estimators_:
    text_representation = tree.export_text(tree_i, feature_names=curious_genes)
    print(text_representation)
    print("=======")

|--- PTEN <= 0.50
|   |--- PPM1D <= 0.50
|   |   |--- class: 0.0
|   |--- PPM1D >  0.50
|   |   |--- class: 1.0
|--- PTEN >  0.50
|   |--- class: 0.0

=======
|--- PTEN <= 0.50
|   |--- ETV6 <= 0.50
|   |   |--- class: 0.0
|   |--- ETV6 >  0.50
|   |   |--- class: 0.0
|--- PTEN >  0.50
|   |--- class: 0.0

=======


res = ""
import re
pattern = '([A-Z])\w+[0-9]*'
genes_of_interest = []
geneExpression = re.compile(pattern)

for tree_i in maximum_model.estimators_:
    text_representation = tree.export_text(tree_i, feature_names=curious_genes)
    
    lines = text_representation.split("\n")
    for line in lines:
        mo = geneExpression.search(line)
        if mo:
            genes_of_interest.append(mo.group())
    
genes_of_interest = list(set(genes_of_interest))
genes_of_interest

['PTEN', 'ETV6', 'PPM1D']


import scipy
assert(scipy.__version__ > "1.7")

#print(train_naive_bayes)

def converter(column):
    return [2 if item else 1 for item in column]


nominal_convert = pd.DataFrame(data = df4, columns = genes_of_interest)

nominal_convert = nominal_convert.apply(lambda x: converter(x))
#nominal_convert = train_naive_bayes[genes_of_interest]


from scipy.stats.contingency import association

association_data = []

for gene_i in genes_of_interest:
    data_row = []
    for gene_j in genes_of_interest:
        
        column_1 = nominal_convert[gene_i].to_numpy()
        column_2 = nominal_convert[gene_j].to_numpy()
        
        two_links = []
        for row_i in range(len(column_1)):
            two_links_temp = [column_1[row_i],column_2[row_i]]
            two_links.append(two_links_temp)
        
        data_row.append(association(two_links, method="tschuprow"))
    association_data.append(data_row)
    

df_association = pd.DataFrame(data = association_data, columns = genes_of_interest)
df_association


import scipy
assert(scipy.__version__ > "1.7")


def converter(column):
    return [2 if item == 1 else 1 for item in column]

nominal_convert = pd.DataFrame(data = df4, columns = genes_of_interest)

nominal_convert = nominal_convert.apply(lambda x: converter(x))
#nominal_convert = train_naive_bayes[genes_of_interest]


from scipy.stats.contingency import association

association_data = []

for gene_i in genes_of_interest:
    data_row = []
    for gene_j in genes_of_interest:
        
        column_1 = nominal_convert[gene_i].to_numpy()
        column_2 = nominal_convert[gene_j].to_numpy()
        
        two_links = []
        for row_i in range(len(column_1)):
            two_links_temp = [column_1[row_i],column_2[row_i]]
            two_links.append(two_links_temp)
        
        data_row.append(association(two_links))
    association_data.append(data_row)
    

df_association = pd.DataFrame(data = association_data, columns = genes_of_interest)
df_association

	PATIENT_ID	METASTATIC_DZ_FUP	METASTATIC_RECURRENCE_TIME_MONTHS	PRIOR_LOCAL_RECURRENCE	SEX	MENOPAUSAL_STATUS_AT_DIAGNOSIS	LATERALITY	T_STAGE	N_STAGE	M_STAGE	...	OVERALL_HR_STATUS_PATIENT	OVERALL_HER2_STATUS_PATIENT	HER2_STATUS_PRIMARY	VITAL_STATUS	LAST_CONTACT_DAYS_TO	TIME_TO_DEATH_MONTHS	DFS_MONTHS	DFS_EVENT	OS_MONTHS	OS_STATUS
0	P-0000004	Yes	446.0	No	Female	Pre	Left	T1c	N3a	M1	...	Positive	Negative	Negative	Alive	14484	NaN	1.1	1	31.5	0:LIVING
1	P-0000012	No	NaN	No	Female	Pre	Right	T2	N0	M0	...	Negative	Negative	Negative	Alive	22336	NaN	218.0	0	218.0	0:LIVING
2	P-0000015	Yes	519.0	No	Female	Pre	Right	T1b	N1mi	M0	...	Positive	Negative	Negative	Deceased	16656	548.0	68.9	1	98.0	1:DECEASED
3	P-0000041	Yes	604.0	No	Female	Pre	Left	T1b	N0	M0	...	Positive	Positive	Positive	Deceased	19364	637.0	90.2	1	123.1	1:DECEASED
4	P-0000057	Yes	459.0	No	Female	Pre	Left	TX	NX	M1	...	Positive	Negative	Negative	Deceased	15871	522.0	0.5	1	63.6	1:DECEASED
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1751	P-0018917	No	NaN	No	Female	Peri	Right	T1c	N0	M0	...	Positive	Negative	Negative	Alive	21723	NaN	2.9	0	2.9	0:LIVING
1752	P-0018963	No	NaN	No	Female	Peri	Left	T1b	N0	M0	...	Positive	Negative	Negative	Alive	17610	NaN	0.7	0	0.7	0:LIVING
1753	P-0019037	No	NaN	No	Female	Post	Right	T1b	N0	M0	...	Positive	Negative	Negative	Alive	25027	NaN	1.6	0	1.6	0:LIVING
1754	P-0019054	No	NaN	No	Female	Post	Right	T2	N1a	M0	...	Positive	Negative	Negative	Alive	23381	NaN	4.3	0	4.3	0:LIVING
1755	P-0019073	No	NaN	No	Female	Post	Right	T2	N1mi	M0	...	Positive	Negative	Negative	Alive	24691	NaN	4.5	0	4.5	0:LIVING

	Hugo_Symbol	Entrez_Gene_Id	Center	NCBI_Build	Chromosome	Start_Position	End_Position	Strand	Consequence	Variant_Classification	...	n_ref_count	n_alt_count	HGVSc	HGVSp	HGVSp_Short	Transcript_ID	RefSeq	Protein_position	Codons	Hotspot
0	ASXL2	0	MSKCC	GRCh37	2	25976426	25976426	+	missense_variant	Missense_Mutation	...	683	0	ENST00000435504.4:c.1119C>G	p.Phe373Leu	p.F373L	ENST00000435504	NaN	373.0	ttC/ttG	0
1	NOTCH1	0	MSKCC	GRCh37	9	139413071	139413071	+	missense_variant	Missense_Mutation	...	485	0	ENST00000277541.6:c.1071C>G	p.Phe357Leu	p.F357L	ENST00000277541	NM_017617.3	357.0	ttC/ttG	0
2	PTEN	0	MSKCC	GRCh37	10	89624263	89624263	+	stop_gained	Nonsense_Mutation	...	147	0	ENST00000371953.3:c.37A>T	p.Lys13Ter	p.K13*	ENST00000371953	NM_000314.4	13.0	Aaa/Taa	0
3	KDM5C	0	MSKCC	GRCh37	X	53254071	53254071	+	start_lost	Translation_Start_Site	...	387	0	ENST00000375401.3:c.1A>T	p.Met1?	p.M1?	ENST00000375401	NM_004187.3	1.0	Atg/Ttg	0
4	CDH1	0	MSKCC	GRCh37	16	68844167	68844167	+	frameshift_variant	Frame_Shift_Del	...	477	0	ENST00000261769.5:c.755del	p.Val252GlufsTer30	p.V252Efs*30	ENST00000261769	NM_004360.3	252.0	gTa/ga	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
9309	MSH6	0	MSKCC	GRCh37	2	48027313	48027313	+	missense_variant	Missense_Mutation	...	273	0	ENST00000234420.5:c.2191C>A	p.Gln731Lys	p.Q731K	ENST00000234420	NM_000179.2	731.0	Caa/Aaa	0
9310	ASXL1	0	MSKCC	GRCh37	20	31021228	31021228	+	missense_variant	Missense_Mutation	...	270	0	ENST00000375687.4:c.1227G>T	p.Lys409Asn	p.K409N	ENST00000375687	NM_015338.5	409.0	aaG/aaT	0
9311	TSC2	0	MSKCC	GRCh37	16	2104320	2104321	+	inframe_insertion	In_Frame_Ins	...	370	1	ENST00000219476.3:c.362_382dup	p.Ile127_Lys128insThrLeuPhePheLysValIle	p.I127_K128insTLFFKVI	ENST00000219476	NM_000548.3	121.0	gcc/gCCCTCTTCTTTAAGGTCATCAcc	0
9312	HIST1H2BD	0	MSKCC	GRCh37	6	26158741	26158742	+	frameshift_variant	Frame_Shift_Ins	...	136	0	ENST00000289316.2:c.345_346insCGTCACCAAG	p.Thr116ArgfsTer31	p.T116Rfs*31	ENST00000289316	NM_138720.2	115.0	-/CGTCACCAAG	0
9313	PIK3R1	0	MSKCC	GRCh37	5	67591050	67591051	+	stop_gained,frameshift_variant	Nonsense_Mutation	...	297	0	ENST00000274335.5:c.1643_1644delinsCTTGAAGAAGC...	p.Asp548AlafsTer2	p.D548Afs*2	ENST00000274335	NaN	548.0	gAC/gCTTGAAGAAGCAGGCAGCTGAG	0

	SAMPLE_ID	mutations	cna
0	P-0000004-T01-IM3	IMPACT341	IMPACT341
1	P-0000012-T02-IM3	IMPACT341	IMPACT341
2	P-0000015-T01-IM3	IMPACT341	IMPACT341
3	P-0000041-T01-IM3	IMPACT341	IMPACT341
4	P-0000057-T01-IM3	IMPACT341	IMPACT341
...	...	...	...
1913	P-0000075-T01-IM3	IMPACT341	IMPACT341
1914	P-0000244-T01-IM3	IMPACT341	IMPACT341
1915	P-0017564-T01-IM6	IMPACT468	IMPACT468
1916	P-0003591-T02-IM6	IMPACT468	IMPACT468
1917	P-0000215-T01-IM3	IMPACT341	IMPACT341

	SAMPLE_ID	cna
mutations
IMPACT341	432	432
IMPACT410	968	968
IMPACT468	518	518

	PATIENT_ID	SAMPLE_ID	SAMPLE_TYPE	SAMPLE_SITE	PRIOR_BREAST_PRIMARY	INVASIVE_CARCINOMA_DX_TIME	NGS_SAMPLE_COLLECTION_TIME	INVASIVE_CARCINOMA_DX_AGE	TUMOR_TISSUE_ORIGIN	TUMOR_SAMPLE_HISTOLOGY	...	OVERALL_HER2_STATUS	HER2_IHC_STATUS	HER2_IHC_SCORE	HER2_FISH	HER2_FISH_RATIO	ONCOTREE_CODE	CANCER_TYPE	CANCER_TYPE_DETAILED	SOMATIC_STATUS	TMB_NONSYNONYMOUS
0	P-0000004	P-0000004-T01-IM3	Primary	Treatment Naive Primary	No	444.87	445	37	Breast	Breast Invasive Ductal Carcinoma	...	Negative	Negative	0-1+	Not Applicable	Not Applicable	IDC	Breast Cancer	Breast Invasive Ductal Carcinoma	Matched	0.133333
1	P-0000012	P-0000012-T02-IM3	Primary	Treatment Naive Primary	No	516.48	517	43	Breast	Breast Invasive Ductal Carcinoma	...	Negative	Negative	0-1+	Not Applicable	Not Applicable	IDC	Breast Cancer	Breast Invasive Ductal Carcinoma	Matched	0.033333
2	P-0000015	P-0000015-T01-IM3	Metastasis	Liver	No	449.80	534	37	Breast	Breast Invasive Ductal Carcinoma	...	Negative	Negative	0+	Not Applicable	Not Applicable	IDC	Breast Cancer	Breast Invasive Ductal Carcinoma	Matched	0.233333
3	P-0000041	P-0000041-T01-IM3	Metastasis	Breast	No	513.68	618	43	Breast	Breast Invasive Ductal Carcinoma	...	Negative	Equivocal	2+	Negative	0.24	IDC	Breast Cancer	Breast Invasive Ductal Carcinoma	Matched	0.333333
4	P-0000057	P-0000057-T01-IM3	Primary	Post-Treatment Primary	No	458.36	489	38	Breast	Breast Mixed Ductal and Lobular Carcinoma	...	Negative	Negative	0-1+	Not Applicable	Not Applicable	MDLC	Breast Cancer	Breast Mixed Ductal and Lobular Carcinoma	Matched	0.166667
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1913	P-0018917	P-0018917-T01-IM6	Primary	Treatment Naive Primary	No	711.64	712	59	Breast	Breast Invasive Ductal Carcinoma	...	Negative	Negative	0-1+	Not Applicable	Not Applicable	IDC	Breast Cancer	Breast Invasive Ductal Carcinoma	Matched	0.133333
1914	P-0018963	P-0018963-T01-IM6	Primary	Treatment Naive Primary	No	578.55	579	48	Breast	Breast Invasive Ductal Carcinoma	...	Negative	Negative	0-1+	Not Applicable	Not Applicable	IDC	Breast Cancer	Breast Invasive Ductal Carcinoma	Matched	0.033333
1915	P-0019037	P-0019037-T01-IM6	Primary	Treatment Naive Primary	No	821.61	822	68	Breast	Breast Invasive Ductal Carcinoma	...	Negative	Equivocal	2+	Negative	1.2	IDC	Breast Cancer	Breast Invasive Ductal Carcinoma	Matched	0.066667
1916	P-0019054	P-0019054-T01-IM6	Primary	Treatment Naive Primary	No	764.84	767	64	Breast	Breast Invasive Lobular Carcinoma	...	Negative	Negative	0-1+	Not Applicable	Not Applicable	ILC	Breast Cancer	Breast Invasive Lobular Carcinoma	Matched	0.100000
1917	P-0019073	P-0019073-T01-IM6	Primary	Treatment Naive Primary	No	807.70	809	67	Breast	Breast Invasive Ductal Carcinoma	...	Negative	Negative	0-1+	Not Applicable	Not Applicable	IDC	Breast Cancer	Breast Invasive Ductal Carcinoma	Matched	0.033333

#¶

Import Libraries¶

Data Processing¶

Imports of Data¶

Gene Mutations Import¶

MSK-Impact¶

Get Common MSK-Impact Mutations¶

Clinical Data Filter¶

Filter by Outcome¶

Gene Query Finder¶

Discovering at risk genes¶

Example Gene (RIPK4)¶

Analysis¶

Ignoring Different Fusions¶

STK11¶

ADCY9¶

Genes found in surviving Pateints¶

Machine Learning¶

Confirming Gene Network¶

Analysis¶

Genes of Interest¶

Analysis¶

Further Analysis¶

Decision Tree Genes¶

Association Algorithm¶

Analysis¶

Neural Network with Logistic Neuron Activation¶

Analysis¶

Analysis¶

Analysis¶

FGFR2¶

BCAS3¶

RAD51C¶

Ending Notes¶

	Hugo_Symbol	Entrez_Gene_Id	Center	Tumor_Sample_Barcode	Fusion	DNA_support	RNA_support	Method	Frame	Comments
358	DDX11L2	0	MSKCC-DMP	P-0000058-T01-IM3	DDX11L2-ALK fusion	yes	unknown	NaN	unknown	NaN
357	ALK	0	MSKCC-DMP	P-0000058-T01-IM3	DDX11L2-ALK fusion	yes	unknown	NaN	unknown	NaN
23	NTRK1	0	MSKCC-DMP	P-0000104-T02-IM6	NTRK1-ZBTB7B fusion	yes	unknown	NaN	unknown	ZBTB7B (NM_001252406) - NTRK1 (NM_002529) Rea...
22	ZBTB7B	0	MSKCC-DMP	P-0000104-T02-IM6	NTRK1-ZBTB7B fusion	yes	unknown	NaN	unknown	ZBTB7B (NM_001252406) - NTRK1 (NM_002529) Rea...
296	TGFB2	0	MSKCC-DMP	P-0000187-T01-IM3	TGFB2-RB1 fusion	yes	unknown	NaN	unknown	NaN
...	...	...	...	...	...	...	...	...	...	...
438	IL10	0	MSKCC-DMP	P-0017846-T01-IM6	IL10-TNNI1 fusion	yes	unknown	NaN	unknown	IL10 (NM_000572) Rearrangement : chr1:g.201374...
445	MRE11A	0	MSKCC-DMP	P-0018120-T01-IM6	MRE11A-DISC1FP1 fusion	yes	unknown	NaN	unknown	MRE11A (NM_005591) Rearrangement: c.1783+13:MR...
444	DISC1FP1	0	MSKCC-DMP	P-0018120-T01-IM6	MRE11A-DISC1FP1 fusion	yes	unknown	NaN	unknown	MRE11A (NM_005591) Rearrangement: c.1783+13:MR...
449	ERBB2	0	MSKCC-DMP	P-0018896-T01-IM6	ARHGAP42-ERBB2 fusion	yes	unknown	NaN	unknown	ERBB2 (NM_004448) rearrangement: t(11;17)(q22....
450	ARHGAP42	0	MSKCC-DMP	P-0018896-T01-IM6	ARHGAP42-ERBB2 fusion	yes	unknown	NaN	unknown	ERBB2 (NM_004448) rearrangement: t(11;17)(q22....

	Hugo_Symbol	Entrez_Gene_Id	Center	Tumor_Sample_Barcode	Fusion	DNA_support	RNA_support	Method	Frame	Comments
0	BCAS3	0	MSKCC-DMP	P-0002841-T02-IM6	BRIP1-BCAS3 fusion	yes	unknown	NaN	unknown	BRIP1 (NM_032043) - BCAS3 (NM_001099432) rearr...
1	BRIP1	0	MSKCC-DMP	P-0002841-T02-IM6	BRIP1-BCAS3 fusion	yes	unknown	NaN	unknown	BRIP1 (NM_032043) - BCAS3 (NM_001099432) rearr...
2	ERMP1	0	MSKCC-DMP	P-0002088-T02-IM5	TGFBR1-ERMP1 fusion	yes	unknown	NaN	unknown	TGFBR1 (NM_004612) Rearrangement : chr9:g.5801...
3	TGFBR1	0	MSKCC-DMP	P-0002088-T02-IM5	TGFBR1-ERMP1 fusion	yes	unknown	NaN	unknown	TGFBR1 (NM_004612) Rearrangement : chr9:g.5801...
4	DNAH9	0	MSKCC-DMP	P-0014576-T01-IM6	MAP2K4-DNAH9 fusion	yes	unknown	NaN	unknown	DNAH9 (NM_001372) - MAP2K4 (NM_003010) rearran...
...	...	...	...	...	...	...	...	...	...	...
538	PTPRS	0	MSKCC-DMP	P-0005613-T01-IM5	UNC5D-PTPRS fusion	yes	unknown	NaN	unknown	PTPRS (NM_002850) - UNC5D (NM_080872) rearrang...
539	UNC5D	0	MSKCC-DMP	P-0005613-T01-IM5	UNC5D-PTPRS fusion	yes	unknown	NaN	unknown	PTPRS (NM_002850) - UNC5D (NM_080872) rearrang...
540	NOTCH3	0	MSKCC-DMP	P-0001308-T01-IM3	NOTCH3-intragenic	yes	unknown	NaN	unknown	None
541	IGF1R	0	MSKCC-DMP	P-0005818-T01-IM5	IGF1R-intragenic	yes	unknown	NaN	unknown	IGF1R (NM_000875) Rearrangement: c.640+53564...
542	EPHA3	0	MSKCC-DMP	P-0005818-T01-IM5	EPHA3-intragenic	yes	unknown	NaN	unknown	EPHA3 (NM_005233) Rearrangement : c.267_996du...

	Hugo_Symbol	Center	Tumor_Sample_Barcode	Fusion	DNA_support	RNA_support	Method	Frame	Comments	PATIENT_ID
40	IGF1R	MSKCC-DMP	P-0010464-T01-IM5	PPP1R3A-IGF1R fusion	yes	unknown	NaN	unknown	IGF1R (NM_000875) rearrangement: t(7;15)(q31.1...	P-0010464
268	IGF1R	MSKCC-DMP	P-0002435-T01-IM3	ACBD6-IGF1R fusion	yes	unknown	NaN	unknown	NaN	P-0002435
530	IGF1R	MSKCC-DMP	P-0003779-T01-IM5	IGF1R-intragenic	yes	unknown	NaN	unknown	IGF1R (NM_000875) rearrangement: c.954-1844_10...	P-0003779
541	IGF1R	MSKCC-DMP	P-0005818-T01-IM5	IGF1R-intragenic	yes	unknown	NaN	unknown	IGF1R (NM_000875) Rearrangement: c.640+53564...	P-0005818

	Hugo_Symbol	Entrez_Gene_Id	Center	Tumor_Sample_Barcode	Fusion	DNA_support	RNA_support	Method	Frame	Comments	PATIENT_ID
24	RARA	0	MSKCC-DMP	P-0011365-T01-IM5	COPZ2-RARA fusion	yes	unknown	NaN	unknown	RARA (NM_000964) - COPZ2 (NM_016429) rearrange...	P-0011365
25	COPZ2	0	MSKCC-DMP	P-0011365-T01-IM5	COPZ2-RARA fusion	yes	unknown	NaN	unknown	RARA (NM_000964) - COPZ2 (NM_016429) rearrange...	P-0011365
38	MYC	0	MSKCC-DMP	P-0012855-T01-IM5	MYC-intragenic	yes	unknown	NaN	unknown	MYC (NM_002467) rearrangement: c.1105_c.*1969d...	P-0012855
62	ARID5B	0	MSKCC-DMP	P-0013625-T01-IM5	ARID5B-intragenic	yes	unknown	NaN	unknown	ARID5B (NM_032199) rearrangement: c.734-289_c....	P-0013625
63	CTNNB1	0	MSKCC-DMP	P-0013625-T01-IM5	CTNNB1-intragenic	yes	unknown	NaN	unknown	CTNNB1 (NM_001904) rearrangement: c.-48-2076_c...	P-0013625
...	...	...	...	...	...	...	...	...	...	...	...
532	C1orf87	0	MSKCC-DMP	P-0002705-T01-IM3	C1orf87-ERBB2 fusion	yes	unknown	NaN	unknown	null Note: The ERBB2-C1orf87 reciprocal transl...	P-0002705
536	PTPRS	0	MSKCC-DMP	P-0005613-T01-IM5	ACER1-PTPRS fusion	yes	unknown	NaN	unknown	PTRPRS (NM_002850) - ACER1 (NM_133492) rearran...	P-0005613
537	ACER1	0	MSKCC-DMP	P-0005613-T01-IM5	ACER1-PTPRS fusion	yes	unknown	NaN	unknown	PTRPRS (NM_002850) - ACER1 (NM_133492) rearran...	P-0005613
538	PTPRS	0	MSKCC-DMP	P-0005613-T01-IM5	UNC5D-PTPRS fusion	yes	unknown	NaN	unknown	PTPRS (NM_002850) - UNC5D (NM_080872) rearrang...	P-0005613
539	UNC5D	0	MSKCC-DMP	P-0005613-T01-IM5	UNC5D-PTPRS fusion	yes	unknown	NaN	unknown	PTPRS (NM_002850) - UNC5D (NM_080872) rearrang...	P-0005613

	Hugo_Symbol	Center	Tumor_Sample_Barcode	Fusion	DNA_support	RNA_support	Method	Frame	Comments	PATIENT_ID
208	STK11	MSKCC-DMP	P-0007518-T01-IM5	STK11-SBNO2 fusion	yes	unknown	NaN	unknown	SBNO2 (NM_014963) - STK11 (NM_000455) Rearran...	P-0007518
209	STK11	MSKCC-DMP	P-0007518-T01-IM5	STK11-intragenic	yes	unknown	NaN	unknown	STK11 (NM_000455) Rearrangement : c.*16+129_75...	P-0007518
509	STK11	MSKCC-DMP	P-0003108-T01-IM5	STK11-HMHA1 fusion	yes	unknown	NaN	unknown	null Note: The HMHA1 (NM_012292) - STK11 (NM_0...	P-0003108

	Hugo_Symbol	Center	Tumor_Sample_Barcode	Fusion	DNA_support	RNA_support	Method	Frame	Comments	PATIENT_ID
207	SBNO2	MSKCC-DMP	P-0007518-T01-IM5	STK11-SBNO2 fusion	yes	unknown	NaN	unknown	SBNO2 (NM_014963) - STK11 (NM_000455) Rearran...	P-0007518
208	STK11	MSKCC-DMP	P-0007518-T01-IM5	STK11-SBNO2 fusion	yes	unknown	NaN	unknown	SBNO2 (NM_014963) - STK11 (NM_000455) Rearran...	P-0007518
209	STK11	MSKCC-DMP	P-0007518-T01-IM5	STK11-intragenic	yes	unknown	NaN	unknown	STK11 (NM_000455) Rearrangement : c.*16+129_75...	P-0007518
507	BRCA2	MSKCC-DMP	P-0003108-T01-IM5	BRCA2-intragenic	yes	unknown	NaN	unknown	null Note: The BRCA2 (NM_000059) intragenic re...	P-0003108
508	HMHA1	MSKCC-DMP	P-0003108-T01-IM5	STK11-HMHA1 fusion	yes	unknown	NaN	unknown	null Note: The HMHA1 (NM_012292) - STK11 (NM_0...	P-0003108
509	STK11	MSKCC-DMP	P-0003108-T01-IM5	STK11-HMHA1 fusion	yes	unknown	NaN	unknown	null Note: The HMHA1 (NM_012292) - STK11 (NM_0...	P-0003108
510	AXL	MSKCC-DMP	P-0003108-T01-IM5	AXL-intragenic	yes	unknown	NaN	unknown	null Note: The AXL (NM_021913) rearrangement e...	P-0003108
511	KLHL13	MSKCC-DMP	P-0003108-T01-IM5	RB1-KLHL13 fusion	yes	unknown	NaN	unknown	null Note: The RB1 (NM_000321) rearrangement e...	P-0003108
512	RB1	MSKCC-DMP	P-0003108-T01-IM5	RB1-KLHL13 fusion	yes	unknown	NaN	unknown	null Note: The RB1 (NM_000321) rearrangement e...	P-0003108

	PATIENT_ID	METASTATIC_DZ_FUP	METASTATIC_RECURRENCE_TIME_MONTHS	PRIOR_LOCAL_RECURRENCE	SEX	MENOPAUSAL_STATUS_AT_DIAGNOSIS	LATERALITY	T_STAGE	N_STAGE	M_STAGE	...	OVERALL_HR_STATUS_PATIENT	OVERALL_HER2_STATUS_PATIENT	HER2_STATUS_PRIMARY	VITAL_STATUS	LAST_CONTACT_DAYS_TO	TIME_TO_DEATH_MONTHS	DFS_MONTHS	DFS_EVENT	OS_MONTHS	OS_STATUS
413	P-0003108	Yes	504.0	No	Female	Pre	Left	T1c	N0	M0	...	Positive	Negative	Negative	Deceased	15886	523.0	60.3	1	78.9	1:DECEASED
873	P-0007518	Yes	843.0	No	Female	Post	Right	T2	N1mi	M0	...	Positive	Negative	Negative	Deceased	25741	847.0	12.5	1	16.7	1:DECEASED

	Hugo_Symbol	Entrez_Gene_Id	Center	Tumor_Sample_Barcode	Fusion	DNA_support	RNA_support	Method	Frame	Comments	PATIENT_ID
261	ADCY9	0	MSKCC-DMP	P-0003762-T02-IM5	ADCY9-CDK12 fusion	yes	unknown	NaN	unknown	ADCY9 (NM_001116) - CDK12 (NM_016507) rearrang...	P-0003762
504	ADCY9	0	MSKCC-DMP	P-0003762-T01-IM5	ADCY9-CDK12 fusion	yes	unknown	NaN	unknown	t(16;17)(p13.3;q12)(chr16:g.4033430::chr17:g.3...	P-0003762

	Hugo_Symbol	Count
0	TYK2	1
1	HELZ	1
2	SRC	2
3	NRG1	1
4	MLL2	1
...	...	...
263	MAP3K1	5
264	YES1	1
265	CCL2	1
266	LRRC4C	1
267	BCL2L14	1

	PATIENT_ID	HELZ	MLL2	SLC25A14	ADCY8	RUNX1	EED	UBAC2	FAT3	RASL10A	...	CARD11	DNAH9	LLGL1	TMEM171	CHEK1	ACTN4	CDKN1B	DNMT3A	TRPC6	HIST1H4B
0	P-0000004	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
1	P-0000012	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
2	P-0000015	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
3	P-0000041	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
4	P-0000057	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1751	P-0018917	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
1752	P-0018963	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
1753	P-0019037	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
1754	P-0019054	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
1755	P-0019073	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False

	MLL3	STAT3	RARA	IDH2	PDE1C	PTEN	PIK3R1	ETV6	IGF1R	RB1
0	0.000000	0.005645	0.005645	0.005645	0.005645	0.006188	0.005645	0.006188	0.005041	0.007145
1	0.005645	0.000000	0.003578	0.003578	0.003578	0.004380	0.003578	0.004380	0.002529	0.005645
2	0.005645	0.003578	0.000000	0.003578	0.003578	0.004380	0.003578	0.004380	0.002529	0.005645
3	0.005645	0.003578	0.003578	0.000000	0.003578	0.004380	0.003578	0.004380	0.002529	0.005645
4	0.005645	0.003578	0.003578	0.003578	0.000000	0.004380	0.003578	0.004380	0.002529	0.005645
5	0.006188	0.004380	0.004380	0.004380	0.004380	0.000000	0.004380	0.005058	0.003572	0.006188
6	0.005645	0.003578	0.003578	0.003578	0.003578	0.004380	0.000000	0.004380	0.002529	0.005645
7	0.006188	0.004380	0.004380	0.004380	0.004380	0.005058	0.004380	0.000000	0.003572	0.006188
8	0.005041	0.002529	0.002529	0.002529	0.002529	0.003572	0.002529	0.003572	0.000000	0.005041
9	0.007145	0.005645	0.005645	0.005645	0.005645	0.006188	0.005645	0.006188	0.005041	0.000000

	MLL3	STAT3	RARA	IDH2	PDE1C	PTEN	PIK3R1	ETV6	IGF1R	RB1
0	0.000000	0.030717	0.030717	0.030717	0.030717	0.033672	0.030717	0.033672	0.027430	0.038881
1	0.030717	0.000000	0.019474	0.019474	0.019474	0.023837	0.019474	0.023837	0.013762	0.030717
2	0.030717	0.019474	0.000000	0.019474	0.019474	0.023837	0.019474	0.023837	0.013762	0.030717
3	0.030717	0.019474	0.019474	0.000000	0.019474	0.023837	0.019474	0.023837	0.013762	0.030717
4	0.030717	0.019474	0.019474	0.019474	0.000000	0.023837	0.019474	0.023837	0.013762	0.030717
5	0.033672	0.023837	0.023837	0.023837	0.023837	0.000000	0.023837	0.027524	0.019440	0.033672
6	0.030717	0.019474	0.019474	0.019474	0.019474	0.023837	0.000000	0.023837	0.013762	0.030717
7	0.033672	0.023837	0.023837	0.023837	0.023837	0.027524	0.023837	0.000000	0.019440	0.033672
8	0.027430	0.013762	0.013762	0.013762	0.013762	0.019440	0.013762	0.019440	0.000000	0.027430
9	0.038881	0.030717	0.030717	0.030717	0.030717	0.033672	0.030717	0.033672	0.027430	0.000000

Number of Trees	Depth	TP	FP	TN	FN	Accuracy	TPR	TNR
1.0	1.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
1.0	2.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
1.0	3.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
1.0	4.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
1.0	5.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
2.0	1.0	690.0	187.0	0.0	1.0	0.785877	0.998553	0.000000
2.0	2.0	690.0	187.0	0.0	1.0	0.785877	0.998553	0.000000
2.0	3.0	690.0	187.0	0.0	1.0	0.785877	0.998553	0.000000
2.0	4.0	690.0	187.0	0.0	1.0	0.785877	0.998553	0.000000
2.0	5.0	690.0	187.0	0.0	1.0	0.785877	0.998553	0.000000
3.0	1.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
3.0	2.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
3.0	3.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
3.0	4.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
3.0	5.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
4.0	1.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
4.0	2.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
4.0	3.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
4.0	4.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
4.0	5.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
5.0	1.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
5.0	2.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
5.0	3.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
5.0	4.0	691.0	186.0	1.0	0.0	0.788155	1.000000	0.005348
5.0	5.0	691.0	186.0	1.0	0.0	0.788155	1.000000	0.005348
6.0	1.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
6.0	2.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
6.0	3.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
6.0	4.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
6.0	5.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
7.0	1.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
7.0	2.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
7.0	3.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
7.0	4.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
7.0	5.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
8.0	1.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
8.0	2.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
8.0	3.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
8.0	4.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
8.0	5.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
9.0	1.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
9.0	2.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
9.0	3.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
9.0	4.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
9.0	5.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
10.0	1.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
10.0	2.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
10.0	3.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
10.0	4.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000
10.0	5.0	691.0	187.0	0.0	0.0	0.787016	1.000000	0.000000

	PTEN	ETV6	PPM1D
0	0.000000	0.005179	0.000000
1	0.005179	0.000000	0.005179
2	0.000000	0.005179	0.000000

	PTEN	ETV6	PPM1D
0	0.000000	0.022173	0.000000
1	0.022173	0.000000	0.022173
2	0.000000	0.022173	0.000000