# imports
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
from IPython.display import HTML, Markdown
import requests
import time

# bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

# display graphics
output_notebook()


gene = 'TPP1'
cell = ''


root_path = 'https://appyters.maayanlab.cloud/storage/L1000_RNAseq_Gene_Search'


# load gene info
gene_info = pd.read_csv(f"{root_path}/L1000_to_RNAseq_gene_list.tsv", sep="\t", index_col=0)
landmark = "not"
inferred_l1000 = "not"
if gene_info.loc[gene, "landmark"] == True:
    landmark = ""
if gene_info.loc[gene, "inferred l1000"] == True:
    inferred_l1000 = ""


display(Markdown(f"**Input gene: {gene}**"))
display(Markdown(f"{gene} is {landmark} a landmark gene."))
display(Markdown(f"{gene} is {inferred_l1000} an originally inferred L1000 gene."))
display(Markdown(f"{gene} is a newly inferred (our model) gene."))
display(HTML(f"""More information about {gene} can be found at the <a href="https://cfde-gene-pages.cloud/gene/{gene}?CF=false&PS=true&Ag=true" target="_blank"> Gene and Drug Landing Page Aggregator</a>"""))


display(Markdown(f"**Input cell line: {'All' if cell == '' else cell}**"))


xpr_gene_data = pd.read_feather(f"{root_path}/gene_files/{gene}.f").set_index('index')
if cell != '': 
    xpr_gene_data = xpr_gene_data[xpr_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]

cp_gene_data = pd.read_feather(f"{root_path}/cp_gene_files/{gene}.f").set_index('index')
if cell != '': 
    cp_gene_data = cp_gene_data[cp_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]


def map_color(cd, logfc, red_norm, blue_norm):
    v = cd*logfc

    if v < 0: 
        return '#D3D3D3'
    elif cd < 0:
        return colors.to_hex(cm.get_cmap('Reds')(red_norm(cd*logfc)))
    else:
        return colors.to_hex(cm.get_cmap('Blues')(blue_norm(cd*logfc)))


def make_plot(comb_df, gene, pert_type):
    # check if there are any results
    if comb_df.shape[0] == 0: 
        display(Markdown("### **There are no signatures in the pre-processed dataset for the chosen gene, cell line, and perturbation type inputs.**"))
        return

    # set color and size for each point on plot
    v = (comb_df['logFC']*comb_df['CD']).to_numpy()
    red_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)
    blue_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)

    plot_colors = [map_color(row.CD, row.logFC, red_norm, blue_norm) for row in comb_df.itertuples()]

    # generate data source
    data_source = ColumnDataSource(
        data=dict(
            x = comb_df['logFC'],
            y = comb_df['CD'].apply(abs),
            cd = comb_df['CD'],
            sig = pd.Series(comb_df.index),
            fc = comb_df['FC'], 
            logfc = comb_df['logFC'],
            colors = plot_colors, 
            sizes = [8] * comb_df.shape[0],
        )
    )

    # create hover tooltip
    tools = [
        ("Signature", "@sig"),
        ("CD Coeff", "@cd"),
        ("Fold Change", "@fc"),
        ("Log2 Fold Change", "@logfc")
    ]
    # generate plot and relevant plot labels
    plot = figure(
        plot_width=700,
        plot_height=500,
        tooltips=tools
    )
    
    plot.circle(
        'x', 'y', 
        size='sizes',
        alpha=0.7, 
        line_alpha=1,
        line_width=1, 
        line_color='colors',
        source=data_source,
        fill_color='colors', 
        name=f"{gene}_expression_in_L1000_to_RNAseq_{pert_type.replace(' ','')}_volcano_plot"
    )

    plot.yaxis.axis_label = 'Abs(CD-Coefficient)'
    plot.xaxis.axis_label = 'log2(Fold Change)'
    plot.title.text = f"Differential Expression of {gene} in RNA-seq-like {pert_type} Signatures"
    plot.title.align = 'center'
    plot.title.text_font_size = '14px'
    show(plot)


def make_tables(comb_df, pert, is_upreg):
    dir_df = comb_df[comb_df['FC'] > 1] if is_upreg else comb_df[comb_df['FC'] < 1]
    if dir_df.shape[0] == 0: 
        display(Markdown(f"### There are no {'up-regulated' if is_upreg else 'down-regulated'} signatures for the chosen gene and cell line inputs."))
        return dir_df
    dir_df = dir_df.sort_values(by='FC', ascending=not is_upreg)
    dir_df['FC'] = dir_df['FC'].apply(lambda x: f'{x:.4f}')
    dir_df['CD'] = dir_df['CD'].apply(lambda x: f'{x:.4f}')
    if pert == 'xpr':
        dir_df['KO Gene'] = dir_df.index.map(lambda x: x.split('_')[4])
    else:
        dir_df['Perturbagen'] = dir_df.index.map(lambda x: x.split('_')[4])
        dir_df['Dose'] = dir_df.index.map(lambda x: x.split('_')[5] if len(x.split('_')) == 6 else '')
    dir_df['Cell Line'] = dir_df.index.map(lambda x: x.split('_')[1])
    dir_df['Timepoint'] = dir_df.index.map(lambda x: x.split('_')[2].lower())
    dir_df = dir_df.rename(columns={
            'FC': 'Fold Change', 
            'logFC': 'Log2(Fold Change)', 
            'CD': 'CD Coefficient',
            'Rank': 'Rank in Signature'})
    dir_df.index.names = ['Signature']
    return dir_df

# create download link for table results
def download_link(df, fname):
    if df.shape[0] == 0: return ''
    csv = df.to_csv(fname, sep='\t', index=True)
    link = f'<div>Download full results: <a href="{fname}" target=_blank>{fname}</a></div>'
    return link


def enrichr(pert, top_perts, direction):
    if pert == 'CRISPR':
        desc = f"Top CRISPR targets from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/Enrichr/addList'
        enrich_url = 'https://maayanlab.cloud/Enrichr/enrich'
    else:
        desc = f"Top compounds from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/DrugEnrichr/addList'
        enrich_url = 'https://maayanlab.cloud/DrugEnrichr/enrich'
    payload = {
        'list': (None, '\n'.join(top_perts)),
        'description': (None, desc)
    }
    response = requests.post(list_url, files=payload)
    if not response.ok:
        raise Exception('Error analyzing gene list')
    time.sleep(0.5)
    return f"{enrich_url}?dataset={response.json()['shortId']}"

def enrichr_link(pert, df, direction, gene): 
    # check if there are any results
    if df.shape[0] < 5: 
        return(f"There are not enough {direction}-regulated signatures to submit to {'Enrichr' if pert == 'CRISPR' else 'DrugEnrichr'}.")
    comb_df = df.copy()
    comb_df['pert'] = comb_df.index.map(lambda x: x.split('_')[4])
    if direction == 'up':
        top_perts = comb_df.sort_values(by='FC', ascending=False) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    else:
        top_perts = comb_df.sort_values(by='FC', ascending=True) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    pert_type = 'CRISPR target genes' if pert == 'CRISPR' else 'chemical compounds'
    results_url = enrichr(pert, top_perts, direction)
    return f'<a href={results_url} target="_blank">Enrichr analysis of top 20 {pert_type} that {direction}-regulate {gene}</a>'


# CRISPR
make_plot(xpr_gene_data, gene, 'CRISPR KO')


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=True)
display(HTML(up_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_xpr[:100], f"{gene}_UpReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'up', gene)))


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=False)
display(HTML(down_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_xpr[:100], f"{gene}_DnReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'down', gene)))


# CP
make_plot(cp_gene_data, gene, 'Chemical')


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_cp = make_tables(cp_gene_data, pert='cp', is_upreg=True)
display(HTML(up_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_cp[:100], f"{gene}_UpReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'up', gene)))


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_cp = make_tables(cp_gene_data, pert='cp', is_upreg=False)
display(HTML(down_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_cp[:100], f"{gene}_DnReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'down', gene)))

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	KO Gene	Cell Line	Timepoint
Signature
XPR031_BICR6.311_96H_C13_OR5B21	0.0335	1.1382	0.186722	3259.0	OR5B21	BICR6.311	96h
XPR030_A549.311_96H_I15_DAO	0.0327	1.1090	0.149221	1912.0	DAO	A549.311	96h
XPR027_A549.311_96H_H18_SIGLEC1	0.0364	1.1018	0.139810	1956.0	SIGLEC1	A549.311	96h
XPR037_YAPC.311_96H_K17_FGFBP2	0.0324	1.0986	0.135708	2017.0	FGFBP2	YAPC.311	96h
XPR028_YAPC.311_96H_K19_CELA1	0.0326	1.0887	0.122669	2047.0	CELA1	YAPC.311	96h
XPR024_A375.311_96H_K01_SKP1	0.0381	1.0812	0.112676	2669.0	SKP1	A375.311	96h
XPR034_YAPC.311_96H_M20_MAP3K6	0.0390	1.0792	0.109976	1752.0	MAP3K6	YAPC.311	96h
XPR018_U251MG.311_96H_C06_IL4R	0.0337	1.0722	0.100529	2557.0	IL4R	U251MG.311	96h
XPR008_BICR6.311_96H_M08_PPARG	0.0323	1.0697	0.097219	1746.0	PPARG	BICR6.311	96h
XPR025_BICR6.311_96H_G04_STAT4	0.0350	1.0696	0.097139	2097.0	STAT4	BICR6.311	96h

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	KO Gene	Cell Line	Timepoint
Signature
XPR044_KELLY.311_96H_G24_PRKAG1	-0.0358	0.8370	-0.256732	1689.0	PRKAG1	KELLY.311	96h
XPR008_A549.311_96H_F01_GABRB2	-0.0361	0.8673	-0.205357	2193.0	GABRB2	A549.311	96h
XPR024_A549.311_96H_A09_SHTN1	-0.0349	0.8774	-0.188637	2495.0	SHTN1	A549.311	96h
XPR042_KELLY.311_96H_E09_HMOX1	-0.0361	0.8795	-0.185317	1624.0	HMOX1	KELLY.311	96h
XPR009_YAPC.311_96H_L24_P2RX1	-0.0347	0.8814	-0.182163	2265.0	P2RX1	YAPC.311	96h
XPR025_BICR6.311_96H_L10_TIRAP	-0.0392	0.8814	-0.182149	1948.0	TIRAP	BICR6.311	96h
XPR015_MCF7.311_96H_E08_CSF3R	-0.0354	0.9082	-0.138925	2147.0	CSF3R	MCF7.311	96h
XPR044_KELLY.311_96H_H20_YME1L1	-0.0386	0.9084	-0.138668	2657.0	YME1L1	KELLY.311	96h
XPR024_A549.311_96H_H16_SOCS2	-0.0370	0.9095	-0.136796	2738.0	SOCS2	A549.311	96h
XPR041_ES2.311_96H_D20_RAB27A	-0.0402	0.9121	-0.132747	2731.0	RAB27A	ES2.311	96h

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	Perturbagen	Dose	Cell Line	Timepoint
Signature
ASG003_XC.P936_24H_G04_amlodipine_10uM	0.0417	1.1567	0.209993	1558.0	amlodipine	10uM	XC.P936	24h
LJP006_MDAMB231_24H_D13_BI-2536_10uM	0.0363	1.1461	0.196711	4759.0	BI-2536	10uM	MDAMB231	24h
ERG021_PC3_24H_M18_I-BET-151_0.04uM	0.0368	1.1433	0.193228	2534.0	I-BET-151	0.04uM	PC3	24h
ASG003_XC.P905_24H_L12_AZD-8055_0.12uM	0.0412	1.1423	0.191964	1823.0	AZD-8055	0.12uM	XC.P905	24h
PAC041_U2OS_6H_P12_sirolimus_20uM	0.0383	1.1336	0.180880	1467.0	sirolimus	20uM	U2OS	6h
MOAR011_BEN_24H_G12_fdcyd_1.11uM	0.0370	1.1276	0.173276	1636.0	fdcyd	1.11uM	BEN	24h
REP.A025_MCF10A_24H_H03_AZD-2014_1.11uM	0.0370	1.1271	0.172637	2268.0	AZD-2014	1.11uM	MCF10A	24h
REP.A016_A375_24H_A19_palbociclib_10uM	0.0362	1.1231	0.167546	3418.0	palbociclib	10uM	A375	24h
ASG002_NCIH2172_24H_N14_JNJ-7706621_1.11uM	0.0361	1.1208	0.164499	2061.0	JNJ-7706621	1.11uM	NCIH2172	24h
REP.B002_A375_24H_O14_NVP-BEZ235_0.74uM	0.0386	1.1030	0.141400	3149.5	NVP-BEZ235	0.74uM	A375	24h

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	Perturbagen	Dose	Cell Line	Timepoint
Signature
LJP006_HA1E_24H_O21_QL-XII-47_1.11uM	-0.0376	0.7393	-0.435725	1585.0	QL-XII-47	1.11uM	HA1E	24h
POL001_HT29_24H_C03_narciclasine_1.11uM	-0.0367	0.7473	-0.420222	2746.0	narciclasine	1.11uM	HT29	24h
REP.B001_PC3_24H_K09_MG-132_20uM	-0.0370	0.7925	-0.335500	2667.0	MG-132	20uM	PC3	24h
LJP006_HA1E_24H_B24_JW-7-24-1_0.04uM	-0.0379	0.7976	-0.326239	2089.0	JW-7-24-1	0.04uM	HA1E	24h
MOAR012_NCIH838_24H_N07_azacitidine_10uM	-0.0375	0.7986	-0.324460	2207.0	azacitidine	10uM	NCIH838	24h
LJP005_MCF10A_3H_A23_crizotinib_0.12uM	-0.0424	0.8250	-0.277450	1855.0	crizotinib	0.12uM	MCF10A	3h
REP.A024_HT29_24H_M16_dolastatin-10_0.37uM	-0.0386	0.8377	-0.255412	2885.0	dolastatin-10	0.37uM	HT29	24h
REP.B017_MDAMB231_24H_P01_molsidomine_2.22uM	-0.0377	0.8626	-0.213259	2101.0	molsidomine	2.22uM	MDAMB231	24h
ASG002_LNCAP.FGC_24H_P24_digitoxin_0.12uM	-0.0369	0.8723	-0.197112	2893.0	digitoxin	0.12uM	LNCAP.FGC	24h
REP.A001_A375_24H_P24_penitrem-a_0.01uM	-0.0373	0.8739	-0.194453	2645.0	penitrem-a	0.01uM	A375	24h

RNA-seq-like Gene Centric Signature Reverse Search (RGCSRS)¶

CRISPR KO signatures¶

Volcano Plots¶

Tables¶

Chemical perturbation signatures¶

Volcano Plots¶

Tables¶