# imports
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
from IPython.display import HTML, Markdown
import requests
import time

# bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

# display graphics
output_notebook()


gene = 'TIMM13'
cell = ''


root_path = 'https://appyters.maayanlab.cloud/storage/L1000_RNAseq_Gene_Search'


# load gene info
gene_info = pd.read_csv(f"{root_path}/L1000_to_RNAseq_gene_list.tsv", sep="\t", index_col=0)
landmark = "not"
inferred_l1000 = "not"
if gene_info.loc[gene, "landmark"] == True:
    landmark = ""
if gene_info.loc[gene, "inferred l1000"] == True:
    inferred_l1000 = ""


display(Markdown(f"**Input gene: {gene}**"))
display(Markdown(f"{gene} is {landmark} a landmark gene."))
display(Markdown(f"{gene} is {inferred_l1000} an originally inferred L1000 gene."))
display(Markdown(f"{gene} is a newly inferred (our model) gene."))
display(HTML(f"""More information about {gene} can be found at the <a href="https://cfde-gene-pages.cloud/gene/{gene}?CF=false&PS=true&Ag=true" target="_blank"> Gene and Drug Landing Page Aggregator</a>"""))


display(Markdown(f"**Input cell line: {'All' if cell == '' else cell}**"))


xpr_gene_data = pd.read_feather(f"{root_path}/gene_files/{gene}.f").set_index('index')
if cell != '': 
    xpr_gene_data = xpr_gene_data[xpr_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]

cp_gene_data = pd.read_feather(f"{root_path}/cp_gene_files/{gene}.f").set_index('index')
if cell != '': 
    cp_gene_data = cp_gene_data[cp_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]


def map_color(cd, logfc, red_norm, blue_norm):
    v = cd*logfc

    if v < 0: 
        return '#D3D3D3'
    elif cd < 0:
        return colors.to_hex(cm.get_cmap('Reds')(red_norm(cd*logfc)))
    else:
        return colors.to_hex(cm.get_cmap('Blues')(blue_norm(cd*logfc)))


def make_plot(comb_df, gene, pert_type):
    # check if there are any results
    if comb_df.shape[0] == 0: 
        display(Markdown("### **There are no signatures in the pre-processed dataset for the chosen gene, cell line, and perturbation type inputs.**"))
        return

    # set color and size for each point on plot
    v = (comb_df['logFC']*comb_df['CD']).to_numpy()
    red_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)
    blue_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)

    plot_colors = [map_color(row.CD, row.logFC, red_norm, blue_norm) for row in comb_df.itertuples()]

    # generate data source
    data_source = ColumnDataSource(
        data=dict(
            x = comb_df['logFC'],
            y = comb_df['CD'].apply(abs),
            cd = comb_df['CD'],
            sig = pd.Series(comb_df.index),
            fc = comb_df['FC'], 
            logfc = comb_df['logFC'],
            colors = plot_colors, 
            sizes = [8] * comb_df.shape[0],
        )
    )

    # create hover tooltip
    tools = [
        ("Signature", "@sig"),
        ("CD Coeff", "@cd"),
        ("Fold Change", "@fc"),
        ("Log2 Fold Change", "@logfc")
    ]
    # generate plot and relevant plot labels
    plot = figure(
        plot_width=700,
        plot_height=500,
        tooltips=tools
    )
    
    plot.circle(
        'x', 'y', 
        size='sizes',
        alpha=0.7, 
        line_alpha=1,
        line_width=1, 
        line_color='colors',
        source=data_source,
        fill_color='colors', 
        name=f"{gene}_expression_in_L1000_to_RNAseq_{pert_type.replace(' ','')}_volcano_plot"
    )

    plot.yaxis.axis_label = 'Abs(CD-Coefficient)'
    plot.xaxis.axis_label = 'log2(Fold Change)'
    plot.title.text = f"Differential Expression of {gene} in RNA-seq-like {pert_type} Signatures"
    plot.title.align = 'center'
    plot.title.text_font_size = '14px'
    show(plot)


def make_tables(comb_df, pert, is_upreg):
    dir_df = comb_df[comb_df['FC'] > 1] if is_upreg else comb_df[comb_df['FC'] < 1]
    if dir_df.shape[0] == 0: 
        display(Markdown(f"### There are no {'up-regulated' if is_upreg else 'down-regulated'} signatures for the chosen gene and cell line inputs."))
        return dir_df
    dir_df = dir_df.sort_values(by='FC', ascending=not is_upreg)
    dir_df['FC'] = dir_df['FC'].apply(lambda x: f'{x:.4f}')
    dir_df['CD'] = dir_df['CD'].apply(lambda x: f'{x:.4f}')
    if pert == 'xpr':
        dir_df['KO Gene'] = dir_df.index.map(lambda x: x.split('_')[4])
    else:
        dir_df['Perturbagen'] = dir_df.index.map(lambda x: x.split('_')[4])
        dir_df['Dose'] = dir_df.index.map(lambda x: x.split('_')[5] if len(x.split('_')) == 6 else '')
    dir_df['Cell Line'] = dir_df.index.map(lambda x: x.split('_')[1])
    dir_df['Timepoint'] = dir_df.index.map(lambda x: x.split('_')[2].lower())
    dir_df = dir_df.rename(columns={
            'FC': 'Fold Change', 
            'logFC': 'Log2(Fold Change)', 
            'CD': 'CD Coefficient',
            'Rank': 'Rank in Signature'})
    dir_df.index.names = ['Signature']
    return dir_df

# create download link for table results
def download_link(df, fname):
    if df.shape[0] == 0: return ''
    csv = df.to_csv(fname, sep='\t', index=True)
    link = f'<div>Download full results: <a href="{fname}" target=_blank>{fname}</a></div>'
    return link


def enrichr(pert, top_perts, direction):
    if pert == 'CRISPR':
        desc = f"Top CRISPR targets from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/Enrichr/addList'
        enrich_url = 'https://maayanlab.cloud/Enrichr/enrich'
    else:
        desc = f"Top compounds from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/DrugEnrichr/addList'
        enrich_url = 'https://maayanlab.cloud/DrugEnrichr/enrich'
    payload = {
        'list': (None, '\n'.join(top_perts)),
        'description': (None, desc)
    }
    response = requests.post(list_url, files=payload)
    if not response.ok:
        raise Exception('Error analyzing gene list')
    time.sleep(0.5)
    return f"{enrich_url}?dataset={response.json()['shortId']}"

def enrichr_link(pert, df, direction, gene): 
    # check if there are any results
    if df.shape[0] < 5: 
        return(f"There are not enough {direction}-regulated signatures to submit to {'Enrichr' if pert == 'CRISPR' else 'DrugEnrichr'}.")
    comb_df = df.copy()
    comb_df['pert'] = comb_df.index.map(lambda x: x.split('_')[4])
    if direction == 'up':
        top_perts = comb_df.sort_values(by='FC', ascending=False) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    else:
        top_perts = comb_df.sort_values(by='FC', ascending=True) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    pert_type = 'CRISPR target genes' if pert == 'CRISPR' else 'chemical compounds'
    results_url = enrichr(pert, top_perts, direction)
    return f'<a href={results_url} target="_blank">Enrichr analysis of top 20 {pert_type} that {direction}-regulate {gene}</a>'


# CRISPR
make_plot(xpr_gene_data, gene, 'CRISPR KO')


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=True)
display(HTML(up_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_xpr[:100], f"{gene}_UpReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'up', gene)))


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=False)
display(HTML(down_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_xpr[:100], f"{gene}_DnReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'down', gene)))


# CP
make_plot(cp_gene_data, gene, 'Chemical')


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_cp = make_tables(cp_gene_data, pert='cp', is_upreg=True)
display(HTML(up_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_cp[:100], f"{gene}_UpReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'up', gene)))


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_cp = make_tables(cp_gene_data, pert='cp', is_upreg=False)
display(HTML(down_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_cp[:100], f"{gene}_DnReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'down', gene)))

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	KO Gene	Cell Line	Timepoint
Signature
XPR014_HT29.311_96H_E06_CDKN2C	0.0203	1.2418	0.312414	2356.0	CDKN2C	HT29.311	96h
XPR019_A549.311_96H_D24_PMS2	0.0219	1.2349	0.304379	1942.0	PMS2	A549.311	96h
XPR014_HT29.311_96H_L14_CLEC12A	0.0206	1.2347	0.304213	1828.0	CLEC12A	HT29.311	96h
XPR010_MCF7.311_96H_P12_PRKAA1	0.0203	1.1567	0.210014	1497.0	PRKAA1	MCF7.311	96h
XPR010_A549.311_96H_O24_BCHE	0.0212	1.1508	0.202612	3514.0	BCHE	A549.311	96h
XPR028_HT29.311_96H_P17_GPD1	0.0214	1.1421	0.191706	2482.0	GPD1	HT29.311	96h
XPR010_HT29.311_96H_A24_CACNA1H	0.0207	1.1265	0.171872	2207.0	CACNA1H	HT29.311	96h
XPR033_U251MG.311_96H_K02_SEC14L2	0.0204	1.1221	0.166189	1994.0	SEC14L2	U251MG.311	96h
XPR011_HT29.311_96H_I11_FPGS	0.0211	1.1182	0.161203	3328.0	FPGS	HT29.311	96h
XPR010_A549.311_96H_I22_APOB	0.0225	1.1174	0.160149	2672.0	APOB	A549.311	96h

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	KO Gene	Cell Line	Timepoint
Signature
XPR031_PC3.311B_96H_B16_PLXNA1	-0.0212	0.8902	-0.167869	1918.0	PLXNA1	PC3.311B	96h
XPR032_A375.311_96H_P04_IL18R1	-0.0208	0.8984	-0.154630	2173.0	IL18R1	A375.311	96h
XPR028_A375.311_96H_G16_PTPRS	-0.0198	0.9007	-0.150804	1563.0	PTPRS	A375.311	96h
XPR041_ES2.311_96H_K01_AKT1	-0.0200	0.9017	-0.149235	2281.0	AKT1	ES2.311	96h
XPR025_A549.311_96H_B07_TBC1D7	-0.0203	0.9037	-0.146012	2762.0	TBC1D7	A549.311	96h
XPR021_BICR6.311_96H_P08_NUDT19	-0.0206	0.9042	-0.145286	1993.0	NUDT19	BICR6.311	96h
XPR008_HT29.311_96H_M08_PPARG	-0.0196	0.9160	-0.126527	2553.0	PPARG	HT29.311	96h
XPR012_A549.311_96H_K05_ARL5B	-0.0229	0.9218	-0.117409	3354.0	ARL5B	A549.311	96h
XPR010_U251MG.311_96H_B03_CCNA2	-0.0215	0.9223	-0.116655	3654.0	CCNA2	U251MG.311	96h
XPRJJ002_A375_96H_G22_DOT1L	-0.0214	0.9266	-0.109988	2945.0	DOT1L	A375	96h

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	Perturbagen	Dose	Cell Line	Timepoint
Signature
PRISM001_PC3_24H_G11_CMAP-AZD-1152HQPA_1.25uM	0.0225	1.3464	0.429118	2853.0	CMAP-AZD-1152HQPA	1.25uM	PC3	24h
REP.A014_MCF7_24H_J20_cariprazine_3.33uM	0.0235	1.1718	0.228766	1266.0	cariprazine	3.33uM	MCF7	24h
MOAR010_HEC265_24H_B22_drospirenone_10uM	0.0226	1.1581	0.211703	2309.0	drospirenone	10uM	HEC265	24h
REP.A008_HT29_24H_K24_benzamil_0.04uM	0.0250	1.1570	0.210360	1394.0	benzamil	0.04uM	HT29	24h
REP.A022_MCF10A_24H_K03_urapidil_1.11uM	0.0222	1.1535	0.206074	1819.0	urapidil	1.11uM	MCF10A	24h
LJP009_YAPC_24H_K12_GSK-1059615_0.04uM	0.0252	1.1511	0.202966	1464.0	GSK-1059615	0.04uM	YAPC	24h
REP.A027_YAPC_24H_P11_EMD-1214063_0.125uM	0.0219	1.1472	0.198094	1989.0	EMD-1214063	0.125uM	YAPC	24h
REP.A015_HA1E_24H_I12_dexfenfluramine_0.04uM	0.0243	1.1328	0.179841	1183.0	dexfenfluramine	0.04uM	HA1E	24h
LJP008_MCF7_24H_G23_azacitidine_0.12uM	0.0241	1.1299	0.176205	3465.0	azacitidine	0.12uM	MCF7	24h
REP.B015_PC3_24H_F05_fomepizole_0.03uM	0.0237	1.1168	0.159384	2219.0	fomepizole	0.03uM	PC3	24h

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	Perturbagen	Dose	Cell Line	Timepoint
Signature
REP.A011_MCF7_24H_F17_nicotine_0.125uM	-0.0230	0.6817	-0.552770	1789.0	nicotine	0.125uM	MCF7	24h
MOAR009_SH4_24H_F22_ampalex_10uM	-0.0229	0.7429	-0.428745	1526.0	ampalex	10uM	SH4	24h
LJP006_BT20_3H_N02_GSK-2126458_3.33uM	-0.0260	0.7772	-0.363700	1450.0	GSK-2126458	3.33uM	BT20	3h
MOAR012_HEC251_24H_C19_PACOCF3_10uM	-0.0240	0.8122	-0.300116	2157.0	PACOCF3	10uM	HEC251	24h
REP.A015_HT29_24H_D24_MK-2461_0.04uM	-0.0222	0.8170	-0.291558	1980.0	MK-2461	0.04uM	HT29	24h
MOAR012_HCC95_24H_P22_ethaverine_10uM	-0.0232	0.8510	-0.232821	2230.0	ethaverine	10uM	HCC95	24h
LJP005_MCF7_24H_C16_vemurafenib_0.37uM	-0.0230	0.8528	-0.229650	2861.0	vemurafenib	0.37uM	MCF7	24h
FIBR027_MCLF130CN_6H_G23_RITA_4uM	-0.0224	0.8593	-0.218806	2723.0	RITA	4uM	MCLF130CN	6h
REP.B023_JURKAT_24H_H07_volasertib_2.22uM	-0.0243	0.8602	-0.217338	3070.0	volasertib	2.22uM	JURKAT	24h
PBIOA018_HCC515_24H_K15_cimetidine_1.11uM	-0.0227	0.8693	-0.202042	2293.0	cimetidine	1.11uM	HCC515	24h

RNA-seq-like Gene Centric Signature Reverse Search (RGCSRS)¶

CRISPR KO signatures¶

Volcano Plots¶

Tables¶

Chemical perturbation signatures¶

Volcano Plots¶

Tables¶