# imports
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
from IPython.display import HTML, Markdown
import requests
import time

# bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

# display graphics
output_notebook()


gene = 'BACE2'
cell = ''


root_path = 'https://appyters.maayanlab.cloud/storage/L1000_RNAseq_Gene_Search'


# load gene info
gene_info = pd.read_csv(f"{root_path}/L1000_to_RNAseq_gene_list.tsv", sep="\t", index_col=0)
landmark = "not"
inferred_l1000 = "not"
if gene_info.loc[gene, "landmark"] == True:
    landmark = ""
if gene_info.loc[gene, "inferred l1000"] == True:
    inferred_l1000 = ""


display(Markdown(f"**Input gene: {gene}**"))
display(Markdown(f"{gene} is {landmark} a landmark gene."))
display(Markdown(f"{gene} is {inferred_l1000} an originally inferred L1000 gene."))
display(Markdown(f"{gene} is a newly inferred (our model) gene."))
display(HTML(f"""More information about {gene} can be found at the <a href="https://cfde-gene-pages.cloud/gene/{gene}?CF=false&PS=true&Ag=true" target="_blank"> Gene and Drug Landing Page Aggregator</a>"""))


display(Markdown(f"**Input cell line: {'All' if cell == '' else cell}**"))


xpr_gene_data = pd.read_feather(f"{root_path}/gene_files/{gene}.f").set_index('index')
if cell != '': 
    xpr_gene_data = xpr_gene_data[xpr_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]

cp_gene_data = pd.read_feather(f"{root_path}/cp_gene_files/{gene}.f").set_index('index')
if cell != '': 
    cp_gene_data = cp_gene_data[cp_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]


def map_color(cd, logfc, red_norm, blue_norm):
    v = cd*logfc

    if v < 0: 
        return '#D3D3D3'
    elif cd < 0:
        return colors.to_hex(cm.get_cmap('Reds')(red_norm(cd*logfc)))
    else:
        return colors.to_hex(cm.get_cmap('Blues')(blue_norm(cd*logfc)))


def make_plot(comb_df, gene, pert_type):
    # check if there are any results
    if comb_df.shape[0] == 0: 
        display(Markdown("### **There are no signatures in the pre-processed dataset for the chosen gene, cell line, and perturbation type inputs.**"))
        return

    # set color and size for each point on plot
    v = (comb_df['logFC']*comb_df['CD']).to_numpy()
    red_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)
    blue_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)

    plot_colors = [map_color(row.CD, row.logFC, red_norm, blue_norm) for row in comb_df.itertuples()]

    # generate data source
    data_source = ColumnDataSource(
        data=dict(
            x = comb_df['logFC'],
            y = comb_df['CD'].apply(abs),
            cd = comb_df['CD'],
            sig = pd.Series(comb_df.index),
            fc = comb_df['FC'], 
            logfc = comb_df['logFC'],
            colors = plot_colors, 
            sizes = [8] * comb_df.shape[0],
        )
    )

    # create hover tooltip
    tools = [
        ("Signature", "@sig"),
        ("CD Coeff", "@cd"),
        ("Fold Change", "@fc"),
        ("Log2 Fold Change", "@logfc")
    ]
    # generate plot and relevant plot labels
    plot = figure(
        plot_width=700,
        plot_height=500,
        tooltips=tools
    )
    
    plot.circle(
        'x', 'y', 
        size='sizes',
        alpha=0.7, 
        line_alpha=1,
        line_width=1, 
        line_color='colors',
        source=data_source,
        fill_color='colors', 
        name=f"{gene}_expression_in_L1000_to_RNAseq_{pert_type.replace(' ','')}_volcano_plot"
    )

    plot.yaxis.axis_label = 'Abs(CD-Coefficient)'
    plot.xaxis.axis_label = 'log2(Fold Change)'
    plot.title.text = f"Differential Expression of {gene} in RNA-seq-like {pert_type} Signatures"
    plot.title.align = 'center'
    plot.title.text_font_size = '14px'
    show(plot)


def make_tables(comb_df, pert, is_upreg):
    dir_df = comb_df[comb_df['FC'] > 1] if is_upreg else comb_df[comb_df['FC'] < 1]
    if dir_df.shape[0] == 0: 
        display(Markdown(f"### There are no {'up-regulated' if is_upreg else 'down-regulated'} signatures for the chosen gene and cell line inputs."))
        return dir_df
    dir_df = dir_df.sort_values(by='FC', ascending=not is_upreg)
    dir_df['FC'] = dir_df['FC'].apply(lambda x: f'{x:.4f}')
    dir_df['CD'] = dir_df['CD'].apply(lambda x: f'{x:.4f}')
    if pert == 'xpr':
        dir_df['KO Gene'] = dir_df.index.map(lambda x: x.split('_')[4])
    else:
        dir_df['Perturbagen'] = dir_df.index.map(lambda x: x.split('_')[4])
        dir_df['Dose'] = dir_df.index.map(lambda x: x.split('_')[5] if len(x.split('_')) == 6 else '')
    dir_df['Cell Line'] = dir_df.index.map(lambda x: x.split('_')[1])
    dir_df['Timepoint'] = dir_df.index.map(lambda x: x.split('_')[2].lower())
    dir_df = dir_df.rename(columns={
            'FC': 'Fold Change', 
            'logFC': 'Log2(Fold Change)', 
            'CD': 'CD Coefficient',
            'Rank': 'Rank in Signature'})
    dir_df.index.names = ['Signature']
    return dir_df

# create download link for table results
def download_link(df, fname):
    if df.shape[0] == 0: return ''
    csv = df.to_csv(fname, sep='\t', index=True)
    link = f'<div>Download full results: <a href="{fname}" target=_blank>{fname}</a></div>'
    return link


def enrichr(pert, top_perts, direction):
    if pert == 'CRISPR':
        desc = f"Top CRISPR targets from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/Enrichr/addList'
        enrich_url = 'https://maayanlab.cloud/Enrichr/enrich'
    else:
        desc = f"Top compounds from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/DrugEnrichr/addList'
        enrich_url = 'https://maayanlab.cloud/DrugEnrichr/enrich'
    payload = {
        'list': (None, '\n'.join(top_perts)),
        'description': (None, desc)
    }
    response = requests.post(list_url, files=payload)
    if not response.ok:
        raise Exception('Error analyzing gene list')
    time.sleep(0.5)
    return f"{enrich_url}?dataset={response.json()['shortId']}"

def enrichr_link(pert, df, direction, gene): 
    # check if there are any results
    if df.shape[0] < 5: 
        return(f"There are not enough {direction}-regulated signatures to submit to {'Enrichr' if pert == 'CRISPR' else 'DrugEnrichr'}.")
    comb_df = df.copy()
    comb_df['pert'] = comb_df.index.map(lambda x: x.split('_')[4])
    if direction == 'up':
        top_perts = comb_df.sort_values(by='FC', ascending=False) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    else:
        top_perts = comb_df.sort_values(by='FC', ascending=True) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    pert_type = 'CRISPR target genes' if pert == 'CRISPR' else 'chemical compounds'
    results_url = enrichr(pert, top_perts, direction)
    return f'<a href={results_url} target="_blank">Enrichr analysis of top 20 {pert_type} that {direction}-regulate {gene}</a>'


# CRISPR
make_plot(xpr_gene_data, gene, 'CRISPR KO')


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=True)
display(HTML(up_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_xpr[:100], f"{gene}_UpReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'up', gene)))


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=False)
display(HTML(down_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_xpr[:100], f"{gene}_DnReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'down', gene)))


# CP
make_plot(cp_gene_data, gene, 'Chemical')


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_cp = make_tables(cp_gene_data, pert='cp', is_upreg=True)
display(HTML(up_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_cp[:100], f"{gene}_UpReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'up', gene)))


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_cp = make_tables(cp_gene_data, pert='cp', is_upreg=False)
display(HTML(down_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_cp[:100], f"{gene}_DnReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'down', gene)))

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	KO Gene	Cell Line	Timepoint
Signature
XPR016_ES2.311_96H_H02_FAM122A	0.0622	1.1990	0.261824	867.0	FAM122A	ES2.311	96h
XPR037_A549.311_96H_D06_HADH	0.0647	1.1872	0.247584	2701.0	HADH	A549.311	96h
XPR024_BICR6.311_96H_H15_SMARCD1	0.0632	1.1507	0.202481	1371.0	SMARCD1	BICR6.311	96h
XPR022_BICR6.311_96H_I06_ASNS	0.0614	1.1397	0.188600	1861.0	ASNS	BICR6.311	96h
XPR025_BICR6.311_96H_C23_SPRY1	0.0624	1.1374	0.185709	844.0	SPRY1	BICR6.311	96h
XPR026_ES2.311_96H_O17_TLR6	0.0669	1.1334	0.180689	1072.0	TLR6	ES2.311	96h
XPR043_U251MG.311_96H_F22_HADH	0.0643	1.1291	0.175182	725.0	HADH	U251MG.311	96h
XPR036_U251MG.311_96H_G06_KALRN	0.0617	1.1231	0.167449	1923.0	KALRN	U251MG.311	96h
XPR020_AGS.311_96H_H07_CDK1	0.0617	1.1212	0.165107	1053.0	CDK1	AGS.311	96h
XPR036_U251MG.311_96H_I04_CROT	0.0616	1.1199	0.163424	2517.0	CROT	U251MG.311	96h

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	KO Gene	Cell Line	Timepoint
Signature
XPR008_ES2.311_96H_O01_DMD	-0.0646	0.8245	-0.278357	2260.0	DMD	ES2.311	96h
XPR044_ES2.311_96H_K01_PKN1	-0.0633	0.8488	-0.236479	1323.0	PKN1	ES2.311	96h
XPR035_A549.311_96H_D06_TAOK2	-0.0711	0.8539	-0.227911	2613.0	TAOK2	A549.311	96h
XPR029_BICR6.311_96H_O22_NDUFA5	-0.0612	0.8593	-0.218830	1339.0	NDUFA5	BICR6.311	96h
XPR022_U251MG.311_96H_I24_PRSS36	-0.0654	0.8643	-0.210453	1288.0	PRSS36	U251MG.311	96h
XPR029_AGS.311_96H_J14_NR2C2	-0.0631	0.8698	-0.201169	1577.0	NR2C2	AGS.311	96h
XPR018_ES2.311_96H_I24_IL9	-0.0659	0.8722	-0.197240	1215.0	IL9	ES2.311	96h
XPR030_ES2.311_96H_P04_RGS8	-0.0630	0.8738	-0.194560	1963.0	RGS8	ES2.311	96h
XPR033_ES2.311_96H_P01_PDXP	-0.0683	0.8773	-0.188887	1817.0	PDXP	ES2.311	96h
XPR026_ES2.311_96H_A04_ASAH1	-0.0624	0.8800	-0.184469	1802.0	ASAH1	ES2.311	96h

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	Perturbagen	Dose	Cell Line	Timepoint
Signature
ASG002_JHH5_24H_D05_birinapant_1.11uM	0.0726	1.3708	0.455036	708.0	birinapant	1.11uM	JHH5	24h
MOAR011_OVTOKO_24H_C15_laropiprant_1.11uM	0.0784	1.2370	0.306837	1325.0	laropiprant	1.11uM	OVTOKO	24h
REP.B018_THP1_24H_H14_gefitinib_0.74uM	0.0695	1.2362	0.305876	693.0	gefitinib	0.74uM	THP1	24h
REP.B026_MDAMB231_24H_D20_aniracetam_0.74uM	0.0845	1.2317	0.300668	1805.0	aniracetam	0.74uM	MDAMB231	24h
LJP008_NPC.CAS9_24H_N09_tranylcypromine_1.11uM	0.0702	1.2281	0.296444	1096.0	tranylcypromine	1.11uM	NPC.CAS9	24h
LJP009_NPC_24H_G18_dasatinib_0.04uM	0.0691	1.2225	0.289852	1163.0	dasatinib	0.04uM	NPC	24h
CPC011_A375_6H_D22_ornidazole_10uM	0.0708	1.1997	0.262641	2291.0	ornidazole	10uM	A375	6h
REP.A002_HA1E_24H_I13_carprofen_10uM	0.0727	1.1935	0.255183	782.0	carprofen	10uM	HA1E	24h
MOAR012_HCC95_24H_D04_exisulind_10uM	0.0680	1.1926	0.254151	1339.0	exisulind	10uM	HCC95	24h
MOAR012_CJM_24H_G02_donepezil_3.33uM	0.0681	1.1894	0.250194	1022.0	donepezil	3.33uM	CJM	24h

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	Perturbagen	Dose	Cell Line	Timepoint
Signature
LJP008_A549_24H_A14_NPK76-II-72-1_3.33uM	-0.0677	0.7434	-0.427742	1165.0	NPK76-II-72-1	3.33uM	A549	24h
CPC002_HA1E_24H_A21_GP-2a_10uM	-0.0787	0.7875	-0.344605	1595.0	GP-2a	10uM	HA1E	24h
LTC005_MDAMB231_24H_M02_MK-2206_0.22uM	-0.0692	0.7938	-0.333208	1617.0	MK-2206	0.22uM	MDAMB231	24h
LJP005_HA1E_24H_B09_HG-5-113-01_1.11uM	-0.0677	0.8079	-0.307673	2231.0	HG-5-113-01	1.11uM	HA1E	24h
REP.B023_YAPC_24H_F24_diacerein_0.01uM	-0.0720	0.8131	-0.298500	1734.0	diacerein	0.01uM	YAPC	24h
REP.B009_HA1E_24H_P12_LY-215490_0.01uM	-0.0737	0.8134	-0.297979	1530.0	LY-215490	0.01uM	HA1E	24h
REP.B008_HA1E_24H_M23_elacridar_0.03uM	-0.0733	0.8316	-0.265966	1993.0	elacridar	0.03uM	HA1E	24h
REP.B004_HA1E_24H_G16_bortezomib_20uM	-0.0675	0.8327	-0.264087	3101.0	bortezomib	20uM	HA1E	24h
LJP006_HME1_3H_O23_QL-XII-47_0.12uM	-0.0754	0.8408	-0.250102	1956.0	QL-XII-47	0.12uM	HME1	3h
REP.B015_HA1E_24H_K03_propentofylline_0.25uM	-0.0693	0.8546	-0.226755	1720.0	propentofylline	0.25uM	HA1E	24h

RNA-seq-like Gene Centric Signature Reverse Search (RGCSRS)¶

CRISPR KO signatures¶

Volcano Plots¶

Tables¶

Chemical perturbation signatures¶

Volcano Plots¶

Tables¶