# imports
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
from IPython.display import HTML, Markdown
import requests
import time

# bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

# display graphics
output_notebook()


gene = 'DKK2'
cell = ''


root_path = 'https://appyters.maayanlab.cloud/storage/L1000_RNAseq_Gene_Search'


# load gene info
gene_info = pd.read_csv(f"{root_path}/L1000_to_RNAseq_gene_list.tsv", sep="\t", index_col=0)
landmark = "not"
inferred_l1000 = "not"
if gene_info.loc[gene, "landmark"] == True:
    landmark = ""
if gene_info.loc[gene, "inferred l1000"] == True:
    inferred_l1000 = ""


display(Markdown(f"**Input gene: {gene}**"))
display(Markdown(f"{gene} is {landmark} a landmark gene."))
display(Markdown(f"{gene} is {inferred_l1000} an originally inferred L1000 gene."))
display(Markdown(f"{gene} is a newly inferred (our model) gene."))
display(HTML(f"""More information about {gene} can be found at the <a href="https://cfde-gene-pages.cloud/gene/{gene}?CF=false&PS=true&Ag=true" target="_blank"> Gene and Drug Landing Page Aggregator</a>"""))


display(Markdown(f"**Input cell line: {'All' if cell == '' else cell}**"))


xpr_gene_data = pd.read_feather(f"{root_path}/gene_files/{gene}.f").set_index('index')
if cell != '': 
    xpr_gene_data = xpr_gene_data[xpr_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]

cp_gene_data = pd.read_feather(f"{root_path}/cp_gene_files/{gene}.f").set_index('index')
if cell != '': 
    cp_gene_data = cp_gene_data[cp_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]


def map_color(cd, logfc, red_norm, blue_norm):
    v = cd*logfc

    if v < 0: 
        return '#D3D3D3'
    elif cd < 0:
        return colors.to_hex(cm.get_cmap('Reds')(red_norm(cd*logfc)))
    else:
        return colors.to_hex(cm.get_cmap('Blues')(blue_norm(cd*logfc)))


def make_plot(comb_df, gene, pert_type):
    # check if there are any results
    if comb_df.shape[0] == 0: 
        display(Markdown("### **There are no signatures in the pre-processed dataset for the chosen gene, cell line, and perturbation type inputs.**"))
        return

    # set color and size for each point on plot
    v = (comb_df['logFC']*comb_df['CD']).to_numpy()
    red_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)
    blue_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)

    plot_colors = [map_color(row.CD, row.logFC, red_norm, blue_norm) for row in comb_df.itertuples()]

    # generate data source
    data_source = ColumnDataSource(
        data=dict(
            x = comb_df['logFC'],
            y = comb_df['CD'].apply(abs),
            cd = comb_df['CD'],
            sig = pd.Series(comb_df.index),
            fc = comb_df['FC'], 
            logfc = comb_df['logFC'],
            colors = plot_colors, 
            sizes = [8] * comb_df.shape[0],
        )
    )

    # create hover tooltip
    tools = [
        ("Signature", "@sig"),
        ("CD Coeff", "@cd"),
        ("Fold Change", "@fc"),
        ("Log2 Fold Change", "@logfc")
    ]
    # generate plot and relevant plot labels
    plot = figure(
        plot_width=700,
        plot_height=500,
        tooltips=tools
    )
    
    plot.circle(
        'x', 'y', 
        size='sizes',
        alpha=0.7, 
        line_alpha=1,
        line_width=1, 
        line_color='colors',
        source=data_source,
        fill_color='colors', 
        name=f"{gene}_expression_in_L1000_to_RNAseq_{pert_type.replace(' ','')}_volcano_plot"
    )

    plot.yaxis.axis_label = 'Abs(CD-Coefficient)'
    plot.xaxis.axis_label = 'log2(Fold Change)'
    plot.title.text = f"Differential Expression of {gene} in RNA-seq-like {pert_type} Signatures"
    plot.title.align = 'center'
    plot.title.text_font_size = '14px'
    show(plot)


def make_tables(comb_df, pert, is_upreg):
    dir_df = comb_df[comb_df['FC'] > 1] if is_upreg else comb_df[comb_df['FC'] < 1]
    if dir_df.shape[0] == 0: 
        display(Markdown(f"### There are no {'up-regulated' if is_upreg else 'down-regulated'} signatures for the chosen gene and cell line inputs."))
        return dir_df
    dir_df = dir_df.sort_values(by='FC', ascending=not is_upreg)
    dir_df['FC'] = dir_df['FC'].apply(lambda x: f'{x:.4f}')
    dir_df['CD'] = dir_df['CD'].apply(lambda x: f'{x:.4f}')
    if pert == 'xpr':
        dir_df['KO Gene'] = dir_df.index.map(lambda x: x.split('_')[4])
    else:
        dir_df['Perturbagen'] = dir_df.index.map(lambda x: x.split('_')[4])
        dir_df['Dose'] = dir_df.index.map(lambda x: x.split('_')[5] if len(x.split('_')) == 6 else '')
    dir_df['Cell Line'] = dir_df.index.map(lambda x: x.split('_')[1])
    dir_df['Timepoint'] = dir_df.index.map(lambda x: x.split('_')[2].lower())
    dir_df = dir_df.rename(columns={
            'FC': 'Fold Change', 
            'logFC': 'Log2(Fold Change)', 
            'CD': 'CD Coefficient',
            'Rank': 'Rank in Signature'})
    dir_df.index.names = ['Signature']
    return dir_df

# create download link for table results
def download_link(df, fname):
    if df.shape[0] == 0: return ''
    csv = df.to_csv(fname, sep='\t', index=True)
    link = f'<div>Download full results: <a href="{fname}" target=_blank>{fname}</a></div>'
    return link


def enrichr(pert, top_perts, direction):
    if pert == 'CRISPR':
        desc = f"Top CRISPR targets from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/Enrichr/addList'
        enrich_url = 'https://maayanlab.cloud/Enrichr/enrich'
    else:
        desc = f"Top compounds from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/DrugEnrichr/addList'
        enrich_url = 'https://maayanlab.cloud/DrugEnrichr/enrich'
    payload = {
        'list': (None, '\n'.join(top_perts)),
        'description': (None, desc)
    }
    response = requests.post(list_url, files=payload)
    if not response.ok:
        raise Exception('Error analyzing gene list')
    time.sleep(0.5)
    return f"{enrich_url}?dataset={response.json()['shortId']}"

def enrichr_link(pert, df, direction, gene): 
    # check if there are any results
    if df.shape[0] < 5: 
        return(f"There are not enough {direction}-regulated signatures to submit to {'Enrichr' if pert == 'CRISPR' else 'DrugEnrichr'}.")
    comb_df = df.copy()
    comb_df['pert'] = comb_df.index.map(lambda x: x.split('_')[4])
    if direction == 'up':
        top_perts = comb_df.sort_values(by='FC', ascending=False) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    else:
        top_perts = comb_df.sort_values(by='FC', ascending=True) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    pert_type = 'CRISPR target genes' if pert == 'CRISPR' else 'chemical compounds'
    results_url = enrichr(pert, top_perts, direction)
    return f'<a href={results_url} target="_blank">Enrichr analysis of top 20 {pert_type} that {direction}-regulate {gene}</a>'


# CRISPR
make_plot(xpr_gene_data, gene, 'CRISPR KO')


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=True)
display(HTML(up_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_xpr[:100], f"{gene}_UpReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'up', gene)))


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=False)
display(HTML(down_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_xpr[:100], f"{gene}_DnReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'down', gene)))

---------------------------------------------------------------------------
Exception                                 Traceback (most recent call last)
Input In [14], in <cell line: 5>()
      3 display(HTML(down_xpr[:10].to_html(escape=False, col_space=70)))
      4 display(HTML(download_link(down_xpr[:100], f"{gene}_DnReg_L1000_CRISPR_signatures.tsv")))
----> 5 display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'down', gene)))

Input In [11], in enrichr_link(pert, df, direction, gene)
     30     top_perts = comb_df.sort_values(by='FC', ascending=True) \
     31         .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
     32 pert_type = 'CRISPR target genes' if pert == 'CRISPR' else 'chemical compounds'
---> 33 results_url = enrichr(pert, top_perts, direction)
     34 return f'<a href={results_url} target="_blank">Enrichr analysis of top 20 {pert_type} that {direction}-regulate {gene}</a>'

Input In [11], in enrichr(pert, top_perts, direction)
     14 response = requests.post(list_url, files=payload)
     15 if not response.ok:
---> 16     raise Exception('Error analyzing gene list')
     17 time.sleep(0.5)
     18 return f"{enrich_url}?dataset={response.json()['shortId']}"

Exception: Error analyzing gene list


# CP
make_plot(cp_gene_data, gene, 'Chemical')


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_cp = make_tables(cp_gene_data, pert='cp', is_upreg=True)
display(HTML(up_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_cp[:100], f"{gene}_UpReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'up', gene)))


display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_cp = make_tables(cp_gene_data, pert='cp', is_upreg=False)
display(HTML(down_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_cp[:100], f"{gene}_DnReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'down', gene)))

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	KO Gene	Cell Line	Timepoint
Signature
XPR027_PC3.311B_96H_H11_ZEB2	0.0275	18.6944	4.224537	225.0	ZEB2	PC3.311B	96h
XPR025_U251MG.311_96H_D23_TACSTD2	0.0272	3.8173	1.932553	156.0	TACSTD2	U251MG.311	96h
XPR041_KELLY.311_96H_E24_AKR1B1	0.0362	2.5970	1.376872	340.0	AKR1B1	KELLY.311	96h
XPR040_KELLY.311_96H_C23_DNMT3A	0.0290	2.1628	1.112877	515.0	DNMT3A	KELLY.311	96h
XPR032_HT29.311_96H_N23_MAP2	0.0327	1.8446	0.883330	110.0	MAP2	HT29.311	96h
XPR041_KELLY.311_96H_J23_HIF1A	0.0346	1.8435	0.882446	293.0	HIF1A	KELLY.311	96h
XPR012_MCF7.311_96H_O06_ATG5	0.0278	1.7853	0.836127	168.0	ATG5	MCF7.311	96h
XPR043_KELLY.311_96H_B03_CCNA2	0.0271	1.7236	0.785387	807.0	CCNA2	KELLY.311	96h
XPR036_PC3.311B_96H_N24_SH3BP4	0.0279	1.6776	0.746417	513.0	SH3BP4	PC3.311B	96h
XPR041_KELLY.311_96H_P19_NDUFS1	0.0320	1.5572	0.638954	734.0	NDUFS1	KELLY.311	96h

	CD Coefficient	Fold Change	Log2(Fold Change)	Rank in Signature	KO Gene	Cell Line	Timepoint
Signature
XPR031_U251MG.311_96H_E03_ORAI3	-0.0337	0.2437	-2.037008	195.0	ORAI3	U251MG.311	96h
XPR041_KELLY.311_96H_I18_FDPS	-0.0360	0.3222	-1.634040	170.0	FDPS	KELLY.311	96h
XPR031_U251MG.311_96H_H02_POU2AF1	-0.0289	0.3375	-1.567209	231.0	POU2AF1	U251MG.311	96h
XPR043_KELLY.311_96H_I06_HMGCS1	-0.0279	0.4272	-1.226987	435.0	HMGCS1	KELLY.311	96h
XPR035_MCF7.311_96H_J04_P4HA2	-0.0356	0.4516	-1.146922	250.0	P4HA2	MCF7.311	96h
XPR033_MCF7.311_96H_F18_PSMD1	-0.0273	0.4878	-1.035751	382.0	PSMD1	MCF7.311	96h
XPR025_U251MG.311_96H_E12_STX11	-0.0273	0.5111	-0.968387	229.0	STX11	U251MG.311	96h
XPR038_KELLY.311_96H_P10_EBNA1BP2	-0.0288	0.5255	-0.928106	401.0	EBNA1BP2	KELLY.311	96h
XPR042_KELLY.311_96H_D02_COPA	-0.0285	0.5758	-0.796269	533.0	COPA	KELLY.311	96h
XPR009_PC3.311B_96H_F20_CDK2	-0.0264	0.5806	-0.784444	331.0	CDK2	PC3.311B	96h

RNA-seq-like Gene Centric Signature Reverse Search (RGCSRS)¶

CRISPR KO signatures¶

Volcano Plots¶

Tables¶

Chemical perturbation signatures¶

Volcano Plots¶

Tables¶