In [1]:
# imports
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
from IPython.display import HTML, Markdown
import requests
import time

# bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

# display graphics
output_notebook()
Loading BokehJS ...
In [2]:
gene = 'BACE2'
cell = ''

RNA-seq-like Gene Centric Signature Reverse Search (RGCSRS)¶

In [3]:
root_path = 'https://appyters.maayanlab.cloud/storage/L1000_RNAseq_Gene_Search'
In [4]:
# load gene info
gene_info = pd.read_csv(f"{root_path}/L1000_to_RNAseq_gene_list.tsv", sep="\t", index_col=0)
landmark = "not"
inferred_l1000 = "not"
if gene_info.loc[gene, "landmark"] == True:
    landmark = ""
if gene_info.loc[gene, "inferred l1000"] == True:
    inferred_l1000 = ""
In [5]:
display(Markdown(f"**Input gene: {gene}**"))
display(Markdown(f"{gene} is {landmark} a landmark gene."))
display(Markdown(f"{gene} is {inferred_l1000} an originally inferred L1000 gene."))
display(Markdown(f"{gene} is a newly inferred (our model) gene."))
display(HTML(f"""More information about {gene} can be found at the <a href="https://cfde-gene-pages.cloud/gene/{gene}?CF=false&PS=true&Ag=true" target="_blank"> Gene and Drug Landing Page Aggregator</a>"""))

Input gene: BACE2

BACE2 is a landmark gene.

BACE2 is not an originally inferred L1000 gene.

BACE2 is a newly inferred (our model) gene.

More information about BACE2 can be found at the Gene and Drug Landing Page Aggregator
In [6]:
display(Markdown(f"**Input cell line: {'All' if cell == '' else cell}**"))

Input cell line: All

This Appyter provides visualizations of the top 5% of RNA-seq-like signatures induced by CRISPR knockouts and chemical perturbagens. Signatures are computed from transformed data profiles from the LINCS L1000 data. The transformation was performed using a two-step model:

  1. A cycleGAN model was used to first predict the RNA-seq expression of the 978 L1000 landmark genes
  2. A fully connected neural network was used to extrapolate the predicted RNA-seq expression of the 978 landmark genes to a full set of 23,164 genes

Signatures were computed using the characteristic direction method (Clark et al., 2014), as implemented here.

Each gene was pre-queried across all available RNA-seq-like signatures, and the top signatures where a gene is up or down-regulated are returned for each gene.

In [7]:
xpr_gene_data = pd.read_feather(f"{root_path}/gene_files/{gene}.f").set_index('index')
if cell != '': 
    xpr_gene_data = xpr_gene_data[xpr_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]

cp_gene_data = pd.read_feather(f"{root_path}/cp_gene_files/{gene}.f").set_index('index')
if cell != '': 
    cp_gene_data = cp_gene_data[cp_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]
In [8]:
def map_color(cd, logfc, red_norm, blue_norm):
    v = cd*logfc

    if v < 0: 
        return '#D3D3D3'
    elif cd < 0:
        return colors.to_hex(cm.get_cmap('Reds')(red_norm(cd*logfc)))
    else:
        return colors.to_hex(cm.get_cmap('Blues')(blue_norm(cd*logfc)))
In [9]:
def make_plot(comb_df, gene, pert_type):
    # check if there are any results
    if comb_df.shape[0] == 0: 
        display(Markdown("### **There are no signatures in the pre-processed dataset for the chosen gene, cell line, and perturbation type inputs.**"))
        return

    # set color and size for each point on plot
    v = (comb_df['logFC']*comb_df['CD']).to_numpy()
    red_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)
    blue_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)

    plot_colors = [map_color(row.CD, row.logFC, red_norm, blue_norm) for row in comb_df.itertuples()]

    # generate data source
    data_source = ColumnDataSource(
        data=dict(
            x = comb_df['logFC'],
            y = comb_df['CD'].apply(abs),
            cd = comb_df['CD'],
            sig = pd.Series(comb_df.index),
            fc = comb_df['FC'], 
            logfc = comb_df['logFC'],
            colors = plot_colors, 
            sizes = [8] * comb_df.shape[0],
        )
    )

    # create hover tooltip
    tools = [
        ("Signature", "@sig"),
        ("CD Coeff", "@cd"),
        ("Fold Change", "@fc"),
        ("Log2 Fold Change", "@logfc")
    ]
    # generate plot and relevant plot labels
    plot = figure(
        plot_width=700,
        plot_height=500,
        tooltips=tools
    )
    
    plot.circle(
        'x', 'y', 
        size='sizes',
        alpha=0.7, 
        line_alpha=1,
        line_width=1, 
        line_color='colors',
        source=data_source,
        fill_color='colors', 
        name=f"{gene}_expression_in_L1000_to_RNAseq_{pert_type.replace(' ','')}_volcano_plot"
    )

    plot.yaxis.axis_label = 'Abs(CD-Coefficient)'
    plot.xaxis.axis_label = 'log2(Fold Change)'
    plot.title.text = f"Differential Expression of {gene} in RNA-seq-like {pert_type} Signatures"
    plot.title.align = 'center'
    plot.title.text_font_size = '14px'
    show(plot)
In [10]:
def make_tables(comb_df, pert, is_upreg):
    dir_df = comb_df[comb_df['FC'] > 1] if is_upreg else comb_df[comb_df['FC'] < 1]
    if dir_df.shape[0] == 0: 
        display(Markdown(f"### There are no {'up-regulated' if is_upreg else 'down-regulated'} signatures for the chosen gene and cell line inputs."))
        return dir_df
    dir_df = dir_df.sort_values(by='FC', ascending=not is_upreg)
    dir_df['FC'] = dir_df['FC'].apply(lambda x: f'{x:.4f}')
    dir_df['CD'] = dir_df['CD'].apply(lambda x: f'{x:.4f}')
    if pert == 'xpr':
        dir_df['KO Gene'] = dir_df.index.map(lambda x: x.split('_')[4])
    else:
        dir_df['Perturbagen'] = dir_df.index.map(lambda x: x.split('_')[4])
        dir_df['Dose'] = dir_df.index.map(lambda x: x.split('_')[5] if len(x.split('_')) == 6 else '')
    dir_df['Cell Line'] = dir_df.index.map(lambda x: x.split('_')[1])
    dir_df['Timepoint'] = dir_df.index.map(lambda x: x.split('_')[2].lower())
    dir_df = dir_df.rename(columns={
            'FC': 'Fold Change', 
            'logFC': 'Log2(Fold Change)', 
            'CD': 'CD Coefficient',
            'Rank': 'Rank in Signature'})
    dir_df.index.names = ['Signature']
    return dir_df

# create download link for table results
def download_link(df, fname):
    if df.shape[0] == 0: return ''
    csv = df.to_csv(fname, sep='\t', index=True)
    link = f'<div>Download full results: <a href="{fname}" target=_blank>{fname}</a></div>'
    return link
In [11]:
def enrichr(pert, top_perts, direction):
    if pert == 'CRISPR':
        desc = f"Top CRISPR targets from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/Enrichr/addList'
        enrich_url = 'https://maayanlab.cloud/Enrichr/enrich'
    else:
        desc = f"Top compounds from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/DrugEnrichr/addList'
        enrich_url = 'https://maayanlab.cloud/DrugEnrichr/enrich'
    payload = {
        'list': (None, '\n'.join(top_perts)),
        'description': (None, desc)
    }
    response = requests.post(list_url, files=payload)
    if not response.ok:
        raise Exception('Error analyzing gene list')
    time.sleep(0.5)
    return f"{enrich_url}?dataset={response.json()['shortId']}"

def enrichr_link(pert, df, direction, gene): 
    # check if there are any results
    if df.shape[0] < 5: 
        return(f"There are not enough {direction}-regulated signatures to submit to {'Enrichr' if pert == 'CRISPR' else 'DrugEnrichr'}.")
    comb_df = df.copy()
    comb_df['pert'] = comb_df.index.map(lambda x: x.split('_')[4])
    if direction == 'up':
        top_perts = comb_df.sort_values(by='FC', ascending=False) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    else:
        top_perts = comb_df.sort_values(by='FC', ascending=True) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    pert_type = 'CRISPR target genes' if pert == 'CRISPR' else 'chemical compounds'
    results_url = enrichr(pert, top_perts, direction)
    return f'<a href={results_url} target="_blank">Enrichr analysis of top 20 {pert_type} that {direction}-regulate {gene}</a>'

CRISPR KO signatures¶

Volcano Plots¶

In the following volcano plot, each point represents a single CRISPR knockout signature. The x-position indicates the log2(fold change) of the expression of the chosen gene in the signature, while the y-position indicates the absolute value of the characteristic direction coefficient of the chosen gene.

Note that the fold change and characteristic direction coefficients of the gene are not necessarily in the same direction for each signature; this is because in cases where a gene is both up- and down-regulated between replicate samples, the characteristic direction method prioritizes the more consistent direction of movement, which may not be consistent with the fold change. To read more about the characteristic direction method, please refer to Clark et al., 2014.

Points with same-direction fold change and CD coefficient values are highlighted by coloring them blue (up-regulated) or red (down-regulated). Darker colored points indicate higher differential expression of the gene in the corresponding signature.

Drag the plot to pan around. Use the toolbar to the right of the plot to zoom, reset the plot view, or download the plot.

In [12]:
# CRISPR
make_plot(xpr_gene_data, gene, 'CRISPR KO')

Tables¶

The tables below display the characteristic direction (CD) coefficients, fold change values, and log2(fold change) values correponding to the expression of the chosen gene in each CRIPSR KO signature.

The rank of the gene in the signature is determined by its fold change relative to the fold change of the other genes that are regulated in the same direction; if a gene is ranked 1 in a signature where the gene is up-regulated, that means that out of all genes up-regulated in the signature, the input gene had the highest fold change and was the most up-regulated.

While only the top 10 signatures for each direction are displayed, below each table is a link to download the top 50 signatures for each direction.

A link to the Enrichr analysis results of the top 20 unique perturbations from the top signatures that up or down-regulate the input gene can be found below each table as well.

In [13]:
display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=True)
display(HTML(up_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_xpr[:100], f"{gene}_UpReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'up', gene)))
Top CRISPR KO signatures where BACE2 is up-regulated (based on fold change)
CD Coefficient Fold Change Log2(Fold Change) Rank in Signature KO Gene Cell Line Timepoint
Signature
XPR016_ES2.311_96H_H02_FAM122A 0.0622 1.1990 0.261824 867.0 FAM122A ES2.311 96h
XPR037_A549.311_96H_D06_HADH 0.0647 1.1872 0.247584 2701.0 HADH A549.311 96h
XPR024_BICR6.311_96H_H15_SMARCD1 0.0632 1.1507 0.202481 1371.0 SMARCD1 BICR6.311 96h
XPR022_BICR6.311_96H_I06_ASNS 0.0614 1.1397 0.188600 1861.0 ASNS BICR6.311 96h
XPR025_BICR6.311_96H_C23_SPRY1 0.0624 1.1374 0.185709 844.0 SPRY1 BICR6.311 96h
XPR026_ES2.311_96H_O17_TLR6 0.0669 1.1334 0.180689 1072.0 TLR6 ES2.311 96h
XPR043_U251MG.311_96H_F22_HADH 0.0643 1.1291 0.175182 725.0 HADH U251MG.311 96h
XPR036_U251MG.311_96H_G06_KALRN 0.0617 1.1231 0.167449 1923.0 KALRN U251MG.311 96h
XPR020_AGS.311_96H_H07_CDK1 0.0617 1.1212 0.165107 1053.0 CDK1 AGS.311 96h
XPR036_U251MG.311_96H_I04_CROT 0.0616 1.1199 0.163424 2517.0 CROT U251MG.311 96h
Download full results: BACE2_UpReg_L1000_CRISPR_signatures.tsv
Enrichr analysis of top 20 CRISPR target genes that up-regulate BACE2
In [14]:
display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=False)
display(HTML(down_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_xpr[:100], f"{gene}_DnReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'down', gene)))
Top CRISPR KO signatures where BACE2 is down-regulated (based on fold change)
CD Coefficient Fold Change Log2(Fold Change) Rank in Signature KO Gene Cell Line Timepoint
Signature
XPR008_ES2.311_96H_O01_DMD -0.0646 0.8245 -0.278357 2260.0 DMD ES2.311 96h
XPR044_ES2.311_96H_K01_PKN1 -0.0633 0.8488 -0.236479 1323.0 PKN1 ES2.311 96h
XPR035_A549.311_96H_D06_TAOK2 -0.0711 0.8539 -0.227911 2613.0 TAOK2 A549.311 96h
XPR029_BICR6.311_96H_O22_NDUFA5 -0.0612 0.8593 -0.218830 1339.0 NDUFA5 BICR6.311 96h
XPR022_U251MG.311_96H_I24_PRSS36 -0.0654 0.8643 -0.210453 1288.0 PRSS36 U251MG.311 96h
XPR029_AGS.311_96H_J14_NR2C2 -0.0631 0.8698 -0.201169 1577.0 NR2C2 AGS.311 96h
XPR018_ES2.311_96H_I24_IL9 -0.0659 0.8722 -0.197240 1215.0 IL9 ES2.311 96h
XPR030_ES2.311_96H_P04_RGS8 -0.0630 0.8738 -0.194560 1963.0 RGS8 ES2.311 96h
XPR033_ES2.311_96H_P01_PDXP -0.0683 0.8773 -0.188887 1817.0 PDXP ES2.311 96h
XPR026_ES2.311_96H_A04_ASAH1 -0.0624 0.8800 -0.184469 1802.0 ASAH1 ES2.311 96h
Download full results: BACE2_DnReg_L1000_CRISPR_signatures.tsv
Enrichr analysis of top 20 CRISPR target genes that down-regulate BACE2

Chemical perturbation signatures¶

Volcano Plots¶

In the following volcano plot, each point represents a single chemical perturbation signature. The x-position indicates the log2(fold change) of the expression of the chosen gene in the signature, while the y-position indicates the absolute value of the characteristic direction coefficient of the chosen gene.

Note that the fold change and characteristic direction coefficients of the gene are not necessarily in the same direction for each signature; this is because in cases where a gene is both up- and down-regulated between replicate samples, the characteristic direction method prioritizes the more consistent direction of movement, which may not be consistent with the fold change. To read more about the characteristic direction method, please refer to Clark et al., 2014.

Points with same-direction fold change and CD coefficient values are highlighted by coloring them blue (up-regulated) or red (down-regulated). Darker colored points indicate higher differential expression of the gene in the corresponding signature.

Drag the plot to pan around. Use the toolbar to the right of the plot to zoom, reset the plot view, or download the plot.

In [15]:
# CP
make_plot(cp_gene_data, gene, 'Chemical')

Tables¶

The tables below display the characteristic direction (CD) coefficients, fold change values, and log2(fold change) values correponding to the expression of the chosen gene in each chemical perturbation signature.

The rank of the gene in the signature is determined by its fold change relative to the fold change of the other genes that are regulated in the same direction; if a gene is ranked 1 in a signature where the gene is up-regulated, that means that out of all genes up-regulated in the signature, the input gene had the highest fold change and was the most up-regulated.

While only the top 10 signatures for each direction are displayed, below each table is a link to download the top 50 signatures for each direction.

A link to the Enrichr analysis results of the top 20 unique perturbations from the top signatures that up or down-regulate the input gene can be found below each table as well.

In [16]:
display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_cp = make_tables(cp_gene_data, pert='cp', is_upreg=True)
display(HTML(up_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_cp[:100], f"{gene}_UpReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'up', gene)))
Top chemical perturbation signatures where BACE2 is up-regulated (based on fold change)
CD Coefficient Fold Change Log2(Fold Change) Rank in Signature Perturbagen Dose Cell Line Timepoint
Signature
ASG002_JHH5_24H_D05_birinapant_1.11uM 0.0726 1.3708 0.455036 708.0 birinapant 1.11uM JHH5 24h
MOAR011_OVTOKO_24H_C15_laropiprant_1.11uM 0.0784 1.2370 0.306837 1325.0 laropiprant 1.11uM OVTOKO 24h
REP.B018_THP1_24H_H14_gefitinib_0.74uM 0.0695 1.2362 0.305876 693.0 gefitinib 0.74uM THP1 24h
REP.B026_MDAMB231_24H_D20_aniracetam_0.74uM 0.0845 1.2317 0.300668 1805.0 aniracetam 0.74uM MDAMB231 24h
LJP008_NPC.CAS9_24H_N09_tranylcypromine_1.11uM 0.0702 1.2281 0.296444 1096.0 tranylcypromine 1.11uM NPC.CAS9 24h
LJP009_NPC_24H_G18_dasatinib_0.04uM 0.0691 1.2225 0.289852 1163.0 dasatinib 0.04uM NPC 24h
CPC011_A375_6H_D22_ornidazole_10uM 0.0708 1.1997 0.262641 2291.0 ornidazole 10uM A375 6h
REP.A002_HA1E_24H_I13_carprofen_10uM 0.0727 1.1935 0.255183 782.0 carprofen 10uM HA1E 24h
MOAR012_HCC95_24H_D04_exisulind_10uM 0.0680 1.1926 0.254151 1339.0 exisulind 10uM HCC95 24h
MOAR012_CJM_24H_G02_donepezil_3.33uM 0.0681 1.1894 0.250194 1022.0 donepezil 3.33uM CJM 24h
Download full results: BACE2_UpReg_L1000_chemical_signatures.tsv
Enrichr analysis of top 20 chemical compounds that up-regulate BACE2
In [17]:
display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_cp = make_tables(cp_gene_data, pert='cp', is_upreg=False)
display(HTML(down_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_cp[:100], f"{gene}_DnReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'down', gene)))
Top chemical perturbation signatures where BACE2 is down-regulated (based on fold change)
CD Coefficient Fold Change Log2(Fold Change) Rank in Signature Perturbagen Dose Cell Line Timepoint
Signature
LJP008_A549_24H_A14_NPK76-II-72-1_3.33uM -0.0677 0.7434 -0.427742 1165.0 NPK76-II-72-1 3.33uM A549 24h
CPC002_HA1E_24H_A21_GP-2a_10uM -0.0787 0.7875 -0.344605 1595.0 GP-2a 10uM HA1E 24h
LTC005_MDAMB231_24H_M02_MK-2206_0.22uM -0.0692 0.7938 -0.333208 1617.0 MK-2206 0.22uM MDAMB231 24h
LJP005_HA1E_24H_B09_HG-5-113-01_1.11uM -0.0677 0.8079 -0.307673 2231.0 HG-5-113-01 1.11uM HA1E 24h
REP.B023_YAPC_24H_F24_diacerein_0.01uM -0.0720 0.8131 -0.298500 1734.0 diacerein 0.01uM YAPC 24h
REP.B009_HA1E_24H_P12_LY-215490_0.01uM -0.0737 0.8134 -0.297979 1530.0 LY-215490 0.01uM HA1E 24h
REP.B008_HA1E_24H_M23_elacridar_0.03uM -0.0733 0.8316 -0.265966 1993.0 elacridar 0.03uM HA1E 24h
REP.B004_HA1E_24H_G16_bortezomib_20uM -0.0675 0.8327 -0.264087 3101.0 bortezomib 20uM HA1E 24h
LJP006_HME1_3H_O23_QL-XII-47_0.12uM -0.0754 0.8408 -0.250102 1956.0 QL-XII-47 0.12uM HME1 3h
REP.B015_HA1E_24H_K03_propentofylline_0.25uM -0.0693 0.8546 -0.226755 1720.0 propentofylline 0.25uM HA1E 24h
Download full results: BACE2_DnReg_L1000_chemical_signatures.tsv
Enrichr analysis of top 20 chemical compounds that down-regulate BACE2
In [ ]: