In [1]:
# imports
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
from IPython.display import HTML, Markdown
import requests
import time

# bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

# display graphics
output_notebook()
Loading BokehJS ...
In [2]:
gene = 'TPP1'
cell = ''

RNA-seq-like Gene Centric Signature Reverse Search (RGCSRS)¶

In [3]:
root_path = 'https://appyters.maayanlab.cloud/storage/L1000_RNAseq_Gene_Search'
In [4]:
# load gene info
gene_info = pd.read_csv(f"{root_path}/L1000_to_RNAseq_gene_list.tsv", sep="\t", index_col=0)
landmark = "not"
inferred_l1000 = "not"
if gene_info.loc[gene, "landmark"] == True:
    landmark = ""
if gene_info.loc[gene, "inferred l1000"] == True:
    inferred_l1000 = ""
In [5]:
display(Markdown(f"**Input gene: {gene}**"))
display(Markdown(f"{gene} is {landmark} a landmark gene."))
display(Markdown(f"{gene} is {inferred_l1000} an originally inferred L1000 gene."))
display(Markdown(f"{gene} is a newly inferred (our model) gene."))
display(HTML(f"""More information about {gene} can be found at the <a href="https://cfde-gene-pages.cloud/gene/{gene}?CF=false&PS=true&Ag=true" target="_blank"> Gene and Drug Landing Page Aggregator</a>"""))

Input gene: TPP1

TPP1 is not a landmark gene.

TPP1 is an originally inferred L1000 gene.

TPP1 is a newly inferred (our model) gene.

More information about TPP1 can be found at the Gene and Drug Landing Page Aggregator
In [6]:
display(Markdown(f"**Input cell line: {'All' if cell == '' else cell}**"))

Input cell line: All

This Appyter provides visualizations of the top 5% of RNA-seq-like signatures induced by CRISPR knockouts and chemical perturbagens. Signatures are computed from transformed data profiles from the LINCS L1000 data. The transformation was performed using a two-step model:

  1. A cycleGAN model was used to first predict the RNA-seq expression of the 978 L1000 landmark genes
  2. A fully connected neural network was used to extrapolate the predicted RNA-seq expression of the 978 landmark genes to a full set of 23,164 genes

Signatures were computed using the characteristic direction method (Clark et al., 2014), as implemented here.

Each gene was pre-queried across all available RNA-seq-like signatures, and the top signatures where a gene is up or down-regulated are returned for each gene.

In [7]:
xpr_gene_data = pd.read_feather(f"{root_path}/gene_files/{gene}.f").set_index('index')
if cell != '': 
    xpr_gene_data = xpr_gene_data[xpr_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]

cp_gene_data = pd.read_feather(f"{root_path}/cp_gene_files/{gene}.f").set_index('index')
if cell != '': 
    cp_gene_data = cp_gene_data[cp_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]
In [8]:
def map_color(cd, logfc, red_norm, blue_norm):
    v = cd*logfc

    if v < 0: 
        return '#D3D3D3'
    elif cd < 0:
        return colors.to_hex(cm.get_cmap('Reds')(red_norm(cd*logfc)))
    else:
        return colors.to_hex(cm.get_cmap('Blues')(blue_norm(cd*logfc)))
In [9]:
def make_plot(comb_df, gene, pert_type):
    # check if there are any results
    if comb_df.shape[0] == 0: 
        display(Markdown("### **There are no signatures in the pre-processed dataset for the chosen gene, cell line, and perturbation type inputs.**"))
        return

    # set color and size for each point on plot
    v = (comb_df['logFC']*comb_df['CD']).to_numpy()
    red_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)
    blue_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)

    plot_colors = [map_color(row.CD, row.logFC, red_norm, blue_norm) for row in comb_df.itertuples()]

    # generate data source
    data_source = ColumnDataSource(
        data=dict(
            x = comb_df['logFC'],
            y = comb_df['CD'].apply(abs),
            cd = comb_df['CD'],
            sig = pd.Series(comb_df.index),
            fc = comb_df['FC'], 
            logfc = comb_df['logFC'],
            colors = plot_colors, 
            sizes = [8] * comb_df.shape[0],
        )
    )

    # create hover tooltip
    tools = [
        ("Signature", "@sig"),
        ("CD Coeff", "@cd"),
        ("Fold Change", "@fc"),
        ("Log2 Fold Change", "@logfc")
    ]
    # generate plot and relevant plot labels
    plot = figure(
        plot_width=700,
        plot_height=500,
        tooltips=tools
    )
    
    plot.circle(
        'x', 'y', 
        size='sizes',
        alpha=0.7, 
        line_alpha=1,
        line_width=1, 
        line_color='colors',
        source=data_source,
        fill_color='colors', 
        name=f"{gene}_expression_in_L1000_to_RNAseq_{pert_type.replace(' ','')}_volcano_plot"
    )

    plot.yaxis.axis_label = 'Abs(CD-Coefficient)'
    plot.xaxis.axis_label = 'log2(Fold Change)'
    plot.title.text = f"Differential Expression of {gene} in RNA-seq-like {pert_type} Signatures"
    plot.title.align = 'center'
    plot.title.text_font_size = '14px'
    show(plot)
In [10]:
def make_tables(comb_df, pert, is_upreg):
    dir_df = comb_df[comb_df['FC'] > 1] if is_upreg else comb_df[comb_df['FC'] < 1]
    if dir_df.shape[0] == 0: 
        display(Markdown(f"### There are no {'up-regulated' if is_upreg else 'down-regulated'} signatures for the chosen gene and cell line inputs."))
        return dir_df
    dir_df = dir_df.sort_values(by='FC', ascending=not is_upreg)
    dir_df['FC'] = dir_df['FC'].apply(lambda x: f'{x:.4f}')
    dir_df['CD'] = dir_df['CD'].apply(lambda x: f'{x:.4f}')
    if pert == 'xpr':
        dir_df['KO Gene'] = dir_df.index.map(lambda x: x.split('_')[4])
    else:
        dir_df['Perturbagen'] = dir_df.index.map(lambda x: x.split('_')[4])
        dir_df['Dose'] = dir_df.index.map(lambda x: x.split('_')[5] if len(x.split('_')) == 6 else '')
    dir_df['Cell Line'] = dir_df.index.map(lambda x: x.split('_')[1])
    dir_df['Timepoint'] = dir_df.index.map(lambda x: x.split('_')[2].lower())
    dir_df = dir_df.rename(columns={
            'FC': 'Fold Change', 
            'logFC': 'Log2(Fold Change)', 
            'CD': 'CD Coefficient',
            'Rank': 'Rank in Signature'})
    dir_df.index.names = ['Signature']
    return dir_df

# create download link for table results
def download_link(df, fname):
    if df.shape[0] == 0: return ''
    csv = df.to_csv(fname, sep='\t', index=True)
    link = f'<div>Download full results: <a href="{fname}" target=_blank>{fname}</a></div>'
    return link
In [11]:
def enrichr(pert, top_perts, direction):
    if pert == 'CRISPR':
        desc = f"Top CRISPR targets from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/Enrichr/addList'
        enrich_url = 'https://maayanlab.cloud/Enrichr/enrich'
    else:
        desc = f"Top compounds from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/DrugEnrichr/addList'
        enrich_url = 'https://maayanlab.cloud/DrugEnrichr/enrich'
    payload = {
        'list': (None, '\n'.join(top_perts)),
        'description': (None, desc)
    }
    response = requests.post(list_url, files=payload)
    if not response.ok:
        raise Exception('Error analyzing gene list')
    time.sleep(0.5)
    return f"{enrich_url}?dataset={response.json()['shortId']}"

def enrichr_link(pert, df, direction, gene): 
    # check if there are any results
    if df.shape[0] < 5: 
        return(f"There are not enough {direction}-regulated signatures to submit to {'Enrichr' if pert == 'CRISPR' else 'DrugEnrichr'}.")
    comb_df = df.copy()
    comb_df['pert'] = comb_df.index.map(lambda x: x.split('_')[4])
    if direction == 'up':
        top_perts = comb_df.sort_values(by='FC', ascending=False) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    else:
        top_perts = comb_df.sort_values(by='FC', ascending=True) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    pert_type = 'CRISPR target genes' if pert == 'CRISPR' else 'chemical compounds'
    results_url = enrichr(pert, top_perts, direction)
    return f'<a href={results_url} target="_blank">Enrichr analysis of top 20 {pert_type} that {direction}-regulate {gene}</a>'

CRISPR KO signatures¶

Volcano Plots¶

In the following volcano plot, each point represents a single CRISPR knockout signature. The x-position indicates the log2(fold change) of the expression of the chosen gene in the signature, while the y-position indicates the absolute value of the characteristic direction coefficient of the chosen gene.

Note that the fold change and characteristic direction coefficients of the gene are not necessarily in the same direction for each signature; this is because in cases where a gene is both up- and down-regulated between replicate samples, the characteristic direction method prioritizes the more consistent direction of movement, which may not be consistent with the fold change. To read more about the characteristic direction method, please refer to Clark et al., 2014.

Points with same-direction fold change and CD coefficient values are highlighted by coloring them blue (up-regulated) or red (down-regulated). Darker colored points indicate higher differential expression of the gene in the corresponding signature.

Drag the plot to pan around. Use the toolbar to the right of the plot to zoom, reset the plot view, or download the plot.

In [12]:
# CRISPR
make_plot(xpr_gene_data, gene, 'CRISPR KO')

Tables¶

The tables below display the characteristic direction (CD) coefficients, fold change values, and log2(fold change) values correponding to the expression of the chosen gene in each CRIPSR KO signature.

The rank of the gene in the signature is determined by its fold change relative to the fold change of the other genes that are regulated in the same direction; if a gene is ranked 1 in a signature where the gene is up-regulated, that means that out of all genes up-regulated in the signature, the input gene had the highest fold change and was the most up-regulated.

While only the top 10 signatures for each direction are displayed, below each table is a link to download the top 50 signatures for each direction.

A link to the Enrichr analysis results of the top 20 unique perturbations from the top signatures that up or down-regulate the input gene can be found below each table as well.

In [13]:
display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=True)
display(HTML(up_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_xpr[:100], f"{gene}_UpReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'up', gene)))
Top CRISPR KO signatures where TPP1 is up-regulated (based on fold change)
CD Coefficient Fold Change Log2(Fold Change) Rank in Signature KO Gene Cell Line Timepoint
Signature
XPR031_BICR6.311_96H_C13_OR5B21 0.0335 1.1382 0.186722 3259.0 OR5B21 BICR6.311 96h
XPR030_A549.311_96H_I15_DAO 0.0327 1.1090 0.149221 1912.0 DAO A549.311 96h
XPR027_A549.311_96H_H18_SIGLEC1 0.0364 1.1018 0.139810 1956.0 SIGLEC1 A549.311 96h
XPR037_YAPC.311_96H_K17_FGFBP2 0.0324 1.0986 0.135708 2017.0 FGFBP2 YAPC.311 96h
XPR028_YAPC.311_96H_K19_CELA1 0.0326 1.0887 0.122669 2047.0 CELA1 YAPC.311 96h
XPR024_A375.311_96H_K01_SKP1 0.0381 1.0812 0.112676 2669.0 SKP1 A375.311 96h
XPR034_YAPC.311_96H_M20_MAP3K6 0.0390 1.0792 0.109976 1752.0 MAP3K6 YAPC.311 96h
XPR018_U251MG.311_96H_C06_IL4R 0.0337 1.0722 0.100529 2557.0 IL4R U251MG.311 96h
XPR008_BICR6.311_96H_M08_PPARG 0.0323 1.0697 0.097219 1746.0 PPARG BICR6.311 96h
XPR025_BICR6.311_96H_G04_STAT4 0.0350 1.0696 0.097139 2097.0 STAT4 BICR6.311 96h
Download full results: TPP1_UpReg_L1000_CRISPR_signatures.tsv
Enrichr analysis of top 20 CRISPR target genes that up-regulate TPP1
In [14]:
display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=False)
display(HTML(down_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_xpr[:100], f"{gene}_DnReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'down', gene)))
Top CRISPR KO signatures where TPP1 is down-regulated (based on fold change)
CD Coefficient Fold Change Log2(Fold Change) Rank in Signature KO Gene Cell Line Timepoint
Signature
XPR044_KELLY.311_96H_G24_PRKAG1 -0.0358 0.8370 -0.256732 1689.0 PRKAG1 KELLY.311 96h
XPR008_A549.311_96H_F01_GABRB2 -0.0361 0.8673 -0.205357 2193.0 GABRB2 A549.311 96h
XPR024_A549.311_96H_A09_SHTN1 -0.0349 0.8774 -0.188637 2495.0 SHTN1 A549.311 96h
XPR042_KELLY.311_96H_E09_HMOX1 -0.0361 0.8795 -0.185317 1624.0 HMOX1 KELLY.311 96h
XPR009_YAPC.311_96H_L24_P2RX1 -0.0347 0.8814 -0.182163 2265.0 P2RX1 YAPC.311 96h
XPR025_BICR6.311_96H_L10_TIRAP -0.0392 0.8814 -0.182149 1948.0 TIRAP BICR6.311 96h
XPR015_MCF7.311_96H_E08_CSF3R -0.0354 0.9082 -0.138925 2147.0 CSF3R MCF7.311 96h
XPR044_KELLY.311_96H_H20_YME1L1 -0.0386 0.9084 -0.138668 2657.0 YME1L1 KELLY.311 96h
XPR024_A549.311_96H_H16_SOCS2 -0.0370 0.9095 -0.136796 2738.0 SOCS2 A549.311 96h
XPR041_ES2.311_96H_D20_RAB27A -0.0402 0.9121 -0.132747 2731.0 RAB27A ES2.311 96h
Download full results: TPP1_DnReg_L1000_CRISPR_signatures.tsv
Enrichr analysis of top 20 CRISPR target genes that down-regulate TPP1

Chemical perturbation signatures¶

Volcano Plots¶

In the following volcano plot, each point represents a single chemical perturbation signature. The x-position indicates the log2(fold change) of the expression of the chosen gene in the signature, while the y-position indicates the absolute value of the characteristic direction coefficient of the chosen gene.

Note that the fold change and characteristic direction coefficients of the gene are not necessarily in the same direction for each signature; this is because in cases where a gene is both up- and down-regulated between replicate samples, the characteristic direction method prioritizes the more consistent direction of movement, which may not be consistent with the fold change. To read more about the characteristic direction method, please refer to Clark et al., 2014.

Points with same-direction fold change and CD coefficient values are highlighted by coloring them blue (up-regulated) or red (down-regulated). Darker colored points indicate higher differential expression of the gene in the corresponding signature.

Drag the plot to pan around. Use the toolbar to the right of the plot to zoom, reset the plot view, or download the plot.

In [15]:
# CP
make_plot(cp_gene_data, gene, 'Chemical')

Tables¶

The tables below display the characteristic direction (CD) coefficients, fold change values, and log2(fold change) values correponding to the expression of the chosen gene in each chemical perturbation signature.

The rank of the gene in the signature is determined by its fold change relative to the fold change of the other genes that are regulated in the same direction; if a gene is ranked 1 in a signature where the gene is up-regulated, that means that out of all genes up-regulated in the signature, the input gene had the highest fold change and was the most up-regulated.

While only the top 10 signatures for each direction are displayed, below each table is a link to download the top 50 signatures for each direction.

A link to the Enrichr analysis results of the top 20 unique perturbations from the top signatures that up or down-regulate the input gene can be found below each table as well.

In [16]:
display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_cp = make_tables(cp_gene_data, pert='cp', is_upreg=True)
display(HTML(up_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_cp[:100], f"{gene}_UpReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'up', gene)))
Top chemical perturbation signatures where TPP1 is up-regulated (based on fold change)
CD Coefficient Fold Change Log2(Fold Change) Rank in Signature Perturbagen Dose Cell Line Timepoint
Signature
ASG003_XC.P936_24H_G04_amlodipine_10uM 0.0417 1.1567 0.209993 1558.0 amlodipine 10uM XC.P936 24h
LJP006_MDAMB231_24H_D13_BI-2536_10uM 0.0363 1.1461 0.196711 4759.0 BI-2536 10uM MDAMB231 24h
ERG021_PC3_24H_M18_I-BET-151_0.04uM 0.0368 1.1433 0.193228 2534.0 I-BET-151 0.04uM PC3 24h
ASG003_XC.P905_24H_L12_AZD-8055_0.12uM 0.0412 1.1423 0.191964 1823.0 AZD-8055 0.12uM XC.P905 24h
PAC041_U2OS_6H_P12_sirolimus_20uM 0.0383 1.1336 0.180880 1467.0 sirolimus 20uM U2OS 6h
MOAR011_BEN_24H_G12_fdcyd_1.11uM 0.0370 1.1276 0.173276 1636.0 fdcyd 1.11uM BEN 24h
REP.A025_MCF10A_24H_H03_AZD-2014_1.11uM 0.0370 1.1271 0.172637 2268.0 AZD-2014 1.11uM MCF10A 24h
REP.A016_A375_24H_A19_palbociclib_10uM 0.0362 1.1231 0.167546 3418.0 palbociclib 10uM A375 24h
ASG002_NCIH2172_24H_N14_JNJ-7706621_1.11uM 0.0361 1.1208 0.164499 2061.0 JNJ-7706621 1.11uM NCIH2172 24h
REP.B002_A375_24H_O14_NVP-BEZ235_0.74uM 0.0386 1.1030 0.141400 3149.5 NVP-BEZ235 0.74uM A375 24h
Download full results: TPP1_UpReg_L1000_chemical_signatures.tsv
Enrichr analysis of top 20 chemical compounds that up-regulate TPP1
In [17]:
display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_cp = make_tables(cp_gene_data, pert='cp', is_upreg=False)
display(HTML(down_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_cp[:100], f"{gene}_DnReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'down', gene)))
Top chemical perturbation signatures where TPP1 is down-regulated (based on fold change)
CD Coefficient Fold Change Log2(Fold Change) Rank in Signature Perturbagen Dose Cell Line Timepoint
Signature
LJP006_HA1E_24H_O21_QL-XII-47_1.11uM -0.0376 0.7393 -0.435725 1585.0 QL-XII-47 1.11uM HA1E 24h
POL001_HT29_24H_C03_narciclasine_1.11uM -0.0367 0.7473 -0.420222 2746.0 narciclasine 1.11uM HT29 24h
REP.B001_PC3_24H_K09_MG-132_20uM -0.0370 0.7925 -0.335500 2667.0 MG-132 20uM PC3 24h
LJP006_HA1E_24H_B24_JW-7-24-1_0.04uM -0.0379 0.7976 -0.326239 2089.0 JW-7-24-1 0.04uM HA1E 24h
MOAR012_NCIH838_24H_N07_azacitidine_10uM -0.0375 0.7986 -0.324460 2207.0 azacitidine 10uM NCIH838 24h
LJP005_MCF10A_3H_A23_crizotinib_0.12uM -0.0424 0.8250 -0.277450 1855.0 crizotinib 0.12uM MCF10A 3h
REP.A024_HT29_24H_M16_dolastatin-10_0.37uM -0.0386 0.8377 -0.255412 2885.0 dolastatin-10 0.37uM HT29 24h
REP.B017_MDAMB231_24H_P01_molsidomine_2.22uM -0.0377 0.8626 -0.213259 2101.0 molsidomine 2.22uM MDAMB231 24h
ASG002_LNCAP.FGC_24H_P24_digitoxin_0.12uM -0.0369 0.8723 -0.197112 2893.0 digitoxin 0.12uM LNCAP.FGC 24h
REP.A001_A375_24H_P24_penitrem-a_0.01uM -0.0373 0.8739 -0.194453 2645.0 penitrem-a 0.01uM A375 24h
Download full results: TPP1_DnReg_L1000_chemical_signatures.tsv
Enrichr analysis of top 20 chemical compounds that down-regulate TPP1
In [ ]: