In [1]:
# imports
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
from IPython.display import HTML, Markdown
import requests
import time

# bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

# display graphics
output_notebook()
Loading BokehJS ...
In [2]:
gene = 'TIMM13'
cell = ''

RNA-seq-like Gene Centric Signature Reverse Search (RGCSRS)¶

In [3]:
root_path = 'https://appyters.maayanlab.cloud/storage/L1000_RNAseq_Gene_Search'
In [4]:
# load gene info
gene_info = pd.read_csv(f"{root_path}/L1000_to_RNAseq_gene_list.tsv", sep="\t", index_col=0)
landmark = "not"
inferred_l1000 = "not"
if gene_info.loc[gene, "landmark"] == True:
    landmark = ""
if gene_info.loc[gene, "inferred l1000"] == True:
    inferred_l1000 = ""
In [5]:
display(Markdown(f"**Input gene: {gene}**"))
display(Markdown(f"{gene} is {landmark} a landmark gene."))
display(Markdown(f"{gene} is {inferred_l1000} an originally inferred L1000 gene."))
display(Markdown(f"{gene} is a newly inferred (our model) gene."))
display(HTML(f"""More information about {gene} can be found at the <a href="https://cfde-gene-pages.cloud/gene/{gene}?CF=false&PS=true&Ag=true" target="_blank"> Gene and Drug Landing Page Aggregator</a>"""))

Input gene: TIMM13

TIMM13 is not a landmark gene.

TIMM13 is an originally inferred L1000 gene.

TIMM13 is a newly inferred (our model) gene.

More information about TIMM13 can be found at the Gene and Drug Landing Page Aggregator
In [6]:
display(Markdown(f"**Input cell line: {'All' if cell == '' else cell}**"))

Input cell line: All

This Appyter provides visualizations of the top 5% of RNA-seq-like signatures induced by CRISPR knockouts and chemical perturbagens. Signatures are computed from transformed data profiles from the LINCS L1000 data. The transformation was performed using a two-step model:

  1. A cycleGAN model was used to first predict the RNA-seq expression of the 978 L1000 landmark genes
  2. A fully connected neural network was used to extrapolate the predicted RNA-seq expression of the 978 landmark genes to a full set of 23,164 genes

Signatures were computed using the characteristic direction method (Clark et al., 2014), as implemented here.

Each gene was pre-queried across all available RNA-seq-like signatures, and the top signatures where a gene is up or down-regulated are returned for each gene.

In [7]:
xpr_gene_data = pd.read_feather(f"{root_path}/gene_files/{gene}.f").set_index('index')
if cell != '': 
    xpr_gene_data = xpr_gene_data[xpr_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]

cp_gene_data = pd.read_feather(f"{root_path}/cp_gene_files/{gene}.f").set_index('index')
if cell != '': 
    cp_gene_data = cp_gene_data[cp_gene_data.index.map(lambda x: x.split('_')[1].split('.')[0] == cell)]
In [8]:
def map_color(cd, logfc, red_norm, blue_norm):
    v = cd*logfc

    if v < 0: 
        return '#D3D3D3'
    elif cd < 0:
        return colors.to_hex(cm.get_cmap('Reds')(red_norm(cd*logfc)))
    else:
        return colors.to_hex(cm.get_cmap('Blues')(blue_norm(cd*logfc)))
In [9]:
def make_plot(comb_df, gene, pert_type):
    # check if there are any results
    if comb_df.shape[0] == 0: 
        display(Markdown("### **There are no signatures in the pre-processed dataset for the chosen gene, cell line, and perturbation type inputs.**"))
        return

    # set color and size for each point on plot
    v = (comb_df['logFC']*comb_df['CD']).to_numpy()
    red_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)
    blue_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)

    plot_colors = [map_color(row.CD, row.logFC, red_norm, blue_norm) for row in comb_df.itertuples()]

    # generate data source
    data_source = ColumnDataSource(
        data=dict(
            x = comb_df['logFC'],
            y = comb_df['CD'].apply(abs),
            cd = comb_df['CD'],
            sig = pd.Series(comb_df.index),
            fc = comb_df['FC'], 
            logfc = comb_df['logFC'],
            colors = plot_colors, 
            sizes = [8] * comb_df.shape[0],
        )
    )

    # create hover tooltip
    tools = [
        ("Signature", "@sig"),
        ("CD Coeff", "@cd"),
        ("Fold Change", "@fc"),
        ("Log2 Fold Change", "@logfc")
    ]
    # generate plot and relevant plot labels
    plot = figure(
        plot_width=700,
        plot_height=500,
        tooltips=tools
    )
    
    plot.circle(
        'x', 'y', 
        size='sizes',
        alpha=0.7, 
        line_alpha=1,
        line_width=1, 
        line_color='colors',
        source=data_source,
        fill_color='colors', 
        name=f"{gene}_expression_in_L1000_to_RNAseq_{pert_type.replace(' ','')}_volcano_plot"
    )

    plot.yaxis.axis_label = 'Abs(CD-Coefficient)'
    plot.xaxis.axis_label = 'log2(Fold Change)'
    plot.title.text = f"Differential Expression of {gene} in RNA-seq-like {pert_type} Signatures"
    plot.title.align = 'center'
    plot.title.text_font_size = '14px'
    show(plot)
In [10]:
def make_tables(comb_df, pert, is_upreg):
    dir_df = comb_df[comb_df['FC'] > 1] if is_upreg else comb_df[comb_df['FC'] < 1]
    if dir_df.shape[0] == 0: 
        display(Markdown(f"### There are no {'up-regulated' if is_upreg else 'down-regulated'} signatures for the chosen gene and cell line inputs."))
        return dir_df
    dir_df = dir_df.sort_values(by='FC', ascending=not is_upreg)
    dir_df['FC'] = dir_df['FC'].apply(lambda x: f'{x:.4f}')
    dir_df['CD'] = dir_df['CD'].apply(lambda x: f'{x:.4f}')
    if pert == 'xpr':
        dir_df['KO Gene'] = dir_df.index.map(lambda x: x.split('_')[4])
    else:
        dir_df['Perturbagen'] = dir_df.index.map(lambda x: x.split('_')[4])
        dir_df['Dose'] = dir_df.index.map(lambda x: x.split('_')[5] if len(x.split('_')) == 6 else '')
    dir_df['Cell Line'] = dir_df.index.map(lambda x: x.split('_')[1])
    dir_df['Timepoint'] = dir_df.index.map(lambda x: x.split('_')[2].lower())
    dir_df = dir_df.rename(columns={
            'FC': 'Fold Change', 
            'logFC': 'Log2(Fold Change)', 
            'CD': 'CD Coefficient',
            'Rank': 'Rank in Signature'})
    dir_df.index.names = ['Signature']
    return dir_df

# create download link for table results
def download_link(df, fname):
    if df.shape[0] == 0: return ''
    csv = df.to_csv(fname, sep='\t', index=True)
    link = f'<div>Download full results: <a href="{fname}" target=_blank>{fname}</a></div>'
    return link
In [11]:
def enrichr(pert, top_perts, direction):
    if pert == 'CRISPR':
        desc = f"Top CRISPR targets from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/Enrichr/addList'
        enrich_url = 'https://maayanlab.cloud/Enrichr/enrich'
    else:
        desc = f"Top compounds from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/DrugEnrichr/addList'
        enrich_url = 'https://maayanlab.cloud/DrugEnrichr/enrich'
    payload = {
        'list': (None, '\n'.join(top_perts)),
        'description': (None, desc)
    }
    response = requests.post(list_url, files=payload)
    if not response.ok:
        raise Exception('Error analyzing gene list')
    time.sleep(0.5)
    return f"{enrich_url}?dataset={response.json()['shortId']}"

def enrichr_link(pert, df, direction, gene): 
    # check if there are any results
    if df.shape[0] < 5: 
        return(f"There are not enough {direction}-regulated signatures to submit to {'Enrichr' if pert == 'CRISPR' else 'DrugEnrichr'}.")
    comb_df = df.copy()
    comb_df['pert'] = comb_df.index.map(lambda x: x.split('_')[4])
    if direction == 'up':
        top_perts = comb_df.sort_values(by='FC', ascending=False) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    else:
        top_perts = comb_df.sort_values(by='FC', ascending=True) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    pert_type = 'CRISPR target genes' if pert == 'CRISPR' else 'chemical compounds'
    results_url = enrichr(pert, top_perts, direction)
    return f'<a href={results_url} target="_blank">Enrichr analysis of top 20 {pert_type} that {direction}-regulate {gene}</a>'

CRISPR KO signatures¶

Volcano Plots¶

In the following volcano plot, each point represents a single CRISPR knockout signature. The x-position indicates the log2(fold change) of the expression of the chosen gene in the signature, while the y-position indicates the absolute value of the characteristic direction coefficient of the chosen gene.

Note that the fold change and characteristic direction coefficients of the gene are not necessarily in the same direction for each signature; this is because in cases where a gene is both up- and down-regulated between replicate samples, the characteristic direction method prioritizes the more consistent direction of movement, which may not be consistent with the fold change. To read more about the characteristic direction method, please refer to Clark et al., 2014.

Points with same-direction fold change and CD coefficient values are highlighted by coloring them blue (up-regulated) or red (down-regulated). Darker colored points indicate higher differential expression of the gene in the corresponding signature.

Drag the plot to pan around. Use the toolbar to the right of the plot to zoom, reset the plot view, or download the plot.

In [12]:
# CRISPR
make_plot(xpr_gene_data, gene, 'CRISPR KO')

Tables¶

The tables below display the characteristic direction (CD) coefficients, fold change values, and log2(fold change) values correponding to the expression of the chosen gene in each CRIPSR KO signature.

The rank of the gene in the signature is determined by its fold change relative to the fold change of the other genes that are regulated in the same direction; if a gene is ranked 1 in a signature where the gene is up-regulated, that means that out of all genes up-regulated in the signature, the input gene had the highest fold change and was the most up-regulated.

While only the top 10 signatures for each direction are displayed, below each table is a link to download the top 50 signatures for each direction.

A link to the Enrichr analysis results of the top 20 unique perturbations from the top signatures that up or down-regulate the input gene can be found below each table as well.

In [13]:
display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=True)
display(HTML(up_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_xpr[:100], f"{gene}_UpReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'up', gene)))
Top CRISPR KO signatures where TIMM13 is up-regulated (based on fold change)
CD Coefficient Fold Change Log2(Fold Change) Rank in Signature KO Gene Cell Line Timepoint
Signature
XPR014_HT29.311_96H_E06_CDKN2C 0.0203 1.2418 0.312414 2356.0 CDKN2C HT29.311 96h
XPR019_A549.311_96H_D24_PMS2 0.0219 1.2349 0.304379 1942.0 PMS2 A549.311 96h
XPR014_HT29.311_96H_L14_CLEC12A 0.0206 1.2347 0.304213 1828.0 CLEC12A HT29.311 96h
XPR010_MCF7.311_96H_P12_PRKAA1 0.0203 1.1567 0.210014 1497.0 PRKAA1 MCF7.311 96h
XPR010_A549.311_96H_O24_BCHE 0.0212 1.1508 0.202612 3514.0 BCHE A549.311 96h
XPR028_HT29.311_96H_P17_GPD1 0.0214 1.1421 0.191706 2482.0 GPD1 HT29.311 96h
XPR010_HT29.311_96H_A24_CACNA1H 0.0207 1.1265 0.171872 2207.0 CACNA1H HT29.311 96h
XPR033_U251MG.311_96H_K02_SEC14L2 0.0204 1.1221 0.166189 1994.0 SEC14L2 U251MG.311 96h
XPR011_HT29.311_96H_I11_FPGS 0.0211 1.1182 0.161203 3328.0 FPGS HT29.311 96h
XPR010_A549.311_96H_I22_APOB 0.0225 1.1174 0.160149 2672.0 APOB A549.311 96h
Download full results: TIMM13_UpReg_L1000_CRISPR_signatures.tsv
Enrichr analysis of top 20 CRISPR target genes that up-regulate TIMM13
In [14]:
display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} CRISPR KO signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_xpr = make_tables(xpr_gene_data, pert='xpr', is_upreg=False)
display(HTML(down_xpr[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_xpr[:100], f"{gene}_DnReg_L1000_CRISPR_signatures.tsv")))
display(HTML(enrichr_link('CRISPR', xpr_gene_data, 'down', gene)))
Top CRISPR KO signatures where TIMM13 is down-regulated (based on fold change)
CD Coefficient Fold Change Log2(Fold Change) Rank in Signature KO Gene Cell Line Timepoint
Signature
XPR031_PC3.311B_96H_B16_PLXNA1 -0.0212 0.8902 -0.167869 1918.0 PLXNA1 PC3.311B 96h
XPR032_A375.311_96H_P04_IL18R1 -0.0208 0.8984 -0.154630 2173.0 IL18R1 A375.311 96h
XPR028_A375.311_96H_G16_PTPRS -0.0198 0.9007 -0.150804 1563.0 PTPRS A375.311 96h
XPR041_ES2.311_96H_K01_AKT1 -0.0200 0.9017 -0.149235 2281.0 AKT1 ES2.311 96h
XPR025_A549.311_96H_B07_TBC1D7 -0.0203 0.9037 -0.146012 2762.0 TBC1D7 A549.311 96h
XPR021_BICR6.311_96H_P08_NUDT19 -0.0206 0.9042 -0.145286 1993.0 NUDT19 BICR6.311 96h
XPR008_HT29.311_96H_M08_PPARG -0.0196 0.9160 -0.126527 2553.0 PPARG HT29.311 96h
XPR012_A549.311_96H_K05_ARL5B -0.0229 0.9218 -0.117409 3354.0 ARL5B A549.311 96h
XPR010_U251MG.311_96H_B03_CCNA2 -0.0215 0.9223 -0.116655 3654.0 CCNA2 U251MG.311 96h
XPRJJ002_A375_96H_G22_DOT1L -0.0214 0.9266 -0.109988 2945.0 DOT1L A375 96h
Download full results: TIMM13_DnReg_L1000_CRISPR_signatures.tsv
Enrichr analysis of top 20 CRISPR target genes that down-regulate TIMM13

Chemical perturbation signatures¶

Volcano Plots¶

In the following volcano plot, each point represents a single chemical perturbation signature. The x-position indicates the log2(fold change) of the expression of the chosen gene in the signature, while the y-position indicates the absolute value of the characteristic direction coefficient of the chosen gene.

Note that the fold change and characteristic direction coefficients of the gene are not necessarily in the same direction for each signature; this is because in cases where a gene is both up- and down-regulated between replicate samples, the characteristic direction method prioritizes the more consistent direction of movement, which may not be consistent with the fold change. To read more about the characteristic direction method, please refer to Clark et al., 2014.

Points with same-direction fold change and CD coefficient values are highlighted by coloring them blue (up-regulated) or red (down-regulated). Darker colored points indicate higher differential expression of the gene in the corresponding signature.

Drag the plot to pan around. Use the toolbar to the right of the plot to zoom, reset the plot view, or download the plot.

In [15]:
# CP
make_plot(cp_gene_data, gene, 'Chemical')

Tables¶

The tables below display the characteristic direction (CD) coefficients, fold change values, and log2(fold change) values correponding to the expression of the chosen gene in each chemical perturbation signature.

The rank of the gene in the signature is determined by its fold change relative to the fold change of the other genes that are regulated in the same direction; if a gene is ranked 1 in a signature where the gene is up-regulated, that means that out of all genes up-regulated in the signature, the input gene had the highest fold change and was the most up-regulated.

While only the top 10 signatures for each direction are displayed, below each table is a link to download the top 50 signatures for each direction.

A link to the Enrichr analysis results of the top 20 unique perturbations from the top signatures that up or down-regulate the input gene can be found below each table as well.

In [16]:
display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is up-regulated (based on fold change)</b></div>'))
up_cp = make_tables(cp_gene_data, pert='cp', is_upreg=True)
display(HTML(up_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(up_cp[:100], f"{gene}_UpReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'up', gene)))
Top chemical perturbation signatures where TIMM13 is up-regulated (based on fold change)
CD Coefficient Fold Change Log2(Fold Change) Rank in Signature Perturbagen Dose Cell Line Timepoint
Signature
PRISM001_PC3_24H_G11_CMAP-AZD-1152HQPA_1.25uM 0.0225 1.3464 0.429118 2853.0 CMAP-AZD-1152HQPA 1.25uM PC3 24h
REP.A014_MCF7_24H_J20_cariprazine_3.33uM 0.0235 1.1718 0.228766 1266.0 cariprazine 3.33uM MCF7 24h
MOAR010_HEC265_24H_B22_drospirenone_10uM 0.0226 1.1581 0.211703 2309.0 drospirenone 10uM HEC265 24h
REP.A008_HT29_24H_K24_benzamil_0.04uM 0.0250 1.1570 0.210360 1394.0 benzamil 0.04uM HT29 24h
REP.A022_MCF10A_24H_K03_urapidil_1.11uM 0.0222 1.1535 0.206074 1819.0 urapidil 1.11uM MCF10A 24h
LJP009_YAPC_24H_K12_GSK-1059615_0.04uM 0.0252 1.1511 0.202966 1464.0 GSK-1059615 0.04uM YAPC 24h
REP.A027_YAPC_24H_P11_EMD-1214063_0.125uM 0.0219 1.1472 0.198094 1989.0 EMD-1214063 0.125uM YAPC 24h
REP.A015_HA1E_24H_I12_dexfenfluramine_0.04uM 0.0243 1.1328 0.179841 1183.0 dexfenfluramine 0.04uM HA1E 24h
LJP008_MCF7_24H_G23_azacitidine_0.12uM 0.0241 1.1299 0.176205 3465.0 azacitidine 0.12uM MCF7 24h
REP.B015_PC3_24H_F05_fomepizole_0.03uM 0.0237 1.1168 0.159384 2219.0 fomepizole 0.03uM PC3 24h
Download full results: TIMM13_UpReg_L1000_chemical_signatures.tsv
Enrichr analysis of top 20 chemical compounds that up-regulate TIMM13
In [17]:
display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {cell} chemical perturbation signatures where {gene} is down-regulated (based on fold change)</b></div>'))
down_cp = make_tables(cp_gene_data, pert='cp', is_upreg=False)
display(HTML(down_cp[:10].to_html(escape=False, col_space=70)))
display(HTML(download_link(down_cp[:100], f"{gene}_DnReg_L1000_chemical_signatures.tsv")))
display(HTML(enrichr_link('chemical', cp_gene_data, 'down', gene)))
Top chemical perturbation signatures where TIMM13 is down-regulated (based on fold change)
CD Coefficient Fold Change Log2(Fold Change) Rank in Signature Perturbagen Dose Cell Line Timepoint
Signature
REP.A011_MCF7_24H_F17_nicotine_0.125uM -0.0230 0.6817 -0.552770 1789.0 nicotine 0.125uM MCF7 24h
MOAR009_SH4_24H_F22_ampalex_10uM -0.0229 0.7429 -0.428745 1526.0 ampalex 10uM SH4 24h
LJP006_BT20_3H_N02_GSK-2126458_3.33uM -0.0260 0.7772 -0.363700 1450.0 GSK-2126458 3.33uM BT20 3h
MOAR012_HEC251_24H_C19_PACOCF3_10uM -0.0240 0.8122 -0.300116 2157.0 PACOCF3 10uM HEC251 24h
REP.A015_HT29_24H_D24_MK-2461_0.04uM -0.0222 0.8170 -0.291558 1980.0 MK-2461 0.04uM HT29 24h
MOAR012_HCC95_24H_P22_ethaverine_10uM -0.0232 0.8510 -0.232821 2230.0 ethaverine 10uM HCC95 24h
LJP005_MCF7_24H_C16_vemurafenib_0.37uM -0.0230 0.8528 -0.229650 2861.0 vemurafenib 0.37uM MCF7 24h
FIBR027_MCLF130CN_6H_G23_RITA_4uM -0.0224 0.8593 -0.218806 2723.0 RITA 4uM MCLF130CN 6h
REP.B023_JURKAT_24H_H07_volasertib_2.22uM -0.0243 0.8602 -0.217338 3070.0 volasertib 2.22uM JURKAT 24h
PBIOA018_HCC515_24H_K15_cimetidine_1.11uM -0.0227 0.8693 -0.202042 2293.0 cimetidine 1.11uM HCC515 24h
Download full results: TIMM13_DnReg_L1000_chemical_signatures.tsv
Enrichr analysis of top 20 chemical compounds that down-regulate TIMM13
In [ ]: