This appyter creates a variety of visualizations for enrichment analysis results for one selected Enrichr library, and may be run as either a standalone appyter from the Appyter Catalog or programmatically from the Enrichr results page.
For simplicity, the only inputs for this appyter are a gene list and one library. Other parameters are set to default values in the cell below. You can download the notebook, change these parameters, and rerun it if you wish.
The pre-processed libraries used to create the scatter plot and hexagonal canvas visualizations can be found here.
A link to the full analysis results on the Enrichr website can be found at the bottom of this page.
# Scatter Plot Imports
from maayanlab_bioinformatics.enrichment import enrich_crisp
import matplotlib as mpl
import matplotlib.colors as colors
import base64
# Bar Chart Imports
import pandas as pd
import numpy as np
import json
import requests
import matplotlib.pyplot as plt
import seaborn as sns
import time
from matplotlib.ticker import MaxNLocator
from IPython.display import display, FileLink, Markdown, HTML
# Hexagonal Canvas Imports
import json
import math
import uuid
import urllib
from textwrap import dedent
from string import Template
from operator import itemgetter
# Manhattan Plot Imports
import matplotlib.patches as mpatches
import matplotlib.cm as cm
# Bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, CustomJS, ColumnDataSource, Span
from bokeh.layouts import layout, row, column, gridplot
from bokeh.palettes import all_palettes
output_notebook()
gene_list_input = '''IAH1
2510006D16RIK
WDR42A
ENTPD5
FBXO3
TRAP1
4732466D17RIK
OXSM
CRADD
ADHFE1
2410018G20RIK
PLEKHA7
1810044D09RIK
ZFYVE20
TMEM80
LASS2
GSTZ1
CDAN1
PSMC6
TMEM77
ASB9
YME1L1
ASF1A
TFAM
4921530L18RIK
TLN1
4933407N01RIK
ALDH8A1
39509
2310026E23RIK
NLRX1
2700046G09RIK
EI24
D4BWG0951E
STXBP2
IPP
CISD1
NFS1
GPR155
RAB11FIP2
LOC100044139
KDR
SEPHS2
EXOSC4
RILP
HOXA7
B3BP
AFAP1L1
TMEM86A
CNTD1
KLF12
1700123L14RIK
APOOL
FZD5
TGDS
AI316807
VPS13B
MCAT
OVOL1
D130020L05RIK
PARP16
ELL3
COQ10A
WBSCR18
PLSCR2
VAMP8
TLCD1
TCN2
GORASP1
A930041I02RIK
SFXN5
FBXL3
SF1
FBXL6
PKIG
BC038156
5730414N17RIK
CEP68
ATP6V1B2
UNC119B
HPN
OSGEPL1
RFESD
ZFP748
3110048L19RIK
PTTG1IP
FARS2
TMEM186
ADH5
4932432K03RIK
MRPL9
1700023B02RIK
ZC3H12C
ARSK
SCP2
BC016495
CDK5RAP1
ARSG
LIPT1
PRKACA
C330002I19RIK
ZRSR1
LYPLA1
SLC33A1
NSUN3
2410012H22RIK
4632404H12RIK
IFT122
LOC100048387
LOC100047292
2610036D13RIK
1110032A03RIK
WDR34
ITFG1
LYRM2
CLDN10
PRPF18
ALDH6A1
NME7
LYRM5
UFC1
RAB1
ATPAF1
9030420J04RIK
ABHD3
INTU
ADK
TFB1M
PCSK7
VLDLR
MRPL35
LRRC1
WDR24
KALRN
MYNN
UBIE
TMEM166
THTPA
1700034H14RIK
PSMC3IP
9230114K14RIK
MED14
LOC100047604
ATXN2
LOC623451
MYO6
5430437P03RIK
TMED4
TASP1
AP4S1
SMYD4
4921508D12RIK
PCMTD2
WDR20A
2410166I05RIK
DHRS1
UBE2E1
GNMT
2210016F16RIK
MPP7
MIPOL1
LOC674449
AGBL3
AI931714
COL4A4
TSR2
NOTUM
ANKRD42
NUPL2
RP23-195K8.6
ZFP148
ENSMUSG00000074286
LRRC19
MTMR14
GBE1
NUDT6
PHF7
ZDHHC5
5730403B10RIK
SIP1
NUDT12
GYS2
ESM1
SIPA1L1
SPTLC1
ZFP11
PSMB1
NPY
METTL7A
RIOK2
METTL8
MDH1
AW209491
2700038C09RIK
MGAT1
PAIP1
CD55
KLHDC4
2610019F03RIK
TOR1A
FKBPL
A530050D06RIK
HSD3B2
TNFSF5IP1
DOLPP1
ATAD3A
LIFR
LRRC61
FAH
SLC7A6OS
SLC25A39
SLC9A6
UBOX5
LOC100046168
7530414M10RIK
9630013D21RIK
D630023B12RIK
0610013E23RIK
4732435N03RIK
SLC25A40
ZFAND1
ACAA1A
CREBL2
VWCE
YARS2
4932438A13RIK
NOL7
CLCC1
2810432D09RIK
6530415H11RIK
PMPCB
NDUFV1
SIAE
PMS1
ARHGEF12
SLC30A6
TRPC2
CCDC16
LRRC40
POLRMT
TIMM44
LRRC44
KMO
DNAJC18
DNAJC19
DALRD3
DMXL1
CACNB4
NAT9
SMO
4833426J09RIK
4933403G14RIK
LOC100044400
2310068J16RIK
ACBD4
FGFR4
RNF167
ZCCHC3
1810049H13RIK
ZFP106
CNO
2610528K11RIK
TPMT
1200014M14RIK
BRI3
A930005H10RIK
ARHGAP18
C330018D20RIK
LOC100046254
SAT2
LRRC56
TM7SF3
AKR7A5
3110057O12RIK
ABHD11
HYI
TXNDC4
COX15
ZKSCAN1
SAC3D1
FN3K
PAICS
RHBDD3
PEX1
ABHD14A
NAP1L1
TMLHE
RBKS
NSMCE4A
TMBIM4
CHPT1
LOC100047214
4930432O21RIK
MTFR1
SYBL1
ACO1
ENY2
FECH
MOBKL2B
GAL3ST2
NDUFB6
USP34
GLO1
PROZ
2010309E21RIK
3110001I20RIK
PITPNC1
H2AFJ
LOC100041586
AFMID
GYK
RDH14
HIBCH
RBM39
FAHD1
RWDD3
ZFP655
TMEM30A
CPT1A
ZFP775
GADD45GIP1
ASCC1
LRRC8A
ANXA13
CABLES1
KLF1
MUT
1810019D21RIK
D730039F16RIK
2610528J11RIK
SLC25A16
2310042D19RIK
SBK1
C1D
CAT
DEFB29
RPS6KB1
ALDH1A3
TOMM70A
CLEC2H
2310009A05RIK
RQCD1
TRIM37
A230062G08RIK
ZFP650
1700001L05RIK
DHTKD1
ZFP787
NXT2
1110003E01RIK
TM2D2
AQP11
ZBTB44
ORC5L
LOC100047782
RABEPK
NR3C1
WDR89
GPHN
RPS6KA5
GK5
DDT
SRR
MAT2B
NAGLU
SCRN3
BPNT1
FBXO9
NEO1
SCYL1
FBXO8
2210011C24RIK
TRIM23
POLI
PGM2'''
enrichr_library = 'ChEA_2016'
genes = gene_list_input.split('\n')
genes = [x.strip() for x in genes]
# Enrichr API Function for Manhattan Plot and Bar Chart
# Takes a gene list and Enrichr libraries as input
def Enrichr_API(enrichr_gene_list, all_libraries):
all_terms = []
all_pvalues =[]
all_adjusted_pvalues = []
for library_name in all_libraries :
ENRICHR_URL = 'http://maayanlab.cloud/Enrichr/addList'
genes_str = '\n'.join(enrichr_gene_list)
description = ''
payload = {
'list': (None, genes_str),
'description': (None, description)
}
response = requests.post(ENRICHR_URL, files=payload)
if not response.ok:
raise Exception('Error analyzing gene list')
data = json.loads(response.text)
time.sleep(0.5)
ENRICHR_URL = 'http://maayanlab.cloud/Enrichr/enrich'
query_string = '?userListId=%s&backgroundType=%s'
user_list_id = data['userListId']
short_id = data["shortId"]
gene_set_library = library_name
response = requests.get(
ENRICHR_URL + query_string % (user_list_id, gene_set_library)
)
if not response.ok:
raise Exception('Error fetching enrichment results')
data = json.loads(response.text)
short_results_df = pd.DataFrame(data[library_name][0:10])
all_terms.append(list(short_results_df[1]))
all_pvalues.append(list(short_results_df[2]))
all_adjusted_pvalues.append(list(short_results_df[6]))
results_df = pd.DataFrame(data[library_name])
# adds library name to the data frame so the libraries can be distinguished
results_df['library'] = library_name.replace('_', '')
return [results_df, short_results_df, all_terms, all_pvalues, all_adjusted_pvalues, str(short_id)]
The scatterplot is organized so that simliar gene sets are clustered together. The larger blue points represent significantly enriched terms - the darker the blue, the more significant the term and the smaller the p-value. The gray points are not significant.
Hovering over points will display the associated gene set name and the p-value. You may have to zoom in using the toolbar next to the plot in order to see details in densely-populated portions. Plots can also be downloaded as an svg using the save function on the toolbar.
For creating and comparing up to 9 scatter plots at once, use the standalone Scatter Plot Visualization Appyter.
# Scatter Plot Parameters
significance_value = 0.05
# Bar Chart Parameters
figure_file_format = ['png', 'svg']
output_file_name = 'Enrichr_results_bar'
color = 'lightskyblue'
final_output_file_names = ['{0}.{1}'.format(output_file_name, file_type) for file_type in figure_file_format]
# Hexagonal Canvas Parameters
canvas_color = 'Blue'
num_hex_colored = 10
# Manhattan Plot Parameters
manhattan_colors = ['#003f5c', '#7a5195', '#ef5675', '#ffa600']
# Scatter Plot Functions
def download_library(library_name):
# Download pre-processed library data
try:
df = pd.read_csv('https://raw.githubusercontent.com/MaayanLab/Enrichr-Viz-Appyter/master/Enrichr-Processed-Library-Storage/Scatterplot/Libraries/' + library_name + '.csv')
except:
display(Markdown("Failed to retrieve the selected pre-processed library."))
return -1, -1, -1
name = df['Name'].tolist()
gene_list = df['Genes'].tolist()
library_data = [list(a) for a in zip(name, gene_list)]
return genes, library_data, df
# Enrichment analysis
def get_library_iter(library_data):
for member in library_data:
term = member[0]
gene_set = member[1].split(' ')
yield term, gene_set
def get_enrichment_results(genes, library_data):
return sorted(enrich_crisp(genes, get_library_iter(library_data), 20000, True), key=lambda r: r[1].pvalue)
def get_pvalue(row, unzipped_results, all_results):
if row['Name'] in list(unzipped_results[0]):
index = list(unzipped_results[0]).index(row['Name'])
return all_results[index][1].pvalue
else:
return 1
# Call enrichment results and return a plot and dataframe for Scatter Plot
def get_plot(library_name):
genes, library_data, df = download_library(library_name)
# library not supported
if genes == -1:
return -1 ,-1
all_results = get_enrichment_results(genes, library_data)
unzipped_results = list(zip(*all_results))
if len(all_results) == 0:
print("There are no enriched terms for your input gene set in the ", library_name, " library.")
my_colors = ['#808080'] * len(df.index)
source = ColumnDataSource(
data=dict(
x = df['x'],
y = df['y'],
gene_set = df['Name'],
colors = my_colors,
sizes = [6] * len(df.index)
)
)
hover_emb = HoverTool(names=["df"], tooltips="""
<div style="margin: 10">
<div style="margin: 0 auto; width:200px;">
<span style="font-size: 12px; font-weight: bold;">Gene Set:</span>
<span style="font-size: 12px">@gene_set</span>
</div>
</div>
""")
else:
# add p value to the dataframe
df['p value'] = df.apply (lambda row: get_pvalue(row, unzipped_results, all_results), axis=1)
# normalize p values for color scaling
cmap = mpl.cm.get_cmap('Blues_r')
norm = colors.Normalize(vmin = df['p value'].min(), vmax=significance_value*2)
my_colors = []
my_sizes = []
for index, row in df.iterrows():
if row['p value'] < significance_value:
my_colors += [mpl.colors.to_hex(cmap(norm(row['p value'])))]
my_sizes += [12]
else:
my_colors += ['#808080']
my_sizes += [6]
source = ColumnDataSource(
data=dict(
x = df['x'],
y = df['y'],
gene_set = df['Name'],
p_value = df['p value'],
colors = my_colors,
sizes = my_sizes
)
)
hover_emb = HoverTool(names=["df"], tooltips="""
<div style="margin: 10">
<div style="margin: 0 auto; width:200px;">
<span style="font-size: 12px; font-weight: bold;">Gene Set:</span>
<span style="font-size: 12px">@gene_set</span>
<span style="font-size: 12px; font-weight: bold;">p-value:</span>
<span style="font-size: 12px">@p_value</span>
</div>
</div>
""")
tools_emb = [hover_emb, 'pan', 'wheel_zoom', 'reset', 'save']
plot_emb = figure(plot_width=700, plot_height=700, tools=tools_emb)
# hide axis labels and grid lines
plot_emb.xaxis.major_tick_line_color = None
plot_emb.xaxis.minor_tick_line_color = None
plot_emb.yaxis.major_tick_line_color = None
plot_emb.yaxis.minor_tick_line_color = None
plot_emb.xaxis.major_label_text_font_size = '0pt'
plot_emb.yaxis.major_label_text_font_size = '0pt'
plot_emb.circle('x', 'y', size = 'sizes', alpha = 0.7, line_alpha = 0,
line_width = 0.01, source = source, fill_color = 'colors', name = "df")
plot_emb.output_backend = "svg"
return plot_emb, df
# Display Scatter Plot
plot, df = get_plot(enrichr_library)
if plot == -1:
display(Markdown("Unable to create scatter plot visualization."))
else:
show(plot)
The bar chart shows the top 10 enriched terms in the chosen library, along with their corresponding p-values. Colored bars correspond to terms with significant p-values (<0.05). An asterisk (*) next to a p-value indicates the term also has a significant adjusted p-value (<0.05).
The bar chart can be downloaded as an image using the links below the figure.
For creating customized bar charts for multiple libraries at once, use the standalone Bar Chart Appyter.
# Bar Chart Functions
# Takes all terms, all p-values, all adjusted p-values, plot title, Enrichr libraries, and specified figure format
def enrichr_figure(all_terms, all_pvalues, all_adjusted_pvalues, plot_names, all_libraries, bar_color):
# Bar colors
if bar_color != 'lightgrey':
bar_color_not_sig = 'lightgrey'
edgecolor=None
linewidth=0
else:
bar_color_not_sig = 'white'
edgecolor='black'
linewidth=1
plt.figure(figsize=(24, 12))
i = 0
bar_colors = [bar_color if (x < 0.05) else bar_color_not_sig for x in all_pvalues[i]]
fig = sns.barplot(x=np.log10(all_pvalues[i])*-1, y=all_terms[i], palette=bar_colors, edgecolor=edgecolor, linewidth=linewidth)
fig.axes.get_yaxis().set_visible(False)
fig.set_title(all_libraries[i].replace('_', ' '), fontsize=26)
fig.set_xlabel('−log₁₀(p‐value)', fontsize=25)
fig.xaxis.set_major_locator(MaxNLocator(integer=True))
fig.tick_params(axis='x', which='major', labelsize=20)
if max(np.log10(all_pvalues[i])*-1)<1:
fig.xaxis.set_ticks(np.arange(0, max(np.log10(all_pvalues[i])*-1), 0.1))
for ii,annot in enumerate(all_terms[i]):
if all_adjusted_pvalues[i][ii] < 0.05:
annot = ' *'.join([annot, str(str(np.format_float_scientific(all_pvalues[i][ii], precision=2)))])
else:
annot = ' '.join([annot, str(str(np.format_float_scientific(all_pvalues[i][ii], precision=2)))])
title_start= max(fig.axes.get_xlim())/200
fig.text(title_start, ii, annot, ha='left', wrap = True, fontsize = 26)
fig.spines['right'].set_visible(False)
fig.spines['top'].set_visible(False)
# Show plot
plt.show()
# Display Bar Chart
results = Enrichr_API(genes, [enrichr_library])
enrichr_figure(results[2], results[3], results[4], final_output_file_names, [enrichr_library], color)
# Download Bar Chart
for i, file in enumerate(final_output_file_names):
display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))
Each hexagon in the hexagonal canvas plot represents one gene set from the selected library. The hexagons are colored based on the Jaccard similarity index between the input gene list and the gene set represented by the hexagon, with brighter color indicating higher similarity. Hexagons that are grouped together represent similar gene sets.
Hovering over a hexagon will display the name of the gene set and the associated similarity index.
For creating customized hexagonal canvas plots for up to two libraries at once, use the standalone Hexagonal Canvas Appyter.
# Hexagonal Canvas Functions
def library_processing():
# Downloads library data for the hexagonal canvas
# Library data is pre-annealed so the canvas will have the most similar gene sets closest together
raw_library_data = []
try:
library_name = enrichr_library
with urllib.request.urlopen('https://raw.githubusercontent.com/MaayanLab/Enrichr-Viz-Appyter/master/Enrichr-Processed-Library-Storage/Annealing/Annealed-Libraries/' + library_name + '.txt') as f:
for line in f.readlines():
raw_library_data.append(line.decode('utf-8').split("\t\t"))
name = []
gene_list = []
except:
display(Markdown("Failed to retrieve the selected annealed library."))
return [], -1, -1
for i in range(len(raw_library_data)):
name += [raw_library_data[i][0]]
raw_genes = raw_library_data[i][1].split('\t')
gene_list += [raw_genes[:-1]]
library_data = [list(a) for a in zip(name, gene_list)]
# raw_library_data: a 2D list where the first element is the name and the second element is a list of genes associated with that name
jaccard_indices = []
indices = []
for gene_set in library_data:
intersection = [value for value in gene_set[1] if value in genes]
index = len(intersection)/(len(gene_set[1]) + len(genes))
jaccard_indices += [[gene_set[0], index]]
indices += [round(index, 5)]
# determine the dimensions of the canvas
x_dimension = math.ceil(math.sqrt(len(indices)))
y_dimension = math.ceil(math.sqrt(len(indices)))
# zip name, gene_list, indices, and blank list for neighbor score then add dummy entries to the zipped list
anneal_list = list(zip(name, gene_list, indices))
return anneal_list, x_dimension, y_dimension
def unzip_list(anneal_list):
unzipped_list = zip(*anneal_list)
return list(unzipped_list)
# define a list of colors for the hexagonal canvas
def get_color(anneal_list, cut_off_value, x_dimension, y_dimension):
# Deal with cut_off_value (only color the most significant 10/20 hexagons)
if cut_off_value == 2.0:
sort_list = sorted(anneal_list, key=itemgetter(2), reverse=True)
cut_off_value = sort_list[int(num_hex_colored)-1][2]
r_value = 0
g_value = 0
b_value = 0
if canvas_color == 'Red':
r_value = 0.0
g_value = 0.8
b_value = 0.8
if canvas_color == 'Yellow':
r_value = 0.0
g_value = 0.3
b_value = 1.0
if canvas_color == 'Purple':
r_value = 0.5
g_value = 1.0
b_value = 0.0
if canvas_color == 'Pink':
r_value = 0.0
g_value = 1.0
b_value = 0.2
if canvas_color == 'Orange':
r_value = 0.0
g_value = 0.45
b_value = 1.0
if canvas_color == 'Green':
r_value = 1.0
g_value = 0.0
b_value = 1.0
if canvas_color == 'Blue':
r_value = 1.0
g_value = 0.9
b_value = 0.0
color_list = []
unzipped_anneal_list = unzip_list(anneal_list)
max_index = max(unzipped_anneal_list[2])
if max_index != 0:
scaled_list = [i/max_index for i in unzipped_anneal_list[2]]
else:
scaled_list = unzipped_anneal_list[2]
for i in range(x_dimension*y_dimension):
if i < len(unzipped_anneal_list[2]) and float(unzipped_anneal_list[2][i]) >= cut_off_value:
color_list += [mpl.colors.to_hex((1-scaled_list[i]*r_value,
1-scaled_list[i]*g_value, 1-scaled_list[i]*b_value))]
elif i < len(unzipped_anneal_list[2]):
color_list += [mpl.colors.to_hex((1-scaled_list[i],
1-scaled_list[i], 1-scaled_list[i]))]
else:
color_list += ["#FFFFFF"]
return color_list, max_index, cut_off_value
def init_chart():
chart_id = 'mychart-' + str(uuid.uuid4())
display(HTML('<script src="/static/components/requirejs/require.js"></script>'))
display(HTML(Template(dedent('''
<script>
require.config({
paths: {
'd3': 'https://cdnjs.cloudflare.com/ajax/libs/d3/5.16.0/d3.min',
'd3-hexbin': 'https://d3js.org/d3-hexbin.v0.2.min',
},
shim: {
'd3-hexbin': ['d3']
}
})
// If we configure mychart via url, we can eliminate this define here
define($chart_id, ['d3', 'd3-hexbin'], function(d3, d3_hexbin) {
return function (figure_id, numA, numB, colorList, libraryList, indices) {
var margin = {top: 50, right: 20, bottom: 20, left: 50},
width = 850 - margin.left - margin.right,
height = 350 - margin.top - margin.bottom;
// append the svg object to the body of the page
var svG = d3.select('#' + figure_id)
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
//The number of columns and rows of the heatmap
var MapColumns = numA,
MapRows = numB;
//The maximum radius the hexagons can have to still fit the screen
var hexRadius = d3.min([width/((MapColumns + 0.5) * Math.sqrt(3)), height/((MapRows + 1/3) * 1.5)]);
//Calculate the center position of each hexagon
var points = [];
for (var i = 0; i < MapRows; i++) {
for (var j = 0; j < MapColumns; j++) {
var x = hexRadius * j * Math.sqrt(3)
//Offset each uneven row by half of a "hex-width" to the right
if(i%2 === 1) x += (hexRadius * Math.sqrt(3))/2
var y = hexRadius * i * 1.5
points.push([x,y])
}
}
//Set the hexagon radius
var hexbin = d3_hexbin.hexbin().radius(hexRadius);
svG.append("g")
.selectAll(".hexagon")
.data(hexbin(points))
.enter().append("path")
.attr("class", "hexagon")
.attr("d", function (d) {
return "M" + d.x + "," + d.y + hexbin.hexagon();
})
.attr("stroke", "black")
.attr("stroke-width", "1px")
.style("fill", function (d,i) { return colorList[i]; })
.on("mouseover", mover)
.on("mouseout", mout)
.append("svg:title")
.text(function(d,i) { return libraryList[i].concat(" ").concat(indices[i]); });
// Mouseover function
function mover(d) {
d3.select(this)
.transition().duration(10)
.style("fill-opacity", 0.3)
};
// Mouseout function
function mout(d) {
d3.select(this)
.transition().duration(10)
.style("fill-opacity", 1)
};
}
})
</script>
''')).substitute({ 'chart_id': repr(chart_id) })))
return chart_id
def Canvas(numA, numB, colorList, libraryList, indices):
chart_id = init_chart()
display(HTML(Template(dedent('''
<svg id=$figure_id></svg>
<script>
require([$chart_id], function(mychart) {
mychart($figure_id, $numA, $numB, $colorList, $libraryList, $indices)
})
</script>
''')).substitute({
'chart_id': repr(chart_id),
'figure_id': repr('fig-' + str(uuid.uuid4())),
'numA': repr(numA),
'numB': repr(numB),
'colorList': repr(colorList),
'libraryList': repr(libraryList),
'indices': repr(indices)
})))
# Display Hexagonal Canvas
anneal_list, x_dimension, y_dimension = library_processing()
if x_dimension < 0:
display(Markdown("Unable to create hexagonal canvas visualization."))
else:
color_list, scaling_factor, cut_off_value = get_color(anneal_list, 2.0, x_dimension, y_dimension)
unzipped_anneal_list = unzip_list(anneal_list)
Canvas(x_dimension, y_dimension, color_list, list(unzipped_anneal_list[0]), list(unzipped_anneal_list[2]))
In the Manhattan plot below, each line on the x-axis denotes a single gene set from the selected library, while the y-axis measures the −log₁₀(p‐value) for each gene set.
Hovering over a point will display the name of the gene set and the associated p-value. You can also zoom, pan, and save the plot as an svg using the toolbar on the right.
For creating customized static and dynamic Manhattan plots to compare multiple libraries at once, use the standalone Manhattan Plot Appyter.
# Manhattan Plot Functions
# Processes Enrichr data for Manhattan plots
def get_data(genes):
# Process Enrichr data
sorted_data = pd.DataFrame({"Gene Set": [], "-log(p value)": [], "Library": []})
# get enrichr results from the library selected
results_df = Enrichr_API(genes, [enrichr_library])[0]
all_terms = []
all_pvalues = []
library_names = []
all_terms.append(list(results_df[1]))
all_pvalues.append(list(results_df[2]))
library_names.append(list(results_df['library']))
x=np.log10(all_pvalues[0])*-1
sorted_terms = list(zip(all_terms[0], x, library_names[0]))
sorted_terms = sorted(sorted_terms, key = itemgetter(0))
unzipped_sorted_list = list(zip(*sorted_terms))
data = pd.DataFrame({"Gene Set": unzipped_sorted_list[0], "-log(p value)": unzipped_sorted_list[1], "Library": unzipped_sorted_list[2]})
sorted_data = pd.concat([sorted_data, data])
# group data by library
groups = sorted_data.groupby("Library")
return sorted_data, groups
# Create Manhattan Plots
def manhattan(sorted_data):
# split data frame into smaller data frames by library
list_of_df = []
for library_name in [enrichr_library]:
library_name = library_name.replace('_', '')
df_new = sorted_data[sorted_data['Library'] == library_name]
list_of_df += [df_new]
list_of_xaxis_values = []
for df in list_of_df:
list_of_xaxis_values += df["Gene Set"].values.tolist()
# define the output figure and the features we want
p = figure(x_range = list_of_xaxis_values, plot_height=300, plot_width=750, tools='pan, box_zoom, hover, reset, save')
# loop over all libraries
r = []
color_index = 0
for df in list_of_df:
if color_index >= len(manhattan_colors):
color_index = 0
# calculate actual p value from -log(p value)
actual_pvalues = []
for log_value in df["-log(p value)"].values.tolist():
actual_pvalues += ["{:.5e}".format(10**(-1*log_value))]
# define ColumnDataSource with our data for this library
source = ColumnDataSource(data=dict(
x = df["Gene Set"].values.tolist(),
y = df["-log(p value)"].values.tolist(),
pvalue = actual_pvalues,
))
# plot data from this library
r += [p.circle(x = 'x', y = 'y', size=5, fill_color=manhattan_colors[color_index], line_color = manhattan_colors[color_index], line_width=1, source = source)]
color_index += 1
p.background_fill_color = 'white'
p.xaxis.major_tick_line_color = None
p.xaxis.major_label_text_font_size = '0pt'
p.y_range.start = 0
p.yaxis.axis_label = '-log(p value)'
p.hover.tooltips = [
("Gene Set", "@x"),
("p value", "@pvalue"),
]
p.output_backend = "svg"
# returns the plot
return p
# Display Manhattan Plot
sorted_data, groups = get_data(genes)
show(manhattan(sorted_data))
The volcano plot shows the significance of each gene set from the selected library versus its odds ratio. Each point represents a single geneset; the x-axis measures the odds ratio (0, inf) calculated for the gene set, while the y-axis gives the -log(p-value) of the gene set.
Larger blue points represent significant terms (p-value < 0.05); smaller gray points represent non-significant terms. The darker the blue color of a point, the more significant it is.
Hovering over points will display the corresponding gene set term, the p-value, and the odds ratio. You may have to zoom in using the toolbar next to the plot in order to see details in densely-populated portions. Plots can also be downloaded as an svg using the save function on the toolbar.
def get_library(lib_name):
'''
Returns a dictionary mapping each term from the input library to
its associated geneset.
'''
resp = requests.get('https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=json&libraryName=' + lib_name)
if resp.status_code == 200:
lib_data = resp.json()[lib_name]['terms']
return { term: lib_data[term].keys() for term in lib_data.keys() }
else:
return {}
def volcano_plot(library_name):
'''
Make volcano plot of odds ratio vs. significance for input library.
'''
lib = get_library(enrichr_library)
if lib == {}:
print('Failed to access library, please try again later.')
return
enrich_results = enrich_crisp(genes, lib, 20000, True)
res_df = pd.DataFrame(
[ [
term,
res.pvalue,
res.odds_ratio
] for (term, res) in enrich_results ],
columns=['term', 'pvalue', 'odds_ratio']
)
res_df['log_pval'] = np.negative(np.log10(res_df['pvalue']))
cmap = mpl.cm.get_cmap('Blues_r')
cnorm = colors.Normalize(vmin = res_df['pvalue'].min(), vmax = 0.1)
my_colors = []
my_sizes = []
for row in res_df.itertuples():
if row.pvalue < 0.05:
my_colors += [mpl.colors.to_hex(cmap(cnorm(row.pvalue)))]
my_sizes += [12]
else:
my_colors += ['#808080']
my_sizes += [6]
source = ColumnDataSource(
data=dict(
x = res_df['odds_ratio'],
y = res_df['log_pval'],
gene_set = res_df['term'],
p_value = res_df['pvalue'],
odds_r = res_df['odds_ratio'],
colors = my_colors,
sizes = my_sizes
)
)
hover_emb = HoverTool(
names=["res_df"],
tooltips="""
<div style="margin: 10">
<div style="margin: 0 auto; width:200px;">
<span style="font-size: 12px; font-weight: bold;">Term:</span>
<span style="font-size: 12px">@gene_set<br></span>
<span style="font-size: 12px; font-weight: bold;">P-Value:</span>
<span style="font-size: 12px">@p_value<br></span>
<span style="font-size: 12px; font-weight: bold;">Odds Ratio:</span>
<span style="font-size: 12px">@odds_r<br></span>
</div>
</div>
"""
)
tools_emb = [hover_emb, 'pan', 'wheel_zoom', 'reset', 'save']
plot_emb = figure(
plot_width = 700,
plot_height = 700,
tools=tools_emb
)
plot_emb.circle(
'x', 'y', size = 'sizes',
alpha = 0.7, line_alpha = 0,
line_width = 0.01, source = source,
fill_color = 'colors', name = "res_df"
)
plot_emb.xaxis.axis_label = "Odds Ratio"
plot_emb.yaxis.axis_label = "-log10(p-value)"
plot_emb.output_backend = "svg"
return plot_emb
plot = volcano_plot(enrichr_library)
show(plot)
A downloadable table displaying the names, p-values, and q-values of significant terms in the selected library.
# Output a table of significant p-values and q-values
def get_qvalues(df):
qvals = []
res_df = pd.DataFrame(results[0]).set_index(1)
for name in df['Name'].to_list():
qvals.append(res_df.loc[name][6])
return qvals
def create_download_link(df, title = "Download CSV file of this table", filename = "data.csv"):
csv = df.to_csv(index = False)
b64 = base64.b64encode(csv.encode())
payload = b64.decode()
html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
html = html.format(payload=payload, title=title, filename=filename)
return HTML(html)
if plot != -1 and 'p value' in df.columns:
sorted_df = df.sort_values(by = ['p value'])
filtered_df = sorted_df[sorted_df['p value'] <= significance_value].reset_index()
filtered_df['q value'] = get_qvalues(filtered_df)
if len(filtered_df) != 0:
display(HTML(f"<strong>Table of significant p-values for {enrichr_library.replace('_', ' ')}</strong>"))
display(HTML(filtered_df[['Name', 'p value', 'q value']].to_html(index = False)))
display(create_download_link(filtered_df[['Name', 'p value', 'q value']]))
| Name | p value | q value |
|---|---|---|
| E2F1 18555785 ChIP-Seq MESCs Mouse | 6.060960e-13 | 3.716090e-10 |
| JARID1A 20064375 ChIP-Seq MESCs Mouse | 5.352762e-08 | 1.640762e-05 |
| PPARA 22158963 ChIP-Seq LIVER Mouse | 9.164746e-07 | 1.875273e-04 |
| ZFX 18555785 ChIP-Seq MESCs Mouse | 3.744051e-06 | 5.772505e-04 |
| NELFA 20434984 ChIP-Seq ESCs Mouse | 7.518581e-06 | 9.246285e-04 |
| RXR 22158963 ChIP-Seq LIVER Mouse | 1.462445e-05 | 1.406845e-03 |
| SRF 21415370 ChIP-Seq HL-1 Mouse | 1.603250e-05 | 1.406845e-03 |
| MYC 18358816 ChIP-ChIP MESCs Mouse | 4.885044e-05 | 3.773184e-03 |
| TRIM28 19339689 ChIP-ChIP MESCs Mouse | 1.798112e-04 | 1.235035e-02 |
| ESRRB 18555785 ChIP-Seq MESCs Mouse | 4.815307e-04 | 2.965336e-02 |
| ERG 20887958 ChIP-Seq HPC-7 Mouse | 6.274334e-04 | 3.521191e-02 |
| MYC 18555785 ChIP-Seq MESCs Mouse | 1.142352e-03 | 5.692329e-02 |
| PPARG 23326641 ChIP-Seq C3H10T1-2 Mouse | 1.205655e-03 | 5.692329e-02 |
| GATA4 21415370 ChIP-Seq HL-1 Mouse | 3.759397e-03 | 1.661201e-01 |
| TAF7L 23326641 ChIP-Seq C3H10T1-2 Mouse | 4.114736e-03 | 1.688551e-01 |
| MYC 19030024 ChIP-ChIP MESCs Mouse | 5.143164e-03 | 1.970175e-01 |
| TBP 23326641 ChIP-Seq C3H10T1-2 Mouse | 5.432506e-03 | 1.970175e-01 |
| YY1 23942234 ChIP-Seq MYOBLASTS AND MYOTUBES Mouse | 6.784255e-03 | 2.329321e-01 |
| ZFP42 18358816 ChIP-ChIP MESCs Mouse | 7.921552e-03 | 2.512311e-01 |
| PDX1 19855005 ChIP-ChIP MIN6 Mouse | 8.172982e-03 | 2.512311e-01 |
| MYCN 18555785 ChIP-Seq MESCs Mouse | 1.249324e-02 | 3.687142e-01 |
| CREM 20920259 ChIP-Seq GC1-SPG Mouse | 1.465862e-02 | 3.969904e-01 |
| GATA1 22383799 ChIP-Seq G1ME Mouse | 1.473669e-02 | 3.969904e-01 |
| SPI1 22790984 ChIP-Seq ERYTHROLEUKEMIA Mouse | 1.586868e-02 | 3.983796e-01 |
| MYC 19079543 ChIP-ChIP MESCs Mouse | 1.609528e-02 | 3.983796e-01 |
| EKLF 21900194 ChIP-Seq ERYTHROCYTE Mouse | 1.710874e-02 | 4.068736e-01 |
| THAP11 20581084 ChIP-Seq MESCs Mouse | 2.187233e-02 | 5.000593e-01 |
| HOXB4 20404135 ChIP-ChIP EML Mouse | 2.398898e-02 | 5.188658e-01 |
| CNOT3 19339689 ChIP-ChIP MESCs Mouse | 2.429504e-02 | 5.188658e-01 |
| HCFC1 20581084 ChIP-Seq MESCs Mouse | 3.085746e-02 | 6.140645e-01 |
| FOXO3 22982991 ChIP-Seq MACROPHAGES Mouse | 3.168178e-02 | 6.140645e-01 |
| FOXO1 23066095 ChIP-Seq LIVER Mouse | 3.202325e-02 | 6.140645e-01 |
| PPARG 19300518 ChIP-PET 3T3-L1 Mouse | 3.385716e-02 | 6.277320e-01 |
| ESR1 17901129 ChIP-ChIP LIVER Mouse | 4.204529e-02 | 7.486599e-01 |
| TCFAP2C 20176728 ChIP-ChIP TROPHOBLAST STEM CELLS Mouse | 4.219204e-02 | 7.486599e-01 |
| TBX5 21415370 ChIP-Seq HL-1 Mouse | 4.731004e-02 | 8.159190e-01 |
# Get complete enrichment analysis results from Enrichr
url = 'https://amp.pharm.mssm.edu/Enrichr/enrich?dataset=' + results[5]
display(HTML(f'<span><a href="https://amp.pharm.mssm.edu/Enrichr/enrich?dataset={results[5]}">Access the complete enrichment analysis on the Enrichr website. </a></span>'))