# Scatter Plot Imports
from maayanlab_bioinformatics.enrichment import enrich_crisp
import matplotlib as mpl
import matplotlib.colors as colors
import base64

# Bar Chart Imports
import pandas as pd 
import numpy as np
import json
import requests
import matplotlib.pyplot as plt
import seaborn as sns
import time
from matplotlib.ticker import MaxNLocator
from IPython.display import display, FileLink, Markdown, HTML

# Hexagonal Canvas Imports
import json
import math
import uuid
import urllib
from textwrap import dedent
from string import Template
from operator import itemgetter

# Manhattan Plot Imports
import matplotlib.patches as mpatches
import matplotlib.cm as cm

# Bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, CustomJS, ColumnDataSource, Span
from bokeh.layouts import layout, row, column, gridplot
from bokeh.palettes import all_palettes
output_notebook()


gene_list_input = '''IAH1
2510006D16RIK
WDR42A
ENTPD5
FBXO3
TRAP1
4732466D17RIK
OXSM
CRADD
ADHFE1
2410018G20RIK
PLEKHA7
1810044D09RIK
ZFYVE20
TMEM80
LASS2
GSTZ1
CDAN1
PSMC6
TMEM77
ASB9
YME1L1
ASF1A
TFAM
4921530L18RIK
TLN1
4933407N01RIK
ALDH8A1
39509
2310026E23RIK
NLRX1
2700046G09RIK
EI24
D4BWG0951E
STXBP2
IPP
CISD1
NFS1
GPR155
RAB11FIP2
LOC100044139
KDR
SEPHS2
EXOSC4
RILP
HOXA7
B3BP
AFAP1L1
TMEM86A
CNTD1
KLF12
1700123L14RIK
APOOL
FZD5
TGDS
AI316807
VPS13B
MCAT
OVOL1
D130020L05RIK
PARP16
ELL3
COQ10A
WBSCR18
PLSCR2
VAMP8
TLCD1
TCN2
GORASP1
A930041I02RIK
SFXN5
FBXL3
SF1
FBXL6
PKIG
BC038156
5730414N17RIK
CEP68
ATP6V1B2
UNC119B
HPN
OSGEPL1
RFESD
ZFP748
3110048L19RIK
PTTG1IP
FARS2
TMEM186
ADH5
4932432K03RIK
MRPL9
1700023B02RIK
ZC3H12C
ARSK
SCP2
BC016495
CDK5RAP1
ARSG
LIPT1
PRKACA
C330002I19RIK
ZRSR1
LYPLA1
SLC33A1
NSUN3
2410012H22RIK
4632404H12RIK
IFT122
LOC100048387
LOC100047292
2610036D13RIK
1110032A03RIK
WDR34
ITFG1
LYRM2
CLDN10
PRPF18
ALDH6A1
NME7
LYRM5
UFC1
RAB1
ATPAF1
9030420J04RIK
ABHD3
INTU
ADK
TFB1M
PCSK7
VLDLR
MRPL35
LRRC1
WDR24
KALRN
MYNN
UBIE
TMEM166
THTPA
1700034H14RIK
PSMC3IP
9230114K14RIK
MED14
LOC100047604
ATXN2
LOC623451
MYO6
5430437P03RIK
TMED4
TASP1
AP4S1
SMYD4
4921508D12RIK
PCMTD2
WDR20A
2410166I05RIK
DHRS1
UBE2E1
GNMT
2210016F16RIK
MPP7
MIPOL1
LOC674449
AGBL3
AI931714
COL4A4
TSR2
NOTUM
ANKRD42
NUPL2
RP23-195K8.6
ZFP148
ENSMUSG00000074286
LRRC19
MTMR14
GBE1
NUDT6
PHF7
ZDHHC5
5730403B10RIK
SIP1
NUDT12
GYS2
ESM1
SIPA1L1
SPTLC1
ZFP11
PSMB1
NPY
METTL7A
RIOK2
METTL8
MDH1
AW209491
2700038C09RIK
MGAT1
PAIP1
CD55
KLHDC4
2610019F03RIK
TOR1A
FKBPL
A530050D06RIK
HSD3B2
TNFSF5IP1
DOLPP1
ATAD3A
LIFR
LRRC61
FAH
SLC7A6OS
SLC25A39
SLC9A6
UBOX5
LOC100046168
7530414M10RIK
9630013D21RIK
D630023B12RIK
0610013E23RIK
4732435N03RIK
SLC25A40
ZFAND1
ACAA1A
CREBL2
VWCE
YARS2
4932438A13RIK
NOL7
CLCC1
2810432D09RIK
6530415H11RIK
PMPCB
NDUFV1
SIAE
PMS1
ARHGEF12
SLC30A6
TRPC2
CCDC16
LRRC40
POLRMT
TIMM44
LRRC44
KMO
DNAJC18
DNAJC19
DALRD3
DMXL1
CACNB4
NAT9
SMO
4833426J09RIK
4933403G14RIK
LOC100044400
2310068J16RIK
ACBD4
FGFR4
RNF167
ZCCHC3
1810049H13RIK
ZFP106
CNO
2610528K11RIK
TPMT
1200014M14RIK
BRI3
A930005H10RIK
ARHGAP18
C330018D20RIK
LOC100046254
SAT2
LRRC56
TM7SF3
AKR7A5
3110057O12RIK
ABHD11
HYI
TXNDC4
COX15
ZKSCAN1
SAC3D1
FN3K
PAICS
RHBDD3
PEX1
ABHD14A
NAP1L1
TMLHE
RBKS
NSMCE4A
TMBIM4
CHPT1
LOC100047214
4930432O21RIK
MTFR1
SYBL1
ACO1
ENY2
FECH
MOBKL2B
GAL3ST2
NDUFB6
USP34
GLO1
PROZ
2010309E21RIK
3110001I20RIK
PITPNC1
H2AFJ
LOC100041586
AFMID
GYK
RDH14
HIBCH
RBM39
FAHD1
RWDD3
ZFP655
TMEM30A
CPT1A
ZFP775
GADD45GIP1
ASCC1
LRRC8A
ANXA13
CABLES1
KLF1
MUT
1810019D21RIK
D730039F16RIK
2610528J11RIK
SLC25A16
2310042D19RIK
SBK1
C1D
CAT
DEFB29
RPS6KB1
ALDH1A3
TOMM70A
CLEC2H
2310009A05RIK
RQCD1
TRIM37
A230062G08RIK
ZFP650
1700001L05RIK
DHTKD1
ZFP787
NXT2
1110003E01RIK
TM2D2
AQP11
ZBTB44
ORC5L
LOC100047782
RABEPK
NR3C1
WDR89
GPHN
RPS6KA5
GK5
DDT
SRR
MAT2B
NAGLU
SCRN3
BPNT1
FBXO9
NEO1
SCYL1
FBXO8
2210011C24RIK
TRIM23
POLI
PGM2'''
enrichr_library = 'ChEA_2016'


genes = gene_list_input.split('\n')
genes = [x.strip() for x in genes]


# Enrichr API Function for Manhattan Plot and Bar Chart
# Takes a gene list and Enrichr libraries as input
def Enrichr_API(enrichr_gene_list, all_libraries):

    all_terms = []
    all_pvalues =[] 
    all_adjusted_pvalues = []

    for library_name in all_libraries : 
        ENRICHR_URL = 'http://maayanlab.cloud/Enrichr/addList'
        genes_str = '\n'.join(enrichr_gene_list)
        description = ''
        payload = {
            'list': (None, genes_str),
            'description': (None, description)
        }

        response = requests.post(ENRICHR_URL, files=payload)
        if not response.ok:
            raise Exception('Error analyzing gene list')

        data = json.loads(response.text)
        time.sleep(0.5)
        ENRICHR_URL = 'http://maayanlab.cloud/Enrichr/enrich'
        query_string = '?userListId=%s&backgroundType=%s'
        user_list_id = data['userListId']
        short_id = data["shortId"]
        gene_set_library = library_name
        response = requests.get(
            ENRICHR_URL + query_string % (user_list_id, gene_set_library)
         )
        if not response.ok:
            raise Exception('Error fetching enrichment results')

        data = json.loads(response.text)

        short_results_df  = pd.DataFrame(data[library_name][0:10])
        all_terms.append(list(short_results_df[1]))
        all_pvalues.append(list(short_results_df[2]))
        all_adjusted_pvalues.append(list(short_results_df[6]))
        
        results_df  = pd.DataFrame(data[library_name])
        # adds library name to the data frame so the libraries can be distinguished
        results_df['library'] = library_name.replace('_', '')

    return [results_df, short_results_df, all_terms, all_pvalues, all_adjusted_pvalues, str(short_id)]


# Scatter Plot Parameters
significance_value = 0.05

# Bar Chart Parameters
figure_file_format = ['png', 'svg']
output_file_name = 'Enrichr_results_bar'
color = 'lightskyblue'
final_output_file_names = ['{0}.{1}'.format(output_file_name, file_type) for file_type in figure_file_format]

# Hexagonal Canvas Parameters
canvas_color = 'Blue'
num_hex_colored = 10

# Manhattan Plot Parameters
manhattan_colors = ['#003f5c', '#7a5195', '#ef5675', '#ffa600']


# Scatter Plot Functions

def download_library(library_name):
    # Download pre-processed library data
    try:
        df = pd.read_csv('https://raw.githubusercontent.com/MaayanLab/Enrichr-Viz-Appyter/master/Enrichr-Processed-Library-Storage/Scatterplot/Libraries/' + library_name + '.csv')
    except:
        display(Markdown("Failed to retrieve the selected pre-processed library."))
        return -1, -1, -1

    name = df['Name'].tolist()
    gene_list = df['Genes'].tolist()
    library_data = [list(a) for a in zip(name, gene_list)]
    return genes, library_data, df

# Enrichment analysis
def get_library_iter(library_data):
    for member in library_data:
        term = member[0]
        gene_set = member[1].split(' ')
        yield term, gene_set

def get_enrichment_results(genes, library_data):
    return sorted(enrich_crisp(genes, get_library_iter(library_data), 20000, True), key=lambda r: r[1].pvalue)

def get_pvalue(row, unzipped_results, all_results):
    if row['Name'] in list(unzipped_results[0]):
        index = list(unzipped_results[0]).index(row['Name'])
        return all_results[index][1].pvalue
    else:
        return 1
    
# Call enrichment results and return a plot and dataframe for Scatter Plot
def get_plot(library_name):
    genes, library_data, df = download_library(library_name)

    # library not supported
    if genes == -1:
        return -1 ,-1

    all_results = get_enrichment_results(genes, library_data)
    unzipped_results = list(zip(*all_results))

    if len(all_results) == 0:
        print("There are no enriched terms for your input gene set in the ", library_name, " library.")
        my_colors = ['#808080'] * len(df.index)

        source = ColumnDataSource(
            data=dict(
                x = df['x'],
                y = df['y'],
                gene_set = df['Name'],
                colors = my_colors,
                sizes = [6] * len(df.index)
            )
        )

        hover_emb = HoverTool(names=["df"], tooltips="""
            <div style="margin: 10">
                <div style="margin: 0 auto; width:200px;">
                    <span style="font-size: 12px; font-weight: bold;">Gene Set:</span>
                    <span style="font-size: 12px">@gene_set</span>
                </div>
            </div>
            """)
    else:
        # add p value to the dataframe
        df['p value'] = df.apply (lambda row: get_pvalue(row, unzipped_results, all_results), axis=1)

        # normalize p values for color scaling
        cmap = mpl.cm.get_cmap('Blues_r')
        norm = colors.Normalize(vmin = df['p value'].min(), vmax=significance_value*2)

        my_colors = []
        my_sizes = []
        for index, row in df.iterrows():
            if row['p value'] < significance_value:
                my_colors += [mpl.colors.to_hex(cmap(norm(row['p value'])))]
                my_sizes += [12]
            else:
                my_colors += ['#808080']
                my_sizes += [6]

        source = ColumnDataSource(
                data=dict(
                    x = df['x'],
                    y = df['y'],
                    gene_set = df['Name'],
                    p_value = df['p value'],
                    colors = my_colors,
                    sizes = my_sizes
                )
            )

        hover_emb = HoverTool(names=["df"], tooltips="""
            <div style="margin: 10">
                <div style="margin: 0 auto; width:200px;">
                    <span style="font-size: 12px; font-weight: bold;">Gene Set:</span>
                    <span style="font-size: 12px">@gene_set</span>
                    <span style="font-size: 12px; font-weight: bold;">p-value:</span>
                    <span style="font-size: 12px">@p_value</span>
                </div>
            </div>
            """)

    tools_emb = [hover_emb, 'pan', 'wheel_zoom', 'reset', 'save']

    plot_emb = figure(plot_width=700, plot_height=700, tools=tools_emb)

    # hide axis labels and grid lines
    plot_emb.xaxis.major_tick_line_color = None
    plot_emb.xaxis.minor_tick_line_color = None
    plot_emb.yaxis.major_tick_line_color = None
    plot_emb.yaxis.minor_tick_line_color = None
    plot_emb.xaxis.major_label_text_font_size = '0pt'
    plot_emb.yaxis.major_label_text_font_size = '0pt' 

    plot_emb.circle('x', 'y', size = 'sizes', alpha = 0.7, line_alpha = 0, 
                    line_width = 0.01, source = source, fill_color = 'colors', name = "df")

    plot_emb.output_backend = "svg"
    
    return plot_emb, df


# Display Scatter Plot
plot, df = get_plot(enrichr_library)
if plot == -1:
    display(Markdown("Unable to create scatter plot visualization."))
else:
    show(plot)


# Bar Chart Functions
# Takes all terms, all p-values, all adjusted p-values, plot title, Enrichr libraries, and specified figure format
def enrichr_figure(all_terms, all_pvalues, all_adjusted_pvalues, plot_names, all_libraries, bar_color): 
    # Bar colors
    if bar_color != 'lightgrey':
        bar_color_not_sig = 'lightgrey'
        edgecolor=None
        linewidth=0
    else:
        bar_color_not_sig = 'white'
        edgecolor='black'
        linewidth=1    

    plt.figure(figsize=(24, 12))
    
    i = 0
    bar_colors = [bar_color if (x < 0.05) else bar_color_not_sig for x in all_pvalues[i]]
    fig = sns.barplot(x=np.log10(all_pvalues[i])*-1, y=all_terms[i], palette=bar_colors, edgecolor=edgecolor, linewidth=linewidth)
    fig.axes.get_yaxis().set_visible(False)
    fig.set_title(all_libraries[i].replace('_', ' '), fontsize=26)
    fig.set_xlabel('−log₁₀(p‐value)', fontsize=25)
    fig.xaxis.set_major_locator(MaxNLocator(integer=True))
    fig.tick_params(axis='x', which='major', labelsize=20)
    if max(np.log10(all_pvalues[i])*-1)<1:
        fig.xaxis.set_ticks(np.arange(0, max(np.log10(all_pvalues[i])*-1), 0.1))
    for ii,annot in enumerate(all_terms[i]):
        if all_adjusted_pvalues[i][ii] < 0.05:
            annot = '  *'.join([annot, str(str(np.format_float_scientific(all_pvalues[i][ii], precision=2)))]) 
        else:
            annot = '  '.join([annot, str(str(np.format_float_scientific(all_pvalues[i][ii], precision=2)))])

        title_start= max(fig.axes.get_xlim())/200
        fig.text(title_start, ii, annot, ha='left', wrap = True, fontsize = 26)

    fig.spines['right'].set_visible(False)
    fig.spines['top'].set_visible(False)
    
    # Show plot 
    plt.show()


# Display Bar Chart
results = Enrichr_API(genes, [enrichr_library])
enrichr_figure(results[2], results[3], results[4], final_output_file_names, [enrichr_library], color)
# Download Bar Chart
for i, file in enumerate(final_output_file_names):
    display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))


# Hexagonal Canvas Functions

def library_processing():
    # Downloads library data for the hexagonal canvas
    # Library data is pre-annealed so the canvas will have the most similar gene sets closest together
    raw_library_data = []

    try:
        library_name = enrichr_library
        with urllib.request.urlopen('https://raw.githubusercontent.com/MaayanLab/Enrichr-Viz-Appyter/master/Enrichr-Processed-Library-Storage/Annealing/Annealed-Libraries/' + library_name + '.txt') as f:
            for line in f.readlines():
                raw_library_data.append(line.decode('utf-8').split("\t\t"))
        name = []
        gene_list = []
    except:
        display(Markdown("Failed to retrieve the selected annealed library."))
        return [], -1, -1

    for i in range(len(raw_library_data)):
        name += [raw_library_data[i][0]]
        raw_genes = raw_library_data[i][1].split('\t')
        gene_list += [raw_genes[:-1]]

    library_data = [list(a) for a in zip(name, gene_list)]

    # raw_library_data: a 2D list where the first element is the name and the second element is a list of genes associated with that name

    jaccard_indices = []
    indices = []

    for gene_set in library_data:
        intersection = [value for value in gene_set[1] if value in genes]
        index = len(intersection)/(len(gene_set[1]) + len(genes))
        jaccard_indices += [[gene_set[0], index]]
        indices += [round(index, 5)]

    # determine the dimensions of the canvas
    x_dimension = math.ceil(math.sqrt(len(indices)))
    y_dimension = math.ceil(math.sqrt(len(indices)))

    # zip name, gene_list, indices, and blank list for neighbor score then add dummy entries to the zipped list
    anneal_list = list(zip(name, gene_list, indices))

    return anneal_list, x_dimension, y_dimension

def unzip_list(anneal_list):
    unzipped_list = zip(*anneal_list)
    return list(unzipped_list)

# define a list of colors for the hexagonal canvas
def get_color(anneal_list, cut_off_value, x_dimension, y_dimension):

    # Deal with cut_off_value (only color the most significant 10/20 hexagons)
    if cut_off_value == 2.0:
        sort_list = sorted(anneal_list, key=itemgetter(2), reverse=True)
        cut_off_value = sort_list[int(num_hex_colored)-1][2]

    r_value = 0
    g_value = 0
    b_value = 0

    if canvas_color == 'Red':
        r_value = 0.0
        g_value = 0.8
        b_value = 0.8
    if canvas_color == 'Yellow':
        r_value = 0.0
        g_value = 0.3
        b_value = 1.0
    if canvas_color == 'Purple':
        r_value = 0.5
        g_value = 1.0
        b_value = 0.0
    if canvas_color == 'Pink':
        r_value = 0.0
        g_value = 1.0
        b_value = 0.2
    if canvas_color == 'Orange':
        r_value = 0.0
        g_value = 0.45
        b_value = 1.0
    if canvas_color == 'Green':
        r_value = 1.0
        g_value = 0.0
        b_value = 1.0
    if canvas_color == 'Blue':
        r_value = 1.0
        g_value = 0.9
        b_value = 0.0

    color_list = []

    unzipped_anneal_list = unzip_list(anneal_list)

    max_index = max(unzipped_anneal_list[2])

    if max_index != 0:
        scaled_list = [i/max_index for i in unzipped_anneal_list[2]]
    else:
        scaled_list = unzipped_anneal_list[2]

    for i in range(x_dimension*y_dimension):
        if i < len(unzipped_anneal_list[2]) and float(unzipped_anneal_list[2][i]) >= cut_off_value:
            color_list += [mpl.colors.to_hex((1-scaled_list[i]*r_value, 
            1-scaled_list[i]*g_value, 1-scaled_list[i]*b_value))]
        elif i < len(unzipped_anneal_list[2]):
            color_list += [mpl.colors.to_hex((1-scaled_list[i], 
            1-scaled_list[i], 1-scaled_list[i]))]
        else:
            color_list += ["#FFFFFF"]
    return color_list, max_index, cut_off_value

def init_chart():
  chart_id = 'mychart-' + str(uuid.uuid4())
  display(HTML('<script src="/static/components/requirejs/require.js"></script>'))
  display(HTML(Template(dedent('''
  <script>
  require.config({
    paths: {
      'd3': 'https://cdnjs.cloudflare.com/ajax/libs/d3/5.16.0/d3.min',
      'd3-hexbin': 'https://d3js.org/d3-hexbin.v0.2.min',
    },
    shim: {
      'd3-hexbin': ['d3']
    }
  })

  // If we configure mychart via url, we can eliminate this define here
  define($chart_id, ['d3', 'd3-hexbin'], function(d3, d3_hexbin) {
    return function (figure_id, numA, numB, colorList, libraryList, indices) {
      var margin = {top: 50, right: 20, bottom: 20, left: 50},
        width = 850 - margin.left - margin.right,
        height = 350 - margin.top - margin.bottom;

      // append the svg object to the body of the page
      var svG = d3.select('#' + figure_id)
                  .attr("width", width + margin.left + margin.right)
                  .attr("height", height + margin.top + margin.bottom)
                  .append("g")
                  .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
      
      //The number of columns and rows of the heatmap
      var MapColumns = numA,
          MapRows = numB;

      //The maximum radius the hexagons can have to still fit the screen
      var hexRadius = d3.min([width/((MapColumns + 0.5) * Math.sqrt(3)), height/((MapRows + 1/3) * 1.5)]);

      //Calculate the center position of each hexagon
      var points = [];
      for (var i = 0; i < MapRows; i++) {
          for (var j = 0; j < MapColumns; j++) {
              var x = hexRadius * j * Math.sqrt(3)
              //Offset each uneven row by half of a "hex-width" to the right
              if(i%2 === 1) x += (hexRadius * Math.sqrt(3))/2
              var y = hexRadius * i * 1.5
              points.push([x,y])
          }
      }

      //Set the hexagon radius
      var hexbin = d3_hexbin.hexbin().radius(hexRadius);

      svG.append("g")
        .selectAll(".hexagon")
        .data(hexbin(points))
        .enter().append("path")
        .attr("class", "hexagon")
        .attr("d", function (d) {
            return "M" + d.x + "," + d.y + hexbin.hexagon();
        })
        .attr("stroke", "black")
        .attr("stroke-width", "1px")
        .style("fill", function (d,i) { return colorList[i]; })
        .on("mouseover", mover)
        .on("mouseout", mout)
        .append("svg:title")
        .text(function(d,i) { return libraryList[i].concat(" ").concat(indices[i]); });

      // Mouseover function
      function mover(d) {
      d3.select(this)
        .transition().duration(10)  
        .style("fill-opacity", 0.3)
      };

      // Mouseout function
      function mout(d) { 
      d3.select(this)
        .transition().duration(10)
        .style("fill-opacity", 1)
      };

  }

  })
  </script>
  ''')).substitute({ 'chart_id': repr(chart_id) })))
  return chart_id

def Canvas(numA, numB, colorList, libraryList, indices):
  chart_id = init_chart()
  display(HTML(Template(dedent('''
  <svg id=$figure_id></svg>
  <script>
  require([$chart_id], function(mychart) {
    mychart($figure_id, $numA, $numB, $colorList, $libraryList, $indices)
  })
  </script>
  ''')).substitute({
      'chart_id': repr(chart_id),
      'figure_id': repr('fig-' + str(uuid.uuid4())),
      'numA': repr(numA),
      'numB': repr(numB),
      'colorList': repr(colorList),
      'libraryList': repr(libraryList),
      'indices': repr(indices)
  })))


# Display Hexagonal Canvas
anneal_list, x_dimension, y_dimension = library_processing()
if x_dimension < 0:
    display(Markdown("Unable to create hexagonal canvas visualization."))
else:
    color_list, scaling_factor, cut_off_value = get_color(anneal_list, 2.0, x_dimension, y_dimension)
    unzipped_anneal_list = unzip_list(anneal_list)
    Canvas(x_dimension, y_dimension, color_list, list(unzipped_anneal_list[0]), list(unzipped_anneal_list[2]))


# Manhattan Plot Functions

# Processes Enrichr data for Manhattan plots
def get_data(genes):
    # Process Enrichr data
    sorted_data = pd.DataFrame({"Gene Set": [], "-log(p value)": [], "Library": []})

    # get enrichr results from the library selected
    results_df = Enrichr_API(genes, [enrichr_library])[0]

    all_terms = []
    all_pvalues = []
    library_names = []

    all_terms.append(list(results_df[1]))
    all_pvalues.append(list(results_df[2]))
    library_names.append(list(results_df['library']))

    x=np.log10(all_pvalues[0])*-1
    sorted_terms = list(zip(all_terms[0], x, library_names[0]))
    sorted_terms = sorted(sorted_terms, key = itemgetter(0))
    unzipped_sorted_list = list(zip(*sorted_terms))

    data = pd.DataFrame({"Gene Set": unzipped_sorted_list[0], "-log(p value)": unzipped_sorted_list[1], "Library": unzipped_sorted_list[2]})

    sorted_data = pd.concat([sorted_data, data])

    # group data by library
    groups = sorted_data.groupby("Library")
    return sorted_data, groups

# Create Manhattan Plots
def manhattan(sorted_data):
    # split data frame into smaller data frames by library
    list_of_df = []
    for library_name in [enrichr_library]:
        library_name = library_name.replace('_', '')
        df_new = sorted_data[sorted_data['Library'] == library_name]
        list_of_df += [df_new]

    list_of_xaxis_values = []
    for df in list_of_df:  
        list_of_xaxis_values += df["Gene Set"].values.tolist()

    # define the output figure and the features we want
    p = figure(x_range = list_of_xaxis_values, plot_height=300, plot_width=750, tools='pan, box_zoom, hover, reset, save')

    # loop over all libraries
    r = []
    color_index = 0
    for df in list_of_df:
        if color_index >= len(manhattan_colors):
            color_index = 0 

        # calculate actual p value from -log(p value)
        actual_pvalues = []
        for log_value in df["-log(p value)"].values.tolist():
            actual_pvalues += ["{:.5e}".format(10**(-1*log_value))]

        # define ColumnDataSource with our data for this library
        source = ColumnDataSource(data=dict(
            x = df["Gene Set"].values.tolist(),
            y = df["-log(p value)"].values.tolist(),
            pvalue = actual_pvalues,
        ))
    
        # plot data from this library
        r += [p.circle(x = 'x', y = 'y', size=5, fill_color=manhattan_colors[color_index], line_color = manhattan_colors[color_index], line_width=1, source = source)]
        color_index += 1

    p.background_fill_color = 'white'
    p.xaxis.major_tick_line_color = None 
    p.xaxis.major_label_text_font_size = '0pt'
    p.y_range.start = 0
    p.yaxis.axis_label = '-log(p value)'

    p.hover.tooltips = [
        ("Gene Set", "@x"),
        ("p value", "@pvalue"),
    ]
    p.output_backend = "svg"
    
    # returns the plot
    return p


# Display Manhattan Plot
sorted_data, groups = get_data(genes)
show(manhattan(sorted_data))


def get_library(lib_name):
    '''
    Returns a dictionary mapping each term from the input library to 
    its associated geneset. 
    '''
    resp = requests.get('https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=json&libraryName=' + lib_name)
    if resp.status_code == 200:
        lib_data = resp.json()[lib_name]['terms']
        return { term: lib_data[term].keys() for term in lib_data.keys() }
    else:
        return {}

def volcano_plot(library_name):
    '''
    Make volcano plot of odds ratio vs. significance for input library.
    '''
    lib = get_library(enrichr_library)
    if lib == {}:
        print('Failed to access library, please try again later.')
        return

    enrich_results = enrich_crisp(genes, lib, 20000, True)

    res_df = pd.DataFrame(
        [ [
            term, 
            res.pvalue, 
            res.odds_ratio
        ] for (term, res) in enrich_results ], 
        columns=['term', 'pvalue', 'odds_ratio']
    )

    res_df['log_pval'] = np.negative(np.log10(res_df['pvalue']))

    cmap = mpl.cm.get_cmap('Blues_r')
    cnorm = colors.Normalize(vmin = res_df['pvalue'].min(), vmax = 0.1)

    my_colors = []
    my_sizes = []
    for row in res_df.itertuples():
        if row.pvalue < 0.05:
            my_colors += [mpl.colors.to_hex(cmap(cnorm(row.pvalue)))]
            my_sizes += [12]
        else:
            my_colors += ['#808080']
            my_sizes += [6]

    source = ColumnDataSource(
        data=dict(
            x = res_df['odds_ratio'],
            y = res_df['log_pval'],
            gene_set = res_df['term'],
            p_value = res_df['pvalue'],
            odds_r = res_df['odds_ratio'],
            colors = my_colors,
            sizes = my_sizes
        )
    )

    hover_emb = HoverTool(
        names=["res_df"], 
        tooltips="""
        <div style="margin: 10">
            <div style="margin: 0 auto; width:200px;">
                <span style="font-size: 12px; font-weight: bold;">Term:</span>
                <span style="font-size: 12px">@gene_set<br></span>
                <span style="font-size: 12px; font-weight: bold;">P-Value:</span>
                <span style="font-size: 12px">@p_value<br></span>
                <span style="font-size: 12px; font-weight: bold;">Odds Ratio:</span>
                <span style="font-size: 12px">@odds_r<br></span>
            </div>
        </div>
        """
    )

    tools_emb = [hover_emb, 'pan', 'wheel_zoom', 'reset', 'save']

    plot_emb = figure(
        plot_width = 700, 
        plot_height = 700,
        tools=tools_emb
    )

    plot_emb.circle(
        'x', 'y', size = 'sizes', 
        alpha = 0.7, line_alpha = 0, 
        line_width = 0.01, source = source, 
        fill_color = 'colors', name = "res_df"
    )

    plot_emb.xaxis.axis_label = "Odds Ratio"
    plot_emb.yaxis.axis_label = "-log10(p-value)"

    plot_emb.output_backend = "svg"
    
    return plot_emb


plot = volcano_plot(enrichr_library)
show(plot)


# Output a table of significant p-values and q-values
def get_qvalues(df):
    qvals = []
    res_df = pd.DataFrame(results[0]).set_index(1)
    for name in df['Name'].to_list():
        qvals.append(res_df.loc[name][6])
    return qvals

def create_download_link(df, title = "Download CSV file of this table", filename = "data.csv"):  
    csv = df.to_csv(index = False)
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload, title=title, filename=filename)
    return HTML(html)

if plot != -1 and 'p value' in df.columns:
    sorted_df = df.sort_values(by = ['p value'])
    filtered_df = sorted_df[sorted_df['p value'] <= significance_value].reset_index()
    filtered_df['q value'] = get_qvalues(filtered_df)
    if len(filtered_df) != 0:
        display(HTML(f"<strong>Table of significant p-values for {enrichr_library.replace('_', ' ')}</strong>"))
        display(HTML(filtered_df[['Name', 'p value', 'q value']].to_html(index = False)))
        display(create_download_link(filtered_df[['Name', 'p value', 'q value']]))


# Get complete enrichment analysis results from Enrichr 
url = 'https://amp.pharm.mssm.edu/Enrichr/enrich?dataset=' + results[5]
display(HTML(f'<span><a href="https://amp.pharm.mssm.edu/Enrichr/enrich?dataset={results[5]}">Access the complete enrichment analysis on the Enrichr website. </a></span>'))

Name	p value	q value
E2F1 18555785 ChIP-Seq MESCs Mouse	6.060960e-13	3.716090e-10
JARID1A 20064375 ChIP-Seq MESCs Mouse	5.352762e-08	1.640762e-05
PPARA 22158963 ChIP-Seq LIVER Mouse	9.164746e-07	1.875273e-04
ZFX 18555785 ChIP-Seq MESCs Mouse	3.744051e-06	5.772505e-04
NELFA 20434984 ChIP-Seq ESCs Mouse	7.518581e-06	9.246285e-04
RXR 22158963 ChIP-Seq LIVER Mouse	1.462445e-05	1.406845e-03
SRF 21415370 ChIP-Seq HL-1 Mouse	1.603250e-05	1.406845e-03
MYC 18358816 ChIP-ChIP MESCs Mouse	4.885044e-05	3.773184e-03
TRIM28 19339689 ChIP-ChIP MESCs Mouse	1.798112e-04	1.235035e-02
ESRRB 18555785 ChIP-Seq MESCs Mouse	4.815307e-04	2.965336e-02
ERG 20887958 ChIP-Seq HPC-7 Mouse	6.274334e-04	3.521191e-02
MYC 18555785 ChIP-Seq MESCs Mouse	1.142352e-03	5.692329e-02
PPARG 23326641 ChIP-Seq C3H10T1-2 Mouse	1.205655e-03	5.692329e-02
GATA4 21415370 ChIP-Seq HL-1 Mouse	3.759397e-03	1.661201e-01
TAF7L 23326641 ChIP-Seq C3H10T1-2 Mouse	4.114736e-03	1.688551e-01
MYC 19030024 ChIP-ChIP MESCs Mouse	5.143164e-03	1.970175e-01
TBP 23326641 ChIP-Seq C3H10T1-2 Mouse	5.432506e-03	1.970175e-01
YY1 23942234 ChIP-Seq MYOBLASTS AND MYOTUBES Mouse	6.784255e-03	2.329321e-01
ZFP42 18358816 ChIP-ChIP MESCs Mouse	7.921552e-03	2.512311e-01
PDX1 19855005 ChIP-ChIP MIN6 Mouse	8.172982e-03	2.512311e-01
MYCN 18555785 ChIP-Seq MESCs Mouse	1.249324e-02	3.687142e-01
CREM 20920259 ChIP-Seq GC1-SPG Mouse	1.465862e-02	3.969904e-01
GATA1 22383799 ChIP-Seq G1ME Mouse	1.473669e-02	3.969904e-01
SPI1 22790984 ChIP-Seq ERYTHROLEUKEMIA Mouse	1.586868e-02	3.983796e-01
MYC 19079543 ChIP-ChIP MESCs Mouse	1.609528e-02	3.983796e-01
EKLF 21900194 ChIP-Seq ERYTHROCYTE Mouse	1.710874e-02	4.068736e-01
THAP11 20581084 ChIP-Seq MESCs Mouse	2.187233e-02	5.000593e-01
HOXB4 20404135 ChIP-ChIP EML Mouse	2.398898e-02	5.188658e-01
CNOT3 19339689 ChIP-ChIP MESCs Mouse	2.429504e-02	5.188658e-01
HCFC1 20581084 ChIP-Seq MESCs Mouse	3.085746e-02	6.140645e-01
FOXO3 22982991 ChIP-Seq MACROPHAGES Mouse	3.168178e-02	6.140645e-01
FOXO1 23066095 ChIP-Seq LIVER Mouse	3.202325e-02	6.140645e-01
PPARG 19300518 ChIP-PET 3T3-L1 Mouse	3.385716e-02	6.277320e-01
ESR1 17901129 ChIP-ChIP LIVER Mouse	4.204529e-02	7.486599e-01
TCFAP2C 20176728 ChIP-ChIP TROPHOBLAST STEM CELLS Mouse	4.219204e-02	7.486599e-01
TBX5 21415370 ChIP-Seq HL-1 Mouse	4.731004e-02	8.159190e-01

Enrichment Analysis Visualizer¶

Scatter Plot¶

Bar Chart¶

Hexagonal Canvas¶

Manhattan Plot¶

Volcano Plot¶

Table of significant p-values¶

Link to Enrichr¶