# Scatter Plot Imports
from maayanlab_bioinformatics.enrichment import enrich_crisp
import matplotlib as mpl

# Bar Chart Imports
import pandas as pd 
import numpy as np
import re
import hashlib
import json
import requests
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import seaborn as sns
import time
from matplotlib.ticker import MaxNLocator
from IPython.display import display, FileLink, HTML

# Hexagonal Canvas Imports
import math
import uuid
import urllib
from textwrap import dedent
from string import Template
from operator import itemgetter

# Bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.palettes import Category20
output_notebook()


gene_list_input = ['GSR', 'DIO1', 'HBA1', 'HBA2', 'IL1B', 'IL6', 'APOA1', 'ALB', 'IFNG', 'CTH', 'SERPINA3', 'RELA', 'SERPINE1', 'HBB', 'SAA4', 'MPO', 'TNF', 'MTR', 'RFK', 'ALOX5', 'PNPO', 'LDLR', 'INSR', 'ABCA1', 'MTHFR', 'ALOX5AP', 'F2', 'SOD3', 'KMO', 'NFKB1', 'ALOX15B', 'SOD1', 'NFKB2', 'SOD2', 'F7', 'CAT', 'SAA2', 'SAA1', 'CRP', 'TXN', 'SAA2-SAA4', 'PLAT', 'PLG', 'INS', 'ICAM1', 'PTGS1', 'PTGS2', 'PRDX4', 'PRDX3', 'CBS', 'PRDX5', 'PRDX2', 'KYNU', 'PRDX1', 'FLAD1', 'CCL2', 'XDH', 'GGT1', 'SCARB1', 'APOB']
enrichr_library = 'GTEx_Aging_Signatures_2021'


genes = [x.strip() for x in gene_list_input]


# Error handling
class NoResults(Exception):
    pass 
class APIFailure(Exception):
    pass


# Enrichr API Function for Manhattan Plot and Bar Chart
# Takes a gene list and Enrichr libraries as input
def Enrichr_API(enrichr_gene_list, all_libraries):

    all_terms = []
    all_pvalues =[] 
    all_adjusted_pvalues = []

    for library_name in all_libraries : 
        ENRICHR_URL = 'https://maayanlab.cloud/Enrichr/addList'
        genes_str = '\n'.join(enrichr_gene_list)
        description = ''
        payload = {
            'list': (None, genes_str),
            'description': (None, description)
        }

        response = requests.post(ENRICHR_URL, files=payload)
        if not response.ok:
            raise APIFailure

        data = json.loads(response.text)
        time.sleep(0.5)
        ENRICHR_URL = 'https://maayanlab.cloud/Enrichr/enrich'
        query_string = '?userListId=%s&backgroundType=%s'
        user_list_id = data['userListId']
        short_id = data["shortId"]
        gene_set_library = library_name
        response = requests.get(
            ENRICHR_URL + query_string % (user_list_id, gene_set_library)
         )
        if not response.ok:
            raise APIFailure

        data = json.loads(response.text)

        if len(data[library_name]) == 0:
            raise NoResults

        short_results_df  = pd.DataFrame(data[library_name][0:10])
        all_terms.append(list(short_results_df[1]))
        all_pvalues.append(list(short_results_df[2]))
        all_adjusted_pvalues.append(list(short_results_df[6]))
        
        results_df  = pd.DataFrame(data[library_name])
        # adds library name to the data frame so the libraries can be distinguished
        results_df['library'] = library_name.replace('_', '')

    return [results_df, short_results_df, all_terms, all_pvalues, all_adjusted_pvalues, str(short_id)]


# Scatter Plot Parameters
significance_value = 0.05

# Bar Chart Parameters
figure_file_format = ['png', 'svg']
output_file_name = 'Enrichr_results_bar'
color = 'lightskyblue'
final_output_file_names = ['{0}.{1}'.format(output_file_name, file_type) for file_type in figure_file_format]

# Hexagonal Canvas Parameters
canvas_color = 'Blue'
num_hex_colored = 10

# Manhattan Plot Parameters
manhattan_colors = ['#003f5c', '#7a5195', '#ef5675', '#ffa600']


### Scatter Plot Functions
def download_library(library_name):
    try: 
        df = pd.read_csv(f"https://raw.githubusercontent.com/MaayanLab/Enrichr-Viz-Appyter/master/Enrichr-Processed-Library-Storage/Clustered_Scatterplots/{library_name}.csv")
        df = df.dropna(subset=['term', 'genes'])
        return df, True
    except:
        display(HTML("<div style='font-size:1.5rem; padding:1.5rem 0;'>Failed to retrieve the selected pre-processed library.</div>"))
        return None, False

def get_library_iter(libdf):
    for row in libdf.itertuples():
        term = row.term
        gene_set = row.genes.split(' ')
        yield term, gene_set

def get_enrichment_results(genes, libdf):
    results = enrich_crisp(genes, get_library_iter(libdf), 20000, True)
    return {res[0]: res[1].pvalue for res in results}

def get_pvalue(row, enrich_results):
    if row['term'] in enrich_results.keys():
        return enrich_results[row['term']]
    else:
        return 1

def str_to_int(s, i):
    '''
    Computes the sha256 hash for the input string {s}, then converts 
    it into an int mod {i}. 
    '''
    s = re.sub(r"\([^()]*\)", "", s).strip()
    byte_string = bytearray(s, "utf8")
    return int(hashlib.sha256(byte_string).hexdigest(), base=16) % i

def lighten(cols, step):
    '''
    Lightens a list of hex colors {cols} by {step} number of times. 
    '''
    for _ in range(step):
        cols = [int(col[1:], 16) for col in cols]
        cols = [f"#{(col & 0x7f7f7f) << 1:06x}" for col in cols]
    return cols

def get_colors(df):
    clusters = pd.unique(df['cluster']).tolist()
    unused_colors = list(Category20[20])
    unused_colors = unused_colors + lighten(unused_colors, 1) + lighten(unused_colors, 2)
    colors = []
    for c in range(len(clusters)):
        clust_color = Category20[20][str_to_int(clusters[c], 20)]
        if clust_color in colors:
            if len(unused_colors) > 0:
                clust_color = unused_colors[0]
                unused_colors.remove(clust_color)
            else:
                clust_color = colors[c % 60]
        else:
            unused_colors.remove(clust_color)
        colors.append(clust_color)
    color_mapper = {clusters[i]: colors[i] for i in range(len(clusters))}
    return color_mapper

def get_top_10(df):
    top_indices = df[df['pvalue'] < significance_value].nsmallest(10, 'pvalue')['term'].tolist()
    return top_indices   

def get_plot(library_name):
    df, df_exists = download_library(library_name)
    if not df_exists:
        return -1, -1
    
    enrich_results = get_enrichment_results(genes, df)
    
    # add p value to the dataframe
    df['pvalue'] = df.apply(lambda row: get_pvalue(row, enrich_results), axis=1)

    # get appropriate color and size attributes
    color_mapper = get_colors(df)

    # normalize pvalues for coloring
    norm_pval = colors.Normalize(-0.3, 1)

    # get top 5 for each cluster
    top_terms = get_top_10(df)

    # add attributes to df
    df['color'] = df['cluster'].apply(lambda x: color_mapper[x])
    df['lw'] = df['term'].apply(lambda x: 2 if x in top_terms else 0.5)
    df['la'] = df['term'].apply(lambda x: 1 if x in top_terms else 0)
    df['size'] = df['pvalue'].apply(lambda x: 12 if x < significance_value else 5)
    df['fa'] = df['pvalue'].apply(lambda x: norm_pval(-np.log10(x)))

    if len(enrich_results.keys()) == 0:
        display(HTML(f"<div style='font-size:1.5rem; padding:1rem 0;'>There are no enriched terms for your input gene set in the <b>{library_name}</b> library.</div>"))
        tooltips = [
            ("Gene Set", "@gene_set"),
            ("Cluster", "@label")
        ]
    
    else: 
        tooltips = [
            ("Gene Set", "@gene_set"),
            ("p-value", "@p_value"),
            ("Cluster", "@label")
        ]
        
    hover_emb = HoverTool(tooltips=tooltips)
    tools_emb = [hover_emb, 'pan', 'wheel_zoom', 'reset', 'save']

    plot_emb = figure(
        plot_width=700, 
        plot_height=700, 
        tools=tools_emb
    )

    source = ColumnDataSource(
        data=dict(
            x = df['x'],
            y = df['y'],
            gene_set = df['term'],
            p_value = df['pvalue'],
            sizes = df['size'],
            colors = df['color'],
            fill_alphas = df['fa'],
            label = df['cluster'],
            line_widths = df['lw'],
            line_alphas = df['la'],
            line_colors = ['#000000']*df.shape[0]
        )
    )

    # hide axis labels and grid lines
    plot_emb.xaxis.major_tick_line_color = None
    plot_emb.xaxis.minor_tick_line_color = None
    plot_emb.yaxis.major_tick_line_color = None
    plot_emb.yaxis.minor_tick_line_color = None
    plot_emb.xaxis.major_label_text_font_size = '0pt'
    plot_emb.yaxis.major_label_text_font_size = '0pt' 

    plot_emb.output_backend = "svg"    
    
    plot_emb.xaxis.axis_label = "UMAP_1"
    plot_emb.yaxis.axis_label = "UMAP_2"
    
    s = plot_emb.scatter(
            'x', 
            'y', 
            size = 'sizes', 
            source = source, 
            color = 'colors', 
            fill_alpha = 'fill_alphas',
            line_alpha = 'line_alphas',
            line_width = 'line_widths',
            line_color = 'line_colors'
        )
    
    return plot_emb, df


# Display Scatter Plot
plot, df = get_plot(enrichr_library)
if plot == -1:
    display(HTML("<div style='font-size:1rem;'>Unable to create scatter plot visualization.</div>"))
else:
    display(HTML(f"<div style='font-size:1.5rem;'>Scatter plot visualization for {enrichr_library}.</div>"))
    show(plot)


# Bar Chart Functions
# Takes all terms, all p-values, all adjusted p-values, plot title, Enrichr libraries, and specified figure format
def enrichr_figure(all_terms, all_pvalues, all_adjusted_pvalues, plot_names, all_libraries, bar_color): 
    # Bar colors
    if bar_color != 'lightgrey':
        bar_color_not_sig = 'lightgrey'
        edgecolor=None
        linewidth=0
    else:
        bar_color_not_sig = 'white'
        edgecolor='black'
        linewidth=1    

    plt.figure(figsize=(24, 12))
    
    i = 0
    bar_colors = [bar_color if (x < 0.05) else bar_color_not_sig for x in all_pvalues[i]]
    fig = sns.barplot(x=np.log10(all_pvalues[i])*-1, y=all_terms[i], palette=bar_colors, edgecolor=edgecolor, linewidth=linewidth)
    fig.axes.get_yaxis().set_visible(False)
    fig.set_title(all_libraries[i].replace('_', ' '), fontsize=26)
    fig.set_xlabel('−log₁₀(p‐value)', fontsize=25)
    fig.xaxis.set_major_locator(MaxNLocator(integer=True))
    fig.tick_params(axis='x', which='major', labelsize=20)
    if max(np.log10(all_pvalues[i])*-1)<1:
        fig.xaxis.set_ticks(np.arange(0, max(np.log10(all_pvalues[i])*-1), 0.1))
    for ii,annot in enumerate(all_terms[i]):
        if all_adjusted_pvalues[i][ii] < 0.05:
            annot = '  *'.join([annot, str(str(np.format_float_scientific(all_pvalues[i][ii], precision=2)))]) 
        else:
            annot = '  '.join([annot, str(str(np.format_float_scientific(all_pvalues[i][ii], precision=2)))])

        title_start= max(fig.axes.get_xlim())/200
        fig.text(title_start, ii, annot, ha='left', wrap = True, fontsize = 26)

    fig.spines['right'].set_visible(False)
    fig.spines['top'].set_visible(False)

    for plot_name in plot_names:
        plt.savefig(plot_name, bbox_inches = 'tight')
    
    # Show plot 
    plt.show()


# Display Bar Chart
try:
    results = Enrichr_API(genes, [enrichr_library])
    enrichr_figure(results[2], results[3], results[4], final_output_file_names, [enrichr_library], color)
    # Download Bar Chart
    for i, file in enumerate(final_output_file_names):
        display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))
except APIFailure:
    results = []
    display(HTML("<div style='font-size:1.5rem; padding:1rem 0;'><b>Unable to display plot because of a bad response from the Enrichr API</b></div>"))
    display(HTML("<div style='font-size:1rem; padding:1rem 0;'>Please try again later."))
except NoResults:
    results = []
    display(HTML("<div style='font-size:1.5rem; padding:1rem 0;'><b>Unable to display plot because there were no results returned from Enrichr</b></div>"))
    display(HTML("<div style='font-size:1rem; padding:1rem 0;'>Please try again with a different Enrichr library, or a different query gene list."))


# Hexagonal Canvas Functions

def library_processing():
    # Downloads library data for the hexagonal canvas
    # Library data is pre-annealed so the canvas will have the most similar gene sets closest together
    raw_library_data = []

    try:
        library_name = enrichr_library
        with urllib.request.urlopen('https://raw.githubusercontent.com/MaayanLab/Enrichr-Viz-Appyter/master/Enrichr-Processed-Library-Storage/Annealing/Annealed-Libraries/' + enrichr_library + '.txt') as f:
            for line in f.readlines():
                raw_library_data.append(line.decode('utf-8').split("\t\t"))
        name = []
        gene_list = []
    except:
        display(HTML("<div style='font-size:1.5rem; padding:1.5rem 0;'>Failed to retrieve the selected annealed library.</div>"))
        return [], -1, -1

    for i in range(len(raw_library_data)):
        name += [raw_library_data[i][0]]
        raw_genes = raw_library_data[i][1].split('\t')
        gene_list += [raw_genes[:-1]]

    library_data = [list(a) for a in zip(name, gene_list)]

    # raw_library_data: a 2D list where the first element is the name and the second element is a list of genes associated with that name

    jaccard_indices = []
    indices = []

    for gene_set in library_data:
        intersection = [value for value in gene_set[1] if value in genes]
        index = len(intersection)/(len(gene_set[1]) + len(genes))
        jaccard_indices += [[gene_set[0], index]]
        indices += [round(index, 5)]

    # determine the dimensions of the canvas
    x_dimension = math.ceil(math.sqrt(len(indices)))
    y_dimension = math.ceil(math.sqrt(len(indices)))

    # zip name, gene_list, indices, and blank list for neighbor score then add dummy entries to the zipped list
    anneal_list = list(zip(name, gene_list, indices))

    return anneal_list, x_dimension, y_dimension

def unzip_list(anneal_list):
    unzipped_list = zip(*anneal_list)
    return list(unzipped_list)

# define a list of colors for the hexagonal canvas
def get_color(anneal_list, cut_off_value, x_dimension, y_dimension):

    # Deal with cut_off_value (only color the most significant 10/20 hexagons)
    if cut_off_value == 2.0:
        sort_list = sorted(anneal_list, key=itemgetter(2), reverse=True)
        cut_off_value = sort_list[int(num_hex_colored)-1][2]

    r_value = 0
    g_value = 0
    b_value = 0

    if canvas_color == 'Red':
        r_value = 0.0
        g_value = 0.8
        b_value = 0.8
    if canvas_color == 'Yellow':
        r_value = 0.0
        g_value = 0.3
        b_value = 1.0
    if canvas_color == 'Purple':
        r_value = 0.5
        g_value = 1.0
        b_value = 0.0
    if canvas_color == 'Pink':
        r_value = 0.0
        g_value = 1.0
        b_value = 0.2
    if canvas_color == 'Orange':
        r_value = 0.0
        g_value = 0.45
        b_value = 1.0
    if canvas_color == 'Green':
        r_value = 1.0
        g_value = 0.0
        b_value = 1.0
    if canvas_color == 'Blue':
        r_value = 1.0
        g_value = 0.9
        b_value = 0.0

    color_list = []

    unzipped_anneal_list = unzip_list(anneal_list)

    max_index = max(unzipped_anneal_list[2])

    if max_index != 0:
        scaled_list = [i/max_index for i in unzipped_anneal_list[2]]
    else:
        scaled_list = unzipped_anneal_list[2]

    for i in range(x_dimension*y_dimension):
        if i < len(unzipped_anneal_list[2]) and float(unzipped_anneal_list[2][i]) >= cut_off_value:
            color_list += [mpl.colors.to_hex((1-scaled_list[i]*r_value, 
            1-scaled_list[i]*g_value, 1-scaled_list[i]*b_value))]
        elif i < len(unzipped_anneal_list[2]):
            color_list += [mpl.colors.to_hex((1-scaled_list[i], 
            1-scaled_list[i], 1-scaled_list[i]))]
        else:
            color_list += ["#FFFFFF"]
    return color_list, max_index, cut_off_value

def init_chart():
  chart_id = 'mychart-' + str(uuid.uuid4())
  display(HTML('<script src="/static/components/requirejs/require.js"></script>'))
  display(HTML(Template(dedent('''
  <script>
  require.config({
    paths: {
      'd3': 'https://cdnjs.cloudflare.com/ajax/libs/d3/5.16.0/d3.min',
      'd3-hexbin': 'https://d3js.org/d3-hexbin.v0.2.min',
    },
    shim: {
      'd3-hexbin': ['d3']
    }
  })

  // If we configure mychart via url, we can eliminate this define here
  define($chart_id, ['d3', 'd3-hexbin'], function(d3, d3_hexbin) {
    return function (figure_id, numA, numB, colorList, libraryList, indices) {
      var margin = {top: 50, right: 20, bottom: 20, left: 50},
        width = 850 - margin.left - margin.right,
        height = 350 - margin.top - margin.bottom;

      // append the svg object to the body of the page
      var svG = d3.select('#' + figure_id)
                  .attr("width", width + margin.left + margin.right)
                  .attr("height", height + margin.top + margin.bottom)
                  .append("g")
                  .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
      
      //The number of columns and rows of the heatmap
      var MapColumns = numA,
          MapRows = numB;

      //The maximum radius the hexagons can have to still fit the screen
      var hexRadius = d3.min([width/((MapColumns + 0.5) * Math.sqrt(3)), height/((MapRows + 1/3) * 1.5)]);

      //Calculate the center position of each hexagon
      var points = [];
      for (var i = 0; i < MapRows; i++) {
          for (var j = 0; j < MapColumns; j++) {
              var x = hexRadius * j * Math.sqrt(3)
              //Offset each uneven row by half of a "hex-width" to the right
              if(i%2 === 1) x += (hexRadius * Math.sqrt(3))/2
              var y = hexRadius * i * 1.5
              points.push([x,y])
          }
      }

      //Set the hexagon radius
      var hexbin = d3_hexbin.hexbin().radius(hexRadius);

      svG.append("g")
        .selectAll(".hexagon")
        .data(hexbin(points))
        .enter().append("path")
        .attr("class", "hexagon")
        .attr("d", function (d) {
            return "M" + d.x + "," + d.y + hexbin.hexagon();
        })
        .attr("stroke", "black")
        .attr("stroke-width", "1px")
        .style("fill", function (d,i) { return colorList[i]; })
        .on("mouseover", mover)
        .on("mouseout", mout)
        .append("svg:title")
        .text(function(d,i) { return libraryList[i].concat(" ").concat(indices[i]); });

      // Mouseover function
      function mover(d) {
      d3.select(this)
        .transition().duration(10)  
        .style("fill-opacity", 0.3)
      };

      // Mouseout function
      function mout(d) { 
      d3.select(this)
        .transition().duration(10)
        .style("fill-opacity", 1)
      };

  }

  })
  </script>
  ''')).substitute({ 'chart_id': repr(chart_id) })))
  return chart_id

def Canvas(numA, numB, colorList, libraryList, indices):
  chart_id = init_chart()
  display(HTML(Template(dedent('''
  <svg id=$figure_id></svg>
  <script>
  require([$chart_id], function(mychart) {
    mychart($figure_id, $numA, $numB, $colorList, $libraryList, $indices)
  })
  </script>
  ''')).substitute({
      'chart_id': repr(chart_id),
      'figure_id': repr('fig-' + str(uuid.uuid4())),
      'numA': repr(numA),
      'numB': repr(numB),
      'colorList': repr(colorList),
      'libraryList': repr(libraryList),
      'indices': repr(indices)
  })))


# Display Hexagonal Canvas
anneal_list, x_dimension, y_dimension = library_processing()
if x_dimension < 0:
    display(HTML("<div style='font-size:1rem;'>Unable to create hexagonal canvas visualization.</div>"))
else:
    color_list, scaling_factor, cut_off_value = get_color(anneal_list, 2.0, x_dimension, y_dimension)
    unzipped_anneal_list = unzip_list(anneal_list)
    if len(set(unzipped_anneal_list[2])) > 1:
        Canvas(x_dimension, y_dimension, color_list, list(unzipped_anneal_list[0]), list(unzipped_anneal_list[2]))
    else:
        display(HTML("<div style='font-size:1.5rem; padding:1rem 0;'><b>Unable to display plot because there were no results returned from Enrichr</b></div>"))
        display(HTML("<div style='font-size:1rem; padding:1rem 0;'>Please try again with a different Enrichr library, or a different query gene list."))


# Manhattan Plot Functions

# Processes Enrichr data for Manhattan plots
def get_data(genes):
    # Process Enrichr data
    sorted_data = pd.DataFrame({"Gene Set": [], "-log(p value)": [], "Library": []})

    # get enrichr results from the library selected
    results_df = Enrichr_API(genes, [enrichr_library])[0]

    all_terms = []
    all_pvalues = []
    library_names = []

    all_terms.append(list(results_df[1]))
    all_pvalues.append(list(results_df[2]))
    library_names.append(list(results_df['library']))

    x=np.log10(all_pvalues[0])*-1
    sorted_terms = list(zip(all_terms[0], x, library_names[0]))
    sorted_terms = sorted(sorted_terms, key = itemgetter(0))
    unzipped_sorted_list = list(zip(*sorted_terms))

    data = pd.DataFrame({"Gene Set": unzipped_sorted_list[0], "-log(p value)": unzipped_sorted_list[1], "Library": unzipped_sorted_list[2]})

    sorted_data = pd.concat([sorted_data, data])

    # group data by library
    groups = sorted_data.groupby("Library")
    return sorted_data, groups

# Create Manhattan Plots
def manhattan(sorted_data):
    # split data frame into smaller data frames by library
    list_of_df = []
    for library_name in [enrichr_library]:
        library_name = library_name.replace('_', '')
        df_new = sorted_data[sorted_data['Library'] == library_name]
        list_of_df += [df_new]

    list_of_xaxis_values = []
    for df in list_of_df:  
        list_of_xaxis_values += df["Gene Set"].values.tolist()

    # define the output figure and the features we want
    p = figure(x_range = list_of_xaxis_values, plot_height=300, plot_width=750, tools='pan, box_zoom, hover, reset, save')

    # loop over all libraries
    r = []
    color_index = 0
    for df in list_of_df:
        if color_index >= len(manhattan_colors):
            color_index = 0 

        # calculate actual p value from -log(p value)
        actual_pvalues = []
        for log_value in df["-log(p value)"].values.tolist():
            actual_pvalues += ["{:.5e}".format(10**(-1*log_value))]

        # define ColumnDataSource with our data for this library
        source = ColumnDataSource(data=dict(
            x = df["Gene Set"].values.tolist(),
            y = df["-log(p value)"].values.tolist(),
            pvalue = actual_pvalues,
        ))
    
        # plot data from this library
        r += [p.circle(x = 'x', y = 'y', size=5, fill_color=manhattan_colors[color_index], line_color = manhattan_colors[color_index], line_width=1, source = source)]
        color_index += 1

    p.background_fill_color = 'white'
    p.xaxis.major_tick_line_color = None 
    p.xaxis.major_label_text_font_size = '0pt'
    p.y_range.start = 0
    p.yaxis.axis_label = '-log(p value)'

    p.hover.tooltips = [
        ("Gene Set", "@x"),
        ("p value", "@pvalue"),
    ]
    p.output_backend = "svg"
    
    # returns the plot
    return p


# Display Manhattan Plot
try:
    sorted_data, groups = get_data(genes)
    show(manhattan(sorted_data))
except APIFailure:
    display(HTML("<div style='font-size:1.5rem; padding:1rem 0;'><b>Unable to display plot because of a bad response from the Enrichr API</b></div>"))
    display(HTML("<div style='font-size:1rem; padding:1rem 0;'>Please try again later."))
except NoResults:
    display(HTML("<div style='font-size:1.5rem; padding:1rem 0;'><b>Unable to display plot because there were no results returned from Enrichr</b></div>"))
    display(HTML("<div style='font-size:1rem; padding:1rem 0;'>Please try again with a different Enrichr library, or a different query gene list."))


def get_library(lib_name):
    '''
    Returns a dictionary mapping each term from the input library to 
    its associated geneset. 
    '''
    raw_lib_data = []

    with urllib.request.urlopen('https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=' + lib_name) as f:
        for line in f.readlines():
            raw_lib_data.append(line.decode("utf-8").split("\t\t"))

    name = []
    gene_list = []
    lib_data = {}

    for i in range(len(raw_lib_data)):
        name += [raw_lib_data[i][0]]
        raw_genes = raw_lib_data[i][1].replace('\t', ' ')
        gene_list += [raw_genes[:-1]]
    
    lib_data = {a[0]:a[1].split(' ') for a in zip(name, gene_list)}
    return lib_data


def volcano_plot(library_name, lib):
    '''
    Make volcano plot of odds ratio vs. significance for input library.
    '''
    enrich_results = enrich_crisp(genes, lib, 21000, True)

    res_df = pd.DataFrame(
        [ [
            term, 
            res.pvalue, 
            res.odds_ratio
        ] for (term, res) in enrich_results ], 
        columns=['term', 'pvalue', 'odds_ratio']
    )

    res_df['log_pval'] = np.negative(np.log10(res_df['pvalue']))

    cmap = mpl.cm.get_cmap('Blues_r')
    cnorm = mpl.colors.Normalize(vmin = res_df['pvalue'].min(), vmax = 0.1)

    my_colors = []
    my_sizes = []
    for row in res_df.itertuples():
        if row.pvalue < 0.05:
            my_colors += [mpl.colors.to_hex(cmap(cnorm(row.pvalue)))]
            my_sizes += [12]
        else:
            my_colors += ['#808080']
            my_sizes += [6]

    source = ColumnDataSource(
        data=dict(
            x = res_df['odds_ratio'],
            y = res_df['log_pval'],
            gene_set = res_df['term'],
            p_value = res_df['pvalue'],
            odds_r = res_df['odds_ratio'],
            colors = my_colors,
            sizes = my_sizes
        )
    )

    hover_emb = HoverTool(
        names=["res_df"], 
        tooltips="""
        <div style="margin: 10">
            <div style="margin: 0 auto; width:200px;">
                <span style="font-size: 12px; font-weight: bold;">Term:</span>
                <span style="font-size: 12px">@gene_set<br></span>
                <span style="font-size: 12px; font-weight: bold;">P-Value:</span>
                <span style="font-size: 12px">@p_value<br></span>
                <span style="font-size: 12px; font-weight: bold;">Odds Ratio:</span>
                <span style="font-size: 12px">@odds_r<br></span>
            </div>
        </div>
        """
    )

    tools_emb = [hover_emb, 'pan', 'wheel_zoom', 'reset', 'save']

    plot_emb = figure(
        plot_width = 700, 
        plot_height = 700,
        tools=tools_emb
    )

    plot_emb.circle(
        'x', 'y', size = 'sizes', 
        alpha = 0.7, line_alpha = 0, 
        line_width = 0.01, source = source, 
        fill_color = 'colors', name = "res_df"
    )

    plot_emb.xaxis.axis_label = "Odds Ratio"
    plot_emb.yaxis.axis_label = "-log10(p-value)"

    plot_emb.output_backend = "svg"
    
    return plot_emb


lib_data = get_library(enrichr_library)
if lib_data == {}:
    display(HTML("<div style='font-size:1.5rem; padding:1.5rem 0;'>Failed to access Enrichr library API, please try again later.</div>"))
else:
    try:
        plot = volcano_plot(enrichr_library, lib_data)
        show(plot)
    except NoResults:
        display(HTML("<div style='font-size:1.5rem; padding:1rem 0;'><b>Unable to display plot because there were no results returned from Enrichr</b></div>"))
        display(HTML("<div style='font-size:1rem; padding:1rem 0;'>Please try again with a different Enrichr library, or a different query gene list."))


# Output a table of significant p-values and q-values

def create_download_link(df, title = "Download CSV file of the full table of significant results", filename = "Enrichment_Analysis_Visualizer_data.csv"):  
    csv = df.to_csv(filename, index = False)
    html = f'<a href="{filename}" target=_blank>{title}</a>'
    return HTML(html)

if len(results) > 0:
    res_df = pd.DataFrame(results[0]).rename(columns={
        0: 'rank',
        1: 'term',
        2: 'p-value',
        3: 'zscore',
        4: 'combined_score',
        5: 'overlap_genes',
        6: 'q-value'
    })
    sorted_res_df = res_df.sort_values(by='p-value', ascending=True)
    filtered_res_df = sorted_res_df[sorted_res_df['p-value'] <= significance_value].reset_index()
    display(HTML(f"<strong>Table of top 10 significant p-values and q-values for {enrichr_library.replace('_', ' ')}</strong>"))
    display(HTML(filtered_res_df[['term', 'p-value', 'q-value', 'overlap_genes']][:10].to_html(index = False)))
    display(create_download_link(filtered_res_df[['term', 'p-value', 'q-value']]))
else:
    display(HTML("<div style='font-size:1.5rem; padding:1rem 0;'><b>Unable to display table</b></div>"))
    display(HTML("<div style='font-size:1rem; padding:1rem 0;'>Please try again later, or start over with a new input list and/or library."))


# Get complete enrichment analysis results from Enrichr 
if len(results) > 0:
    url = 'https://maayanlab.cloud/Enrichr/enrich?dataset=' + results[5]
    display(HTML(f"<div style='font-size:1.25rem; padding:1rem 0;'><a href='https://maayanlab.cloud/Enrichr/enrich?dataset={results[5]}'>Access the complete enrichment analysis results on the Enrichr website. </a></div>"))
else:
    display(HTML("<div style='font-size:1.5rem; padding:1rem 0;'><b>No Enrichr results available for the current query</b></div>"))
    display(HTML("<div style='font-size:1rem; padding:1rem 0;'>Please try again with a different input list and/or library."))

term	p-value	q-value	overlap_genes
GTEx Blood 20-29 vs 70-79 Up	1.865373e-10	4.010551e-08	[CRP, SERPINA3, IFNG, ALB, SAA4, SAA1, APOA1, SAA2, PLG, F2, APOB]
GTEx Spleen 20-29 vs 40-49 Up	7.780455e-07	2.787996e-05	[CRP, SERPINA3, SERPINE1, SAA4, SAA2, PLG, F2, ALOX15B]
GTEx Breast 20-29 vs 70-79 Up	7.780455e-07	2.787996e-05	[IL6, IFNG, SERPINE1, HBB, CCL2, HBA2, HBA1, PTGS2]
GTEx Blood 20-29 vs 60-69 Up	7.780455e-07	2.787996e-05	[IFNG, ALB, SAA4, SAA2, PLG, PTGS2, APOB, MPO]
GTEx Liver 20-29 vs 70-79 Down	7.780455e-07	2.787996e-05	[CRP, IL6, SERPINE1, SAA1, SAA2, SOD2, MPO, ICAM1]
GTEx Testis 20-29 vs 70-79 Up	7.780455e-07	2.787996e-05	[IL6, SERPINE1, HBB, CCL2, HBA2, HBA1, PTGS2, TNF]
GTEx SmallIntestine 20-29 vs 40-49 Up	9.628636e-06	2.300174e-04	[CRP, ALB, HBB, HBA2, PLG, HBA1, F2]
GTEx Blood 20-29 vs 50-59 Up	9.628636e-06	2.300174e-04	[CRP, IFNG, ALB, SAA4, SAA1, SAA2, PLG]
GTEx SmallIntestine 20-29 vs 60-69 Up	9.628636e-06	2.300174e-04	[CRP, APOA1, HBB, HBA2, PLG, HBA1, F2]
GTEx AdiposeTissue 20-29 vs 40-49 Down	1.022470e-04	8.793240e-04	[IL6, IL1B, CCL2, PTGS2, MPO, LDLR]

Enrichment Analysis Visualizer¶

Scatter Plot¶

Bar Chart¶

Hexagonal Canvas¶

Manhattan Plot¶

Volcano Plot¶

Table of significant p-values¶

Link to Enrichr¶