import io
import json
import uuid
import time
import logging
import requests
import numpy as np
import pandas as pd
import urllib.parse
import plotly.express as px
import scipy.stats as st
import traceback
from contextlib import contextmanager
from textwrap import dedent
from functools import partial, lru_cache
from IPython.display import HTML, Markdown, display

logger = logging.getLogger('')
formatter = logging.Formatter('%(message)s')
console = logging.StreamHandler()
console.setFormatter(formatter)
logger.addHandler(console)

from itables_javascript import show
import itables.options as opt
opt.maxBytes = 0
opt.showIndex = False

class RenderJSON(object):
    ''' https://gist.github.com/t27/48b3ac73a1479914f9fe9383e5d45325 '''
    def __init__(self, json_data):
        if isinstance(json_data, dict) or isinstance(json_data, list):
            self.json_str = json.dumps(json_data)
        else:
            self.json_str = json_data
        self.uuid = str(uuid.uuid4())
        
    def _ipython_display_(self):
        display(HTML('<div id="{}" style="height: auto; width:100%;"></div>'.format(self.uuid)))
        display(HTML("""<script>
        require(["https://rawgit.com/caldwell/renderjson/master/renderjson.js"], function() {
          renderjson.set_show_to_level(1)
          document.getElementById('%s').appendChild(renderjson(%s))
        });</script>
        """ % (self.uuid, self.json_str)))

def combine_pvalues(pvalues, method='fisher', select='pvalue'):
    ''' A helper for accessing this method via pd.agg which expects a scaler result
    '''
    statistic, pvalue = st.combine_pvalues(pvalues, method=method)
    return dict(statistic=statistic, pvalue=pvalue)[select]

def combine_zscores(zscores):
    ''' A helper method for combining zscores via Stouffer method
    '''
    return zscores.sum() / np.sqrt(zscores.shape[0])

def one(it):
    try:
        return next(iter(it))
    except:
        return None

@contextmanager
def ignore_exceptions(error_message=None):
    ''' Show but ignore exceptions that occur within the context
    Usage:
    with ignore_exceptions():
        my_fallible_operation()
    '''
    try:
        yield
    except Exception as e:
        if error_message:
            logger.error(error_message)
        else:
            traceback.print_exc()


gene_input = 'TPP1'


@lru_cache()
def mygene_info_query(geneId):
    res = requests.get('https://mygene.info/v3/query', params=dict(q=geneId))
    results = res.json()
    return results


gene_results = mygene_info_query(gene_input)
display(RenderJSON(gene_results))


if len(gene_results['hits']) == 0:
    raise Exception(f"Gene with identifier {gene_input} could not be found in MyGeneInfo!")
# find exact match
geneId = one(hit['_id'] for hit in gene_results['hits'] if hit['symbol'] == gene_input)
if not geneId:
    # Select the most likely gene (mygeneinfo sorts by likelyhood)
    _, geneId = max(
        (hit['_score'], hit['_id'])
        for hit in gene_results['hits']
        if hit.keys() & {'_score', 'symbol'}
    )
display(Markdown(f"**GeneID: {geneId}**"))


@lru_cache()
def mygene_info(geneId):
    req = requests.get(
        f"https://mygene.info/v3/gene/{geneId}"
    )
    return req.json()

gene_info = mygene_info(geneId)
display(RenderJSON(gene_info))
display(Markdown(f"### Gene Symbol: {gene_info['symbol']}"))


@lru_cache()
def gtex_singleTissueEqtl(geneSymbol, datasetId='gtex_v8'):
    res = requests.get(
        'https://gtexportal.org/rest/v1/association/singleTissueEqtl',
        params=dict(
            format='json',
            geneSymbol=geneSymbol,
            datasetId=datasetId,
        )
    )
    results = res.json()['singleTissueEqtl']
    if len(results) == 0: raise Exception(f"No information for gene with identifier {geneSymbol} found in GTEx")
    return pd.DataFrame(results)


with ignore_exceptions(f"Gene with identifier {gene_info['symbol']} currently not available in GTEx"):
    gtex_results = gtex_singleTissueEqtl(gene_info['symbol'])
    columns = list(gtex_results.columns)
    columns.insert(0, columns.pop(columns.index('nes')))
    columns.insert(0, columns.pop(columns.index('pValue')))
    columns.insert(0, columns.pop(columns.index('tissueSiteDetailId')))
    gtex_results = gtex_results[columns]
    show(gtex_results, order=[[gtex_results.columns.get_loc('pValue'), 'asc']])


with ignore_exceptions('Could not process GTEx output'):
    gtex_combined_stouffer_statistic = gtex_results.groupby('tissueSiteDetailId')['pValue'] \
        .agg(partial(combine_pvalues, method='stouffer', select='statistic')) \
        .to_frame('combined_stouffer_statistic') \
        .reset_index() \
        .sort_values('combined_stouffer_statistic', ascending=False)
    gtex_combined_stouffer_statistic['group'] = gtex_combined_stouffer_statistic['tissueSiteDetailId'].apply(lambda name: name.split('_', maxsplit=1)[0])

    fig = px.bar(
        gtex_combined_stouffer_statistic,
        y='combined_stouffer_statistic',
        x='tissueSiteDetailId',
        color='group',
        orientation='v',
        title=f"Tissues with significant expression of {gene_info['symbol']} in GTEx",
        height=1000,
    )
    fig.show()


import matplotlib.cm as cm
import matplotlib.colors as colors
from bokeh.io import output_notebook
from bokeh.plotting import figure, show as bokeh_show
from bokeh.models import ColumnDataSource

# display graphics
output_notebook()

root_path = 'https://appyters.maayanlab.cloud/storage/L1000_RNAseq_Gene_Search'

def map_color(cd, logfc, red_norm, blue_norm):
    v = cd*logfc

    if v < 0: 
        return '#D3D3D3'
    elif cd < 0:
        return colors.to_hex(cm.get_cmap('Reds')(red_norm(cd*logfc)))
    else:
        return colors.to_hex(cm.get_cmap('Blues')(blue_norm(cd*logfc)))

def make_plot(comb_df, gene, pert_type):
    # check if there are any results
    if comb_df.shape[0] == 0: 
        display(Markdown("### **There are no signatures in the pre-processed dataset for the chosen gene, cell line, and perturbation type inputs.**"))
        return

    # set color and size for each point on plot
    v = (comb_df['logFC']*comb_df['CD']).to_numpy()
    red_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)
    blue_norm = colors.Normalize(vmin=min(v)-0.005, vmax=max(v)+0.005)

    plot_colors = [map_color(row.CD, row.logFC, red_norm, blue_norm) for row in comb_df.itertuples()]

    # generate data source
    data_source = ColumnDataSource(
        data=dict(
            x = comb_df['logFC'],
            y = comb_df['CD'].apply(abs),
            cd = comb_df['CD'],
            sig = pd.Series(comb_df.index),
            fc = comb_df['FC'], 
            logfc = comb_df['logFC'],
            colors = plot_colors, 
            sizes = [8] * comb_df.shape[0],
        )
    )

    # create hover tooltip
    tools = [
        ("Signature", "@sig"),
        ("CD Coeff", "@cd"),
        ("Fold Change", "@fc"),
        ("Log2 Fold Change", "@logfc")
    ]
    # generate plot and relevant plot labels
    plot = figure(
        plot_width=700,
        plot_height=500,
        tooltips=tools
    )
    
    plot.circle(
        'x', 'y', 
        size='sizes',
        alpha=0.7, 
        line_alpha=1,
        line_width=1, 
        line_color='colors',
        source=data_source,
        fill_color='colors', 
        name=f"{gene}_expression_in_L1000_to_RNAseq_{pert_type.replace(' ','')}_volcano_plot"
    )

    plot.yaxis.axis_label = 'Abs(CD-Coefficient)'
    plot.xaxis.axis_label = 'log2(Fold Change)'
    plot.title.text = f"Differential Expression of {gene} in RNA-seq-like {pert_type} Signatures"
    plot.title.align = 'center'
    plot.title.text_font_size = '14px'
    bokeh_show(plot)

def make_tables(comb_df, pert, is_upreg):
    dir_df = comb_df[comb_df['FC'] > 1] if is_upreg else comb_df[comb_df['FC'] < 1]
    if dir_df.shape[0] == 0: 
        display(Markdown(f"### There are no {'up-regulated' if is_upreg else 'down-regulated'} signatures for the chosen gene and cell line inputs."))
        return dir_df
    dir_df = dir_df.sort_values(by='FC', ascending=not is_upreg)
    dir_df['FC'] = dir_df['FC'].apply(lambda x: f'{x:.4f}')
    dir_df['CD'] = dir_df['CD'].apply(lambda x: f'{x:.4f}')
    if pert == 'xpr':
        dir_df['KO Gene'] = dir_df.index.map(lambda x: x.split('_')[4])
    else:
        dir_df['Perturbagen'] = dir_df.index.map(lambda x: x.split('_')[4])
        dir_df['Dose'] = dir_df.index.map(lambda x: x.split('_')[5] if len(x.split('_')) == 6 else '')
    dir_df['Cell Line'] = dir_df.index.map(lambda x: x.split('_')[1])
    dir_df['Timepoint'] = dir_df.index.map(lambda x: x.split('_')[2].lower())
    dir_df = dir_df.rename(columns={
            'FC': 'Fold Change', 
            'logFC': 'Log2(Fold Change)', 
            'CD': 'CD Coefficient',
            'Rank': 'Rank in Signature'})
    dir_df.index.names = ['Signature']
    return dir_df

def download_link(df, fname):
    if df.shape[0] == 0: return ''
    csv = df.to_csv(fname, sep='\t', index=True)
    link = f'<div>Download full results: <a href="{fname}" target=_blank>{fname}</a></div>'
    return link

def enrichr(pert, top_perts, direction, gene):
    if pert == 'CRISPR':
        desc = f"Top CRISPR targets from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/Enrichr/addList'
        enrich_url = 'https://maayanlab.cloud/Enrichr/enrich'
    else:
        desc = f"Top compounds from RNA-seq-like signatures that {direction}-regulate {gene}"
        list_url = 'https://maayanlab.cloud/DrugEnrichr/addList'
        enrich_url = 'https://maayanlab.cloud/DrugEnrichr/enrich'
    payload = {
        'list': (None, '\n'.join(top_perts)),
        'description': (None, desc)
    }
    response = requests.post(list_url, files=payload)
    if not response.ok:
        raise Exception('Error analyzing gene list')
    time.sleep(0.5)
    return f"{enrich_url}?dataset={response.json()['shortId']}"

def enrichr_link(pert, df, direction, gene): 
    # check if there are any results
    if df.shape[0] < 5: 
        return(f"There are not enough {direction}-regulated signatures to submit to {'Enrichr' if pert == 'CRISPR' else 'DrugEnrichr'}.")
    comb_df = df.copy()
    comb_df['pert'] = comb_df.index.map(lambda x: x.split('_')[4])
    if direction == 'up':
        top_perts = comb_df.sort_values(by='FC', ascending=False) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    else:
        top_perts = comb_df.sort_values(by='FC', ascending=True) \
            .drop_duplicates(subset=['pert'],keep='first')['pert'][:20]
    pert_type = 'CRISPR target genes' if pert == 'CRISPR' else 'chemical compounds'
    results_url = enrichr(pert, top_perts, direction, gene)
    return f'<a href={results_url} target="_blank">Enrichr analysis of top 20 {pert_type} that {direction}-regulate {gene}</a>'

def l1000_rnaseq_directional_gene_search(gene=None, gene_data=None, pert=None, pert_full_name=None, pert_name=None, is_upreg=None):
    direction = 'up' if is_upreg else 'down'
    display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top {pert_full_name} signatures where {gene} is {direction}-regulated (based on fold change)</b></div>'))
    tbl = make_tables(gene_data, pert='cp', is_upreg=is_upreg)
    display(HTML(tbl[:10].to_html(escape=False, col_space=70)))
    display(HTML(download_link(tbl[:100], f"{gene}_{direction.capitalize()}Reg_L1000_{pert_name}_signatures.tsv")))
    display(HTML(enrichr_link(pert_name, gene_data, direction, gene)))

def l1000_rnaseq_gene_search(gene=None, gene_data=None, pert=None, pert_full_name=None, pert_name=None):
    make_plot(gene_data, gene_info['symbol'], pert_name)
    l1000_rnaseq_directional_gene_search(
        gene=gene, gene_data=gene_data, pert=pert, pert_full_name=pert_full_name, pert_name=pert_name, is_upreg=True,
    )
    l1000_rnaseq_directional_gene_search(
        gene=gene, gene_data=gene_data, pert=pert, pert_full_name=pert_full_name, pert_name=pert_name, is_upreg=False,
    )


with ignore_exceptions(f"No information for gene with identifier {gene_info['symbol']} found in L1000"):
  l1000_rnaseq_gene_search(
    gene=gene_info['symbol'],
    gene_data=pd.read_feather(f"{root_path}/gene_files/{gene_info['symbol']}.f").set_index('index'),
    pert='xpr',
    pert_name='CRISPR',
    pert_full_name='CRISPR KO',
  )
  l1000_rnaseq_gene_search(
    gene=gene_info['symbol'],
    gene_data=pd.read_feather(f"{root_path}/cp_gene_files/{gene_info['symbol']}.f").set_index('index'),
    pert='cp',
    pert_name='chemical',
    pert_full_name='Chemical Perturbation',
  )


@lru_cache()
def impc_phenotype(marker_gene, rows=20):
    res = requests.get(
        f"https://www.ebi.ac.uk/mi/impc/solr/genotype-phenotype/select",
        params=dict(
            q=f"marker_symbol:{marker_gene}",
            rows=rows,
        ),
    )
    response = res.json()['response']
    if response['numFound'] == 0: raise Exception('No phenotypes found')
    return pd.DataFrame(response['docs']).sort_values('p_value', ascending=True)


with ignore_exceptions(f"No information for gene with identifier {gene_info['symbol']} found in IPMC"):
    impc_results = impc_phenotype(gene_info['symbol'].capitalize())
    show(impc_results[[
        'marker_accession_id',
        'mp_term_id',
        'mp_term_name',
        'assertion_type',
        'p_value',
        'phenotyping_center',
        'percentage_change',
        'statistical_method',
    ]])


with ignore_exceptions('IPMC Results could not be processed'):
    impc_combined_stouffer_statistic = impc_results.groupby('mp_term_name')['p_value'] \
        .agg([
            ('combined_stouffer_statistic', partial(combine_pvalues, method='stouffer', select='pvalue')),
        ]) \
        .reset_index() \
        .sort_values('combined_stouffer_statistic', ascending=False)
    impc_combined_stouffer_statistic['-logp(combined_stouffer_statistic)'] = -np.log10(impc_combined_stouffer_statistic['combined_stouffer_statistic'])

    fig = px.bar(
        impc_combined_stouffer_statistic,
        x='-logp(combined_stouffer_statistic)',
        y='mp_term_name',
        text='mp_term_name',
        orientation='h',
        title=f"Phenotype known to be associated with {gene_info['symbol']} from IMPC",
    )
    fig.update_yaxes(showticklabels=False)
    fig.update_traces(texttemplate='%{text}', textposition='auto', insidetextanchor='start')
    fig.show()


@lru_cache()
def glygen_geneNameSearch(recommended_gene_name, organism_taxon_id=9606):
    res = requests.get(
        'https://api.glygen.org/directsearch/gene/',
        params=dict(
            query=json.dumps(dict(
                recommended_gene_name=recommended_gene_name,
                organism=dict(
                    id=organism_taxon_id
                ),
            )),
        ),
        verify=False, # not sure why on my system I get SSL errors
    )
    return res.json()


with ignore_exceptions(f"No information for gene with identifier {gene_info['symbol']} found in GlyGen"):
    glygen_geneInfo = glygen_geneNameSearch(gene_info['symbol'])
    display(RenderJSON(glygen_geneInfo))
    d = pd.DataFrame(glygen_geneInfo['results'][0]['glycosylation'])
    d['evidence'] = d['evidence'].apply(
        lambda evidence: ' '.join(f"<a href='{e['url']}'>{e['url']}></a>" for e in evidence if 'url' in e)
    )
    show(d, order=[[d.columns.get_loc('residue'), 'asc']])
    display(d)


@lru_cache()
def ldh_gene_lookup(id):
    req = requests.get(
        f"https://genboree.org/cfde-gene/Gene/id/{urllib.parse.quote(id)}",
        headers={
            'Accept': 'application/json',
        },
    )
    assert req.status_code == 200, f"Gene with identifier {id} currently not available in exRNA LDH"
    results = req.json()
    return results


with ignore_exceptions(f"Gene with identifier {gene_info['symbol']} not available in exRNA LDH"):
    ldh_results = ldh_gene_lookup(gene_info['symbol'])
    display(Markdown(f"[View On LDH](https://genboree.org/cfde-gene/ui/id/{urllib.parse.quote(gene_info['symbol'])})"))
    display(RenderJSON(ldh_results))


with ignore_exceptions(f"No information for gene with identifier {gene_info['symbol']} found in HuBMAP ASCT+B"):
  req = requests.get('https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=HuBMAP_ASCT_plus_B_augmented_w_RNAseq_Coexpression')
  HuBMAP_ASCT_plus_B_gmt = pd.DataFrame({
    term: {
      gene: True
      for gene in geneset
      if gene
    }
    for line in req.text.splitlines()
    for (term, _, *geneset) in (line.strip().split("\t"),)
  })
  show(HuBMAP_ASCT_plus_B_gmt.loc[gene_info['symbol']].dropna().reset_index().rename({ 'index': 'Term' }, axis=1))


display(Markdown(f"""
<div style="display: flex; align-items: flex-start">
    <img class="img-fluid" style="width: 250px" src="/CFDE-Gene-Partnership/static/logos/Metabolomics.png" />
    <div>
        <p><a href="https://metabolomicsworkbench.org/">https://metabolomicsworkbench.org/</a></p>
        <p>The National Institutes of Health (NIH) Common Fund Metabolomics Program was developed with the goal of increasing national capacity in metabolomics by supporting the development of next generation technologies, providing training and mentoring opportunities, increasing the inventory and availability of high quality reference standards, and promoting data sharing and collaboration.</p>
        <p><a href="https://sc-cfdewebdev.sdsc.edu/MetGENE/index.php?GeneID={geneId}&species=hsa&GeneIDType=ENTREZID">MetGENE</a> identifies the pathways and reactions catalyzed by the given gene {gene_info['symbol']}, its related metabolites and the studies in Metabolomics Workbench with data on such metabolites.</a></p>
    </div>
</div>
"""))


with ignore_exceptions(f"No information for gene with identifier {gene_info['symbol']} found in MetGene"):
    metgene_table = requests.get(f"https://sc-cfdewebdev.sdsc.edu/MetGENE/mgSummary.php?species=hsa&GeneSym={gene_info['symbol']}&GeneID={geneId}&viewType=json").json()
    display(HTML(f"""
    <div style="display: flex; align-items: flex-start">
        <table>
            <thead>
                <tr>
                <th>&nbsp;</th>
                <th><a href="https://sc-cfdewebdev.sdsc.edu/MetGENE/pathways.php?species=hsa&GeneIDType=ENTREZID&anatomy=NA&disease=NA&phenotype=NA&GeneInfoStr={geneId}">Pathways</a></th>
                <th><a href="https://sc-cfdewebdev.sdsc.edu/MetGENE/reactions.php?species=hsa&GeneIDType=ENTREZID&anatomy=NA&disease=NA&phenotype=NA&GeneInfoStr={geneId}">Reactions</a></th>
                <th><a href="https://sc-cfdewebdev.sdsc.edu/MetGENE/metabolites.php?species=hsa&GeneIDType=ENTREZID&anatomy=NA&disease=NA&phenotype=NA&GeneInfoStr={geneId}">Metabolites</a></th>
                <th><a href="https://sc-cfdewebdev.sdsc.edu/MetGENE/studies.php?species=hsa&GeneIDType=ENTREZID&anatomy=NA&disease=NA&phenotype=NA&GeneInfoStr={geneId}">Studies</a></th>
                </tr>
            </thead>
            <tbody>{''.join(f"<tr><td>{r['Genes']}</td><td>{r['Pathways']}</td><td>{r['Reactions']}</td><td>{r['Metabolites']}</td><td>{r['Studies']}</td></tr>" for r in metgene_table)}</tbody>
        </table>
    <div style="display: flex; flex-wrap: wrap;">
        <img
            style="width: 100%;"
            src="https://sc-cfdewebdev.sdsc.edu/MetGENE/mgSummary.php?species=hsa&GeneSym={gene_info['symbol']}&GeneID={geneId}&viewType=png"
            alt="MetGene Summary"
        />
    </div>
    </div>
    """))


@lru_cache()
def idg_pharos_geneDiseaseAssociations(associatedTarget):
    req = requests.post(
        'https://pharos-api.ncats.io/graphql',
        headers={
            'Content-Type': 'application/json',
            'Accept': 'application/json',
        },
        json={
            'query': dedent(f'''
            query associatedDiseases {{
                diseases(filter: {{associatedTarget: {json.dumps(associatedTarget)} }}) {{
                    diseases {{
                        associations {{
                            name
                            evidence
                            pvalue
                            zscore
                        }}
                    }}
                }}
            }}
            ''')
        },
    )
    results = req.json()['data']
    return results


with ignore_exceptions(f"No information for gene with identifier {gene_info['symbol']} found in Pharos"):
    idg_pharos_api_results = idg_pharos_geneDiseaseAssociations(gene_info['symbol'])

    idg_pharos_results = pd.DataFrame(
        association
        for disease in idg_pharos_api_results['diseases']['diseases']
        for association in disease['associations']
    )

    def compute_score(r):
        if pd.notnull(r['pvalue']):
            return r['pvalue']
        elif pd.notnull(r['zscore']):
            return 1-st.distributions.norm.cdf(r['zscore'])
        else:
            # we give na's 0.5 pvalue to show up in the plot albiet, insignificantly
            return 0.5

    # convert zscores to pvalues if present instead of pvalue
    idg_pharos_results['pvalue'] = idg_pharos_results.apply(compute_score, axis=1)
    show(idg_pharos_results, order=[[idg_pharos_results.columns.get_loc('pvalue'), 'asc']])


with ignore_exceptions('Pharos results could not be processed'):
    idg_pharos_combined_stouffer_statistic = idg_pharos_results.groupby('name')['pvalue'] \
        .agg([
            ('combined_stouffer_statistic', partial(combine_pvalues, method='stouffer', select='pvalue')),
        ]) \
        .reset_index() \
        .sort_values('combined_stouffer_statistic', ascending=False)
    idg_pharos_combined_stouffer_statistic['-logp(combined_stouffer_statistic)'] = -np.log10(idg_pharos_combined_stouffer_statistic['combined_stouffer_statistic'])

    fig = px.bar(
        idg_pharos_combined_stouffer_statistic,
        x='-logp(combined_stouffer_statistic)',
        y='name',
        text='name',
        orientation='h',
        title=f"Disease known to be associated with {gene_info['symbol']} from IDG's Pharos",
    )
    fig.update_yaxes(showticklabels=False)
    fig.update_traces(texttemplate='%{text}', textposition='auto', insidetextanchor='start')
    fig.show()


@lru_cache()
def idg_harmonizome_geneInfo(gene, showAssociations=True, version='1.0'):
    res = requests.get(
        f"https://maayanlab.cloud/Harmonizome/api/{urllib.parse.quote(version)}/gene/{urllib.parse.quote(gene_info['symbol'])}",
        params=dict(
            showAssociations=json.dumps(showAssociations),
        ),
    )
    return res.json()


with ignore_exceptions(f"No information for gene with identifier {gene_info['symbol']} found in Harmonizome"):
    idg_harmonizome_api_geneInfo = idg_harmonizome_geneInfo(gene_info['symbol'])
    display(RenderJSON(idg_harmonizome_api_geneInfo))
    idg_harmonizome_geneAssociations = pd.DataFrame([
        dict(
            **geneAssociation['geneSet'],
            **geneAssociation,
        )
        for geneAssociation in idg_harmonizome_api_geneInfo['associations']
    ]).drop(['geneSet', 'thresholdValue', 'href'], axis=1).dropna()
    show(
        idg_harmonizome_geneAssociations,
        order=[[idg_harmonizome_geneAssociations.columns.get_loc('standardizedValue'), 'desc']]
    )


with ignore_exceptions('Harmonizome results could not be procesed'):
    idg_harmonizome_geneAssociations['direction'] = idg_harmonizome_geneAssociations['standardizedValue'].apply(
        lambda v: 'up' if v > 0 else 'down'
    )
    idg_harmonizome_geneAssociations['absoluteZscore'] = np.abs(idg_harmonizome_geneAssociations['standardizedValue'])
    idg_harmonizome_geneAssociations = idg_harmonizome_geneAssociations.sort_values('absoluteZscore')
    idg_harmonizome_geneAssociations_top_10_up = idg_harmonizome_geneAssociations[idg_harmonizome_geneAssociations['direction'] == 'up'].iloc[-10:]
    idg_harmonizome_geneAssociations_top_10_down = idg_harmonizome_geneAssociations[idg_harmonizome_geneAssociations['direction'] == 'down'].iloc[-10:]

    fig = px.bar(
        pd.concat([
            idg_harmonizome_geneAssociations_top_10_up,
            idg_harmonizome_geneAssociations_top_10_down,
        ], axis=0),
        x='absoluteZscore',
        y='name',
        orientation='h',
        barmode='group',
        color='direction',
        facet_row='direction',
        text='name',
        title=f"Significant associations with {gene_info['symbol']} in IDG's Harmonizome",
        height=1000,
        width=1000,
    )
    fig.update_yaxes(matches=None, showticklabels=False)
    fig.update_traces(texttemplate='%{text}', textposition='inside', insidetextanchor='start')
    fig.show()


@lru_cache()
def archs4_tissue_expression(search, species='human'):
    res = requests.get(
        f"https://maayanlab.cloud/archs4/search/loadExpressionTissue.php",
        params=dict(
            search=search,
            species='human',
            type='tissue',
        ),
    )
    df = pd.read_csv(io.StringIO(res.text)).dropna()
    df.index = pd.MultiIndex.from_tuples(
        df['id'].apply(lambda id: id.split('.')),
        names=['', 'system', 'organ', 'tissue'],
    )
    return df.reset_index().drop(['id', ''], axis=1)


with ignore_exceptions(f"No information for gene with identifier {gene_info['symbol']} found in ARCHS4"):
    archs4_tissue_results = archs4_tissue_expression(gene_info['symbol'])
    show(archs4_tissue_results)


@lru_cache()
def uniprot_genecentric(gene, offset=0, size=100):
    req = requests.get(
        f"https://www.ebi.ac.uk/proteins/api/genecentric",
        params=dict(
            gene=gene,
            offset=offset,
            size=size,
        )
    )
    return req.json()


with ignore_exceptions(f"No information for gene with identifier {gene_info['symbol']} found in UniProt"):
    uniprot_geneinfo = uniprot_genecentric(gene_info['symbol'])
    show(pd.DataFrame([
        dict(related_record, subject=record['gene']['accession'])
        for record in uniprot_geneinfo
        for related_record in [record['gene'], *record.get('relatedGene',[])]
    ]))

CFDE Gene-Centric Appyter: TPP1¶

MyGeneInfo: Query¶

MyGeneInfo¶

Primary Information¶

GTEx¶

LINCS¶

L1000 RNAseq Gene Centric Signature Reverse Search (RGCSRS)¶

International Mouse Phenotyping Consortium (IMPC)¶

GlyGen¶

exRNA¶

HuBMAP¶

Metabolomics¶

Secondary Information¶

IDG¶

Pharos¶

Harmonizome¶

ARCHS4¶

UnitProt¶