from collections import Counter

import pandas as pd
import numpy as np

# Display / graphing
from IPython.display import display, HTML
import plotly.express as px
import plotly.graph_objects as go
import sklearn.metrics
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

# API access
import requests
import io
import time
import json


# Notebook display util functions
def make_clickable(link):
    return f'<a target="_blank" href="{link}">{link}</a>'

table_number = 0
figure_number = 0
def figure_header(label,title):
    global table_number
    global figure_number
    if label == 'Table':
        table_number += 1
        label = f'Table {table_number}'
    elif label == 'Figure':
        figure_number += 1
        label = f'Figure {figure_number}'
    display(HTML(f"<div style='font-size:1.25rem; padding:1rem 0;'><b>{label}</b>: {title}</div>"))
    
def figure_legend(label,title,content=''):
    global table_number
    global figure_number
    if label == 'Table':
        label = f'Table {table_number}'
    elif label == 'Figure':
        label = f'Figure {figure_number}'
    display(HTML(f'<style>div.caption {{text-align: center;}}</style><div class=caption><b>{label}</b>: <i>{title}</i>. {content} </div>'))
    
def drugenrichr_link(short_id):
    display(HTML(f'<span><a href="https://maayanlab.cloud/DrugEnrichr/enrich?dataset={short_id}">Access the complete enrichment analysis on the DrugEnrichr website. </a></span>'))


def DrugEnrichr_API(drug_list, description):
    # takes a drug list and provides a shortID to view enrichment analysis results on DrugEnrichr website
    global short_id
    DRUG_ENRICHR_URL = 'http://amp.pharm.mssm.edu/DrugEnrichr/addList'
    drugs_str = '\n'.join(drug_list)
    payload = {
        'list': (None, drugs_str),
        'description': (None, description)
    }

    response = requests.post(DRUG_ENRICHR_URL, files=payload)
    if not response.ok:
        raise Exception('Error analyzing drug list')

    data = json.loads(response.text)
    short_id = data['shortId']
    user_list_id = data['userListId']

    #sends generated userListId to DrugEnrichr enrichment API endpoint
    results = {}
    DRUG_ENRICHR_URL = 'http://amp.pharm.mssm.edu/DrugEnrichr/enrich'
    query_string = '?userListId=%s&backgroundType=%s'
    for drug_set_library in ['L1000FWD_GO_Biological_Processes_Up', 'L1000FWD_GO_Biological_Processes_Down',
                   'SIDER_Side_Effects']:
        response = requests.get(
            DRUG_ENRICHR_URL + query_string % (user_list_id, drug_set_library)
         )
        if not response.ok:
            raise Exception('Error fetching enrichment results')
            
        data = response.json()[drug_set_library]
        try:
            short_results_df  = pd.DataFrame(data).sort_values(by = [2])
            results[drug_set_library] = {
                'terms': short_results_df[1].head(10).tolist(),
                'p-values': short_results_df[2].head(10).tolist(),
                'adjusted p-values': short_results_df[6].head(10).tolist()
            }
        except KeyError:
            pass
        
    return results
            
        
# Create bar plot of top 10 enriched terms in L1000FWD & SIDER libraries
def bar_plot(results): 
    # Bar colors
    bar_color = 'pink'
    bar_color_not_sig = 'lightgrey'
    edgecolor=None
    linewidth=0    

    for library, values in results.items():
        plt.figure(figsize=(24, 12))
        bar_colors = [bar_color if (x < 0.05) else bar_color_not_sig for x in values['p-values']]
        fig = sns.barplot(x=np.log10(values['p-values'])*-1,
                          y= values['terms'],
                          palette=bar_colors,
                          edgecolor=edgecolor,
                          linewidth=linewidth)

        fig.axes.get_yaxis().set_visible(False)
        fig.set_title(library.replace('_', ' '), fontsize=26)
        fig.set_xlabel('−log₁₀(p‐value)', fontsize=25)
        fig.xaxis.set_major_locator(MaxNLocator(integer=True))
        fig.tick_params(axis='x', which='major', labelsize=20)
        if max(np.log10(results[library]['p-values'])*-1)<1:
            fig.xaxis.set_ticks(np.arange(0, max(np.log10(results[library]['p-values'])*-1), 0.1))
        for index,term in enumerate(results[library]['terms']):
            if results[library]['adjusted p-values'][index] < 0.05:
                term = '  *'.join([term,
                                    str(str(np.format_float_scientific(results[library]['p-values'][index],
                                    precision=2)))]) 
            else:
                term = '  '.join([term,
                                   str(str(np.format_float_scientific(results[library]['p-values'][index],
                                   precision=2)))])

            title_start= max(fig.axes.get_xlim())/200
            fig.text(title_start, index, term, ha='left', wrap = True, fontsize = 26)

        fig.spines['right'].set_visible(False)
        fig.spines['top'].set_visible(False)
        plt.show()


matrix_files = {'L1000 Signature Similarity': 'https://appyters.maayanlab.cloud/storage/DrugShot/L1000_similarity_matrix.npz',
                'AutoRIF Literature Co-mentions': 'https://appyters.maayanlab.cloud/storage/DrugShot/AutoRIF_cooccurence_matrix.tsv.gz',
                'DrugRIF Literature Co-mentions' : 'https://appyters.maayanlab.cloud/storage/DrugShot/DrugRIF_cooccurence_matrix.tsv.gz'
                }


RIF = pd.read_csv("https://appyters.maayanlab.cloud/storage/DrugShot/AutoRIF.tsv.gz",
                      sep = '\t',
                      usecols = ['name','PMID']).set_index('name')


i = 0
results = []
pubmed_url = ("https://eutils.ncbi.nlm.nih.gov/entrez"+
             "/eutils/esearch.fcgi"+
             "?db=pubmed&term={}"+
             "&retmax=100000"+
             "&retstart={}"+
             "&retmode=json")
res = requests.get(pubmed_url.format('positive regulation of lymphocyte proliferation', i)).json()
if int(res['esearchresult']['count']) > 0:
    while i <= int(res['esearchresult']['count']):
        results.extend(requests.get(pubmed_url.format('positive regulation of lymphocyte proliferation',i)).json()['esearchresult']['idlist'])
        i += len(res['esearchresult']['idlist'])
        time.sleep(0.2)
results = list(map(int, results))


# Retrieve associated drugs based on drug:search term co-mentions in the literature
df_associated = pd.DataFrame(RIF[RIF['PMID'].isin(results) == True].index.value_counts())\
.rename(columns = {'name':'Publications with Search Term'})
# Get counts of each drug in the association file
drug_counts = Counter(RIF[RIF.index.isin(df_associated.index)].index)
# Calculate fraction of publications associated with drug+term / drug only
normalized_fraction = [(df_associated.loc[index][0]/drug_counts[index]) for index in df_associated.index]
df_associated['Publications with Search Term / Total Publications'] = normalized_fraction
# Calculate (Publications with Search Term * Normalized Fraction) used as metric to rank unweighted drug set",
df_associated['Publications with Search Term * Normalized Fraction'] = df_associated['Publications with Search Term'] * df_associated['Publications with Search Term / Total Publications']
del(RIF)


associated_table = df_associated.sort_values(by = ['Publications with Search Term'], ascending = False)
associated_table.to_csv('positive regulation of lymphocyte proliferation'.replace(' ','_')+'_associated_drug_table.csv')
figure_header('Table', 'Top Associated Compounds<br>({})</br>'.format(make_clickable('positive regulation of lymphocyte proliferation'.replace(' ','_')+'_associated_drug_table.csv')))
display(associated_table[associated_table.columns[0:2]].head(20))
figure_legend('Table', 'Top 20 Drugs associated with '+'positive regulation of lymphocyte proliferation')
del(df_associated)


fig = px.scatter(associated_table.reset_index().rename(columns = {'index':'chemical'}),
                 x = 'Publications with Search Term',
                 y = 'Publications with Search Term / Total Publications',
                 hover_name = 'chemical',
                 title='positive regulation of lymphocyte proliferation')
fig.show()


associated_compounds = associated_table.sort_values(by = 'Publications with Search Term * Normalized Fraction',
                                                    kind = 'stable',
                                                    ascending = False)[0:50].index.str.upper().tolist()


# Load correlation matrix into pandas DataFrame
response = requests.get(matrix_files["L1000 Signature Similarity"])
coexpression_matrix = np.load(io.BytesIO(response.content), allow_pickle = True)
mat = pd.DataFrame(data = coexpression_matrix['correlations'], columns = coexpression_matrix['index'],
                               index = coexpression_matrix['index'])
del(coexpression_matrix)


# Calculate average similarity for each drug in the prediction matrix with the associated drug set
mat = mat.loc[mat.index.str.upper().isin(associated_compounds)]
print(f'{len(mat.index)} small molecules from the associated drug set matched in the similarity matrix')
mat.loc['Similarity_Score'] = mat[mat.columns].mean()
mat.sort_values(by = ['Similarity_Score'], axis = 1, ascending = False, inplace = True)

28 small molecules from the associated drug set matched in the similarity matrix


# Calculating FPR and TPR for literature co-mentions predictions
y_score = mat.loc['Similarity_Score'].sort_values(ascending = False)
true_indices = sorted([y_score.\
                       index.str.upper().\
                       get_loc(x) for x in associated_compounds \
                       if x in mat.index.str.upper()])
y_true = np.zeros(len(mat.columns))
np.put(y_true, true_indices, 1)
fpr, tpr, thresholds = sklearn.metrics.roc_curve(y_true, y_score)
auc_score = sklearn.metrics.roc_auc_score(y_true, y_score)
# Plot dataframe
data = pd.DataFrame(data=[y_score.index,y_true,fpr,tpr],
                   index = ['name','label','fpr','tpr']).T
# ROC Curve
fig = go.Figure()
name = f"(AUC={auc_score:.4f})"
fig.add_trace(go.Scatter(x=data['fpr'],
                         y=data['tpr'],
                         text = data['name'],
                         showlegend=True,
                         name=name,
                         mode='lines'))
fig.add_shape(
    type='line',
    line=dict(dash='dash'),
    x0=0,
    x1=1,
    y0=0,
    y1=1)
fig.layout.update(
    title = f'ROC Curve for Unweighted Drug Set Rankings in L1000 Signature Similarity Matrix',
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    yaxis=dict(scaleanchor="x", scaleratio=0.65),
    xaxis=dict(constrain='domain'),
    width=800, height=500
)
fig.show()


predicted_table = pd.DataFrame(mat.loc['Similarity_Score'])
predicted_table = predicted_table[~predicted_table.index.str.upper().isin(associated_compounds)]
filename = 'positive regulation of lymphocyte proliferation'.replace(' ','_')+"_L1000 Signature Similarity".replace(' ','_')+'_predicted_drug_table.csv'
predicted_table.to_csv(filename)
predicted_table.to_csv(filename)
figure_header('Table', 'Top Predicted Compounds From L1000 Signature Similarity<br>({})</br>'.format(make_clickable(filename)))
display(predicted_table.head(20))
figure_legend('Table', 'Top 20 drugs predicted to be associated with {0} based on {1}'.format('positive regulation of lymphocyte proliferation','L1000 Signature Similarity'))


drug_list = predicted_table.index[0:50].tolist()
results = DrugEnrichr_API(drug_list,'%s (DrugShot L1000 Signature Similarity)'% 'positive regulation of lymphocyte proliferation')
bar_plot(results)


drugenrichr_link(short_id)

	Publications with Search Term	Publications with Search Term / Total Publications
ionomycin	26	0.005950
cyclosporin-a	26	0.000872
dinoprostone	26	0.000908
sirolimus	25	0.001174
dexamethasone	21	0.000395
retinol	15	0.000330
tretinoin	14	0.000616
cyclophosphamide	11	0.000200
resveratrol	10	0.001055
imatinib	9	0.000841
tacrolimus	9	0.000540
cholecalciferol	9	0.000327
ethanolamine	7	0.000033
calcitriol	7	0.000484
broxuridine	7	0.000539
azacitidine	7	0.000956
succinimide	6	0.001067
dronabinol	6	0.000801
mycophenolic acid	6	0.000716
daunorubicin	6	0.000088

	Similarity_Score
BIX-01294	0.191447
purvalanol-a	0.186698
TL-HRAS-24	0.175511
SA-25547	0.175222
avicin-d	0.174788
KIN001-055	0.174067
MG-132	0.173868
neratinib	0.172500
afatinib	0.171939
crizotinib	0.171688
BRD-K40329609	0.169068
BRD-K01896723	0.168310
VU-0418939-2	0.168204
SA-1478088	0.168009
BRD-K14749055	0.167835
BVT-948	0.167308
WYE-125132	0.166824
BRD-K65603160	0.166754
ispinesib	0.166449
IKK-16	0.166317

DrugShot¶

Load AutoRIF¶

Query Term Using PubMed Eutils API¶

Associated Drug Table¶

Scatter Plot of Drug Frequency in Literature¶

Predicted Drugs From L1000 Signature Similarity¶

DrugEnrichr Analysis of L1000 Signature Similarity Predicted Drugs¶