import re
import math
import time
import requests
import pandas as pd
import json
import scipy.stats as st
from IPython.display import display, IFrame, Markdown, HTML
import seaborn as sns
import matplotlib.pyplot as plt
from umap import UMAP
from sklearn.manifold import TSNE
from maayanlab_bioinformatics.normalization import quantile_normalize, zscore_normalize
from maayanlab_bioinformatics.harmonization import ncbi_genes_lookup
from tqdm import tqdm
import plotly.express as px
import numpy as np
from matplotlib.ticker import MaxNLocator


METADATA_API = "https://maayanlab.cloud/sigcom-lincs/metadata-api"
DATA_API = "https://maayanlab.cloud/sigcom-lincs/data-api/api/v1"
CLUSTERGRAMMER_URL = 'https://maayanlab.cloud/clustergrammer/matrix_upload/'
S3_PREFIX = "https://appyters.maayanlab.cloud/storage/LDP3Consensus/"
drugmonizome_meta_api = "https://maayanlab.cloud/drugmonizome/metadata-api"
drugmonizome_data_api = "https://maayanlab.cloud/drugmonizome/data-api/api/v1"
enrichr_api = 'https://maayanlab.cloud/Enrichr/'


table = 1
figure = 1


up_gene_sets = 'up.gmt'
down_gene_sets = 'down.gmt'


gene_set_direction = None
if up_gene_sets == '':
    gene_set_direction = "down"
    print("Up gene-sets was not uploaded. Gene-set direction is set to down.")
elif down_gene_sets == '':
    gene_set_direction = "up"
    print("Down gene-sets was not uploaded. Gene-set direction is set to up.")


datasets = ['LINCS L1000 CRISPR Perturbations (2021)', 'LINCS L1000 Chemical Perturbations (2021)']
drugmonizome_datasets = ['L1000FWD_GO_Biological_Processes_drugsetlibrary_up', 'L1000FWD_GO_Biological_Processes_drugsetlibrary_down', 'L1000FWD_KEGG_Pathways_drugsetlibrary_up', 'L1000FWD_KEGG_Pathways_drugsetlibrary_down']
transcription_libraries = []
pathways_libraries = ['KEGG_2021_Human', 'WikiPathway_2021_Human']    
ontologies_libraries = ['GO_Biological_Process_2021', 'MGI_Mammalian_Phenotype_Level_4_2021'] 
diseases_drugs_libraries = []


alpha = 0.5
perc = 0.1
top_perts = 50
consensus_method = 'z-score'


ncbi_lookup = ncbi_genes_lookup('Mammalia/Homo_sapiens')
print('Loaded NCBI genes!')

Loaded NCBI genes!


signatures = {}
if not up_gene_sets == '':
    with open(up_gene_sets) as upfile:
        for line in upfile:
            unpacked = line.strip().split("\t")
            if len(unpacked) < 3:
                raise ValueError("GMT is not formatted properly, please consult the README of the appyter for proper formatting")
            sigid = unpacked[0]
            geneset = unpacked[2:]
            genes = []
            for i in geneset:
                gene = i.split(",")[0]
                gene_name = ncbi_lookup(gene.upper())
                if gene_name:
                    genes.append(gene_name)
            signatures[sigid] = {
                "up_genes": genes,
                "down_genes": []
            }
if not down_gene_sets == '':
    with open(down_gene_sets) as downfile:
        for line in downfile:
            unpacked = line.strip().split("\t")
            if len(unpacked) < 3:
                raise ValueError("GMT is not formatted properly, please consult the README of the appyter for proper formatting")
            sigid = unpacked[0]
            geneset = unpacked[2:]
            if sigid not in signatures and gene_set_direction == None:
                raise ValueError("%s did not match any of the up signatures, make sure that the signature names are the same for both up and down genes"%sigid)
            else:
                genes = []
                for i in geneset:
                    gene = i.split(",")[0]
                    gene_name = ncbi_lookup(gene)
                    if gene_name:
                        genes.append(gene_name)
                if sigid in signatures:
                    signatures[sigid]["down_genes"] = genes
                else:
                    signatures[sigid] = {
                        "up_genes": [],
                        "down_genes": genes
                    }


enrichr_libraries = transcription_libraries + pathways_libraries + ontologies_libraries + diseases_drugs_libraries


dataset_map = {
  "LINCS L1000 Antibody Perturbations (2021)": "l1000_aby",
  "LINCS L1000 Ligand Perturbations (2021)": "l1000_lig",
  "LINCS L1000 Overexpression Perturbations (2021)": "l1000_oe",
  "LINCS L1000 CRISPR Perturbations (2021)": "l1000_xpr",
  "LINCS L1000 shRNA Perturbations (2021)": "l1000_shRNA",
  "LINCS L1000 Chemical Perturbations (2021)": "l1000_cp",
  "LINCS L1000 siRNA Perturbations (2021)": "l1000_siRNA"
}

labeller = {
  "LINCS L1000 Antibody Perturbations (2021)": "antibody",
  "LINCS L1000 Ligand Perturbations (2021)": "ligand",
  "LINCS L1000 Overexpression Perturbations (2021)": "overexpression",
  "LINCS L1000 CRISPR Perturbations (2021)": "CRISPR",
  "LINCS L1000 shRNA Perturbations (2021)": "shRNA",
  "LINCS L1000 Chemical Perturbations (2021)": "chemical",
  "LINCS L1000 siRNA Perturbations (2021)": "siRNA"
}

gene_page = {
  "LINCS L1000 Ligand Perturbations (2021)",
  "LINCS L1000 Overexpression Perturbations (2021)",
  "LINCS L1000 CRISPR Perturbations (2021)",
  "LINCS L1000 shRNA Perturbations (2021)",
  "LINCS L1000 siRNA Perturbations (2021)"
}

drug_page = {
  "LINCS L1000 Chemical Perturbations (2021)": "l1000_cp",
}


# functions
def convert_genes(up_genes=[], down_genes=[]):
    try:
        payload = {
           "filter": {
               "where": {
                   "meta.symbol": {"inq": up_genes + down_genes}
               }
           }
        }
        timeout = 0.5
        for i in range(5):
            res = requests.post(METADATA_API + "/entities/find", json=payload)
            if res.ok:
                break
            else:
                time.sleep(timeout)
                if res.status_code >= 500:
                    timeout = timeout * 2
        else:
            raise Exception(res.text)
        results = res.json()
        up = set(up_genes)
        down = set(down_genes)
        if len(up_genes) == 0 or len(down_genes) == 0:
            converted = {
                "entities": [],
            }
        else:
            converted = {
                "up_entities": [],
                "down_entities": []
            }
        for i in results:
            symbol = i["meta"]["symbol"]
            if "entities" in converted:
                converted["entities"].append(i["id"])
            elif symbol in up:
                converted["up_entities"].append(i["id"])
            elif symbol in down:
                converted["down_entities"].append(i["id"])
        return converted
    except Exception as e:
        print(e)


def signature_search(genes, library):
    try:
        payload = {
            **genes,
            "database": library,
            "limit": 1000
        }
        timeout = 0.5
        for i in range(5):
            endpoint = "/enrich/rank" if "entities" in payload else "/enrich/ranktwosided"
            res = requests.post(DATA_API + endpoint, json=payload)
            if res.ok:
                break
            else:
                time.sleep(timeout)
                if res.status_code >= 500:
                    timeout = timeout * 2
        else:
            raise Exception(res.text)
        
        return res.json()["results"]
    except Exception as e:
        print(e)

def resolve_rank(s, gene_set_direction):
    try:
        sigs = {}
        for i in s:
            if i["p-value"] < alpha:
                uid = i["uuid"]
                direction = "up" if i["zscore"] > 0 else "down"
                if direction == gene_set_direction:
                    i["type"] = "mimicker"
                    sigs[uid] = i
                else:
                    i["type"] = "reverser"
                    sigs[uid] = i
            
        payload = {
            "filter": {
                "where": {
                    "id": {"inq": list(sigs.keys())}
                },
                "fields": [
                    "id",
                    "meta.pert_name",
                    "meta.pert_type",
                    "meta.pert_time",
                    "meta.pert_dose",
                    "meta.cell_line",
                    "meta.local_id"
                ]
            }
        }
        timeout = 0.5
        for i in range(5):
            res = requests.post(METADATA_API + "/signatures/find", json=payload)
            if res.ok:
                break
            else:
                time.sleep(timeout)
                if res.status_code >= 500:
                    timeout = timeout * 2
        else:
            raise Exception(res.text)
        results = res.json()
        signatures = {
            "mimickers": {},
            "reversers": {}
        }
        for sig in results:
            uid = sig["id"]
            scores = sigs[uid]
            sig["scores"] = scores
            if "pert_name" in sig["meta"]:
                local_id = sig["meta"].get("local_id", None)
                if scores["type"] == "mimicker":
                    pert_name = sig["meta"].get("pert_name", None)
                    local_id = "%s_%s"%(pert_name, local_id.replace("_%s"%pert_name, ""))
                    signatures["mimickers"][local_id] = {
                        "pert_name": sig["meta"].get("pert_name", None),
                        "pert_time": sig["meta"].get("pert_time", None),
                        "pert_dose": sig["meta"].get("pert_dose", None),
                        "cell_line": sig["meta"].get("cell_line", None),
                        "z-score": abs(scores.get("zscore", 0)),
                        "p-value": scores.get("p-value", 0)
                    }
                elif scores["type"] == "reverser":
                    pert_name = sig["meta"].get("pert_name", None)
                    local_id = "%s_%s"%(pert_name, local_id.replace("_%s"%pert_name, ""))
                    signatures["reversers"][local_id] = {
                        "pert_name": sig["meta"].get("pert_name", None),
                        "pert_time": sig["meta"].get("pert_time", None),
                        "pert_dose": sig["meta"].get("pert_dose", None),
                        "cell_line": sig["meta"].get("cell_line", None),
                        "z-score": abs(scores.get("zscore", 0)),
                        "p-value": scores.get("p-value", 0)
                    }
        return signatures

    except Exception as e:
        print(e)


def resolve_ranktwosided(s):
    try:
        sigs = {}
        for i in s:
            if i['p-down'] < alpha and i['p-up'] < alpha:
                uid = i["uuid"]
                i['z-sum (abs)'] = abs(i['z-sum'])
                if i['z-sum'] > 0:
                    i["type"] = "mimicker"
                    sigs[uid] = i
                elif i['z-sum'] < 0:
                    i["type"] = "reverser"
                    sigs[uid] = i
            
        payload = {
            "filter": {
                "where": {
                    "id": {"inq": list(sigs.keys())}
                },
                "fields": [
                    "id",
                    "meta.pert_name",
                    "meta.pert_type",
                    "meta.pert_time",
                    "meta.pert_dose",
                    "meta.cell_line",
                    "meta.local_id"
                ]
            }
        }
        timeout = 0.5
        for i in range(5):
            res = requests.post(METADATA_API + "/signatures/find", json=payload)
            if res.ok:
                break
            else:
                time.sleep(timeout)
                if res.status_code >= 500:
                    timeout = timeout * 2
        else:
            raise Exception(res.text)
        results = res.json()
        signatures = {
            "mimickers": {},
            "reversers": {}
        }
        for sig in results:
            uid = sig["id"]
            scores = sigs[uid]
            sig["scores"] = scores
            if "pert_name" in sig["meta"]:
                local_id = sig["meta"].get("local_id", None)
                if scores["type"] == "mimicker" and len(signatures["mimickers"]) < 100:
                    pert_name = sig["meta"].get("pert_name", None)
                    local_id = "%s_%s"%(pert_name, local_id.replace("_%s"%pert_name, ""))
                    signatures["mimickers"][local_id] = {
                        "pert_name": sig["meta"].get("pert_name", None),
                        "pert_time": sig["meta"].get("pert_time", None),
                        "pert_dose": sig["meta"].get("pert_dose", None),
                        "cell_line": sig["meta"].get("cell_line", None),
                        "z-sum": scores.get("z-sum (abs)", 0)
                    }
                elif scores["type"] == "reverser" and len(signatures["reversers"]) < 100:
                    pert_name = sig["meta"].get("pert_name", None)
                    local_id = "%s_%s"%(pert_name, local_id.replace("_%s"%pert_name, ""))
                    signatures["reversers"][local_id] = {
                        "pert_name": sig["meta"].get("pert_name", None),
                        "pert_time": sig["meta"].get("pert_time", None),
                        "pert_dose": sig["meta"].get("pert_dose", None),
                        "cell_line": sig["meta"].get("cell_line", None),
                        "z-sum": scores.get("z-sum (abs)", 0)
                    }
        return signatures

    except Exception as e:
        print(e)


# enriched = {lib:{"mimickers": {}, "reversers": {}} for lib in datasets}
enriched = {"mimickers": {lib: {} for lib in datasets}, "reversers": {lib: {} for lib in datasets}}
metadata = {}
for k,sig in tqdm(signatures.items()):    
    try:
        time.sleep(0.1)
        genes = convert_genes(sig["up_genes"],sig["down_genes"])
        if ("entities" in genes and len(genes["entities"]) > 5) or (len(genes["up_entities"]) > 5 and len(genes["down_entities"]) > 5):
            for lib in datasets:
                library = dataset_map[lib]
                s = signature_search(genes, library)
                if gene_set_direction == None:
                    sigs = resolve_ranktwosided(s)
                else:
                    sigs = resolve_rank(s, gene_set_direction)
                enriched["mimickers"][lib][k] = sigs["mimickers"]
                enriched["reversers"][lib][k] = sigs["reversers"]
                for direction, entries in sigs.items():
                    for label, meta in entries.items():
                        if label not in metadata:
                            metadata[label] = {
                                "pert_name": meta.get("pert_name", None),
                                "pert_time": meta.get("pert_time", None),
                                "pert_dose": meta.get("pert_dose", None),
                                "cell_line": meta.get("cell_line", None),
                            }
                time.sleep(0.1)
    except Exception as e:
        print(e)

100%|██████████| 1/1 [00:08<00:00,  8.72s/it]


def clustergrammer(df, name, figure, label="Clustergrammer"):
    clustergram_df = df.rename(columns={i:"Signature: %s"%i for i in df.columns}, index={i:"Drug: %s"%i for i in df.index})
    clustergram_df.to_csv(name, sep="\t")
    response = ''
    timeout = 0.5
    for i in range(5):
        try:
            res = requests.post(CLUSTERGRAMMER_URL, files={'file': open(name, 'rb')})
            if not res.ok:
                response = res.text
                time.sleep(timeout)
                if res.status_code >= 500:
                    timeout = timeout * 2
            else:
                clustergrammer_url = res.text.replace("http:","https:")   
                break
        except Exception as e:
            response = e
            time.sleep(2)
    else:
        if type(response) == Exception:
            raise response
        else:
            raise Exception(response)
    display(IFrame(clustergrammer_url, width="1000", height="1000"))
    display(Markdown("**Figure %d** %s [Go to url](%s)"%(figure, label, clustergrammer_url)))
    figure += 1
    return figure

cmap = sns.cubehelix_palette(50, hue=0.05, rot=0, light=1, dark=0)

def heatmap(df, filename, figure, label, width=15, height=15):
    fig = plt.figure(figsize=(width,height))
    cg = sns.clustermap(df, cmap=cmap, figsize=(width, height))
    cg.ax_row_dendrogram.set_visible(False)
    cg.ax_col_dendrogram.set_visible(False)
    display(cg)
    plt.show()
    cg.savefig(filename)
    display(Markdown("**Figure %d** %s"%(figure, label)))
    figure+=1
    return figure

def make_clickable(link):
    # target _blank to open new window
    # extract clickable text to display for your link
    text = link.split('=')[1]
    return f'<a target="_blank" href="{link}">{text}</a>'


annot_dict = {}
def bar_chart(enrichment, title=''):
    bar_color = 'mediumspringgreen'
    bar_color_not_sig = 'lightgrey'
    edgecolor=None
    linewidth=0
    if len(enrichment) > 10:
        enrichment = enrichment[0:10]
    enrichment_names = [i["name"] for i in enrichment]
    enrichment_scores = [i["pval"] for i in enrichment]
    plt.figure(figsize=(10,4))
    bar_colors = [bar_color if (x < 0.05) else bar_color_not_sig for x in enrichment_scores]
    fig = sns.barplot(x=np.log10(enrichment_scores)*-1, y=enrichment_names, palette=bar_colors, edgecolor=edgecolor, linewidth=linewidth)
    fig.axes.get_yaxis().set_visible(False)
    fig.set_title(title.replace('_',' '),fontsize=20)
    fig.set_xlabel('-Log10(p-value)',fontsize=19)
    fig.xaxis.set_major_locator(MaxNLocator(integer=True))
    fig.tick_params(axis='x', which='major', labelsize=20)
    if max(np.log10(enrichment_scores)*-1)<1:
        fig.xaxis.set_ticks(np.arange(0, max(np.log10(enrichment_scores)*-1), 0.1))
    for ii,annot in enumerate(enrichment_names):
        if annot in annot_dict.keys():
            annot = annot_dict[annot]
        if enrichment_scores[ii] < 0.05:
            annot = '  *'.join([annot, str(str(np.format_float_scientific(enrichment_scores[ii],precision=2)))]) 
        else:
            annot = '  '.join([annot, str(str(np.format_float_scientific(enrichment_scores[ii],precision=2)))])

        title_start= max(fig.axes.get_xlim())/200
        fig.text(title_start,ii,annot,ha='left',wrap = True, fontsize = 12)
        fig.patch.set_edgecolor('black')  
        fig.patch.set_linewidth('2')
    plt.show()
        

def get_drugmonizome_plot(consensus, label, figure, dataset):
    payload = {
        "filter":{
            "where": {
                "meta.Name": {
                    "inq": [i.lower() for i in set(consensus['pert name'])]
                }
            }
        }
    }

    res = requests.post(drugmonizome_meta_api + "/entities/find", json=payload)

    entities = {}
    for i in res.json():
        name = i["meta"]["Name"]
        uid = i["id"]
        if name not in entities:
            entities[name] = uid
            
    query = {
        "entities": list(entities.values()),
        "limit": 1000,
        "database": dataset
    }

    res = requests.post(drugmonizome_data_api + "/enrich/overlap", json=query)

    scores = res.json()["results"]
    uids = {i["uuid"]: i for i in scores}

    payload = {
        "filter":{
            "where": {
                "id": {
                    "inq": list(uids.keys())
                }
            }
        }
    }

    res = requests.post(drugmonizome_meta_api + "/signatures/find", json=payload)

    sigs = res.json()
    sigs = res.json()
    scores = []
    for i in sigs:
        score = uids[i["id"]]
        scores.append({
            "name": i["meta"]["Term"][0]["Name"],
            "pval": score["p-value"]
        })
    
    scores.sort(key=lambda x: x['pval'])
    if len(scores) > 0:
        bar_chart(scores, dataset.replace("setlibrary", " set library"))
        display(Markdown("**Figure %d** %s"%(figure, label)))
        figure += 1
    return figure

def get_enrichr_bar(userListId, enrichr_library, figure, label):
    query_string = '?userListId=%s&backgroundType=%s'
    res = requests.get(
        enrichr_api + 'enrich' + query_string % (userListId, enrichr_library)
     )
    if not res.ok:
        raise Exception('Error fetching enrichment results')

    data = res.json()[enrichr_library]
    scores = [{"name": i[1], "pval": i[2]} for i in data]
    scores.sort(key=lambda x: x['pval'])
    if len(scores) > 0:
        bar_chart(scores, enrichr_library)
        display(Markdown("**Figure %d** %s"%(figure, label)))
        figure +=1
    return figure

def enrichment(consensus, label, figure):
    gene_names = [i.upper() for i in set(consensus['pert name'])]
    genes_str = '\n'.join(gene_names)
    description = label
    payload = {
        'list': (None, genes_str),
        'description': (None, description)
    }

    res = requests.post(enrichr_api + 'addList', files=payload)
    if not res.ok:
        raise Exception('Error analyzing gene list')

    data = res.json()
    shortId = data["shortId"]
    userListId = data["userListId"]
    display(Markdown("Enrichr Link: https://maayanlab.cloud/Enrichr/enrich?dataset=%s"%shortId))
    for d in enrichr_libraries:
        l = "Enrichr %s top ranked terms for %s"%(d.replace("_", " "), label)
        figure = get_enrichr_bar(userListId, d, figure, l)
    return figure


score_field = "z-sum" if gene_set_direction == None else "z-score"


direction = "mimickers"
alternate = "mimicking"
for lib in datasets:
    library = dataset_map[lib]
    display(Markdown("#### Consensus %s %s signatures"%(alternate, labeller[lib])), display_id=alternate+lib)
    index = set()
    sig_dict = enriched[direction][lib]
    for v in sig_dict.values():
        index = index.union(v.keys())
    df = pd.DataFrame(0, index=index, columns=sig_dict.keys())
    for sig_name,v in sig_dict.items():
        for local_id, meta in v.items():
            df.at[local_id, sig_name] = meta[score_field]
    df = df[(df>0).sum(1) > len(df.columns) * perc]
    if consensus_method == 'z-score':
        df = df.loc[df.sum(1).sort_values(ascending=False).index[0:top_perts]]
    else:
        df = df.loc[(df > 0).sum(1).sort_values(ascending=False).index[0:top_perts]]
    filename = "sig_matrix_%s_%s.tsv"%(library.replace(" ","_"), direction)
    df.to_csv(filename, sep="\t")
    display(Markdown("Download score matrix for %s %s signatures ([download](./%s))"%
                     (alternate, labeller[lib], filename)))

    
    if lib in gene_page:
#         "pert_name": sig["meta"].get("pert_name", None),
#         "pert_time": sig["meta"].get("pert_time", None),
#         "pert_dose": sig["meta"].get("pert_dose", None),
#         "cell_line": sig["meta"].get("cell_line", None),
        stat_df = pd.DataFrame(index=df.index, columns=["pert name", "pert time", "cell line", "count", "z-sum", "Enrichr gene page"])
        stat_df['count'] = (df > 0).sum(1)
        # Compute zstat and p value
        stat_df["z-sum"] = df.sum(1)
        for i in stat_df.index:
            stat_df.at[i, "pert name"] = metadata[i]["pert_name"]
            stat_df.at[i, "pert time"] = metadata[i]["pert_time"]
            stat_df.at[i, "cell line"] = metadata[i]["cell_line"]
        stat_df['Enrichr gene page'] = ["https://maayanlab.cloud/Enrichr/#find!gene=%s"%i for i in stat_df["pert name"]]
        stat_df = stat_df.fillna("-")
        filename = "sig_stat_%s_%s.tsv"%(lib.replace(" ","_"), direction)
        stat_df.to_csv(filename, sep="\t")
        stat_df['Enrichr gene page'] = stat_df['Enrichr gene page'].apply(make_clickable)
        stat_html = stat_df.head(25).to_html(escape=False)
        display(HTML(stat_html))
    else:
        stat_df = pd.DataFrame(index=df.index, columns=["pert name", "pert dose", "pert time", "cell line", "count", "z-sum"])
        stat_df['count'] = (df > 0).sum(1)
        stat_df["z-sum"] = df.sum(1)
        
        for i in stat_df.index:
            stat_df.at[i, "pert name"] = metadata[i]["pert_name"]
            stat_df.at[i, "pert dose"] = metadata[i]["pert_dose"]
            stat_df.at[i, "pert time"] = metadata[i]["pert_time"]
            stat_df.at[i, "cell line"] = metadata[i]["cell_line"]
        stat_df = stat_df.fillna("-")
        filename = "sig_stat_%s_%s.tsv"%(library.replace(" ","_"), direction)
        stat_df.to_csv(filename, sep="\t")
        display(stat_df.head(25))
    display(Markdown("**Table %d** Top 25 consensus %s %s signatures([download](./%s))"%
                     (table, alternate, labeller[lib], filename)))

    table+=1

    
#     display(df.head())
#     display(Markdown("**Table %d** Consensus %s %s signatures ([download](./%s))"%
#                      (table, alternate, labeller[lib], filename)))

#     table+=1
    if len(df.index) > 1:
        consensus_norm = quantile_normalize(df)
        display(Markdown("##### Clustergrammer for %s %s perturbagens"%(alternate, labeller[lib])), display_id="%s-clustergrammer-%s"%(alternate, lib))
        label = "Clustergrammer of consensus %s perturbagens of L1000 %s perturbations(2021) (quantile normalized scores)"%(alternate, labeller[lib])
        name = "clustergrammer_%s_%s.tsv"%(library.replace(" ", "_"), direction)
        figure = clustergrammer(consensus_norm, name, figure, label)

        display(Markdown("#### Heatmap for %s %s perturbagens"%(alternate, labeller[lib])), display_id="%s-heatmap-%s"%(alternate, lib))
        label = "Heatmap of consensus %s perturbagens of L1000 %s perturbations(2021) (quantile normalized scores)"%(alternate, labeller[lib])
        name = "heatmap_%s_%s.png"%(library.replace(" ", "_"), direction)
        figure = heatmap(consensus_norm, name, figure, label)
        if len(set(stat_df["pert name"])) > 5:
            if lib in drug_page:
                display(Markdown("#### Drugmonizome enrichment analysis for the consensus %s %s perturbagens"% (alternate, labeller[lib])))
                for d in drugmonizome_datasets:
                    label = "%s top ranked enriched terms for %s %s perturbagens"%(d.replace("_", " "), alternate, labeller[lib])
                    figure = get_drugmonizome_plot(stat_df, label, figure, d)
            elif lib in gene_page:
                display(Markdown("#### Enrichr link to analyze enriched terms for the consensus %s %s perturbagens"% (alternate, labeller[lib])))
                label = "%s L1000 %s perturbagens"%(alternate, labeller[lib])
                figure = enrichment(stat_df, label, figure)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/ipykernel_41/1720778184.py in <module>
     78         label = "Heatmap of consensus %s perturbagens of L1000 %s perturbations(2021) (quantile normalized scores)"%(alternate, labeller[lib])
     79         name = "heatmap_%s_%s.png"%(library.replace(" ", "_"), direction)
---> 80         figure = heatmap(consensus_norm, name, figure, label)
     81         if len(set(stat_df["pert name"])) > 5:
     82             if lib in drug_page:

/tmp/ipykernel_41/4247677548.py in heatmap(df, filename, figure, label, width, height)
     32 def heatmap(df, filename, figure, label, width=15, height=15):
     33     fig = plt.figure(figsize=(width,height))
---> 34     cg = sns.clustermap(df, cmap=cmap, figsize=(width, height))
     35     cg.ax_row_dendrogram.set_visible(False)
     36     cg.ax_col_dendrogram.set_visible(False)

/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
     44             )
     45         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46         return f(**kwargs)
     47     return inner_f
     48 

/usr/local/lib/python3.8/dist-packages/seaborn/matrix.py in clustermap(data, pivot_kws, method, metric, z_score, standard_scale, figsize, cbar_kws, row_cluster, col_cluster, row_linkage, col_linkage, row_colors, col_colors, mask, dendrogram_ratio, colors_ratio, cbar_pos, tree_kws, **kwargs)
   1404                           colors_ratio=colors_ratio, cbar_pos=cbar_pos)
   1405 
-> 1406     return plotter.plot(metric=metric, method=method,
   1407                         colorbar_kws=cbar_kws,
   1408                         row_cluster=row_cluster, col_cluster=col_cluster,

/usr/local/lib/python3.8/dist-packages/seaborn/matrix.py in plot(self, metric, method, colorbar_kws, row_cluster, col_cluster, row_linkage, col_linkage, tree_kws, **kws)
   1217         colorbar_kws = {} if colorbar_kws is None else colorbar_kws
   1218 
-> 1219         self.plot_dendrograms(row_cluster, col_cluster, metric, method,
   1220                               row_linkage=row_linkage, col_linkage=col_linkage,
   1221                               tree_kws=tree_kws)

/usr/local/lib/python3.8/dist-packages/seaborn/matrix.py in plot_dendrograms(self, row_cluster, col_cluster, metric, method, row_linkage, col_linkage, tree_kws)
   1072         # PLot the column dendrogram
   1073         if col_cluster:
-> 1074             self.dendrogram_col = dendrogram(
   1075                 self.data2d, metric=metric, method=method, label=False,
   1076                 axis=1, ax=self.ax_col_dendrogram, linkage=col_linkage,

/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
     44             )
     45         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46         return f(**kwargs)
     47     return inner_f
     48 

/usr/local/lib/python3.8/dist-packages/seaborn/matrix.py in dendrogram(data, linkage, axis, label, metric, method, rotate, tree_kws, ax)
    777 
    778     """
--> 779     plotter = _DendrogramPlotter(data, linkage=linkage, axis=axis,
    780                                  metric=metric, method=method,
    781                                  label=label, rotate=rotate)

/usr/local/lib/python3.8/dist-packages/seaborn/matrix.py in __init__(self, data, linkage, metric, method, axis, label, rotate)
    590         else:
    591             self.linkage = linkage
--> 592         self.dendrogram = self.calculate_dendrogram()
    593 
    594         # Dendrogram ends are always at multiples of 5, who knows why

/usr/local/lib/python3.8/dist-packages/seaborn/matrix.py in calculate_dendrogram(self)
    669             "reordered_ind" which indicates the re-ordering of the matrix
    670         """
--> 671         return hierarchy.dendrogram(self.linkage, no_plot=True,
    672                                     color_threshold=-np.inf)
    673 

/usr/local/lib/python3.8/dist-packages/scipy/cluster/hierarchy.py in dendrogram(Z, p, truncate_mode, color_threshold, get_leaves, orientation, labels, count_sort, distance_sort, show_leaf_counts, no_plot, no_labels, leaf_font_size, leaf_rotation, leaf_label_func, show_contracted, link_color_func, ax, above_threshold_color)
   3299         raise ValueError("Dimensions of Z and labels must be consistent.")
   3300 
-> 3301     is_valid_linkage(Z, throw=True, name='Z')
   3302     Zs = Z.shape
   3303     n = Zs[0] + 1

/usr/local/lib/python3.8/dist-packages/scipy/cluster/hierarchy.py in is_valid_linkage(Z, warning, throw, name)
   2270             raise ValueError('Linkage matrix %smust have 4 columns.' % name_str)
   2271         if Z.shape[0] == 0:
-> 2272             raise ValueError('Linkage must be computed on at least two '
   2273                              'observations.')
   2274         n = Z.shape[0]

ValueError: Linkage must be computed on at least two observations.

<Figure size 1080x1080 with 0 Axes>


direction = "reversers"
alternate = "reversing"
for lib in datasets:
    library = dataset_map[lib]
    display(Markdown("#### Consensus %s %s signatures"%(alternate, labeller[lib])), display_id=alternate+lib)
    index = set()
    sig_dict = enriched[direction][lib]
    for v in sig_dict.values():
        index = index.union(v.keys())
    df = pd.DataFrame(0, index=index, columns=sig_dict.keys())
    for sig_name,v in sig_dict.items():
        for local_id, meta in v.items():
            df.at[local_id, sig_name] = meta[score_field]
    df = df[(df>0).sum(1) > len(df.columns) * perc]
    if consensus_method == 'z-score':
        df = df.loc[df.sum(1).sort_values(ascending=False).index[0:top_perts]]
    else:
        df = df.loc[(df > 0).sum(1).sort_values(ascending=False).index[0:top_perts]]
    filename = "sig_matrix_%s_%s.tsv"%(library.replace(" ","_"), direction)
    df.to_csv(filename, sep="\t")
    display(Markdown("Download score matrix for %s %s signatures ([download](./%s))"%
                     (alternate, labeller[lib], filename)))

    
    if lib in gene_page:
#         "pert_name": sig["meta"].get("pert_name", None),
#         "pert_time": sig["meta"].get("pert_time", None),
#         "pert_dose": sig["meta"].get("pert_dose", None),
#         "cell_line": sig["meta"].get("cell_line", None),
        stat_df = pd.DataFrame(index=df.index, columns=["pert name", "pert time", "cell line", "count", "z-sum", "Enrichr gene page"])
        stat_df['count'] = (df > 0).sum(1)
        # Compute zstat and p value
        stat_df["z-sum"] = df.sum(1)
        for i in stat_df.index:
            stat_df.at[i, "pert name"] = metadata[i]["pert_name"]
            stat_df.at[i, "pert time"] = metadata[i]["pert_time"]
            stat_df.at[i, "cell line"] = metadata[i]["cell_line"]
        stat_df['Enrichr gene page'] = ["https://maayanlab.cloud/Enrichr/#find!gene=%s"%i for i in stat_df["pert name"]]
        stat_df = stat_df.fillna("-")
        filename = "sig_stat_%s_%s.tsv"%(lib.replace(" ","_"), direction)
        stat_df.to_csv(filename, sep="\t")
        stat_df['Enrichr gene page'] = stat_df['Enrichr gene page'].apply(make_clickable)
        stat_html = stat_df.head(25).to_html(escape=False)
        display(HTML(stat_html))
    else:
        stat_df = pd.DataFrame(index=df.index, columns=["pert name", "pert dose", "pert time", "cell line", "count", "z-sum"])
        stat_df['count'] = (df > 0).sum(1)
        stat_df["z-sum"] = df.sum(1)
        
        for i in stat_df.index:
            stat_df.at[i, "pert name"] = metadata[i]["pert_name"]
            stat_df.at[i, "pert dose"] = metadata[i]["pert_dose"]
            stat_df.at[i, "pert time"] = metadata[i]["pert_time"]
            stat_df.at[i, "cell line"] = metadata[i]["cell_line"]
        stat_df = stat_df.fillna("-")
        filename = "sig_stat_%s_%s.tsv"%(library.replace(" ","_"), direction)
        stat_df.to_csv(filename, sep="\t")
        display(stat_df.head(25))
    display(Markdown("**Table %d** Top 25 consensus %s %s signatures([download](./%s))"%
                     (table, alternate, labeller[lib], filename)))

    table+=1

    
#     display(df.head())
#     display(Markdown("**Table %d** Consensus %s %s signatures ([download](./%s))"%
#                      (table, alternate, labeller[lib], filename)))

#     table+=1
    if len(df.index) > 1:
        consensus_norm = quantile_normalize(df)
        display(Markdown("##### Clustergrammer for %s %s perturbagens"%(alternate, labeller[lib])), display_id="%s-clustergrammer-%s"%(alternate, lib))
        label = "Clustergrammer of consensus %s perturbagens of L1000 %s perturbations(2021) (quantile normalized scores)"%(alternate, labeller[lib])
        name = "clustergrammer_%s_%s.tsv"%(library.replace(" ", "_"), direction)
        figure = clustergrammer(consensus_norm, name, figure, label)

        display(Markdown("#### Heatmap for %s %s perturbagens"%(alternate, labeller[lib])), display_id="%s-heatmap-%s"%(alternate, lib))
        label = "Heatmap of consensus %s perturbagens of L1000 %s perturbations(2021) (quantile normalized scores)"%(alternate, labeller[lib])
        name = "heatmap_%s_%s.png"%(library.replace(" ", "_"), direction)
        figure = heatmap(consensus_norm, name, figure, label)
        if len(set(stat_df["pert name"])) > 5:
            if lib in drug_page:
                display(Markdown("#### Drugmonizome enrichment analysis for the consensus %s %s perturbagens"% (alternate, labeller[lib])))
                for d in drugmonizome_datasets:
                    label = "%s top ranked enriched terms for %s %s perturbagens"%(d.replace("_", " ").replace("setlibrary", " set library"), alternate, labeller[lib])
                    figure = get_drugmonizome_plot(stat_df, label, figure, d)
            elif lib in gene_page:
                display(Markdown("#### Enrichr link to analyze enriched terms for the consensus %s %s perturbagens"% (alternate, labeller[lib])))
                label = "%s L1000 %s perturbagens"%(alternate, labeller[lib])
                figure = enrichment(stat_df, label, figure)

	pert name	pert time	cell line	count	z-sum	Enrichr gene page
VAV1_XPR027_MCF7.311_96H_O09	VAV1	96 h	MCF7	1	9	VAV1
NR2C1_XPR021_HT29.311_96H_H10	NR2C1	96 h	HT29	1	8	NR2C1
RHD_XPR023_MCF7.311_96H_A05	RHD	96 h	MCF7	1	8	RHD
CD3D_XPR013_U251MG.311_96H_P14	CD3D	96 h	U251MG	1	8	CD3D
SIRT6_XPRJJ002_A375_96H_L17	SIRT6	96 h	A375	1	8	SIRT6
CD151_XPR013_ES2.311_96H_P01	CD151	96 h	ES2	1	8	CD151
DDI2_XPR015_HT29.311_96H_D04	DDI2	96 h	HT29	1	8	DDI2
MICB_XPR020_U251MG.311_96H_B17	MICB	96 h	U251MG	1	8	MICB
None_XPR045_HS944T.311_96H_O03_BRDN0003230890	-	96 h	HS944T	1	8	None
ZNF300_XPR027_U251MG.311_96H_J06	ZNF300	96 h	U251MG	1	8	ZNF300
None_XPR043_U251MG.311_96H_K22_BRDN0001145640	-	96 h	U251MG	1	8	None
UCN2_XPR026_AGS.311_96H_J20	UCN2	96 h	AGS	1	8	UCN2
CREG1_XPR015_HT29.311_96H_K07	CREG1	96 h	HT29	1	8	CREG1
LDLRAD3_XPR019_MCF7.311_96H_B07	LDLRAD3	96 h	MCF7	1	8	LDLRAD3
BST1_XPR028_HT29.311_96H_A21	BST1	96 h	HT29	1	8	BST1
PAWR_XPR031_ES2.311_96H_O03	PAWR	96 h	ES2	1	8	PAWR
ACVR1B_XPR035_A375.311_96H_M09	ACVR1B	96 h	A375	1	7	ACVR1B
SCUBE1_XPR023_ES2.311_96H_P13	SCUBE1	96 h	ES2	1	7	SCUBE1
CD7_XPR014_HT29.311_96H_K09	CD7	96 h	HT29	1	7	CD7
PDXK_XPR035_HT29.311_96H_M06	PDXK	96 h	HT29	1	7	PDXK
NR2C1_XPR021_A375.311_96H_H10	NR2C1	96 h	A375	1	7	NR2C1
IFI30_XPR017_PC3.311B_96H_H18	IFI30	96 h	PC3	1	7	IFI30
BAZ1B_XPR034_MCF7.311_96H_E22	BAZ1B	96 h	MCF7	1	7	BAZ1B
PRMT7_XPRJJ002_A375_96H_B22	PRMT7	96 h	A375	1	7	PRMT7
SLC39A8_XPR024_AGS.311_96H_B23	SLC39A8	96 h	AGS	1	7	SLC39A8

Consensus Perturbagens of 1 DHEA signatures from SigCom LINCS¶

Gene Harmonization¶

Input Signatures Metadata¶

SigCom LINCS Signature Search¶

Consensus Analysis¶

Mimickers¶

Consensus mimicking CRISPR signatures¶

Clustergrammer for mimicking CRISPR perturbagens¶

Heatmap for mimicking CRISPR perturbagens¶

Reversers¶

References¶