Imports and helper functions

Contents

Imports and helper functions#

import os
import sys

import numpy as np
import pandas as pd
import torch
from scipy import stats
from sklearn.preprocessing import MinMaxScaler

import joypy
import matplotlib.pyplot as plt
import mplscience
import seaborn as sns

import scanpy as sc
import scvelo as scv
import scvi
from scvelo.plotting.simulation import compute_dynamics
from velovi import VELOVI

sys.path.append("../..")
from paths import DATA_DIR, FIG_DIR

Global seed set to 0

General settings#

scvi.settings.dl_pin_memory_gpu_training = False

sns.reset_defaults()
sns.reset_orig()
scv.settings.set_figure_params('scvelo', dpi_save=400, dpi=80, transparent=True, fontsize=20, color_map='viridis')

SAVE_FIGURES = True
if SAVE_FIGURES:
    os.makedirs(FIG_DIR / 'permutation' / 'old_brain', exist_ok=True)

Function definitions#

def fit_velovi(bdata):
    VELOVI.setup_anndata(bdata, spliced_layer="Ms", unspliced_layer="Mu")

    vae = VELOVI(bdata)
    vae.train()

    df = vae.history["elbo_train"].iloc[20:].reset_index().rename(columns={'elbo_train': 'elbo'})
    df['set'] = 'train'

    _df = vae.history["elbo_validation"].iloc[20:].reset_index().rename(columns={'elbo_validation': 'elbo'})
    _df['set'] = 'validation'

    df = pd.concat([df, _df], axis=0).reset_index(drop=True)

    with mplscience.style_context():
        sns.set_style(style="whitegrid")
        fig, ax = plt.subplots(figsize=(6, 4))
        sns.lineplot(data=df, x='epoch', y='elbo', hue='set', palette=['#0173B2', '#DE8F05'], ax=ax)

    latent_time = vae.get_latent_time(n_samples=25)
    velocities = vae.get_velocity(n_samples=25, velo_statistic="mean")

    t = latent_time
    scaling = 20 / t.max(0)

    bdata.layers["velocities_velovi"] = velocities / scaling
    bdata.layers["latent_time_velovi"] = latent_time

    bdata.var["fit_alpha"] = vae.get_rates()["alpha"] / scaling
    bdata.var["fit_beta"] = vae.get_rates()["beta"] / scaling
    bdata.var["fit_gamma"] = vae.get_rates()["gamma"] / scaling
    bdata.var["fit_t_"] = (
        torch.nn.functional.softplus(vae.module.switch_time_unconstr)
        .detach()
        .cpu()
        .numpy()
    ) * scaling
    bdata.layers["fit_t"] = latent_time.values * scaling[np.newaxis, :]
    bdata.var['fit_scaling'] = 1.0

    return vae

def shuffle_layer_celltype(adata, labels_key: str, layer: str = "Mu"):
    bdata = adata.copy()
    labels = bdata.obs[labels_key]
    unspliced = bdata.layers[layer]
    for l in np.unique(labels):
        mask = np.asarray(labels == l)
        unspliced_ct = unspliced[mask].copy()
        for g in range(unspliced.shape[1]):
            unspliced_ct[:, g] = np.random.permutation(unspliced_ct[:, g])
        bdata.layers[layer][mask] = unspliced_ct
    return bdata

def plot_phase_portrait(adata, gene, color, permuted=False, figsize=(6, 6)):
    fig, ax = plt.subplots(figsize=figsize)

    df = pd.DataFrame(
        {
            'unspliced': adata[:, gene].layers['Mu'].squeeze().copy(),
            'spliced': adata[:, gene].layers['Ms'].squeeze().copy(),
            'color': color
        }
    )

    with mplscience.style_context():
        sns.scatterplot(data=df, x='spliced', y='unspliced', c=color, s=25, ax=ax);

        _, unspliced, spliced = compute_dynamics(adata, basis=gene, extrapolate=True, sort=True)
        df = pd.DataFrame(
            {
                'unspliced': unspliced.squeeze(),
                'spliced': spliced.squeeze(),
            }
        )

        ax.plot(spliced, unspliced, color="purple", linewidth=2)

        spliced_steady_state = np.linspace(np.min(spliced), np.max(spliced))
        unspliced_steady_state = adata.var.loc[gene, 'fit_gamma'] / adata.var.loc[gene, 'fit_beta'] * (spliced_steady_state - np.min(spliced_steady_state)) + np.min(unspliced)
        ax.plot(spliced_steady_state, unspliced_steady_state, color='purple', linestyle="--", linewidth=2);

    ax.axis('off')
    
    if SAVE_FIGURES:
        if permuted:
            fname = f'phase_portrait_{gene}_permuted'
        else:
            fname = f'phase_portrait_{gene}'
        fig.savefig(
            FIG_DIR / 'permutation' / 'old_brain' / f'{fname}.svg',
            format="svg",
            transparent=True,
            bbox_inches='tight'
        )

def plot_perm_scores(adata, perm_scores, gene, color_label, figsize=(6, 4)):
    df = pd.DataFrame(perm_scores.loc[gene])
    df["Cell type"] = df.index
    order = adata.obs[color_label].cat.categories.tolist()
    
    with mplscience.style_context():
        sns.set_style(style="whitegrid")
        fig, ax = plt.subplots(figsize=figsize)
        sns.barplot(
            data=df,
            y=gene,
            x="Cell type",
            palette=adata.uns[f"{color_label}_colors"],
            order=order,
            ax=ax,
        )

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / 'permutation' / 'old_brain' / f'permutation_score_{gene}.svg',
            format="svg",
            transparent=True,
            bbox_inches='tight'
        )

Data loading#

adata = sc.read(DATA_DIR / "old_brain" / "old_brain.h5ad")
adata

AnnData object with n_obs × n_vars = 1823 × 54143
    obs: 'cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age'
    layers: 'spliced', 'unspliced'

Data preprocessing#

scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)

scaler = MinMaxScaler()
adata.layers["Mu"] = scaler.fit_transform(adata.layers["Mu"])

scaler = MinMaxScaler()
adata.layers["Ms"] = scaler.fit_transform(adata.layers["Ms"])

Filtered out 49315 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:05) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:00) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)

Model fitting#

velovi_vae = fit_velovi(adata)

/home/icb/philipp.weiler/miniconda3/envs/velovi-py39/lib/python3.9/site-packages/torch/distributed/_sharded_tensor/__init__.py:8: DeprecationWarning: torch.distributed._sharded_tensor will be deprecated, use torch.distributed._shard.sharded_tensor instead
  warnings.warn(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.

Epoch 500/500: 100%|██████████| 500/500 [01:12<00:00,  6.89it/s, loss=-7.27e+03, v_num=1]

../_images/4b236bb48b66f69e52f7e97c29b1ff3d431d09fe36a014d2b0f1c3870ea4fef5.png

Permutation score evaluation#

perm_scores, permuted_adata = velovi_vae.get_permutation_scores(labels_key='clusters_coarse')

INFO     Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup       
INFO     Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup       

full_perm_df = pd.DataFrame(columns=["Score", "Dataset"])

max_ratio = np.nanmax(perm_scores.values, axis=1)
scores = max_ratio.tolist()
dataset = ['Old brain'] * len(max_ratio)

full_perm_df["Score"] = scores
full_perm_df["Dataset"] = dataset

with mplscience.style_context():
    df = pd.melt(
        perm_scores.loc[
            :,
            adata.obs['clusters_coarse'].cat.categories[(adata.obs['clusters_coarse'].value_counts() > 1).sort_index()]
        ]
    ).rename(columns={'variable': 'Cell type', 'value': 'Test statistic'})
    df.head()

with mplscience.style_context():
    fig, ax = plt.subplots(figsize=(25, 3))
    sns.violinplot(data=df, x='Cell type', y='Test statistic', ax=ax)
    ax.tick_params(axis='x', rotation=45)

if SAVE_FIGURES:
    fig.savefig(
        FIG_DIR / 'permutation' / 'old_brain' / 'permutation_score_density_per_cell_type.svg',
        format="svg",
        transparent=True,
        bbox_inches='tight',
    )

../_images/ca4e50a1c1ade1230c0c0ce60f66ea425ee9eabaea0cdc7c2d0361f3da1720c4.png

adata.obs['clusters'].c

AACTCCCCAAGCCCAC       ASC_Lin
AAATGCCTCGGTTAAC    NEURON_Lin
AAACCTGTCGGTTCGG    IMMUNE_Lin
AAAGCAACAACACCCG       OLG_Lin
AAATGCCAGGCGATAC    IMMUNE_Lin
                       ...    
TTTGCGCTCGCTGATA       ASC_Lin
TTTGCGCAGAAGGACA       ASC_Lin
TTTGGTTGTCAAAGAT      VASC_Lin
TTTGGTTAGTAGGTGC       OLG_Lin
TTTCCTCCATCTCCCA       OLG_Lin
Name: clusters, Length: 1823, dtype: category
Categories (6, object): ['ASC_Lin', 'EPC_Lin', 'IMMUNE_Lin', 'NEURON_Lin', 'OLG_Lin', 'VASC_Lin']

df = pd.DataFrame(
    perm_scores.idxmax(axis=1).value_counts(normalize=True),
    columns=['Percentage']
).reset_index().rename(columns={'index': 'Cell type'}).sort_values(by='Cell type')

with mplscience.style_context():
    sns.set_style(style="whitegrid")
    fig, ax = plt.subplots(figsize=(25, 3))
    sns.barplot(data=df, x='Cell type', y='Percentage', ax=ax)
    ax.tick_params(axis='x', rotation=45)

if SAVE_FIGURES:
    fig.savefig(
        FIG_DIR / 'permutation' / 'old_brain' / 'percentage_of_max_statistic.svg',
        format="svg",
        transparent=True,
        bbox_inches='tight',
    )

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Input In [24], in <cell line: 5>()
      1 df = pd.DataFrame(
      2     perm_scores.idxmax(axis=1).value_counts(normalize=True),
      3     columns=['Percentage']
      4 ).reset_index().rename(columns={'index': 'Cell type'}).sort_values(by='Cell type')
----> 5 palette = dict(zip(adata.obs['clusters'].cat.categories, adata.uns['clusters_colors']))
      7 with mplscience.style_context():
      8     sns.set_style(style="whitegrid")

File ~/miniconda3/envs/velovi-py39/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:100, in OverloadedDict.__getitem__(self, key)
     98     return self.overloaded[key].get()
     99 else:
--> 100     return self.data[key]

KeyError: 'clusters_colors'

adata.obs['mNEUR'] = False
adata.obs.loc[adata.obs['clusters_coarse'].isin(['mNEUR']), 'mNEUR'] = True

sc.tl.umap(adata)

fig, ax = plt.subplots(figsize=(6, 4))
scv.pl.scatter(adata, basis='umap', c='mNEUR', palette=['#0173b2', '#de8f05'], title='', legend_loc=False, s=25, dpi=200, ax=ax)

if SAVE_FIGURES:
    fig.savefig(
        FIG_DIR / 'permutation' / 'old_brain' / 'umap.svg',
        format="svg",
        transparent=True,
        bbox_inches='tight'
    )

../_images/0969fdd4aa115e98958b327caa2eca7816017c937b59bf9ff7518359ab19c2be.png

for gene in adata.var_names[np.argsort(scores)[-5:]]:
    with mplscience.style_context():
        fig, ax = plt.subplots(figsize=(6, 6))
        scv.pl.scatter(adata, basis=gene, c='mNEUR', frameon=False, ncols=5, palette=['#0173b2', '#de8f05'], ax=ax)

    if SAVE_FIGURES:
        fig.savefig(
            FIG_DIR / 'permutation' / 'old_brain' / f'phase_portrait_{gene}.svg',
            format="svg",
            transparent=True,
            bbox_inches='tight',
        )

../_images/da0b7550db0ab12ed97673f505b91da57a20e6189200e022daca255ab46709cc.png

../_images/4aecc53f10cd5ac4aa357959cd9bcc32464c245a4fdce09c050061fa68f3e87e.png

../_images/770f8734ccec5115b1c168eccf9d7a650b34e0843c0ade2c9fab60f640c9c093.png

../_images/415c62e65cf0af95460030cc2992a473173004494cdc7fe674851d3f24c20a45.png

../_images/cee69dbeafc41359b4d3737c66e99142429db1a66c3d4b7676fb632fb221b9e4.png

perm_score_wo_mNEUR = perm_scores.loc[:, ~perm_scores.columns.isin(['mNEUR'])]

_full_perm_df = pd.DataFrame(columns=["Score", "Dataset"])

_max_ratio = np.nanmax(perm_score_wo_mNEUR.values, axis=1)
_scores = _max_ratio.tolist()
_dataset = ['Old brain w/o mNEUR'] * len(_max_ratio)

_full_perm_df["Score"] = _scores
_full_perm_df["Dataset"] = _dataset

df = pd.concat([full_perm_df, _full_perm_df], axis=0)
datasets = ['Old brain', 'Old brain w/o mNEUR']
palette = dict(zip(datasets, sns.color_palette('colorblind').as_hex()[5:7]))

with mplscience.style_context():
    fig, axes = joypy.joyplot(
        df,
        by='Dataset',
        color=list(palette.values()),
        alpha=0.75,
        labels=datasets,
        x_range=[0, 18],
        figsize=(6, 4),
        grid='both'
    )
    
if SAVE_FIGURES:
    fig.savefig(
        FIG_DIR / 'permutation' / 'old_brain' / 'permutation_score_densities_clipped_joypy.svg',
        format="svg",
        transparent=True,
        bbox_inches='tight'
    )

../_images/67334d367e4c18c9cb487a0f83108ad05317cc173cdce13fe43ca506df26c4c8.png

features = ["Skew", "Kurtosis"]

data_stats_df = pd.DataFrame(
    index=datasets,
    columns=features,
    data=np.zeros(
        (len(datasets), len(features))
    )
)

for d in datasets:
    score1 = df.query("Dataset == @d")["Score"]
    score1 = score1[score1 <= np.percentile(score1, 99)]
    data_stats_df.loc[d, "Skew"] = stats.skew(score1)
    data_stats_df.loc[d, "Kurtosis"] = stats.kurtosis(score1)

data_stats_df

	Skew	Kurtosis
Old brain	0.949882	0.331188
Old brain w/o mNEUR	1.220034	1.219945

_df = data_stats_df.reset_index().rename(columns={'index': 'Dataset'})
g = sns.PairGrid(
    _df,
    x_vars=['Kurtosis', 'Skew'],
    y_vars=["Dataset"],
    height=4,
    # aspect=1.5
)

g.map(
    sns.stripplot,
    size=20,
    orient="h",
    jitter=False,
    palette=palette,
    linewidth=1,
    edgecolor="w"
)

titles = ['Kurtosis', 'Skew']
xlims = [(-0.5, 4.75), (0.25, 2)]

for ax, title, xlim in zip(g.axes.flat, titles, xlims):

    # Set a different title for each axes
    ax.set(title=title)
    ax.set_xlim(xlim)

    # Make the grid horizontal instead of vertical
    ax.xaxis.grid(True)
    ax.yaxis.grid(True)

sns.despine(left=True, bottom=True)

if SAVE_FIGURES:
    g.savefig(
        FIG_DIR / 'permutation' / 'old_brain' / 'permutation_density_statistics.svg',
        format="svg",
        transparent=True,
        bbox_inches='tight',
    )

../_images/291ec0bcbffca385dae2fae9e26e496379f03cd4bfd5b4438fad959991ab9e47.png