Data generation for RNA velocity analysis in pre-frontal cortex#

Generates .h5ad files to analyze RNA velocy in the pre-frontal cortex using different pre-processing protocols.

Preliminaries#

Requires

  • DATA_DIR/pfc/PFC_adata_quantifications_combined.h5ad

Output

  • DATA_DIR/pfc/alevin_prepref_isocollapse_cdna_introns_decoy_gentrome.h5ad

  • DATA_DIR/pfc/alevin_prepref_isocollapse_cdna_introns_gentrome.h5ad

  • DATA_DIR/pfc/alevin_prepref_isoseparate_cdna_introns_decoy_gentrome.h5ad

  • DATA_DIR/pfc/alevin_prepref_isoseparate_cdna_introns_gentrome.h5ad

  • DATA_DIR/pfc/alevin_spliced_unspliced_gentrome.h5ad

  • DATA_DIR/pfc/dropest.h5ad

  • DATA_DIR/pfc/kallisto_bustools_prepref_isocollapse_exclude.h5ad

  • DATA_DIR/pfc/kallisto_bustools_prepref_isocollapse_include.h5ad

  • DATA_DIR/pfc/kallisto_bustools_prepref_isoseparate_exclude.h5ad

  • DATA_DIR/pfc/kallisto_bustools_prepref_isoseparate_include.h5ad

  • DATA_DIR/pfc/starsolo.h5ad

  • DATA_DIR/pfc/starsolo_subtr.h5ad

  • DATA_DIR/pfc/velocyto.h5ad

Library imports#

from pathlib import Path
import sys

# import standard packages
import pandas as pd

# import single-cell packages
from anndata import AnnData
import scanpy as sc

sys.path.append("../../..")
from paths import DATA_DIR

Data loading#

adata = sc.read(DATA_DIR / "pfc" / "PFC_adata_quantifications_combined.h5ad")
adata
AnnData object with n_obs × n_vars = 1267 × 54144
    obs: 'cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage'
    uns: 'X_name', 'dataset'
    obsm: 'PCA_alevin_spliced_gentrome', 'TSNE_alevin_spliced_gentrome', 'UMAP_alevin_spliced_gentrome'
    layers: 'alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_spliced', 'alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_unspliced', 'alevin_prepref_isocollapse_cdna_introns_gentrome_spliced', 'alevin_prepref_isocollapse_cdna_introns_gentrome_unspliced', 'alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_spliced', 'alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_unspliced', 'alevin_prepref_isoseparate_cdna_introns_gentrome_spliced', 'alevin_prepref_isoseparate_cdna_introns_gentrome_unspliced', 'alevin_spliced_gentrome_counts', 'alevin_spliced_unspliced_gentrome_spliced', 'alevin_spliced_unspliced_gentrome_unspliced', 'dropest_spliced', 'dropest_unspliced', 'kallisto_bustools_prepref_isocollapse_exclude_spliced', 'kallisto_bustools_prepref_isocollapse_exclude_unspliced', 'kallisto_bustools_prepref_isocollapse_include_spliced', 'kallisto_bustools_prepref_isocollapse_include_unspliced', 'kallisto_bustools_prepref_isoseparate_exclude_spliced', 'kallisto_bustools_prepref_isoseparate_exclude_unspliced', 'kallisto_bustools_prepref_isoseparate_include_spliced', 'kallisto_bustools_prepref_isoseparate_include_unspliced', 'starsolo_spliced', 'starsolo_subtr_spliced', 'starsolo_subtr_unspliced', 'starsolo_unspliced', 'velocyto_unspliced'

Data generation#

alevin_coll_decoy_gtr = AnnData(
    X=adata.layers['alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_unspliced'],
        'spliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_coll_decoy_gtr.write(DATA_DIR / "pfc" / "alevin_prepref_isocollapse_cdna_introns_decoy_gentrome.h5ad")
/tmp/ipykernel_212169/2180130167.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_coll_decoy_gtr = AnnData(
alevin_coll_gtr = AnnData(
    X=adata.layers['alevin_prepref_isocollapse_cdna_introns_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_gentrome_unspliced'],
        'spliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_coll_gtr.write(DATA_DIR / "pfc" / "alevin_prepref_isocollapse_cdna_introns_gentrome.h5ad")
/tmp/ipykernel_212169/2727677742.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_coll_gtr = AnnData(
alevin_sep_decoy_gtr = AnnData(
    X=adata.layers['alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_unspliced'],
        'spliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_sep_decoy_gtr.write(DATA_DIR / "pfc" / "alevin_prepref_isoseparate_cdna_introns_decoy_gentrome.h5ad")
/tmp/ipykernel_212169/1874051280.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_sep_decoy_gtr = AnnData(
alevin_sep_gtr = AnnData(
    X=adata.layers['alevin_prepref_isoseparate_cdna_introns_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_gentrome_unspliced'],
        'spliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_sep_gtr.write(DATA_DIR / "pfc" / "alevin_prepref_isoseparate_cdna_introns_gentrome.h5ad")
/tmp/ipykernel_212169/2798175907.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_sep_gtr = AnnData(
alevin_spliced_unspliced_gtr = AnnData(
    X=adata.layers['alevin_spliced_unspliced_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_spliced_unspliced_gentrome_unspliced'],
        'spliced': adata.layers['alevin_spliced_unspliced_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_spliced_unspliced_gtr.write(DATA_DIR / "pfc" / "alevin_spliced_unspliced_gentrome.h5ad")
/tmp/ipykernel_212169/61586144.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_spliced_unspliced_gtr = AnnData(
dropest = AnnData(
    X=adata.layers['dropest_spliced'],
    layers={
        'unspliced': adata.layers['dropest_unspliced'],
        'spliced': adata.layers['dropest_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
dropest.write(DATA_DIR / "pfc" / "dropest.h5ad")
/tmp/ipykernel_212169/3754588795.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  dropest = AnnData(
kalisto_bustools_coll_excl = AnnData(
    X=adata.layers['kallisto_bustools_prepref_isocollapse_exclude_spliced'],
    layers={
        'unspliced': adata.layers['kallisto_bustools_prepref_isocollapse_exclude_unspliced'],
        'spliced': adata.layers['kallisto_bustools_prepref_isocollapse_exclude_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_coll_excl.write(DATA_DIR / "pfc" / "kallisto_bustools_prepref_isocollapse_exclude.h5ad")
/tmp/ipykernel_212169/234732962.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  kalisto_bustools_coll_excl = AnnData(
kalisto_bustools_coll_incl = AnnData(
    X=adata.layers['kallisto_bustools_prepref_isocollapse_include_spliced'],
    layers={
        'unspliced': adata.layers['kallisto_bustools_prepref_isocollapse_include_unspliced'],
        'spliced': adata.layers['kallisto_bustools_prepref_isocollapse_include_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_coll_incl.write(DATA_DIR / "pfc" / "kallisto_bustools_prepref_isocollapse_include.h5ad")
/tmp/ipykernel_212169/2381467909.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  kalisto_bustools_coll_incl = AnnData(
kalisto_bustools_sep_excl = AnnData(
    X=adata.layers['kallisto_bustools_prepref_isoseparate_exclude_spliced'],
    layers={
        'unspliced': adata.layers['kallisto_bustools_prepref_isoseparate_exclude_unspliced'],
        'spliced': adata.layers['kallisto_bustools_prepref_isoseparate_exclude_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_sep_excl.write(DATA_DIR / "pfc" / "kallisto_bustools_prepref_isoseparate_exclude.h5ad")
/tmp/ipykernel_212169/2403671092.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  kalisto_bustools_sep_excl = AnnData(
kalisto_bustools_sep_incl = AnnData(
    X=adata.layers['kallisto_bustools_prepref_isoseparate_include_spliced'],
    layers={
        'unspliced': adata.layers['kallisto_bustools_prepref_isoseparate_include_unspliced'],
        'spliced': adata.layers['kallisto_bustools_prepref_isoseparate_include_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_sep_incl.write(DATA_DIR / "pfc" / "kallisto_bustools_prepref_isoseparate_include.h5ad")
/tmp/ipykernel_212169/3114816111.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  kalisto_bustools_sep_incl = AnnData(
starsolo = AnnData(
    X=adata.layers['starsolo_spliced'],
    layers={
        'unspliced': adata.layers['starsolo_unspliced'],
        'spliced': adata.layers['starsolo_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
starsolo.write(DATA_DIR / "pfc" / "starsolo.h5ad")
/tmp/ipykernel_212169/2454536947.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  starsolo = AnnData(
starsolo_diff = AnnData(
    X=adata.layers['starsolo_subtr_spliced'],
    layers={
        'unspliced': adata.layers['starsolo_subtr_unspliced'],
        'spliced': adata.layers['starsolo_subtr_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
starsolo_diff.write(DATA_DIR / "pfc" / "starsolo_subtr.h5ad")
/tmp/ipykernel_212169/843566959.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  starsolo_diff = AnnData(
velocyto = AnnData(
    X=adata.X,
    layers={
        'unspliced': adata.layers['velocyto_unspliced'],
        'spliced': adata.X,
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'X', 'nGene', 'nUMI', 'percent.mito', 'Sample', 'treatment', 'Period', 'stage', 'DevStage']],
    var=pd.DataFrame(index=adata.var_names),
)
velocyto.write(DATA_DIR / "pfc" / "velocyto.h5ad")