Data generation for RNA velocity analysis in mouse brain#

Generates .h5ad files to analyze RNA velocy in mouse brain using different pre-processing protocols.

Preliminaries#

Requires

  • DATA_DIR/old_brain/OldBrain_adata_quantifications_combined.h5ad

Output

  • DATA_DIR/old_brain/alevin_prepref_isocollapse_cdna_introns_decoy_gentrome.h5ad

  • DATA_DIR/old_brain/alevin_prepref_isocollapse_cdna_introns_gentrome.h5ad

  • DATA_DIR/old_brain/alevin_prepref_isoseparate_cdna_introns_decoy_gentrome.h5ad

  • DATA_DIR/old_brain/alevin_prepref_isoseparate_cdna_introns_gentrome.h5ad

  • DATA_DIR/old_brain/alevin_spliced_unspliced_gentrome.h5ad

  • DATA_DIR/old_brain/dropest.h5ad

  • DATA_DIR/old_brain/kallisto_bustools_prepref_isocollapse_exclude.h5ad

  • DATA_DIR/old_brain/kallisto_bustools_prepref_isocollapse_include.h5ad

  • DATA_DIR/old_brain/kallisto_bustools_prepref_isoseparate_exclude.h5ad

  • DATA_DIR/old_brain/kallisto_bustools_prepref_isoseparate_include.h5ad

  • DATA_DIR/old_brain/starsolo.h5ad

  • DATA_DIR/old_brain/starsolo_subtr.h5ad

  • DATA_DIR/old_brain/velocyto.h5ad

Library imports#

from pathlib import Path
import sys

# import standard packages
import pandas as pd

# import single-cell packages
from anndata import AnnData
import scanpy as sc

sys.path.append("../../..")
from paths import DATA_DIR

Data loading#

adata = sc.read(DATA_DIR / "old_brain" / "OldBrain_adata_quantifications_combined.h5ad")
adata
AnnData object with n_obs × n_vars = 1823 × 54143
    obs: 'cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age'
    uns: 'X_name', 'dataset'
    obsm: 'PCA_alevin_spliced_gentrome', 'TSNE_alevin_spliced_gentrome', 'UMAP_alevin_spliced_gentrome'
    layers: 'alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_spliced', 'alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_unspliced', 'alevin_prepref_isocollapse_cdna_introns_gentrome_spliced', 'alevin_prepref_isocollapse_cdna_introns_gentrome_unspliced', 'alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_spliced', 'alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_unspliced', 'alevin_prepref_isoseparate_cdna_introns_gentrome_spliced', 'alevin_prepref_isoseparate_cdna_introns_gentrome_unspliced', 'alevin_spliced_gentrome_counts', 'alevin_spliced_unspliced_gentrome_spliced', 'alevin_spliced_unspliced_gentrome_unspliced', 'dropest_spliced', 'dropest_unspliced', 'kallisto_bustools_prepref_isocollapse_exclude_spliced', 'kallisto_bustools_prepref_isocollapse_exclude_unspliced', 'kallisto_bustools_prepref_isocollapse_include_spliced', 'kallisto_bustools_prepref_isocollapse_include_unspliced', 'kallisto_bustools_prepref_isoseparate_exclude_spliced', 'kallisto_bustools_prepref_isoseparate_exclude_unspliced', 'kallisto_bustools_prepref_isoseparate_include_spliced', 'kallisto_bustools_prepref_isoseparate_include_unspliced', 'starsolo_spliced', 'starsolo_subtr_spliced', 'starsolo_subtr_unspliced', 'starsolo_unspliced', 'velocyto_unspliced'

Data generation#

alevin_coll_decoy_gtr = AnnData(
    X=adata.layers['alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_unspliced'],
        'spliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_coll_decoy_gtr.write(DATA_DIR / "old_brain" / "alevin_prepref_isocollapse_cdna_introns_decoy_gentrome.h5ad")
/tmp/ipykernel_91451/2175067153.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_coll_decoy_gtr = AnnData(
alevin_coll_gtr = AnnData(
    X=adata.layers['alevin_prepref_isocollapse_cdna_introns_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_gentrome_unspliced'],
        'spliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_coll_gtr.write(DATA_DIR / "old_brain" / "alevin_prepref_isocollapse_cdna_introns_gentrome.h5ad")
/tmp/ipykernel_91451/2574400100.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_coll_gtr = AnnData(
alevin_sep_decoy_gtr = AnnData(
    X=adata.layers['alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_unspliced'],
        'spliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_sep_decoy_gtr.write(DATA_DIR / "old_brain" / "alevin_prepref_isoseparate_cdna_introns_decoy_gentrome.h5ad")
/tmp/ipykernel_91451/2620880788.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_sep_decoy_gtr = AnnData(
alevin_sep_gtr = AnnData(
    X=adata.layers['alevin_prepref_isoseparate_cdna_introns_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_gentrome_unspliced'],
        'spliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_sep_gtr.write(DATA_DIR / "old_brain" / "alevin_prepref_isoseparate_cdna_introns_gentrome.h5ad")
/tmp/ipykernel_91451/2290063912.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_sep_gtr = AnnData(
alevin_spliced_unspliced_gtr = AnnData(
    X=adata.layers['alevin_spliced_unspliced_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_spliced_unspliced_gentrome_unspliced'],
        'spliced': adata.layers['alevin_spliced_unspliced_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_spliced_unspliced_gtr.write(DATA_DIR / "old_brain" / "alevin_spliced_unspliced_gentrome.h5ad")
/tmp/ipykernel_91451/3661593580.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_spliced_unspliced_gtr = AnnData(
dropest = AnnData(
    X=adata.layers['dropest_spliced'],
    layers={
        'unspliced': adata.layers['dropest_unspliced'],
        'spliced': adata.layers['dropest_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
dropest.write(DATA_DIR / "old_brain" / "dropest.h5ad")
/tmp/ipykernel_91451/823519977.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  dropest = AnnData(
kalisto_bustools_coll_excl = AnnData(
    X=adata.layers['kallisto_bustools_prepref_isocollapse_exclude_spliced'],
    layers={
        'unspliced': adata.layers['kallisto_bustools_prepref_isocollapse_exclude_unspliced'],
        'spliced': adata.layers['kallisto_bustools_prepref_isocollapse_exclude_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_coll_excl.write(DATA_DIR / "old_brain" / "kallisto_bustools_prepref_isocollapse_exclude.h5ad")
/tmp/ipykernel_91451/1570495297.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  kalisto_bustools_coll_excl = AnnData(
kalisto_bustools_coll_incl = AnnData(
    X=adata.layers['kallisto_bustools_prepref_isocollapse_include_spliced'],
    layers={
        'unspliced': adata.layers['kallisto_bustools_prepref_isocollapse_include_unspliced'],
        'spliced': adata.layers['kallisto_bustools_prepref_isocollapse_include_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_coll_incl.write(DATA_DIR / "old_brain" / "kallisto_bustools_prepref_isocollapse_include.h5ad")
/tmp/ipykernel_91451/1103478739.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  kalisto_bustools_coll_incl = AnnData(
kalisto_bustools_sep_excl = AnnData(
    X=adata.layers['kallisto_bustools_prepref_isoseparate_exclude_spliced'],
    layers={
        'unspliced': adata.layers['kallisto_bustools_prepref_isoseparate_exclude_unspliced'],
        'spliced': adata.layers['kallisto_bustools_prepref_isoseparate_exclude_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_sep_excl.write(DATA_DIR / "old_brain" / "kallisto_bustools_prepref_isoseparate_exclude.h5ad")
/tmp/ipykernel_91451/3931455348.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  kalisto_bustools_sep_excl = AnnData(
kalisto_bustools_sep_incl = AnnData(
    X=adata.layers['kallisto_bustools_prepref_isoseparate_include_spliced'],
    layers={
        'unspliced': adata.layers['kallisto_bustools_prepref_isoseparate_include_unspliced'],
        'spliced': adata.layers['kallisto_bustools_prepref_isoseparate_include_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_sep_incl.write(DATA_DIR / "old_brain" / "kallisto_bustools_prepref_isoseparate_include.h5ad")
/tmp/ipykernel_91451/901589679.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  kalisto_bustools_sep_incl = AnnData(
starsolo = AnnData(
    X=adata.layers['starsolo_spliced'],
    layers={
        'unspliced': adata.layers['starsolo_unspliced'],
        'spliced': adata.layers['starsolo_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
starsolo.write(DATA_DIR / "old_brain" / "starsolo.h5ad")
/tmp/ipykernel_91451/1188938678.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  starsolo = AnnData(
starsolo_diff = AnnData(
    X=adata.layers['starsolo_subtr_spliced'],
    layers={
        'unspliced': adata.layers['starsolo_subtr_unspliced'],
        'spliced': adata.layers['starsolo_subtr_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
starsolo_diff.write(DATA_DIR / "old_brain" / "starsolo_subtr.h5ad")
/tmp/ipykernel_91451/1821660359.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  starsolo_diff = AnnData(
velocyto = AnnData(
    X=adata.X,
    layers={
        'unspliced': adata.layers['velocyto_unspliced'],
        'spliced': adata.X,
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
    var=pd.DataFrame(index=adata.var_names),
)
velocyto.write(DATA_DIR / "old_brain" / "velocyto.h5ad")