Data generation for RNA velocity analysis in mouse brain#
Generates .h5ad
files to analyze RNA velocy in mouse brain using different pre-processing protocols.
Preliminaries#
Requires
DATA_DIR/old_brain/OldBrain_adata_quantifications_combined.h5ad
Output
DATA_DIR/old_brain/alevin_prepref_isocollapse_cdna_introns_decoy_gentrome.h5ad
DATA_DIR/old_brain/alevin_prepref_isocollapse_cdna_introns_gentrome.h5ad
DATA_DIR/old_brain/alevin_prepref_isoseparate_cdna_introns_decoy_gentrome.h5ad
DATA_DIR/old_brain/alevin_prepref_isoseparate_cdna_introns_gentrome.h5ad
DATA_DIR/old_brain/alevin_spliced_unspliced_gentrome.h5ad
DATA_DIR/old_brain/dropest.h5ad
DATA_DIR/old_brain/kallisto_bustools_prepref_isocollapse_exclude.h5ad
DATA_DIR/old_brain/kallisto_bustools_prepref_isocollapse_include.h5ad
DATA_DIR/old_brain/kallisto_bustools_prepref_isoseparate_exclude.h5ad
DATA_DIR/old_brain/kallisto_bustools_prepref_isoseparate_include.h5ad
DATA_DIR/old_brain/starsolo.h5ad
DATA_DIR/old_brain/starsolo_subtr.h5ad
DATA_DIR/old_brain/velocyto.h5ad
Library imports#
from pathlib import Path
import sys
# import standard packages
import pandas as pd
# import single-cell packages
from anndata import AnnData
import scanpy as sc
sys.path.append("../../..")
from paths import DATA_DIR
Data loading#
adata = sc.read(DATA_DIR / "old_brain" / "OldBrain_adata_quantifications_combined.h5ad")
adata
AnnData object with n_obs × n_vars = 1823 × 54143
obs: 'cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age'
uns: 'X_name', 'dataset'
obsm: 'PCA_alevin_spliced_gentrome', 'TSNE_alevin_spliced_gentrome', 'UMAP_alevin_spliced_gentrome'
layers: 'alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_spliced', 'alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_unspliced', 'alevin_prepref_isocollapse_cdna_introns_gentrome_spliced', 'alevin_prepref_isocollapse_cdna_introns_gentrome_unspliced', 'alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_spliced', 'alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_unspliced', 'alevin_prepref_isoseparate_cdna_introns_gentrome_spliced', 'alevin_prepref_isoseparate_cdna_introns_gentrome_unspliced', 'alevin_spliced_gentrome_counts', 'alevin_spliced_unspliced_gentrome_spliced', 'alevin_spliced_unspliced_gentrome_unspliced', 'dropest_spliced', 'dropest_unspliced', 'kallisto_bustools_prepref_isocollapse_exclude_spliced', 'kallisto_bustools_prepref_isocollapse_exclude_unspliced', 'kallisto_bustools_prepref_isocollapse_include_spliced', 'kallisto_bustools_prepref_isocollapse_include_unspliced', 'kallisto_bustools_prepref_isoseparate_exclude_spliced', 'kallisto_bustools_prepref_isoseparate_exclude_unspliced', 'kallisto_bustools_prepref_isoseparate_include_spliced', 'kallisto_bustools_prepref_isoseparate_include_unspliced', 'starsolo_spliced', 'starsolo_subtr_spliced', 'starsolo_subtr_unspliced', 'starsolo_unspliced', 'velocyto_unspliced'
Data generation#
alevin_coll_decoy_gtr = AnnData(
X=adata.layers['alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_spliced'],
layers={
'unspliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_unspliced'],
'spliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_spliced'],
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
alevin_coll_decoy_gtr.write(DATA_DIR / "old_brain" / "alevin_prepref_isocollapse_cdna_introns_decoy_gentrome.h5ad")
/tmp/ipykernel_91451/2175067153.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
alevin_coll_decoy_gtr = AnnData(
alevin_coll_gtr = AnnData(
X=adata.layers['alevin_prepref_isocollapse_cdna_introns_gentrome_spliced'],
layers={
'unspliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_gentrome_unspliced'],
'spliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_gentrome_spliced'],
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
alevin_coll_gtr.write(DATA_DIR / "old_brain" / "alevin_prepref_isocollapse_cdna_introns_gentrome.h5ad")
/tmp/ipykernel_91451/2574400100.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
alevin_coll_gtr = AnnData(
alevin_sep_decoy_gtr = AnnData(
X=adata.layers['alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_spliced'],
layers={
'unspliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_unspliced'],
'spliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_spliced'],
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
alevin_sep_decoy_gtr.write(DATA_DIR / "old_brain" / "alevin_prepref_isoseparate_cdna_introns_decoy_gentrome.h5ad")
/tmp/ipykernel_91451/2620880788.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
alevin_sep_decoy_gtr = AnnData(
alevin_sep_gtr = AnnData(
X=adata.layers['alevin_prepref_isoseparate_cdna_introns_gentrome_spliced'],
layers={
'unspliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_gentrome_unspliced'],
'spliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_gentrome_spliced'],
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
alevin_sep_gtr.write(DATA_DIR / "old_brain" / "alevin_prepref_isoseparate_cdna_introns_gentrome.h5ad")
/tmp/ipykernel_91451/2290063912.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
alevin_sep_gtr = AnnData(
alevin_spliced_unspliced_gtr = AnnData(
X=adata.layers['alevin_spliced_unspliced_gentrome_spliced'],
layers={
'unspliced': adata.layers['alevin_spliced_unspliced_gentrome_unspliced'],
'spliced': adata.layers['alevin_spliced_unspliced_gentrome_spliced'],
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
alevin_spliced_unspliced_gtr.write(DATA_DIR / "old_brain" / "alevin_spliced_unspliced_gentrome.h5ad")
/tmp/ipykernel_91451/3661593580.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
alevin_spliced_unspliced_gtr = AnnData(
dropest = AnnData(
X=adata.layers['dropest_spliced'],
layers={
'unspliced': adata.layers['dropest_unspliced'],
'spliced': adata.layers['dropest_spliced'],
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
dropest.write(DATA_DIR / "old_brain" / "dropest.h5ad")
/tmp/ipykernel_91451/823519977.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
dropest = AnnData(
kalisto_bustools_coll_excl = AnnData(
X=adata.layers['kallisto_bustools_prepref_isocollapse_exclude_spliced'],
layers={
'unspliced': adata.layers['kallisto_bustools_prepref_isocollapse_exclude_unspliced'],
'spliced': adata.layers['kallisto_bustools_prepref_isocollapse_exclude_spliced'],
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_coll_excl.write(DATA_DIR / "old_brain" / "kallisto_bustools_prepref_isocollapse_exclude.h5ad")
/tmp/ipykernel_91451/1570495297.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
kalisto_bustools_coll_excl = AnnData(
kalisto_bustools_coll_incl = AnnData(
X=adata.layers['kallisto_bustools_prepref_isocollapse_include_spliced'],
layers={
'unspliced': adata.layers['kallisto_bustools_prepref_isocollapse_include_unspliced'],
'spliced': adata.layers['kallisto_bustools_prepref_isocollapse_include_spliced'],
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_coll_incl.write(DATA_DIR / "old_brain" / "kallisto_bustools_prepref_isocollapse_include.h5ad")
/tmp/ipykernel_91451/1103478739.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
kalisto_bustools_coll_incl = AnnData(
kalisto_bustools_sep_excl = AnnData(
X=adata.layers['kallisto_bustools_prepref_isoseparate_exclude_spliced'],
layers={
'unspliced': adata.layers['kallisto_bustools_prepref_isoseparate_exclude_unspliced'],
'spliced': adata.layers['kallisto_bustools_prepref_isoseparate_exclude_spliced'],
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_sep_excl.write(DATA_DIR / "old_brain" / "kallisto_bustools_prepref_isoseparate_exclude.h5ad")
/tmp/ipykernel_91451/3931455348.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
kalisto_bustools_sep_excl = AnnData(
kalisto_bustools_sep_incl = AnnData(
X=adata.layers['kallisto_bustools_prepref_isoseparate_include_spliced'],
layers={
'unspliced': adata.layers['kallisto_bustools_prepref_isoseparate_include_unspliced'],
'spliced': adata.layers['kallisto_bustools_prepref_isoseparate_include_spliced'],
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_sep_incl.write(DATA_DIR / "old_brain" / "kallisto_bustools_prepref_isoseparate_include.h5ad")
/tmp/ipykernel_91451/901589679.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
kalisto_bustools_sep_incl = AnnData(
starsolo = AnnData(
X=adata.layers['starsolo_spliced'],
layers={
'unspliced': adata.layers['starsolo_unspliced'],
'spliced': adata.layers['starsolo_spliced'],
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
starsolo.write(DATA_DIR / "old_brain" / "starsolo.h5ad")
/tmp/ipykernel_91451/1188938678.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
starsolo = AnnData(
starsolo_diff = AnnData(
X=adata.layers['starsolo_subtr_spliced'],
layers={
'unspliced': adata.layers['starsolo_subtr_unspliced'],
'spliced': adata.layers['starsolo_subtr_spliced'],
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
starsolo_diff.write(DATA_DIR / "old_brain" / "starsolo_subtr.h5ad")
/tmp/ipykernel_91451/1821660359.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
starsolo_diff = AnnData(
velocyto = AnnData(
X=adata.X,
layers={
'unspliced': adata.layers['velocyto_unspliced'],
'spliced': adata.X,
},
obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters', 'NAME', 'nGene', 'nUMI', 'animal_type', 'cell_type_age']],
var=pd.DataFrame(index=adata.var_names),
)
velocyto.write(DATA_DIR / "old_brain" / "velocyto.h5ad")