Data generation for RNA velocity analysis in spermatogenesis#

Generates .h5ad files to analyze RNA velocy in spermatogenesis using different pre-processing protocols.

Preliminaries#

Requires

  • DATA_DIR/spermatogenesis/Spermatogenesis_adata_quantifications_combined.h5ad

Output

  • DATA_DIR/spermatogenesis/alevin_prepref_isocollapse_cdna_introns_decoy_gentrome.h5ad

  • DATA_DIR/spermatogenesis/alevin_prepref_isocollapse_cdna_introns_gentrome.h5ad

  • DATA_DIR/spermatogenesis/alevin_prepref_isoseparate_cdna_introns_decoy_gentrome.h5ad

  • DATA_DIR/spermatogenesis/alevin_prepref_isoseparate_cdna_introns_gentrome.h5ad

  • DATA_DIR/spermatogenesis/alevin_spliced_unspliced_gentrome.h5ad

  • DATA_DIR/spermatogenesis/dropest.h5ad

  • DATA_DIR/spermatogenesis/kallisto_bustools_prepref_isocollapse_exclude.h5ad

  • DATA_DIR/spermatogenesis/kallisto_bustools_prepref_isocollapse_include.h5ad

  • DATA_DIR/spermatogenesis/kallisto_bustools_prepref_isoseparate_exclude.h5ad

  • DATA_DIR/spermatogenesis/kallisto_bustools_prepref_isoseparate_include.h5ad

  • DATA_DIR/spermatogenesis/starsolo.h5ad

  • DATA_DIR/spermatogenesis/starsolo_subtr.h5ad

  • DATA_DIR/spermatogenesis/velocyto.h5ad

Library imports#

from pathlib import Path
import sys

# import standard packages
import pandas as pd

# import single-cell packages
from anndata import AnnData
import scanpy as sc

sys.path.append("../../..")
from paths import DATA_DIR

Data loading#

adata = sc.read(
    DATA_DIR / "spermatogenesis" / "Spermatogenesis_adata_quantifications_combined.h5ad"
)
adata
AnnData object with n_obs × n_vars = 1829 × 54144
    obs: 'cell_index', 'clusters_coarse', 'clusters'
    uns: 'X_name', 'dataset'
    obsm: 'PCA_alevin_spliced_gentrome', 'TSNE_alevin_spliced_gentrome', 'UMAP_alevin_spliced_gentrome'
    layers: 'alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_spliced', 'alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_unspliced', 'alevin_prepref_isocollapse_cdna_introns_gentrome_spliced', 'alevin_prepref_isocollapse_cdna_introns_gentrome_unspliced', 'alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_spliced', 'alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_unspliced', 'alevin_prepref_isoseparate_cdna_introns_gentrome_spliced', 'alevin_prepref_isoseparate_cdna_introns_gentrome_unspliced', 'alevin_spliced_gentrome_counts', 'alevin_spliced_unspliced_gentrome_spliced', 'alevin_spliced_unspliced_gentrome_unspliced', 'dropest_spliced', 'dropest_unspliced', 'kallisto_bustools_prepref_isocollapse_exclude_spliced', 'kallisto_bustools_prepref_isocollapse_exclude_unspliced', 'kallisto_bustools_prepref_isocollapse_include_spliced', 'kallisto_bustools_prepref_isocollapse_include_unspliced', 'kallisto_bustools_prepref_isoseparate_exclude_spliced', 'kallisto_bustools_prepref_isoseparate_exclude_unspliced', 'kallisto_bustools_prepref_isoseparate_include_spliced', 'kallisto_bustools_prepref_isoseparate_include_unspliced', 'starsolo_spliced', 'starsolo_subtr_spliced', 'starsolo_subtr_unspliced', 'starsolo_unspliced', 'velocyto_unspliced'

Data generation#

alevin_coll_decoy_gtr = AnnData(
    X=adata.layers['alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_unspliced'],
        'spliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_decoy_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_coll_decoy_gtr.write(DATA_DIR / "spermatogenesis" / "alevin_prepref_isocollapse_cdna_introns_decoy_gentrome.h5ad")
/tmp/ipykernel_55632/4262589273.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_coll_decoy_gtr = AnnData(
alevin_coll_gtr = AnnData(
    X=adata.layers['alevin_prepref_isocollapse_cdna_introns_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_gentrome_unspliced'],
        'spliced': adata.layers['alevin_prepref_isocollapse_cdna_introns_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_coll_gtr.write(DATA_DIR / "spermatogenesis" / "alevin_prepref_isocollapse_cdna_introns_gentrome.h5ad")
/tmp/ipykernel_55632/2193934378.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_coll_gtr = AnnData(
alevin_sep_decoy_gtr = AnnData(
    X=adata.layers['alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_unspliced'],
        'spliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_decoy_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_sep_decoy_gtr.write(DATA_DIR / "spermatogenesis" / "alevin_prepref_isoseparate_cdna_introns_decoy_gentrome.h5ad")
/tmp/ipykernel_55632/3226528060.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_sep_decoy_gtr = AnnData(
alevin_sep_gtr = AnnData(
    X=adata.layers['alevin_prepref_isoseparate_cdna_introns_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_gentrome_unspliced'],
        'spliced': adata.layers['alevin_prepref_isoseparate_cdna_introns_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_sep_gtr.write(DATA_DIR / "spermatogenesis" / "alevin_prepref_isoseparate_cdna_introns_gentrome.h5ad")
/tmp/ipykernel_55632/1316360450.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_sep_gtr = AnnData(
alevin_spliced_unspliced_gtr = AnnData(
    X=adata.layers['alevin_spliced_unspliced_gentrome_spliced'],
    layers={
        'unspliced': adata.layers['alevin_spliced_unspliced_gentrome_unspliced'],
        'spliced': adata.layers['alevin_spliced_unspliced_gentrome_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
alevin_spliced_unspliced_gtr.write(DATA_DIR / "spermatogenesis" / "alevin_spliced_unspliced_gentrome.h5ad")
/tmp/ipykernel_55632/1297943926.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  alevin_spliced_unspliced_gtr = AnnData(
dropest = AnnData(
    X=adata.layers['dropest_spliced'],
    layers={
        'unspliced': adata.layers['dropest_unspliced'],
        'spliced': adata.layers['dropest_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
dropest.write(DATA_DIR / "spermatogenesis" / "dropest.h5ad")
/tmp/ipykernel_55632/1095542269.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  dropest = AnnData(
kalisto_bustools_coll_excl = AnnData(
    X=adata.layers['kallisto_bustools_prepref_isocollapse_exclude_spliced'],
    layers={
        'unspliced': adata.layers['kallisto_bustools_prepref_isocollapse_exclude_unspliced'],
        'spliced': adata.layers['kallisto_bustools_prepref_isocollapse_exclude_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_coll_excl.write(DATA_DIR / "spermatogenesis" / "kallisto_bustools_prepref_isocollapse_exclude.h5ad")
/tmp/ipykernel_55632/2241203077.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  kalisto_bustools_coll_excl = AnnData(
kalisto_bustools_coll_incl = AnnData(
    X=adata.layers['kallisto_bustools_prepref_isocollapse_include_spliced'],
    layers={
        'unspliced': adata.layers['kallisto_bustools_prepref_isocollapse_include_unspliced'],
        'spliced': adata.layers['kallisto_bustools_prepref_isocollapse_include_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_coll_incl.write(DATA_DIR / "spermatogenesis" / "kallisto_bustools_prepref_isocollapse_include.h5ad")
/tmp/ipykernel_55632/3440704683.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  kalisto_bustools_coll_incl = AnnData(
kalisto_bustools_sep_excl = AnnData(
    X=adata.layers['kallisto_bustools_prepref_isoseparate_exclude_spliced'],
    layers={
        'unspliced': adata.layers['kallisto_bustools_prepref_isoseparate_exclude_unspliced'],
        'spliced': adata.layers['kallisto_bustools_prepref_isoseparate_exclude_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_sep_excl.write(DATA_DIR / "spermatogenesis" / "kallisto_bustools_prepref_isoseparate_exclude.h5ad")
/tmp/ipykernel_55632/639120034.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  kalisto_bustools_sep_excl = AnnData(
kalisto_bustools_sep_incl = AnnData(
    X=adata.layers['kallisto_bustools_prepref_isoseparate_include_spliced'],
    layers={
        'unspliced': adata.layers['kallisto_bustools_prepref_isoseparate_include_unspliced'],
        'spliced': adata.layers['kallisto_bustools_prepref_isoseparate_include_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
kalisto_bustools_sep_incl.write(DATA_DIR / "spermatogenesis" / "kallisto_bustools_prepref_isoseparate_include.h5ad")
/tmp/ipykernel_55632/4141797636.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  kalisto_bustools_sep_incl = AnnData(
starsolo = AnnData(
    X=adata.layers['starsolo_spliced'],
    layers={
        'unspliced': adata.layers['starsolo_unspliced'],
        'spliced': adata.layers['starsolo_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
starsolo.write(DATA_DIR / "spermatogenesis" / "starsolo.h5ad")
/tmp/ipykernel_55632/1123460196.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  starsolo = AnnData(
starsolo_diff = AnnData(
    X=adata.layers['starsolo_subtr_spliced'],
    layers={
        'unspliced': adata.layers['starsolo_subtr_unspliced'],
        'spliced': adata.layers['starsolo_subtr_spliced'],
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
starsolo_diff.write(DATA_DIR / "spermatogenesis" / "starsolo_subtr.h5ad")
/tmp/ipykernel_55632/172827074.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  starsolo_diff = AnnData(
velocyto = AnnData(
    X=adata.X,
    layers={
        'unspliced': adata.layers['velocyto_unspliced'],
        'spliced': adata.X,
    },
    obs=adata.obs[['cell_index', 'clusters_coarse', 'clusters']],
    var=pd.DataFrame(index=adata.var_names),
)
velocyto.write(DATA_DIR / "spermatogenesis" / "velocyto.h5ad")