SeruatV4数据转化为h5ad格式数据
1、导入(R)
rm(list = ls())library(Seurat)library(qs)library(reticulate)library(hdf5r)library(sceasy)library(BiocParallel)register(MulticoreParam(workers = 4, progressbar = TRUE)) scRNA <- qread("sc_dataset.qs")scRNA# An object of class Seurat # 30269 features across 44651 samples within 2 assays # Active assay: integrated (2000 features, 2000 variable features)# 1 other assay present: RNA# 3 dimensional reductions calculated: pca, umap, tsne
2、配置python环境(终端/linux)
# 配置环境conda create -n sceasy python=3.9conda activate sceasyconda install loompy# 可选安装conda install anndataconda install scipy
3、开始转换(R)
# 在R语言中加载python环境use_condaenv('sceasy')loompy <- reticulate::import('loompy')# Seurat to AnnDatasceasy::convertFormat(scRNA, from="seurat", to="anndata", outFile='scRNA.h5ad')# AnnData object with n_obs × n_vars = 44651 × 28269# obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'GSE_num', 'Gender', 'Age', 'subsite', 'hpv', 'percent.mt', 'percent.rp', 'percent.hb', 'RNA_snn_res.0.1', 'RNA_snn_res.0.2', 'RNA_snn_res.0.3', 'RNA_snn_res.0.4', 'RNA_snn_res.0.5', 'RNA_snn_res.0.6', 'RNA_snn_res.0.7', 'RNA_snn_res.0.8', 'RNA_snn_res.0.9', 'RNA_snn_res.1', 'RNA_snn_res.1.1', 'RNA_snn_res.1.2', 'RNA_snn_res.1.3', 'RNA_snn_res.1.4', 'RNA_snn_res.1.5', 'RNA_snn_res.1.6', 'RNA_snn_res.1.7', 'RNA_snn_res.1.8', 'RNA_snn_res.1.9', 'RNA_snn_res.2', 'seurat_clusters', 'celltype', 'integrated_snn_res.0.1', 'integrated_snn_res.0.2', 'integrated_snn_res.0.3', 'integrated_snn_res.0.4', 'integrated_snn_res.0.5', 'integrated_snn_res.0.6', 'integrated_snn_res.0.7', 'integrated_snn_res.0.8', 'integrated_snn_res.0.9', 'integrated_snn_res.1', 'integrated_snn_res.1.1', 'integrated_snn_res.1.2', 'integrated_snn_res.1.3', 'integrated_snn_res.1.4', 'integrated_snn_res.1.5', 'integrated_snn_res.1.6', 'integrated_snn_res.1.7', 'integrated_snn_res.1.8', 'integrated_snn_res.1.9', 'integrated_snn_res.2'# var: 'name'# obsm: 'X_pca', 'X_umap', 'X_tsne'#Seurat to SingleCellExperimentsceasy::convertFormat(scRNA, from="seurat", to="sce", outFile='scRNA.rds')
4、IDE中确认一下(python)
# 加载库import scanpy as scimport os# 确认路径os.getcwd()# 读取数据adata = sc.read_h5ad('scRNA.h5ad')adata# AnnData object with n_obs × n_vars = 44651 × 28269# obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'GSE_num', 'Gender', 'Age', 'subsite', 'hpv', 'percent.mt', 'percent.rp', 'percent.hb', 'RNA_snn_res.0.1', 'RNA_snn_res.0.2', 'RNA_snn_res.0.3', 'RNA_snn_res.0.4', 'RNA_snn_res.0.5', 'RNA_snn_res.0.6', 'RNA_snn_res.0.7', 'RNA_snn_res.0.8', 'RNA_snn_res.0.9', 'RNA_snn_res.1', 'RNA_snn_res.1.1', 'RNA_snn_res.1.2', 'RNA_snn_res.1.3', 'RNA_snn_res.1.4', 'RNA_snn_res.1.5', 'RNA_snn_res.1.6', 'RNA_snn_res.1.7', 'RNA_snn_res.1.8', 'RNA_snn_res.1.9', 'RNA_snn_res.2', 'seurat_clusters', 'celltype', 'integrated_snn_res.0.1', 'integrated_snn_res.0.2', 'integrated_snn_res.0.3', 'integrated_snn_res.0.4', 'integrated_snn_res.0.5', 'integrated_snn_res.0.6', 'integrated_snn_res.0.7', 'integrated_snn_res.0.8', 'integrated_snn_res.0.9', 'integrated_snn_res.1', 'integrated_snn_res.1.1', 'integrated_snn_res.1.2', 'integrated_snn_res.1.3', 'integrated_snn_res.1.4', 'integrated_snn_res.1.5', 'integrated_snn_res.1.6', 'integrated_snn_res.1.7', 'integrated_snn_res.1.8', 'integrated_snn_res.1.9', 'integrated_snn_res.2'# var: 'name'# obsm: 'X_pca', 'X_tsne', 'X_umap'
SeruatV5数据转化为h5ad格式数据
1、导入(R)
rm(list = ls())V5_path = "/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/seurat5/".libPaths(V5_path).libPaths()library(Seurat)library(qs)library(reticulate)library(hdf5r)library(sceasy)library(BiocParallel)register(MulticoreParam(workers = 4, progressbar = TRUE)) scRNA_V5 <- readRDS("scRNA_tumor.rds")scRNA_V5# An object of class Seurat # 20124 features across 5042 samples within 1 assay # Active assay: RNA (20124 features, 2000 variable features)# 3 layers present: counts, data, scale.data# 3 dimensional reductions calculated: pca, harmony, umap
2、配置python环境(终端/linux)
# 配置环境conda create -n sceasy python=3.9conda activate sceasyconda install loompy# 可选安装conda install anndataconda install scipy
3、R语言转换(R)
# 在R语言中加载python环境use_condaenv('sceasy')loompy <- reticulate::import('loompy')# Seurat to AnnDatascRNA_V5[["RNA"]] <- as(scRNA_V5[["RNA"]], "Assay")sceasy::convertFormat(scRNA_V5, from="seurat", to="anndata", outFile='scRNA_V5.h5ad')# AnnData object with n_obs × n_vars = 5042 × 20124# obs: 'nCount_RNA', 'nFeature_RNA', 'Sample', 'Cell.Barcode', 'Type', 'RNA_snn_res.0.1', 'RNA_snn_res.0.2', 'RNA_snn_res.0.3', 'RNA_snn_res.0.4', 'RNA_snn_res.0.5', 'RNA_snn_res.0.6', 'RNA_snn_res.0.7', 'RNA_snn_res.0.8', 'RNA_snn_res.0.9', 'RNA_snn_res.1', 'RNA_snn_res.1.1', 'RNA_snn_res.1.2', 'seurat_clusters', 'celltype', 'seurat_annotation'# var: 'vf_vst_counts_mean', 'vf_vst_counts_variance', 'vf_vst_counts_variance.expected', 'vf_vst_counts_variance.standardized', 'vf_vst_counts_variable', 'vf_vst_counts_rank', 'var.features', 'var.features.rank'# obsm: 'X_pca', 'X_harmony', 'X_umap'# Warning message:# In .regularise_df(obj@meta.data, drop_single_values = drop_single_values) :# Dropping single category variables:orig.ident
先将 Seurat V5 对象中的 Assay5 类型转换为 Seurat 旧版本中的 Assay 类型,然后再进行转化
4、IDE中确认一下(python)
# 加载库import scanpy as scimport os# 确认路径os.getcwd()# 读取数据adata = sc.read_h5ad('scRNA.h5ad')adata# AnnData object with n_obs × n_vars = 5042 × 20124# obs: 'nCount_RNA', 'nFeature_RNA', 'Sample', 'Cell.Barcode', 'Type', 'RNA_snn_res.0.1', 'RNA_snn_res.0.2', 'RNA_snn_res.0.3', 'RNA_snn_res.0.4', 'RNA_snn_res.0.5', 'RNA_snn_res.0.6', 'RNA_snn_res.0.7', 'RNA_snn_res.0.8', 'RNA_snn_res.0.9', 'RNA_snn_res.1', 'RNA_snn_res.1.1', 'RNA_snn_res.1.2', 'seurat_clusters', 'celltype', 'seurat_annotation'# var: 'vf_vst_counts_mean', 'vf_vst_counts_variance', 'vf_vst_counts_variance.expected', 'vf_vst_counts_variance.standardized', 'vf_vst_counts_variable', 'vf_vst_counts_rank', 'var.features', 'var.features.rank'# obsm: 'X_harmony', 'X_pca', 'X_umap'
ha5d格式数据转化成seruat对象
1.导入
rm(list = ls())library(sceasy)library(reticulate)library(Seurat)library(BiocParallel)register(MulticoreParam(workers = 4, progressbar = TRUE))
2、R语言转换
# h5ad转为Seuratsceasy::convertFormat(obj = "scRNA.h5ad", from="anndata",to="seurat", outFile = 'scRNA.rds')# X -> counts# An object of class Seurat # 28269 features across 44651 samples within 1 assay # Active assay: RNA (28269 features, 0 variable features)# 2 layers present: counts, data# 3 dimensional reductions calculated: pca, tsne, umap
这种方法得到的数据是SeruatV4版本的,所以如果要用于SeruatV5的话还需要再转化一下。
还有细胞数很多的话sceasy就不好用了,这个时候可以用dior包。
参考资料:
sceasy: https://github.com/cellgeni/sceasy
dior: https://github.com/JiekaiLab/dior
单细胞天地: https://mp.weixin.qq.com/s/qHBeQnYJdK0ATGlTOROPeA
生信菜鸟团: https://mp.weixin.qq.com/s/8fwJSc9Dnp8h_Suv76oXVA
KS科研分享与服务:https://mp.weixin.qq.com/s/Wt9TU5Qk3yqPDlRlXr6BfQ
注:若对内容有疑惑或者有发现明确错误的朋友,请联系后台(欢迎交流)。更多内容可关注公众号:生信方舟
- END -