Calculating the gene activity matrix of scATAC-seq
In [1]:
Copied!
import Epiverse as ev
import scanpy as sc
import Epiverse as ev
import scanpy as sc
In [ ]:
Copied!
!wget -p data https://cf.10xgenomics.com/samples/cell-atac/2.1.0/10k_pbmc_ATACv2_nextgem_Chromium_X/10k_pbmc_ATACv2_nextgem_Chromium_X_filtered_peak_bc_matrix.h5
!wget -p data https://cf.10xgenomics.com/samples/cell-atac/2.1.0/10k_pbmc_ATACv2_nextgem_Chromium_X/10k_pbmc_ATACv2_nextgem_Chromium_X_filtered_peak_bc_matrix.h5
In [2]:
Copied!
adata = sc.read_10x_h5('data/10k_pbmc_ATACv2_nextgem_Chromium_X_filtered_peak_bc_matrix.h5',gex_only=False)
adata
adata = sc.read_10x_h5('data/10k_pbmc_ATACv2_nextgem_Chromium_X_filtered_peak_bc_matrix.h5',gex_only=False)
adata
Out[2]:
AnnData object with n_obs × n_vars = 10273 × 164487
var: 'gene_ids', 'feature_types', 'genome'
In [ ]:
Copied!
ev.utils.download_gene_activity_reference(download_file='refer')
ev.utils.download_gene_activity_reference(download_file='refer')
In [3]:
Copied!
Gene_activity = ev.pp.cal_gene_activity(adata=adata,
genebed='refer/T2TCHM13_refgenes.txt',
decay=float(10000))
Gene_activity = ev.pp.cal_gene_activity(adata=adata,
genebed='refer/T2TCHM13_refgenes.txt',
decay=float(10000))
......Extract gene information from the provided genebed file ......Calculate gene-peak regulatory scores based on the selected model peaks number: 164487 peaks number in gene promoters and exons: 19322 peaks number out gene promoters and exons: 145165 ......Store the index and total scores for each gene ......Store the maximum score for each gene symbol
In [4]:
Copied!
Gene_activity
Gene_activity
Out[4]:
AnnData object with n_obs × n_vars = 10273 × 59454
In [5]:
Copied!
Gene_activity.to_df()
Gene_activity.to_df()
Out[5]:
| 5S_rRNA | 5_8S_rRNA | 7SK | A1BG | A1BG-AS1 | A1CF | A2M | A2M-AS1 | A2ML1 | A2ML1-AS1 | ... | ZWINT | ZXDA | ZXDB | ZXDC | ZYG11A | ZYG11AP1 | ZYXP1 | hsa-mir-1253 | hsa-mir-423 | snoZ196 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AAACGAAAGACTAGCG-1 | 0.000000 | 0.000000 | 0.565850 | 0.0 | 0.0 | 0.830326 | 0.001135 | 0.007001 | 0.005824 | 0.010367 | ... | 0.0 | 0.000000 | 0.004062 | 1.235636 | 0.006776 | 0.000611 | 0.0 | 0.022758 | 0.000461 | 0.000432 |
| AAACGAAAGCAACGGT-1 | 0.181331 | 0.000085 | 1.722540 | 0.0 | 0.0 | 0.000000 | 0.001917 | 0.011824 | 0.000277 | 0.000573 | ... | 0.0 | 0.004205 | 0.000000 | 1.152631 | 0.000532 | 0.000099 | 0.0 | 0.002319 | 0.000000 | 0.000432 |
| AAACGAAAGCCATTCA-1 | 0.548799 | 0.003705 | 1.874563 | 0.0 | 0.0 | 0.000000 | 0.269398 | 0.043671 | 0.009194 | 0.011922 | ... | 0.0 | 0.000000 | 0.009735 | 2.964916 | 0.844313 | 0.002629 | 0.0 | 1.381189 | 2.111990 | 0.000000 |
| AAACGAAAGGAGGCGA-1 | 0.000000 | 0.000682 | 2.928587 | 0.0 | 0.0 | 0.183455 | 1.128043 | 1.044262 | 0.000535 | 0.000953 | ... | 0.0 | 0.000000 | 0.016656 | 1.501693 | 3.720639 | 0.014950 | 0.0 | 0.013200 | 0.003754 | 0.001296 |
| AAACGAAAGGATTAAC-1 | 1.105243 | 0.000000 | 3.732908 | 0.0 | 0.0 | 0.183455 | 1.146316 | 1.156889 | 0.002376 | 0.001499 | ... | 0.0 | 0.000000 | 0.000000 | 0.821750 | 3.127061 | 0.000084 | 0.0 | 0.000000 | 0.000000 | 0.000000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| TTTGTGTTCCTGGGAC-1 | 0.823199 | 0.000183 | 1.361867 | 0.0 | 0.0 | 0.000000 | 0.000567 | 0.003501 | 0.000139 | 0.000247 | ... | 0.0 | 0.000000 | 0.000000 | 2.323623 | 0.991325 | 0.002804 | 0.0 | 0.000000 | 0.000000 | 0.000432 |
| TTTGTGTTCGAGAAGC-1 | 0.000000 | 0.000870 | 1.363589 | 0.0 | 0.0 | 0.183455 | 0.000000 | 0.000000 | 0.094202 | 0.167682 | ... | 0.0 | 0.000000 | 0.000000 | 1.942213 | 1.688626 | 0.001531 | 0.0 | 0.012265 | 0.001182 | 0.000864 |
| TTTGTGTTCGATATGC-1 | 0.000074 | 0.000000 | 1.165522 | 0.0 | 0.0 | 0.000000 | 0.297744 | 0.758031 | 0.003154 | 0.002537 | ... | 0.0 | 0.000000 | 0.011423 | 2.095139 | 0.990995 | 0.014037 | 0.0 | 0.011715 | 1.053653 | 0.000432 |
| TTTGTGTTCGCTACCT-1 | 1.443001 | 0.000000 | 5.457284 | 0.0 | 0.0 | 0.183455 | 0.000378 | 0.002334 | 0.006760 | 0.010668 | ... | 0.0 | 0.000000 | 0.000000 | 1.459977 | 1.693196 | 0.000768 | 0.0 | 0.000065 | 0.000000 | 0.001204 |
| TTTGTGTTCTTAGTGG-1 | 0.000000 | 0.000183 | 0.367145 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.0 | 0.000000 | 0.000000 | 0.047682 | 0.000438 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000432 |
10273 rows × 59454 columns