Find the genes related regions¶
In [1]:
Copied!
import omicverse as ov
import scanpy as sc
import Epiverse as ev
ov.ov_plot_set()
import omicverse as ov
import scanpy as sc
import Epiverse as ev
ov.ov_plot_set()
2023-08-20 21:06:05.193053: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. 2023-08-20 21:06:05.800191: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory 2023-08-20 21:06:05.800256: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory 2023-08-20 21:06:05.800262: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
In [2]:
Copied!
adata=sc.read('data/biodata/cd34_multiome_atac.h5ad')
adata
adata=sc.read('data/biodata/cd34_multiome_atac.h5ad')
adata
Out[2]:
AnnData object with n_obs × n_vars = 6881 × 246113
obs: 'Sample', 'TSSEnrichment', 'ReadsInTSS', 'ReadsInPromoter', 'ReadsInBlacklist', 'PromoterRatio', 'PassQC', 'NucleosomeRatio', 'nMultiFrags', 'nMonoFrags', 'nFrags', 'nDiFrags', 'BlacklistRatio', 'Clusters', 'ReadsInPeaks', 'FRIP', 'leiden', 'phenograph', 'celltype', 'SEACell'
var: 'seqnames', 'start', 'end', 'width', 'strand', 'score', 'replicateScoreQuantile', 'groupScoreQuantile', 'Reproducibility', 'GroupReplicate', 'nearestGene', 'distToGeneStart', 'peakType', 'distToTSS', 'nearestTSS', 'GC', 'idx'
uns: 'celltype_colors'
obsm: 'X_svd', 'X_umap'
In [4]:
Copied!
atac_anno=ev.utils.Annotation('/mnt/home/zehuazeng/data/gtf/gencode.v43.basic.annotation.gtf')
atac_anno.chrom_dict
atac_anno=ev.utils.Annotation('/mnt/home/zehuazeng/data/gtf/gencode.v43.basic.annotation.gtf')
atac_anno.chrom_dict
Out[4]:
{'chr1': 248937043,
'chr2': 242175694,
'chr3': 198228376,
'chr4': 190196190,
'chr5': 181472430,
'chr6': 170745977,
'chr7': 159233377,
'chr8': 145066685,
'chr9': 138320835,
'chr10': 133778699,
'chr11': 135075908,
'chr12': 133238549,
'chr13': 114346637,
'chr14': 106879812,
'chr15': 101979093,
'chr16': 90222673,
'chr17': 83240804,
'chr18': 80247514,
'chr19': 58605223,
'chr20': 64327972,
'chr21': 46691226,
'chr22': 50801309,
'chrX': 156027877,
'chrY': 57214397,
'chrM': 16023}
In [5]:
Copied!
atac_anno.tss_init(upstream=1000,
downstream=100)
atac_anno.distal_init(upstream=[1000,200000],
downstream=[1000,200000])
atac_anno.body_init()
atac_anno.tss_init(upstream=1000,
downstream=100)
atac_anno.distal_init(upstream=[1000,200000],
downstream=[1000,200000])
atac_anno.body_init()
In [6]:
Copied!
%%time
import pandas as pd
k=0
for chr in adata.var['seqnames'].unique():
if k==0:
merge_pd=atac_anno.query_multi(query_list=adata.var.loc[adata.var['seqnames']==chr].index.tolist(),
chrom=chr,batch=4,ncpus=8)
else:
merge_pd1=atac_anno.query_multi(query_list=adata.var.loc[adata.var['seqnames']==chr].index.tolist(),
chrom=chr,batch=4,ncpus=8)
merge_pd=pd.concat([merge_pd,merge_pd1])
k+=1
%%time
import pandas as pd
k=0
for chr in adata.var['seqnames'].unique():
if k==0:
merge_pd=atac_anno.query_multi(query_list=adata.var.loc[adata.var['seqnames']==chr].index.tolist(),
chrom=chr,batch=4,ncpus=8)
else:
merge_pd1=atac_anno.query_multi(query_list=adata.var.loc[adata.var['seqnames']==chr].index.tolist(),
chrom=chr,batch=4,ncpus=8)
merge_pd=pd.concat([merge_pd,merge_pd1])
k+=1
100%|██████████| 3073/3073 [00:16<00:00, 187.28it/s]
Start process_querychr6...
65%|██████▍ | 2337/3599 [00:09<00:05, 212.21it/s]
Start process_querychr6...
100%|██████████| 3599/3599 [00:14<00:00, 244.08it/s] 59%|█████▉ | 2115/3599 [00:09<00:07, 207.30it/s]
Start process_querychr6...
100%|██████████| 3599/3599 [00:18<00:00, 197.08it/s] 46%|████▌ | 1653/3599 [00:10<00:11, 162.31it/s]
Start process_querychr6...
45%|████▍ | 1605/3599 [00:08<00:12, 160.97it/s]
Start process_querychr6...
100%|██████████| 3/3 [00:00<00:00, 169.65it/s] 100%|██████████| 3599/3599 [00:21<00:00, 170.19it/s] 100%|██████████| 3599/3599 [00:19<00:00, 180.60it/s]
Start process_querychr7...
75%|███████▍ | 2310/3091 [00:09<00:03, 223.19it/s]
Start process_querychr7...
100%|██████████| 3091/3091 [00:12<00:00, 249.46it/s] 69%|██████▉ | 2144/3091 [00:09<00:04, 219.50it/s]
Start process_querychr7...
100%|██████████| 3091/3091 [00:14<00:00, 219.68it/s] 59%|█████▉ | 1833/3091 [00:08<00:06, 201.70it/s]
Start process_querychr7...
100%|██████████| 3091/3091 [00:15<00:00, 200.57it/s] 100%|██████████| 3091/3091 [00:16<00:00, 190.75it/s]
Start process_querychr8...
100%|██████████| 2563/2563 [00:09<00:00, 284.09it/s]
Start process_querychr8...
90%|█████████ | 2307/2563 [00:09<00:01, 211.17it/s]
Start process_querychr8...
100%|██████████| 2563/2563 [00:10<00:00, 247.70it/s] 79%|███████▉ | 2035/2563 [00:09<00:02, 190.05it/s]
Start process_querychr8...
100%|██████████| 2563/2563 [00:11<00:00, 215.26it/s] 71%|███████ | 1824/2563 [00:08<00:03, 214.39it/s]
Start process_querychr8...
100%|██████████| 3/3 [00:00<00:00, 186.47it/s] 100%|██████████| 2563/2563 [00:12<00:00, 207.08it/s]
Start process_querychr9...
100%|██████████| 2594/2594 [00:08<00:00, 296.60it/s]
Start process_querychr9...
88%|████████▊ | 2285/2594 [00:09<00:01, 232.77it/s]
Start process_querychr9...
100%|██████████| 2594/2594 [00:10<00:00, 245.62it/s] 83%|████████▎ | 2158/2594 [00:09<00:02, 216.28it/s]
Start process_querychr9...
100%|██████████| 2594/2594 [00:11<00:00, 234.16it/s] 78%|███████▊ | 2032/2594 [00:08<00:02, 216.52it/s]
Start process_querychr9...
100%|██████████| 3/3 [00:00<00:00, 215.96it/s] 100%|██████████| 2594/2594 [00:11<00:00, 228.00it/s]
Start process_querychrX...
100%|██████████| 1524/1524 [00:05<00:00, 283.53it/s]
Start process_querychrX...
100%|██████████| 1524/1524 [00:05<00:00, 269.44it/s]
Start process_querychrX...
100%|██████████| 1524/1524 [00:06<00:00, 237.86it/s]
Start process_querychrX...
100%|██████████| 1524/1524 [00:06<00:00, 220.91it/s]
Start process_querychrX...
100%|██████████| 1/1 [00:00<00:00, 228.14it/s]
CPU times: user 14min 8s, sys: 2min 21s, total: 16min 30s Wall time: 22min 34s
In [12]:
Copied!
merge_pd.to_csv('temp_merge_pd.csv')
merge_pd.to_csv('temp_merge_pd.csv')
In [5]:
Copied!
import pandas as pd
merge_pd=pd.read_csv('temp_merge_pd.csv',index_col=0)
import pandas as pd
merge_pd=pd.read_csv('temp_merge_pd.csv',index_col=0)
In [8]:
Copied!
merge_pd=atac_anno.merge_info(merge_pd)
merge_pd=atac_anno.merge_info(merge_pd)
In [9]:
Copied!
atac_anno.add_gene_info(adata,merge_pd,
columns=['peaktype','neargene','neargene_tss'])
atac_anno.add_gene_info(adata,merge_pd,
columns=['peaktype','neargene','neargene_tss'])
In [14]:
Copied!
adata.var[['peaktype','neargene','neargene_tss']].head(20)
adata.var[['peaktype','neargene','neargene_tss']].head(20)
Out[14]:
| peaktype | neargene | neargene_tss | |
|---|---|---|---|
| chr1:816070-816570 | body | ENSG00000290784 | 817712 |
| chr1:817092-817592 | promoter | ENSG00000290784 | 817712 |
| chr1:827266-827766 | promoter | LINC01128 | 826832 |
| chr1:838271-838771 | body | LINC01128 | 826832 |
| chr1:842638-843138 | body | LINC01128 | 827673 |
| chr1:856533-857033 | body | LINC01128 | 827598 |
| chr1:858585-859085 | body | LINC01128 | 827598 |
| chr1:860736-861236 | promoter | ENSG00000288531 | 860227 |
| chr1:869632-870132 | promoter | FAM41C | 870201 |
| chr1:876439-876939 | promoter | ENSG00000283040 | 877234 |
| chr1:890619-891119 | down_distal | ENSG00000285268 | 781937 |
| chr1:897221-897721 | down_distal | LINC01409 | 803934 |
| chr1:898274-898774 | down_distal | LINC01409 | 803934 |
| chr1:904520-905020 | down_distal | LINC01409 | 803934 |
| chr1:905170-905670 | promoter | ENSG00000272438 | 904834 |
| chr1:906630-907130 | body | ENSG00000272438 | 904834 |
| chr1:909981-910481 | body | ENSG00000272438 | 904834 |
| chr1:911150-911650 | body | ENSG00000272438 | 904834 |
| chr1:912768-913268 | body | ENSG00000230699 | 911435 |
| chr1:915682-916182 | down_distal | FAM87B | 817371 |
In [ ]:
Copied!