pzweuj commited on
Commit
4311660
·
verified ·
1 Parent(s): a480e60

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +20 -26
Dockerfile CHANGED
@@ -1,11 +1,10 @@
1
  # TransVar API for HuggingFace Spaces
2
- # 仓库: https://github.com/pzweuj/TransVar2API
3
 
4
  FROM python:3.9-slim
5
 
6
  # 环境变量
7
  ENV DEBIAN_FRONTEND=noninteractive \
8
- TRANSVAR_DB_PATH=/data/transvar_db \
9
  PYTHONUNBUFFERED=1 \
10
  PORT=7860
11
 
@@ -34,36 +33,31 @@ RUN pip3 install --no-cache-dir transvar
34
  # 修补 transvar 的 localdb.py 以修复 KeyError: 'product' 错误
35
  RUN python3 /app/scripts/patch_transvar.py
36
 
37
- # 创建数据目录
38
- RUN mkdir -p /data/transvar_db/ucsc_hg38 /data/transvar_db/ucsc_hg19 \
39
- /data/transvar_db/ncbi_refseq_hg38 /data/transvar_db/ncbi_refseq_hg19
 
40
 
41
- # ========== UCSC 数据库 ==========
42
- WORKDIR /data/transvar_db/ucsc_hg38
43
- RUN wget -q -O hg38.fa.gz https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz && \
44
- gunzip -f hg38.fa.gz && \
45
- samtools faidx hg38.fa
46
- RUN wget -q -O ncbiRefSeq.txt.gz https://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/ncbiRefSeq.txt.gz
47
 
48
- WORKDIR /data/transvar_db/ucsc_hg19
49
- RUN wget -q -O hg19.fa.gz https://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.fa.gz && \
50
- gunzip -f hg19.fa.gz && \
51
- samtools faidx hg19.fa
52
- RUN wget -q -O ncbiRefSeq.txt.gz https://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/ncbiRefSeq.txt.gz
53
 
54
- # ========== NCBI RefSeq 数据库 (软链接 UCSC 的参考基因组) ==========
55
- WORKDIR /data/transvar_db/ncbi_refseq_hg38
56
- RUN ln -sf ../ucsc_hg38/hg38.fa ./hg38.fa && \
57
- samtools faidx hg38.fa
58
- RUN wget -q -O hg38_refseq.gff.gz https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gff.gz
59
 
60
- WORKDIR /data/transvar_db/ncbi_refseq_hg19
61
- RUN ln -sf ../ucsc_hg19/hg19.fa ./hg19.fa && \
62
- samtools faidx hg19.fa
63
- RUN wget -q -O hg19_refseq.gff.gz https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/GRCh37_latest/refseq_identifiers/GRCh37_latest_genomic.gff.gz
 
 
 
 
64
 
65
  # 设置启动脚本权限
66
- WORKDIR /app
67
  RUN chmod +x /app/scripts/hf_startup.sh
68
 
69
  EXPOSE 7860
 
1
  # TransVar API for HuggingFace Spaces
2
+ # 使用 transvar 官方数据库
3
 
4
  FROM python:3.9-slim
5
 
6
  # 环境变量
7
  ENV DEBIAN_FRONTEND=noninteractive \
 
8
  PYTHONUNBUFFERED=1 \
9
  PORT=7860
10
 
 
33
  # 修补 transvar 的 localdb.py 以修复 KeyError: 'product' 错误
34
  RUN python3 /app/scripts/patch_transvar.py
35
 
36
+ # ========== 下载 transvar 官方数据库 ==========
37
+ # hg38 - 下载注释数据库和参考基因组
38
+ RUN echo "Downloading hg38 annotation database..." && \
39
+ transvar config --download_anno --refversion hg38
40
 
41
+ RUN echo "Downloading hg38 reference genome..." && \
42
+ transvar config --download_ref --refversion hg38
 
 
 
 
43
 
44
+ # hg19 - 下载注释数据库和参考基因组
45
+ RUN echo "Downloading hg19 annotation database..." && \
46
+ transvar config --download_anno --refversion hg19
 
 
47
 
48
+ RUN echo "Downloading hg19 reference genome..." && \
49
+ transvar config --download_ref --refversion hg19
 
 
 
50
 
51
+ # 验证数据库
52
+ RUN echo "Verifying databases..." && \
53
+ echo "hg38 config:" && transvar config --refversion hg38 && \
54
+ echo "hg19 config:" && transvar config --refversion hg19
55
+
56
+ # 测试 transvar
57
+ RUN echo "Testing transvar..." && \
58
+ transvar panno -i "PIK3CA:p.E545K" --refversion hg38 -o /dev/stdout
59
 
60
  # 设置启动脚本权限
 
61
  RUN chmod +x /app/scripts/hf_startup.sh
62
 
63
  EXPOSE 7860