pzweuj commited on
Commit
32addc0
·
verified ·
1 Parent(s): b44ba53

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +2 -29
Dockerfile CHANGED
@@ -43,35 +43,8 @@ RUN mkdir -p /data/transvar_db/refseq_hg38 /data/transvar_db/refseq_hg19
43
  # 如果设置了 HF_DATASET,从数据集下载
44
  RUN if [ -n "$HF_DATASET" ]; then \
45
  echo "Downloading from HF Dataset: $HF_DATASET" && \
46
- python3 << 'PYEOF'
47
- import os
48
- from huggingface_hub import hf_hub_download, snapshot_download
49
-
50
- dataset = os.environ.get('HF_DATASET', '')
51
- target_dir = '/data/transvar_db'
52
-
53
- # 尝试使用 snapshot_download 下载整个目录
54
- try:
55
- local_path = snapshot_download(repo_id=dataset, repo_type='dataset', cache_dir='/tmp/hf_cache')
56
- # 复制到目标目录
57
- os.system(f'cp -r {local_path}/* {target_dir}/')
58
- print(f'Downloaded from Dataset: {local_path}')
59
- except Exception as e:
60
- print(f'snapshot_download failed: {e}')
61
- # 尝试单独下载文件
62
- files = ['hg38/hg38.fa', 'hg38/ncbiRefSeq.txt.gz', 'hg19/hg19.fa', 'hg19/ncbiRefSeq.txt.gz']
63
- for f in files:
64
- try:
65
- path = hf_hub_download(repo_id=dataset, filename=f, repo_type='dataset')
66
- subdir = f.split('/')[0] # hg38 or hg19
67
- filename = f.split('/')[1]
68
- os.makedirs(f'{target_dir}/refseq_{subdir}', exist_ok=True)
69
- os.system(f'cp {path} {target_dir}/refseq_{subdir}/{filename}')
70
- print(f'Downloaded: {f}')
71
- except Exception as e2:
72
- print(f'Failed to download {f}: {e2}')
73
- PYEOF
74
- fi
75
 
76
  # 如果没有设置 HF_DATASET,从 UCSC 下载(首次构建)
77
  WORKDIR /data/transvar_db/refseq_hg38
 
43
  # 如果设置了 HF_DATASET,从数据集下载
44
  RUN if [ -n "$HF_DATASET" ]; then \
45
  echo "Downloading from HF Dataset: $HF_DATASET" && \
46
+ python3 -c "import os; from huggingface_hub import snapshot_download; dataset=os.environ.get('HF_DATASET',''); local=snapshot_download(repo_id=dataset, repo_type='dataset', cache_dir='/tmp/hf_cache'); os.system(f'cp -r {local}/* /data/transvar_db/')" || true; \
47
+ fi
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  # 如果没有设置 HF_DATASET,从 UCSC 下载(首次构建)
50
  WORKDIR /data/transvar_db/refseq_hg38