Patryk Studzinski commited on
Commit
c53fb02
·
1 Parent(s): 572ea40

Feat: Simplify model download using huggingface-cli

Browse files
Files changed (2) hide show
  1. Dockerfile +9 -10
  2. download_model.py +0 -69
Dockerfile CHANGED
@@ -1,24 +1,23 @@
1
-
2
  FROM python:3.9-slim
3
 
4
  WORKDIR /app
5
 
6
  ENV MODEL_DIR=/app/pretrain_model
7
  ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1
8
- ENV HUGGING_FACE_HUB_TOKEN=""
9
 
10
  COPY requirements.txt .
11
-
12
  RUN pip install --no-cache-dir -r requirements.txt
13
 
14
- COPY download_model.py /app/download_model.py
15
 
16
- RUN --mount=type=secret,id=huggingface_token \
17
- export HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/huggingface_token) && \
18
- echo "--- Docker RUN: Starting model download script (token is now in ENV)..." && \
19
- python /app/download_model.py && \
20
- echo "--- Docker RUN: Model download script finished." && \
21
- rm /app/download_model.py
 
 
22
 
23
  COPY . .
24
 
 
 
1
  FROM python:3.9-slim
2
 
3
  WORKDIR /app
4
 
5
  ENV MODEL_DIR=/app/pretrain_model
6
  ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1
7
+ ENV HF_TOKEN=""
8
 
9
  COPY requirements.txt .
 
10
  RUN pip install --no-cache-dir -r requirements.txt
11
 
 
12
 
13
+ RUN --mount=type=secret,id=HF_TOKEN \
14
+ export HF_TOKEN=$(cat /run/secrets/HF_TOKEN) && \
15
+ echo "--- Docker RUN: Downloading model using huggingface-cli..." && \
16
+ huggingface-cli download speakleash/Bielik-1.5B-v3.0-Instruct \
17
+ --local-dir ${MODEL_DIR} \
18
+ --local-dir-use-symlinks=False && \
19
+ echo "--- Docker RUN: Model download complete."
20
+
21
 
22
  COPY . .
23
 
download_model.py DELETED
@@ -1,69 +0,0 @@
1
- # This script is intended to be run in a Docker container with the Hugging Face token mounted as a secret.
2
- from huggingface_hub import snapshot_download
3
- from huggingface_hub.errors import HfHubHTTPError
4
- import os
5
- import sys
6
- import traceback
7
-
8
- def main():
9
- token_path = '/run/secrets/huggingface_token'
10
- model_dir_path = os.environ.get('MODEL_DIR')
11
- repo_id_to_download = 'speakleash/Bielik-1.5B-v3.0-Instruct'
12
-
13
- print(f'--- Python SCRIPT DEBUG: Target model directory: {model_dir_path}')
14
- if not model_dir_path:
15
- print('--- Python SCRIPT CRITICAL ERROR: MODEL_DIR environment variable not set!')
16
- sys.exit(1)
17
-
18
- token_value = None
19
- try:
20
- with open(token_path, 'r') as f:
21
- token_value = f.read().strip()
22
- print(f'--- Python SCRIPT DEBUG: Token file {token_path} read successfully.')
23
- if token_value:
24
- masked_token = f"{token_value[:4]}****{token_value[-4:] if len(token_value) > 4 else '(token too short)'}"
25
- print(f'--- Python SCRIPT DEBUG: Token content (masked): {masked_token}')
26
- if not token_value.startswith('hf_'):
27
- print('--- Python SCRIPT WARNING: Token does not appear to start with hf_! Check token file content.')
28
- else:
29
- print('--- Python SCRIPT CRITICAL ERROR: Token file was empty or only whitespace!')
30
- sys.exit(1)
31
- except FileNotFoundError:
32
- print(f'--- Python SCRIPT CRITICAL ERROR: Token secret file {token_path} not found! Ensure --mount is correct.')
33
- sys.exit(1)
34
- except Exception as e:
35
- print(f'--- Python SCRIPT CRITICAL ERROR: Could not read token from {token_path}: {e}')
36
- traceback.print_exc()
37
- sys.exit(1)
38
-
39
- try:
40
- print(f'--- Python SCRIPT INFO: Calling snapshot_download for {repo_id_to_download}...')
41
- snapshot_download(
42
- repo_id=repo_id_to_download,
43
- local_dir=model_dir_path,
44
- token=token_value,
45
- local_dir_use_symlinks=False,
46
- resume_download=True
47
- # Removed ignore_patterns for now to ensure no interference
48
- )
49
- print(f'--- Python SCRIPT INFO: snapshot_download completed for {repo_id_to_download}.')
50
- except HfHubHTTPError as http_e:
51
- print(f'--- Python SCRIPT ERROR: HfHubHTTPError during snapshot_download: {http_e}')
52
- if http_e.response is not None:
53
- print(f'--- Python SCRIPT ERROR: Response status: {http_e.response.status_code}')
54
- print(f'--- Python SCRIPT ERROR: Response headers: {http_e.response.headers}')
55
- try:
56
- response_content = http_e.response.content.decode()
57
- except UnicodeDecodeError:
58
- response_content = str(http_e.response.content)
59
- print(f'--- Python SCRIPT ERROR: Response content: {response_content}')
60
- if http_e.request_id:
61
- print(f'--- Python SCRIPT ERROR: Request ID: {http_e.request_id}')
62
- sys.exit(1)
63
- except Exception as e:
64
- print(f'--- Python SCRIPT ERROR: Other Exception during snapshot_download: {e}')
65
- traceback.print_exc()
66
- sys.exit(1)
67
-
68
- if __name__ == "__main__":
69
- main()