david167 commited on
Commit
bd1b0d6
·
1 Parent(s): 4656a02

Fix permissions error: proper cache directory and HF token auth for Llama 3.1

Browse files
Files changed (2) hide show
  1. Dockerfile +5 -3
  2. app.py +8 -3
Dockerfile CHANGED
@@ -34,9 +34,11 @@ RUN pip install -r requirements.txt
34
  COPY app.py .
35
  COPY README.md .
36
 
37
- # Create HF cache directory
38
- RUN mkdir -p /.cache
39
- ENV HF_HOME=/.cache
 
 
40
 
41
  # Expose port
42
  EXPOSE 7860
 
34
  COPY app.py .
35
  COPY README.md .
36
 
37
+ # Create HF cache directory with proper permissions
38
+ RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
39
+ ENV HF_HOME=/app/.cache
40
+ ENV TRANSFORMERS_CACHE=/app/.cache
41
+ ENV HF_DATASETS_CACHE=/app/.cache
42
 
43
  # Expose port
44
  EXPOSE 7860
app.py CHANGED
@@ -62,13 +62,17 @@ async def load_model():
62
 
63
  logger.info("Loading model with transformers...")
64
 
65
- # Use the base model instead of GGUF for better compatibility
66
  base_model_name = "meta-llama/Llama-3.1-8B-Instruct"
67
 
 
 
 
68
  tokenizer = AutoTokenizer.from_pretrained(
69
  base_model_name,
70
  use_fast=True,
71
- trust_remote_code=True
 
72
  )
73
 
74
  model = AutoModelForCausalLM.from_pretrained(
@@ -76,7 +80,8 @@ async def load_model():
76
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
77
  device_map="auto" if device == "cuda" else None,
78
  trust_remote_code=True,
79
- low_cpu_mem_usage=True
 
80
  )
81
 
82
  if device == "cuda":
 
62
 
63
  logger.info("Loading model with transformers...")
64
 
65
+ # Use Llama 3.1 8B Instruct (requires HF token with access)
66
  base_model_name = "meta-llama/Llama-3.1-8B-Instruct"
67
 
68
+ # Get HF token from environment
69
+ hf_token = os.getenv("HF_TOKEN")
70
+
71
  tokenizer = AutoTokenizer.from_pretrained(
72
  base_model_name,
73
  use_fast=True,
74
+ trust_remote_code=True,
75
+ token=hf_token
76
  )
77
 
78
  model = AutoModelForCausalLM.from_pretrained(
 
80
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
81
  device_map="auto" if device == "cuda" else None,
82
  trust_remote_code=True,
83
+ low_cpu_mem_usage=True,
84
+ token=hf_token
85
  )
86
 
87
  if device == "cuda":