Spaces:
Sleeping
Sleeping
Fix permissions error: proper cache directory and HF token auth for Llama 3.1
Browse files- Dockerfile +5 -3
- app.py +8 -3
Dockerfile
CHANGED
|
@@ -34,9 +34,11 @@ RUN pip install -r requirements.txt
|
|
| 34 |
COPY app.py .
|
| 35 |
COPY README.md .
|
| 36 |
|
| 37 |
-
# Create HF cache directory
|
| 38 |
-
RUN mkdir -p /.cache
|
| 39 |
-
ENV HF_HOME=/.cache
|
|
|
|
|
|
|
| 40 |
|
| 41 |
# Expose port
|
| 42 |
EXPOSE 7860
|
|
|
|
| 34 |
COPY app.py .
|
| 35 |
COPY README.md .
|
| 36 |
|
| 37 |
+
# Create HF cache directory with proper permissions
|
| 38 |
+
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
|
| 39 |
+
ENV HF_HOME=/app/.cache
|
| 40 |
+
ENV TRANSFORMERS_CACHE=/app/.cache
|
| 41 |
+
ENV HF_DATASETS_CACHE=/app/.cache
|
| 42 |
|
| 43 |
# Expose port
|
| 44 |
EXPOSE 7860
|
app.py
CHANGED
|
@@ -62,13 +62,17 @@ async def load_model():
|
|
| 62 |
|
| 63 |
logger.info("Loading model with transformers...")
|
| 64 |
|
| 65 |
-
# Use
|
| 66 |
base_model_name = "meta-llama/Llama-3.1-8B-Instruct"
|
| 67 |
|
|
|
|
|
|
|
|
|
|
| 68 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 69 |
base_model_name,
|
| 70 |
use_fast=True,
|
| 71 |
-
trust_remote_code=True
|
|
|
|
| 72 |
)
|
| 73 |
|
| 74 |
model = AutoModelForCausalLM.from_pretrained(
|
|
@@ -76,7 +80,8 @@ async def load_model():
|
|
| 76 |
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
| 77 |
device_map="auto" if device == "cuda" else None,
|
| 78 |
trust_remote_code=True,
|
| 79 |
-
low_cpu_mem_usage=True
|
|
|
|
| 80 |
)
|
| 81 |
|
| 82 |
if device == "cuda":
|
|
|
|
| 62 |
|
| 63 |
logger.info("Loading model with transformers...")
|
| 64 |
|
| 65 |
+
# Use Llama 3.1 8B Instruct (requires HF token with access)
|
| 66 |
base_model_name = "meta-llama/Llama-3.1-8B-Instruct"
|
| 67 |
|
| 68 |
+
# Get HF token from environment
|
| 69 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 70 |
+
|
| 71 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 72 |
base_model_name,
|
| 73 |
use_fast=True,
|
| 74 |
+
trust_remote_code=True,
|
| 75 |
+
token=hf_token
|
| 76 |
)
|
| 77 |
|
| 78 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 80 |
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
| 81 |
device_map="auto" if device == "cuda" else None,
|
| 82 |
trust_remote_code=True,
|
| 83 |
+
low_cpu_mem_usage=True,
|
| 84 |
+
token=hf_token
|
| 85 |
)
|
| 86 |
|
| 87 |
if device == "cuda":
|