Spaces:

skkalwar
/

LLM_Model

Sleeping

Shreekant Kalwar (Nokia) commited on Aug 29

Commit

7a240a4

1 Parent(s): 1d80ba8

new code

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -7,11 +7,18 @@ WORKDIR /app
 # Copy the current directory contents into the container
 COPY . .
-# Install any needed dependencies
 RUN pip install --no-cache-dir -r requirements.txt
-# Expose the API port
 EXPOSE 7860
-# Command to run the FastAPI app
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 # Copy the current directory contents into the container
 COPY . .
+# Install dependencies
 RUN pip install --no-cache-dir -r requirements.txt
+# Hugging Face cache fix: set writable cache directory inside /app
+ENV TRANSFORMERS_CACHE=/app/.cache
+ENV HF_HOME=/app/.cache
+# Make sure the cache directory exists
+RUN mkdir -p /app/.cache
+# Expose FastAPI port
 EXPOSE 7860
+# Run FastAPI app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -2,22 +2,21 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 app = FastAPI()
 class ChatRequest(BaseModel):
     message: str
 # Load DeepSeek model (small one for local use)
-# Try bigger models if you have a GPU with >12GB VRAM
 model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
 print("Loading model... this may take a minute ⏳")
-global tokenizer
-global model
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
@@ -28,9 +27,6 @@ print("Model loaded ✅")
 @app.get("/")
 def root():
     return {"status": "ok"}
 @app.post("/chat")

 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+import os
+# Ensure Hugging Face cache uses a writable path
+os.environ["TRANSFORMERS_CACHE"] = "/app/.cache"
+os.environ["HF_HOME"] = "/app/.cache"
 app = FastAPI()
 class ChatRequest(BaseModel):
     message: str
 # Load DeepSeek model (small one for local use)
 model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
 print("Loading model... this may take a minute ⏳")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
 @app.get("/")
 def root():
     return {"status": "ok"}
 @app.post("/chat")