Shreekant Kalwar (Nokia) commited on
Commit
7a240a4
·
1 Parent(s): 1d80ba8
Files changed (2) hide show
  1. Dockerfile +11 -4
  2. app.py +4 -8
Dockerfile CHANGED
@@ -7,11 +7,18 @@ WORKDIR /app
7
  # Copy the current directory contents into the container
8
  COPY . .
9
 
10
- # Install any needed dependencies
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
13
- # Expose the API port
 
 
 
 
 
 
 
14
  EXPOSE 7860
15
 
16
- # Command to run the FastAPI app
17
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
7
  # Copy the current directory contents into the container
8
  COPY . .
9
 
10
+ # Install dependencies
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
13
+ # Hugging Face cache fix: set writable cache directory inside /app
14
+ ENV TRANSFORMERS_CACHE=/app/.cache
15
+ ENV HF_HOME=/app/.cache
16
+
17
+ # Make sure the cache directory exists
18
+ RUN mkdir -p /app/.cache
19
+
20
+ # Expose FastAPI port
21
  EXPOSE 7860
22
 
23
+ # Run FastAPI app
24
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -2,22 +2,21 @@ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
 
5
 
6
-
 
 
7
 
8
  app = FastAPI()
9
 
10
  class ChatRequest(BaseModel):
11
  message: str
12
 
13
-
14
  # Load DeepSeek model (small one for local use)
15
- # Try bigger models if you have a GPU with >12GB VRAM
16
  model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
17
 
18
  print("Loading model... this may take a minute ⏳")
19
- global tokenizer
20
- global model
21
  tokenizer = AutoTokenizer.from_pretrained(model_name)
22
  model = AutoModelForCausalLM.from_pretrained(
23
  model_name,
@@ -28,9 +27,6 @@ print("Model loaded ✅")
28
 
29
  @app.get("/")
30
  def root():
31
-
32
-
33
-
34
  return {"status": "ok"}
35
 
36
  @app.post("/chat")
 
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
+ import os
6
 
7
+ # Ensure Hugging Face cache uses a writable path
8
+ os.environ["TRANSFORMERS_CACHE"] = "/app/.cache"
9
+ os.environ["HF_HOME"] = "/app/.cache"
10
 
11
  app = FastAPI()
12
 
13
  class ChatRequest(BaseModel):
14
  message: str
15
 
 
16
  # Load DeepSeek model (small one for local use)
 
17
  model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
18
 
19
  print("Loading model... this may take a minute ⏳")
 
 
20
  tokenizer = AutoTokenizer.from_pretrained(model_name)
21
  model = AutoModelForCausalLM.from_pretrained(
22
  model_name,
 
27
 
28
  @app.get("/")
29
  def root():
 
 
 
30
  return {"status": "ok"}
31
 
32
  @app.post("/chat")