Shreekant Kalwar (Nokia) commited on
Commit
11ff83b
·
0 Parent(s):

initial commit

Browse files
Files changed (4) hide show
  1. .gitignore +3 -0
  2. Dockerfile +13 -0
  3. app.py +30 -0
  4. requirements.txt +0 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ /venv
2
+ .env
3
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
5
+
6
+ # Load DeepSeek model (small one for local use)
7
+ # Try bigger models if you have a GPU with >12GB VRAM
8
+ model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
9
+
10
+ print("Loading model... this may take a minute ⏳")
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_name,
14
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
15
+ device_map="auto"
16
+ )
17
+ print("Model loaded ✅")
18
+
19
+ app = FastAPI()
20
+
21
+ class ChatRequest(BaseModel):
22
+ message: str
23
+
24
+ @app.post("/chat")
25
+ def chat(request: ChatRequest):
26
+ """Chat endpoint using DeepSeek model"""
27
+ inputs = tokenizer(request.message, return_tensors="pt").to(model.device)
28
+ outputs = model.generate(**inputs, max_new_tokens=200)
29
+ reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
30
+ return {"reply": reply}
requirements.txt ADDED
Binary file (1.38 kB). View file