Shreekant Kalwar (Nokia) commited on
Commit
1d80ba8
·
1 Parent(s): 149a73b
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. README.md +1 -2
  3. app.py +18 -14
Dockerfile CHANGED
@@ -1,5 +1,5 @@
1
  # Use an official Python runtime as a parent image
2
- FROM python:3.11
3
 
4
  # Set the working directory in the container
5
  WORKDIR /app
 
1
  # Use an official Python runtime as a parent image
2
+ FROM python:3.13
3
 
4
  # Set the working directory in the container
5
  WORKDIR /app
README.md CHANGED
@@ -3,8 +3,7 @@ title: LLM Model
3
  emoji: 🤖
4
  colorFrom: indigo
5
  colorTo: purple
6
- sdk: gradio
7
- sdk_version: "4.36.0"
8
  app_file: app.py
9
  pinned: false
10
  ---
 
3
  emoji: 🤖
4
  colorFrom: indigo
5
  colorTo: purple
6
+ sdk: docker
 
7
  app_file: app.py
8
  pinned: false
9
  ---
app.py CHANGED
@@ -4,28 +4,32 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
6
 
 
7
  app = FastAPI()
8
 
9
  class ChatRequest(BaseModel):
10
  message: str
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  @app.get("/")
13
  def root():
14
 
15
- # Load DeepSeek model (small one for local use)
16
- # Try bigger models if you have a GPU with >12GB VRAM
17
- model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
18
-
19
- print("Loading model... this may take a minute ⏳")
20
- global tokenizer
21
- global model
22
- tokenizer = AutoTokenizer.from_pretrained(model_name)
23
- model = AutoModelForCausalLM.from_pretrained(
24
- model_name,
25
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
26
- device_map="auto"
27
- )
28
- print("Model loaded ✅")
29
 
30
  return {"status": "ok"}
31
 
 
4
  import torch
5
 
6
 
7
+
8
  app = FastAPI()
9
 
10
  class ChatRequest(BaseModel):
11
  message: str
12
 
13
+
14
+ # Load DeepSeek model (small one for local use)
15
+ # Try bigger models if you have a GPU with >12GB VRAM
16
+ model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
17
+
18
+ print("Loading model... this may take a minute ⏳")
19
+ global tokenizer
20
+ global model
21
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ model_name,
24
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
25
+ device_map="auto"
26
+ )
27
+ print("Model loaded ✅")
28
+
29
  @app.get("/")
30
  def root():
31
 
32
+
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  return {"status": "ok"}
35