Shreekant Kalwar (Nokia) commited on
Commit
55a61ee
·
1 Parent(s): 941e116

runtime.txt add

Browse files
Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -3,18 +3,6 @@ from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
6
- # Load DeepSeek model (small one for local use)
7
- # Try bigger models if you have a GPU with >12GB VRAM
8
- model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
9
-
10
- print("Loading model... this may take a minute ⏳")
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
- model = AutoModelForCausalLM.from_pretrained(
13
- model_name,
14
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
15
- device_map="auto"
16
- )
17
- print("Model loaded ✅")
18
 
19
  app = FastAPI()
20
 
@@ -23,6 +11,20 @@ class ChatRequest(BaseModel):
23
 
24
  @app.get("/")
25
  def root():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  return {"status": "ok"}
27
 
28
  @app.post("/chat")
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  app = FastAPI()
8
 
 
11
 
12
  @app.get("/")
13
  def root():
14
+
15
+ # Load DeepSeek model (small one for local use)
16
+ # Try bigger models if you have a GPU with >12GB VRAM
17
+ model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
18
+
19
+ print("Loading model... this may take a minute ⏳")
20
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_name,
23
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
24
+ device_map="auto"
25
+ )
26
+ print("Model loaded ✅")
27
+
28
  return {"status": "ok"}
29
 
30
  @app.post("/chat")