Shreekant Kalwar (Nokia) commited on
Commit
4815889
·
1 Parent(s): 6707a85

model change

Browse files
Files changed (1) hide show
  1. app.py +18 -1
app.py CHANGED
@@ -1,6 +1,7 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
4
  import torch
5
  import os
6
 
@@ -10,11 +11,24 @@ os.environ["HF_HOME"] = "/app/.cache"
10
 
11
  app = FastAPI()
12
 
 
 
 
 
 
 
 
 
 
 
13
  class ChatRequest(BaseModel):
14
  message: str
15
 
16
  # Load DeepSeek model (small one for local use)
17
- model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
 
 
 
18
 
19
  print("Loading model... this may take a minute ⏳")
20
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -34,5 +48,8 @@ def chat(request: ChatRequest):
34
  """Chat endpoint using DeepSeek model"""
35
  inputs = tokenizer(request.message, return_tensors="pt").to(model.device)
36
  outputs = model.generate(**inputs, max_new_tokens=200)
 
37
  reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
38
  return {"reply": reply}
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from fastapi.middleware.cors import CORSMiddleware
5
  import torch
6
  import os
7
 
 
11
 
12
  app = FastAPI()
13
 
14
+ # ✅ Allow all origins
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"], # allow all origins
18
+ allow_credentials=True,
19
+ allow_methods=["*"], # allow all HTTP methods
20
+ allow_headers=["*"], # allow all headers
21
+ )
22
+
23
+
24
  class ChatRequest(BaseModel):
25
  message: str
26
 
27
  # Load DeepSeek model (small one for local use)
28
+ #model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
29
+
30
+ model_name="Qwen/Qwen2.5-1.5B-Instruct"
31
+ #model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
32
 
33
  print("Loading model... this may take a minute ⏳")
34
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
48
  """Chat endpoint using DeepSeek model"""
49
  inputs = tokenizer(request.message, return_tensors="pt").to(model.device)
50
  outputs = model.generate(**inputs, max_new_tokens=200)
51
+
52
  reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
53
+
54
+
55
  return {"reply": reply}