Batrdj commited on
Commit
b60e096
Β·
verified Β·
1 Parent(s): 286d07a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -12
app.py CHANGED
@@ -12,13 +12,14 @@ app = FastAPI()
12
  # πŸ” API KEY (keep same)
13
  API_KEY = "sk-tinyllm-9f3a2c7e8b4d1a6c0e52f91d"
14
 
15
- # βœ… Lightweight CPU model (NLP engine only)
16
- MODEL_NAME = "Qwen/Qwen1.5-0.5B-Chat"
17
 
18
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
19
  model = AutoModelForCausalLM.from_pretrained(
20
  MODEL_NAME,
21
- dtype=torch.float32
 
22
  )
23
  model.eval()
24
 
@@ -59,9 +60,6 @@ def chat(
59
  ):
60
  check_api_key(authorization)
61
 
62
- # 🚫 IMPORTANT:
63
- # DO NOT inject system identity here.
64
- # Your RAG prompt already contains ALL rules.
65
  messages = [
66
  {
67
  "role": "user",
@@ -78,11 +76,11 @@ def chat(
78
  with torch.no_grad():
79
  output_ids = model.generate(
80
  input_ids,
81
- max_new_tokens=220, # controlled output
82
- temperature=0.0, # πŸ”₯ NO hallucination
83
  top_p=0.7,
84
  top_k=20,
85
- do_sample=False, # deterministic
86
  repetition_penalty=1.1,
87
  eos_token_id=tokenizer.eos_token_id
88
  )
@@ -92,6 +90,4 @@ def chat(
92
  skip_special_tokens=True
93
  ).strip()
94
 
95
- return {
96
- "response": response
97
- }
 
12
  # πŸ” API KEY (keep same)
13
  API_KEY = "sk-tinyllm-9f3a2c7e8b4d1a6c0e52f91d"
14
 
15
+ # πŸ”₯ BETTER CPU-SAFE MODEL
16
+ MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
17
 
18
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
19
  model = AutoModelForCausalLM.from_pretrained(
20
  MODEL_NAME,
21
+ torch_dtype=torch.float32,
22
+ device_map="cpu"
23
  )
24
  model.eval()
25
 
 
60
  ):
61
  check_api_key(authorization)
62
 
 
 
 
63
  messages = [
64
  {
65
  "role": "user",
 
76
  with torch.no_grad():
77
  output_ids = model.generate(
78
  input_ids,
79
+ max_new_tokens=220,
80
+ temperature=0.0, # deterministic (RAG-safe)
81
  top_p=0.7,
82
  top_k=20,
83
+ do_sample=False,
84
  repetition_penalty=1.1,
85
  eos_token_id=tokenizer.eos_token_id
86
  )
 
90
  skip_special_tokens=True
91
  ).strip()
92
 
93
+ return {"response": response}