Batrdj commited on
Commit
ed3a83e
Β·
verified Β·
1 Parent(s): 433f86f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -12,14 +12,13 @@ app = FastAPI()
12
  # πŸ” API KEY (keep same)
13
  API_KEY = "sk-tinyllm-9f3a2c7e8b4d1a6c0e52f91d"
14
 
15
- # πŸ”₯ BETTER CPU-SAFE MODEL
16
- MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
17
 
18
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
19
  model = AutoModelForCausalLM.from_pretrained(
20
  MODEL_NAME,
21
- torch_dtype=torch.float32,
22
- device_map="cpu"
23
  )
24
  model.eval()
25
 
@@ -60,6 +59,9 @@ def chat(
60
  ):
61
  check_api_key(authorization)
62
 
 
 
 
63
  messages = [
64
  {
65
  "role": "user",
@@ -76,11 +78,11 @@ def chat(
76
  with torch.no_grad():
77
  output_ids = model.generate(
78
  input_ids,
79
- max_new_tokens=220,
80
- temperature=0.0, # deterministic (RAG-safe)
81
  top_p=0.7,
82
  top_k=20,
83
- do_sample=False,
84
  repetition_penalty=1.1,
85
  eos_token_id=tokenizer.eos_token_id
86
  )
@@ -90,4 +92,6 @@ def chat(
90
  skip_special_tokens=True
91
  ).strip()
92
 
93
- return {"response": response}
 
 
 
12
  # πŸ” API KEY (keep same)
13
  API_KEY = "sk-tinyllm-9f3a2c7e8b4d1a6c0e52f91d"
14
 
15
+ # βœ… Lightweight CPU model (NLP engine only)
16
+ MODEL_NAME = "Qwen/Qwen1.5-0.5B-Chat"
17
 
18
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
19
  model = AutoModelForCausalLM.from_pretrained(
20
  MODEL_NAME,
21
+ dtype=torch.float32
 
22
  )
23
  model.eval()
24
 
 
59
  ):
60
  check_api_key(authorization)
61
 
62
+ # 🚫 IMPORTANT:
63
+ # DO NOT inject system identity here.
64
+ # Your RAG prompt already contains ALL rules.
65
  messages = [
66
  {
67
  "role": "user",
 
78
  with torch.no_grad():
79
  output_ids = model.generate(
80
  input_ids,
81
+ max_new_tokens=220, # controlled output
82
+ temperature=0.0, # πŸ”₯ NO hallucination
83
  top_p=0.7,
84
  top_k=20,
85
+ do_sample=False, # deterministic
86
  repetition_penalty=1.1,
87
  eos_token_id=tokenizer.eos_token_id
88
  )
 
92
  skip_special_tokens=True
93
  ).strip()
94
 
95
+ return {
96
+ "response": response
97
+ }