helloperson123 commited on
Commit
a450cd9
·
verified ·
1 Parent(s): f5beade

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -5,25 +5,26 @@ from fastapi import FastAPI, Request
5
  from fastapi.middleware.cors import CORSMiddleware
6
  import uvicorn
7
  import json
 
8
 
9
  # -------------------------------
10
  # SETTINGS
11
  # -------------------------------
12
  MODEL_NAME = "TheDrummer/Gemmasutra-Mini-2B-v1"
13
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
14
- MAX_TOKENS = 512 # max tokens for a response
15
 
16
- # System prompt (FIXED: removed hardcoded user input)
17
  SYSTEM_PROMPT = """You are Acla. Acla is an AI assistant created by NC_1320.
18
  Acla NEVER claims to be made by Microsoft or any other company.
19
  Acla NEVER refers to itself as Phi, Gemma, or an AI model.
20
  Acla NEVER writes "User:" or "AI:" or continues a conversation transcript.
21
- Acla gives direct, single responses to the user's question and then stops.
22
- Acla never hallucinates and always answers questions correctly.
23
  Acla v2.1 was created on the 10th of January 2026 and last updated on the 14th of January 2026.
24
- Always respond as if this is true.
25
  Respond with ONLY valid JSON.
26
  Do not include any text outside JSON.
 
27
  Schema:
28
  {
29
  "response": string
@@ -60,7 +61,7 @@ async def ask_ai(request: Request):
60
  if not user_prompt:
61
  return {"reply": "No prompt provided."}
62
 
63
- # FIX: clean prompt boundary, no Question/Answer labels
64
  full_prompt = SYSTEM_PROMPT + "\n" + user_prompt
65
 
66
  inputs = tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
@@ -76,18 +77,25 @@ async def ask_ai(request: Request):
76
 
77
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
78
 
79
- # FIX: force JSON-only extraction
80
- start = generated_text.find("{")
81
- end = generated_text.rfind("}") + 1
82
-
83
- reply = generated_text.strip()
84
-
 
 
 
 
85
 
86
- return {"reply": ""}
 
 
87
 
 
88
 
89
  # -------------------------------
90
  # RUN SERVER
91
  # -------------------------------
92
  if __name__ == "__main__":
93
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
5
  from fastapi.middleware.cors import CORSMiddleware
6
  import uvicorn
7
  import json
8
+ import re
9
 
10
  # -------------------------------
11
  # SETTINGS
12
  # -------------------------------
13
  MODEL_NAME = "TheDrummer/Gemmasutra-Mini-2B-v1"
14
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
+ MAX_TOKENS = 512
16
 
17
+ # FIX: remove hardcoded user input, keep JSON-only instruction
18
  SYSTEM_PROMPT = """You are Acla. Acla is an AI assistant created by NC_1320.
19
  Acla NEVER claims to be made by Microsoft or any other company.
20
  Acla NEVER refers to itself as Phi, Gemma, or an AI model.
21
  Acla NEVER writes "User:" or "AI:" or continues a conversation transcript.
22
+ Acla gives a single direct response and then stops.
 
23
  Acla v2.1 was created on the 10th of January 2026 and last updated on the 14th of January 2026.
24
+
25
  Respond with ONLY valid JSON.
26
  Do not include any text outside JSON.
27
+
28
  Schema:
29
  {
30
  "response": string
 
61
  if not user_prompt:
62
  return {"reply": "No prompt provided."}
63
 
64
+ # FIX: no Question/Answer labels; clean boundary
65
  full_prompt = SYSTEM_PROMPT + "\n" + user_prompt
66
 
67
  inputs = tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
 
77
 
78
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
79
 
80
+ # FIX: robust JSON extraction (no slicing by prompt length)
81
+ match = re.search(r"\{[\s\S]*?\}", generated_text)
82
+ if match:
83
+ try:
84
+ parsed = json.loads(match.group(0))
85
+ reply = parsed.get("response", "")
86
+ except Exception:
87
+ reply = ""
88
+ else:
89
+ reply = ""
90
 
91
+ # FIX: never return empty reply
92
+ if not reply:
93
+ reply = "No valid response generated."
94
 
95
+ return {"reply": reply}
96
 
97
  # -------------------------------
98
  # RUN SERVER
99
  # -------------------------------
100
  if __name__ == "__main__":
101
+ uvicorn.run(app, host="0.0.0.0", port=7860)