helloperson123 commited on
Commit
bfc4c59
·
verified ·
1 Parent(s): a450cd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -24
app.py CHANGED
@@ -12,23 +12,17 @@ import re
12
  # -------------------------------
13
  MODEL_NAME = "TheDrummer/Gemmasutra-Mini-2B-v1"
14
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
- MAX_TOKENS = 512
16
-
17
- # FIX: remove hardcoded user input, keep JSON-only instruction
18
- SYSTEM_PROMPT = """You are Acla. Acla is an AI assistant created by NC_1320.
19
- Acla NEVER claims to be made by Microsoft or any other company.
20
- Acla NEVER refers to itself as Phi, Gemma, or an AI model.
21
- Acla NEVER writes "User:" or "AI:" or continues a conversation transcript.
22
- Acla gives a single direct response and then stops.
23
- Acla v2.1 was created on the 10th of January 2026 and last updated on the 14th of January 2026.
24
-
25
- Respond with ONLY valid JSON.
26
- Do not include any text outside JSON.
27
-
28
- Schema:
29
- {
30
- "response": string
31
- }
32
  """
33
 
34
  # -------------------------------
@@ -61,8 +55,8 @@ async def ask_ai(request: Request):
61
  if not user_prompt:
62
  return {"reply": "No prompt provided."}
63
 
64
- # FIX: no Question/Answer labels; clean boundary
65
- full_prompt = SYSTEM_PROMPT + "\n" + user_prompt
66
 
67
  inputs = tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
68
 
@@ -77,20 +71,29 @@ async def ask_ai(request: Request):
77
 
78
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
79
 
80
- # FIX: robust JSON extraction (no slicing by prompt length)
81
- match = re.search(r"\{[\s\S]*?\}", generated_text)
 
 
 
82
  if match:
83
  try:
84
  parsed = json.loads(match.group(0))
85
- reply = parsed.get("response", "")
86
  except Exception:
87
  reply = ""
88
  else:
89
  reply = ""
90
 
91
- # FIX: never return empty reply
 
 
 
 
 
 
92
  if not reply:
93
- reply = "No valid response generated."
94
 
95
  return {"reply": reply}
96
 
 
12
  # -------------------------------
13
  MODEL_NAME = "TheDrummer/Gemmasutra-Mini-2B-v1"
14
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
+ MAX_TOKENS = 256
16
+
17
+ # FIX: make JSON preferred, not fragile
18
+ SYSTEM_PROMPT = """You are Acla, an AI assistant created by NC_1320.
19
+ You answer the user's question once and stop.
20
+ Do not write User:, AI:, or continue a conversation.
21
+
22
+ Prefer responding in valid JSON exactly like:
23
+ {"response":"your answer here"}
24
+
25
+ If JSON is not possible, respond with plain text only.
 
 
 
 
 
 
26
  """
27
 
28
  # -------------------------------
 
55
  if not user_prompt:
56
  return {"reply": "No prompt provided."}
57
 
58
+ # FIX: explicit answer anchor
59
+ full_prompt = SYSTEM_PROMPT + "\n\nUser input:\n" + user_prompt + "\n\nResponse:\n"
60
 
61
  inputs = tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
62
 
 
71
 
72
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
73
 
74
+ # Remove prompt echo
75
+ text = generated_text.split("Response:", 1)[-1].strip()
76
+
77
+ # Try JSON first
78
+ match = re.search(r"\{[\s\S]*?\}", text)
79
  if match:
80
  try:
81
  parsed = json.loads(match.group(0))
82
+ reply = parsed.get("response", "").strip()
83
  except Exception:
84
  reply = ""
85
  else:
86
  reply = ""
87
 
88
+ # FIX: plain-text fallback
89
+ if not reply:
90
+ for stop in ["User:", "AI:", "Assistant:"]:
91
+ text = text.split(stop)[0]
92
+ reply = text.strip()
93
+
94
+ # FIX: never empty
95
  if not reply:
96
+ reply = "I could not generate a response."
97
 
98
  return {"reply": reply}
99