Spaces:

helloperson123
/

idk

Sleeping

App Files Files Community

helloperson123 commited on Jan 25

Commit

bfc4c59

verified ·

1 Parent(s): a450cd9

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -24

app.py CHANGED Viewed

@@ -12,23 +12,17 @@ import re
 # -------------------------------
 MODEL_NAME = "TheDrummer/Gemmasutra-Mini-2B-v1"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-MAX_TOKENS = 512
-# FIX: remove hardcoded user input, keep JSON-only instruction
-SYSTEM_PROMPT = """You are Acla. Acla is an AI assistant created by NC_1320.
-Acla NEVER claims to be made by Microsoft or any other company.
-Acla NEVER refers to itself as Phi, Gemma, or an AI model.
-Acla NEVER writes "User:" or "AI:" or continues a conversation transcript.
-Acla gives a single direct response and then stops.
-Acla v2.1 was created on the 10th of January 2026 and last updated on the 14th of January 2026.
-Respond with ONLY valid JSON.
-Do not include any text outside JSON.
-Schema:
-{
-  "response": string
-}
 """
 # -------------------------------
@@ -61,8 +55,8 @@ async def ask_ai(request: Request):
     if not user_prompt:
         return {"reply": "No prompt provided."}
-    # FIX: no Question/Answer labels; clean boundary
-    full_prompt = SYSTEM_PROMPT + "\n" + user_prompt
     inputs = tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
@@ -77,20 +71,29 @@ async def ask_ai(request: Request):
     generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # FIX: robust JSON extraction (no slicing by prompt length)
-    match = re.search(r"\{[\s\S]*?\}", generated_text)
     if match:
         try:
             parsed = json.loads(match.group(0))
-            reply = parsed.get("response", "")
         except Exception:
             reply = ""
     else:
         reply = ""
-    # FIX: never return empty reply
     if not reply:
-        reply = "No valid response generated."
     return {"reply": reply}

 # -------------------------------
 MODEL_NAME = "TheDrummer/Gemmasutra-Mini-2B-v1"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+MAX_TOKENS = 256
+# FIX: make JSON preferred, not fragile
+SYSTEM_PROMPT = """You are Acla, an AI assistant created by NC_1320.
+You answer the user's question once and stop.
+Do not write User:, AI:, or continue a conversation.
+Prefer responding in valid JSON exactly like:
+{"response":"your answer here"}
+If JSON is not possible, respond with plain text only.
 """
 # -------------------------------
     if not user_prompt:
         return {"reply": "No prompt provided."}
+    # FIX: explicit answer anchor
+    full_prompt = SYSTEM_PROMPT + "\n\nUser input:\n" + user_prompt + "\n\nResponse:\n"
     inputs = tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
     generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Remove prompt echo
+    text = generated_text.split("Response:", 1)[-1].strip()
+    # Try JSON first
+    match = re.search(r"\{[\s\S]*?\}", text)
     if match:
         try:
             parsed = json.loads(match.group(0))
+            reply = parsed.get("response", "").strip()
         except Exception:
             reply = ""
     else:
         reply = ""
+    # FIX: plain-text fallback
+    if not reply:
+        for stop in ["User:", "AI:", "Assistant:"]:
+            text = text.split(stop)[0]
+        reply = text.strip()
+    # FIX: never empty
     if not reply:
+        reply = "I could not generate a response."
     return {"reply": reply}