Spaces:

faaizashiq
/

codecracker-backend

Running

faaizashiq commited on 12 days ago

Commit

ac2a30a

verified ·

1 Parent(s): 0f051e5

Update backend/agents/adventure_agent.py

Files changed (1) hide show

backend/agents/adventure_agent.py CHANGED Viewed

@@ -364,24 +364,36 @@ class AdventureAgent:
             raise RuntimeError("GGUF model unavailable")
         print("[AdventureAgent] Running GGUF inference on CPU...")
-        result = llm.create_chat_completion(
-            messages=[
-                {
-                    "role": "system",
-                    "content": (
-                        "You are the Lead Game Designer for CodeCracker, an educational "
-                        "coding game for ages 10-14. Always respond with valid JSON only. "
-                        "No markdown, no explanation."
-                    ),
-                },
-                {"role": "user", "content": prompt},
-            ],
-            max_tokens=1500,
-            temperature=0.7,
-            top_p=0.9,
-            stop=["<|im_end|>", "<|endoftext|>"],
-        )
-        text = result["choices"][0]["message"]["content"]
         print("[AdventureAgent] GGUF inference complete.")
         return text

             raise RuntimeError("GGUF model unavailable")
         print("[AdventureAgent] Running GGUF inference on CPU...")
+        response_chunks = []
+        try:
+            for chunk in llm.create_chat_completion(
+                messages=[
+                    {
+                        "role": "system",
+                        "content": (
+                            "You are the Lead Game Designer for CodeCracker, an educational "
+                            "coding game for ages 10-14. Always respond with valid JSON only. "
+                            "No markdown, no explanation."
+                        ),
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                max_tokens=1500,
+                temperature=0.7,
+                top_p=0.9,
+                stop=["<|im_end|>", "<|endoftext|>"],
+                stream=True
+            ):
+                token = chunk["choices"][0]["delta"].get("content", "")
+                if token:
+                    response_chunks.append(token)
+                    print(token, end="", flush=True)
+        except Exception as e:
+            print(f"\n[AdventureAgent] GGUF streaming error: {e}")
+            raise e
+        print()  # New line after streaming completes
+        text = "".join(response_chunks)
         print("[AdventureAgent] GGUF inference complete.")
         return text