Spaces:
Running
Running
Update backend/agents/adventure_agent.py
Browse files
backend/agents/adventure_agent.py
CHANGED
|
@@ -364,24 +364,36 @@ class AdventureAgent:
|
|
| 364 |
raise RuntimeError("GGUF model unavailable")
|
| 365 |
|
| 366 |
print("[AdventureAgent] Running GGUF inference on CPU...")
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
"
|
| 373 |
-
"
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
print("[AdventureAgent] GGUF inference complete.")
|
| 386 |
return text
|
| 387 |
|
|
|
|
| 364 |
raise RuntimeError("GGUF model unavailable")
|
| 365 |
|
| 366 |
print("[AdventureAgent] Running GGUF inference on CPU...")
|
| 367 |
+
response_chunks = []
|
| 368 |
+
try:
|
| 369 |
+
for chunk in llm.create_chat_completion(
|
| 370 |
+
messages=[
|
| 371 |
+
{
|
| 372 |
+
"role": "system",
|
| 373 |
+
"content": (
|
| 374 |
+
"You are the Lead Game Designer for CodeCracker, an educational "
|
| 375 |
+
"coding game for ages 10-14. Always respond with valid JSON only. "
|
| 376 |
+
"No markdown, no explanation."
|
| 377 |
+
),
|
| 378 |
+
},
|
| 379 |
+
{"role": "user", "content": prompt},
|
| 380 |
+
],
|
| 381 |
+
max_tokens=1500,
|
| 382 |
+
temperature=0.7,
|
| 383 |
+
top_p=0.9,
|
| 384 |
+
stop=["<|im_end|>", "<|endoftext|>"],
|
| 385 |
+
stream=True
|
| 386 |
+
):
|
| 387 |
+
token = chunk["choices"][0]["delta"].get("content", "")
|
| 388 |
+
if token:
|
| 389 |
+
response_chunks.append(token)
|
| 390 |
+
print(token, end="", flush=True)
|
| 391 |
+
except Exception as e:
|
| 392 |
+
print(f"\n[AdventureAgent] GGUF streaming error: {e}")
|
| 393 |
+
raise e
|
| 394 |
+
|
| 395 |
+
print() # New line after streaming completes
|
| 396 |
+
text = "".join(response_chunks)
|
| 397 |
print("[AdventureAgent] GGUF inference complete.")
|
| 398 |
return text
|
| 399 |
|