faaizashiq commited on
Commit
ac2a30a
·
verified ·
1 Parent(s): 0f051e5

Update backend/agents/adventure_agent.py

Browse files
Files changed (1) hide show
  1. backend/agents/adventure_agent.py +30 -18
backend/agents/adventure_agent.py CHANGED
@@ -364,24 +364,36 @@ class AdventureAgent:
364
  raise RuntimeError("GGUF model unavailable")
365
 
366
  print("[AdventureAgent] Running GGUF inference on CPU...")
367
- result = llm.create_chat_completion(
368
- messages=[
369
- {
370
- "role": "system",
371
- "content": (
372
- "You are the Lead Game Designer for CodeCracker, an educational "
373
- "coding game for ages 10-14. Always respond with valid JSON only. "
374
- "No markdown, no explanation."
375
- ),
376
- },
377
- {"role": "user", "content": prompt},
378
- ],
379
- max_tokens=1500,
380
- temperature=0.7,
381
- top_p=0.9,
382
- stop=["<|im_end|>", "<|endoftext|>"],
383
- )
384
- text = result["choices"][0]["message"]["content"]
 
 
 
 
 
 
 
 
 
 
 
 
385
  print("[AdventureAgent] GGUF inference complete.")
386
  return text
387
 
 
364
  raise RuntimeError("GGUF model unavailable")
365
 
366
  print("[AdventureAgent] Running GGUF inference on CPU...")
367
+ response_chunks = []
368
+ try:
369
+ for chunk in llm.create_chat_completion(
370
+ messages=[
371
+ {
372
+ "role": "system",
373
+ "content": (
374
+ "You are the Lead Game Designer for CodeCracker, an educational "
375
+ "coding game for ages 10-14. Always respond with valid JSON only. "
376
+ "No markdown, no explanation."
377
+ ),
378
+ },
379
+ {"role": "user", "content": prompt},
380
+ ],
381
+ max_tokens=1500,
382
+ temperature=0.7,
383
+ top_p=0.9,
384
+ stop=["<|im_end|>", "<|endoftext|>"],
385
+ stream=True
386
+ ):
387
+ token = chunk["choices"][0]["delta"].get("content", "")
388
+ if token:
389
+ response_chunks.append(token)
390
+ print(token, end="", flush=True)
391
+ except Exception as e:
392
+ print(f"\n[AdventureAgent] GGUF streaming error: {e}")
393
+ raise e
394
+
395
+ print() # New line after streaming completes
396
+ text = "".join(response_chunks)
397
  print("[AdventureAgent] GGUF inference complete.")
398
  return text
399