Spaces:

faaizashiq
/

codecracker-backend

Running

App Files Files Community

codecracker-backend / backend /agents /adventure_agent.py

faaizashiq

Update backend/agents/adventure_agent.py

ac2a30a verified 13 days ago

raw

history blame contribute delete

27.3 kB

	"""
	AdventureAgent — generates Blockly / Time Challenge story arcs.

	Model priority chain (tries each in order, falls to next on failure):
	1. YOUR GGUF fine-tune (faaizashiq/adventure-agent-qwen-gguf)
	→ llama-cpp-python on HF Spaces free CPU (16GB RAM)
	→ Slow first load (~60s), fast after warm (~30s)
	→ FREE FOREVER, uses your actual fine-tuned weights

	2. Gemini 2.0 Flash fallback
	→ Used automatically if GGUF fails or is still loading
	→ Fast (~3-5s), free tier (1500 req/day)
	→ Requires GEMINI_API_KEY

	3. Static fallback levels
	→ Used if both AI paths fail (no internet, no keys, etc.)

	Environment variables (set as HF Space Secrets):
	GEMINI_API_KEY — for fallback path 2
	ADVENTURE_TUNED_MODEL — your HF repo: faaizashiq/adventure-agent-qwen-gguf
	ADVENTURE_GGUF_FILE — specific .gguf filename (auto-detected if blank)
	HF_TOKEN — only needed if your GGUF repo is private
	"""

	import google.generativeai as genai
	import json
	import os
	import random
	import time
	import csv
	import sys
	import threading
	from typing import Dict, Any, Optional, List

	csv.field_size_limit(sys.maxsize)

	STUDENTS_DATASET_ID = "majeedkazemi/students-coding-questions-from-ai-assistant"

	# ─────────────────────────────────────────────────────────────────────────────
	# GGUF SINGLETON LOADER
	# Loads once on first request, stays in memory for all subsequent requests.
	# Thread-safe: uses a lock so concurrent requests don't double-load.
	# ─────────────────────────────────────────────────────────────────────────────
	_gguf_llm = None
	_gguf_load_lock = threading.Lock()
	_gguf_load_attempted = False
	_gguf_load_failed = False


	def _load_gguf_model(repo_id: str, gguf_filename: Optional[str] = None):
	"""
	Download and load a GGUF model from HF Hub via llama-cpp-python.
	Returns the Llama instance, or None if loading fails.
	Subsequent calls return the cached instance immediately.
	"""
	global _gguf_llm, _gguf_load_attempted, _gguf_load_failed

	# Fast path — already loaded or already failed
	if _gguf_load_attempted:
	return _gguf_llm

	with _gguf_load_lock:
	# Double-check inside lock
	if _gguf_load_attempted:
	return _gguf_llm

	_gguf_load_attempted = True
	try:
	# Check if llama-cpp-python is installed before attempting anything
	try:
	from llama_cpp import Llama # type: ignore
	except ImportError:
	print("[AdventureAgent] ❌ llama-cpp-python not installed — GGUF unavailable. Using Gemini fallback.")
	_gguf_load_failed = True
	return None

	from huggingface_hub import hf_hub_download, list_repo_files

	hf_token = os.getenv("HF_TOKEN", "").strip() or None

	# Auto-detect the best GGUF file if not pinned
	if not gguf_filename:
	print(f"[AdventureAgent] Scanning {repo_id} for GGUF files...")
	all_files = list(list_repo_files(repo_id, token=hf_token))
	gguf_files = [f for f in all_files if f.endswith(".gguf")]

	if not gguf_files:
	raise FileNotFoundError(f"No .gguf files found in {repo_id}")

	# Prefer Q4_K_M — best quality/speed balance on CPU
	# Then Q4_0, then Q5_K_M, then whatever is smallest
	def _rank(name: str) -> int:
	n = name.lower()
	if "q4_k_m" in n: return 0
	if "q4_0" in n: return 1
	if "q5_k_m" in n: return 2
	if "q3_k_m" in n: return 3
	return 99

	gguf_files.sort(key=_rank)
	gguf_filename = gguf_files[0]
	print(f"[AdventureAgent] Auto-selected GGUF: {gguf_filename}")

	cache_dir = os.path.join(os.getenv("HF_HOME", "/tmp"), "gguf_models")
	print(f"[AdventureAgent] Downloading {repo_id}/{gguf_filename} ...")
	model_path = hf_hub_download(
	repo_id=repo_id,
	filename=gguf_filename,
	token=hf_token,
	local_dir=cache_dir,
	)
	print(f"[AdventureAgent] Loading GGUF from {model_path} ...")

	_gguf_llm = Llama(
	model_path=model_path,
	n_ctx=2048, # context window
	n_threads=2, # HF Spaces free tier: 2 vCPU
	n_gpu_layers=0, # CPU only — no GPU on free tier
	verbose=False,
	)
	print("[AdventureAgent] ✅ GGUF model loaded and ready on CPU.")

	except ImportError:
	print("[AdventureAgent] ❌ llama-cpp-python not installed.")
	_gguf_load_failed = True
	except Exception as e:
	print(f"[AdventureAgent] ❌ GGUF load failed: {e}")
	_gguf_load_failed = True

	return _gguf_llm


	def _data_dir() -> str:
	return os.path.join(os.path.dirname(__file__), '..', 'data')


	# ─────────────────────────────────────────────────────────────────────────────
	# AGENT
	# ─────────────────────────────────────────────────────────────────────────────
	class AdventureAgent:

	_dataset_cache: List[Dict[str, str]] = []
	_students_cache: List[Dict[str, Any]] = []
	_few_shot_cache: Optional[Dict[str, Any]] = None

	def __init__(self, api_key: Optional[str] = None):
	self.api_key = api_key or os.getenv("GEMINI_API_KEY")
	self.tuned_model_env = os.getenv("ADVENTURE_TUNED_MODEL", "").strip()
	self.gguf_file_env = os.getenv("ADVENTURE_GGUF_FILE", "").strip() or None

	# Decide which primary path to use
	self.use_gguf = (
	bool(self.tuned_model_env)
	and "/" in self.tuned_model_env
	and not self.tuned_model_env.startswith("tunedModels/")
	)

	# Gemini client — used as primary (if no GGUF) or fallback (if GGUF fails)
	self.gemini_client = None
	gemini_model_id = "gemini-2.0-flash"
	if self.tuned_model_env.startswith("tunedModels/"):
	gemini_model_id = self.tuned_model_env # Gemini tuned model

	if self.api_key:
	genai.configure(api_key=self.api_key)
	self.gemini_client = genai.GenerativeModel(gemini_model_id)

	if self.use_gguf:
	print(f"[AdventureAgent] Primary: GGUF ({self.tuned_model_env})")
	print(f"[AdventureAgent] Fallback: {'Gemini ' + gemini_model_id if self.gemini_client else 'Static levels'}")
	# Warm up GGUF model in background thread during application startup
	threading.Thread(
	target=_load_gguf_model,
	args=(self.tuned_model_env, self.gguf_file_env),
	daemon=True
	).start()
	elif self.gemini_client:
	print(f"[AdventureAgent] Primary: Gemini ({gemini_model_id})")
	else:
	print("[AdventureAgent] WARNING: No model configured — static fallback only.")

	self.cache: Dict[str, Any] = {}
	self.cache_duration = 3600
	self.high_rated_levels: List[Dict] = []

	self._load_dataset_lazy()
	self._load_students_dataset_lazy()
	self._load_few_shot_examples()

	# ─────────────────────────────────────────────────────────────────────────
	# DATA LOADING
	# ─────────────────────────────────────────────────────────────────────────
	def _load_dataset_lazy(self):
	if AdventureAgent._dataset_cache:
	return
	file_path = os.path.join(_data_dir(), 'train.csv')
	if os.path.exists(file_path):
	try:
	with open(file_path, "r", encoding="utf-8") as f:
	reader = csv.DictReader(f)
	for i, row in enumerate(reader):
	if i >= 500:
	break
	if row.get('question'):
	AdventureAgent._dataset_cache.append(row)
	print(f"[AdventureAgent] Loaded {len(AdventureAgent._dataset_cache)} rows from train.csv")
	except Exception as e:
	print(f"[AdventureAgent] train.csv load error: {e}")

	def _load_students_dataset_lazy(self):
	if AdventureAgent._students_cache:
	return
	jsonl_path = os.path.join(_data_dir(), 'students_coding_questions.jsonl')
	if os.path.isfile(jsonl_path):
	try:
	with open(jsonl_path, "r", encoding="utf-8") as f:
	for line in f:
	line = line.strip()
	if line:
	AdventureAgent._students_cache.append(json.loads(line))
	print(f"[AdventureAgent] Loaded {len(AdventureAgent._students_cache)} CodeAid rows")
	return
	except Exception as e:
	print(f"[AdventureAgent] students jsonl error: {e}")

	if os.getenv("ADVENTURE_LOAD_HF_DATASET", "").lower() in ("1", "true", "yes"):
	try:
	from datasets import load_dataset
	ds = load_dataset(STUDENTS_DATASET_ID, split="train")
	for i, row in enumerate(ds):
	if i >= 800:
	break
	stem = (row.get("input_task_description")
	or row.get("input_intention")
	or row.get("input_question"))
	if stem and str(stem).strip().lower() not in ("null", "none", ""):
	AdventureAgent._students_cache.append({
	"problem_stem": str(stem)[:500],
	"feature_type": row.get("feature_type"),
	})
	print(f"[AdventureAgent] Loaded {len(AdventureAgent._students_cache)} rows from HF")
	except Exception as e:
	print(f"[AdventureAgent] HF dataset skip: {e}")

	def _load_few_shot_examples(self):
	if AdventureAgent._few_shot_cache is not None:
	return
	path = os.path.join(_data_dir(), 'adventure_few_shot_examples.json')
	if os.path.isfile(path):
	try:
	with open(path, "r", encoding="utf-8") as f:
	AdventureAgent._few_shot_cache = json.load(f)
	except Exception as e:
	print(f"[AdventureAgent] few-shot load error: {e}")
	AdventureAgent._few_shot_cache = {}
	else:
	AdventureAgent._few_shot_cache = {}

	# ─────────────────────────────────────────────────────────────────────────
	# PROMPT HELPERS
	# ─────────────────────────────────────────────────────────────────────────
	def _pick_student_problem(self) -> Optional[Dict[str, Any]]:
	if not AdventureAgent._students_cache:
	return None
	weighted = [r for r in AdventureAgent._students_cache if r.get("task_description")]
	pool = weighted if weighted else AdventureAgent._students_cache
	row = random.choice(pool)
	return {
	"question": row.get("problem_stem") or row.get("task_description") or row.get("question", ""),
	"feature_type": row.get("feature_type", ""),
	}

	def _few_shot_block(self, mode: str, difficulty: str) -> str:
	if not AdventureAgent._few_shot_cache:
	return ""
	mode_key = "blockly" if mode == "blockly" else mode
	examples = (AdventureAgent._few_shot_cache.get(mode_key) or {}).get(difficulty)
	if not examples:
	return ""
	return (
	"\nQUALITY REFERENCE (match this tone and JSON shape — do not copy verbatim):\n"
	f"{json.dumps(examples, indent=2)[:3000]}\n"
	)

	def learn_from_feedback(self, level_data: Dict[str, Any], rating: int,
	developer_feedback: Optional[str] = None):
	if developer_feedback and str(developer_feedback).strip():
	level_data["CRITICAL_DEVELOPER_TUNING_NOTE"] = str(developer_feedback).strip()
	if rating == 5 or (developer_feedback and str(developer_feedback).strip()):
	if len(self.high_rated_levels) > 50:
	self.high_rated_levels.pop(0)
	self.high_rated_levels.append(level_data)

	def _build_prompt(self, mode: str, difficulty: str,
	dataset_problem: Optional[Dict[str, str]] = None,
	is_gguf: bool = False) -> str:
	if difficulty == "Easy":
	diff_rules = "EASY: Simple If/Else or basic sequencing. No complex loops."
	concepts = '["if", "print"]'
	elif difficulty == "Intermediate":
	diff_rules = "INTERMEDIATE: MUST use a loop (For/While) or basic Array iteration."
	concepts = '["loop", "variable"]'
	else:
	diff_rules = "EXPERT: Nested loops, complex logic, or algorithmic thinking."
	concepts = '["loop", "nested_loop"]'

	dataset_block = ""
	if dataset_problem:
	q = dataset_problem.get('question') or dataset_problem.get('problem_stem', '')
	ft = dataset_problem.get('feature_type', '')
	dataset_block = (
	f"\nREAL STUDENT PROBLEM (adapt for ages 10-14, sci-fi story):\n"
	f"Type: {ft}\nContext: {q}\n"
	"Do NOT use C pointers, malloc, or university-level topics.\n"
	)

	prompt = (
	"You are the Lead Game Designer for 'CodeCracker', an educational coding game for ages 10-14.\n"
	"Generate a UNIQUE STORY ARC of EXACTLY 5 interconnected game levels.\n\n"
	f"Mode: {mode} \| Difficulty: {difficulty}\n"
	f"{diff_rules}\n"
	f"{dataset_block}"
	"\nSCAFFOLDING RULES:\n"
	"- Never give the full solution code.\n"
	"- Teach one concept per level; build across the arc.\n"
	"- Hints guide structure first, logic second — never paste the answer.\n"
	)

	if not is_gguf:
	prompt += f"{self._few_shot_block(mode, difficulty)}"

	prompt += (
	"\nOUTPUT: RAW JSON ONLY. No markdown. No explanation.\n"
	'Top-level keys: "story_arc_title", "levels" (array of exactly 5).\n'
	)

	if mode == "blockly":
	prompt += (
	f'\nEach level: {{"type":"blockly","level_id":"id","title":"...","story":"...",'
	f'"concept_tutorial":"...","problem":"...","toolbox_categories":["Math","Loops","Variables","Text","Logic"],'
	f'"initial_code":"","validation_rules":{{"required_concepts":{concepts},"expected_output":"..."}},'
	f'"hint_1":"...","hint_2":"..."}}'
	)
	elif mode == "time_challenge":
	prompt += (
	'\nEach level: {"type":"time_challenge","level_id":"id","title":"...","story":"...",'
	'"concept_tutorial":"...","timer_seconds":60,"buggy_code":"...","task":"...","solution_patch":"..."}'
	)

	if not is_gguf and self.high_rated_levels:
	prompt += (
	f"\n\nDEVELOPER-APPROVED EXAMPLES (match this quality):\n"
	f"{json.dumps(self.high_rated_levels[-2:], indent=2)[:1500]}"
	)

	return prompt

	# ─────────────────────────────────────────────────────────────────────────
	# GGUF INFERENCE
	# ─────────────────────────────────────────────────────────────────────────
	def _call_gguf(self, prompt: str) -> str:
	llm = _load_gguf_model(self.tuned_model_env, self.gguf_file_env)
	if llm is None:
	raise RuntimeError("GGUF model unavailable")

	print("[AdventureAgent] Running GGUF inference on CPU...")
	response_chunks = []
	try:
	for chunk in llm.create_chat_completion(
	messages=[
	{
	"role": "system",
	"content": (
	"You are the Lead Game Designer for CodeCracker, an educational "
	"coding game for ages 10-14. Always respond with valid JSON only. "
	"No markdown, no explanation."
	),
	},
	{"role": "user", "content": prompt},
	],
	max_tokens=1500,
	temperature=0.7,
	top_p=0.9,
	stop=["<\|im_end\|>", "<\|endoftext\|>"],
	stream=True
	):
	token = chunk["choices"][0]["delta"].get("content", "")
	if token:
	response_chunks.append(token)
	print(token, end="", flush=True)
	except Exception as e:
	print(f"\n[AdventureAgent] GGUF streaming error: {e}")
	raise e

	print() # New line after streaming completes
	text = "".join(response_chunks)
	print("[AdventureAgent] GGUF inference complete.")
	return text

	# ─────────────────────────────────────────────────────────────────────────
	# GEMINI INFERENCE
	# ─────────────────────────────────────────────────────────────────────────
	def _call_gemini(self, prompt: str) -> str:
	if not self.gemini_client:
	raise RuntimeError("Gemini client not configured (no API key)")
	print("[AdventureAgent] Calling Gemini API...")
	response = self.gemini_client.generate_content(prompt)
	return response.text or ""

	# ─────────────────────────────────────────────────────────────────────────
	# JSON EXTRACTION
	# ─────────────────────────────────────────────────────────────────────────
	@staticmethod
	def _extract_json(text: str) -> Dict[str, Any]:
	"""Strip markdown fences and extract the first JSON object."""
	if "```json" in text:
	text = text.split("```json")[1].split("```")[0]
	elif "```" in text:
	text = text.split("```")[1].split("```")[0]
	# Find the opening brace
	start = text.find("{")
	if start != -1:
	text = text[start:]
	return json.loads(text.strip())

	# ─────────────────────────────────────────────────────────────────────────
	# PUBLIC ENTRY POINT
	# ─────────────────────────────────────────────────────────────────────────
	def generate_level(self, mode: str, difficulty: str,
	specific_topic: Optional[str] = None) -> Dict[str, Any]:
	if difficulty in ["Tutorial", "Onboarding"]:
	return self._get_onboarding_level(mode)

	# Pick a student problem for context
	dataset_prob: Optional[Dict[str, Any]] = None
	if specific_topic:
	dataset_prob = {"question": specific_topic}
	elif mode == "blockly":
	dataset_prob = self._pick_student_problem()
	if not dataset_prob and AdventureAgent._dataset_cache:
	row = random.choice(AdventureAgent._dataset_cache)
	dataset_prob = {"question": (row.get("question") or "")[:600]}

	# Cache key — avoids hammering the model for identical requests
	cache_key = (
	f"{mode}_{difficulty}_"
	f"{specific_topic or (dataset_prob or {}).get('question', '')[:80]}"
	)
	if cache_key in self.cache:
	entry = self.cache[cache_key]
	if time.time() - entry['timestamp'] < self.cache_duration:
	print("[AdventureAgent] Serving from cache.")
	return entry['data']

	# ── Try GGUF first, then Gemini, then static fallback ──────────────
	text = None

	if self.use_gguf:
	try:
	gguf_prompt = self._build_prompt(mode, difficulty, dataset_prob, is_gguf=True)
	text = self._call_gguf(gguf_prompt)
	except Exception as e:
	import traceback
	print(f"[AdventureAgent] GGUF failed: {e}. Trying Gemini fallback...")
	traceback.print_exc()

	if text is None:
	try:
	gemini_prompt = self._build_prompt(mode, difficulty, dataset_prob, is_gguf=False)
	text = self._call_gemini(gemini_prompt)
	except Exception as e:
	import traceback
	print(f"[AdventureAgent] Gemini failed: {e}. Using static fallback.")
	traceback.print_exc()
	return self._get_fallback_level(mode, difficulty)

	try:
	level_data = self._extract_json(text)
	self.cache[cache_key] = {"data": level_data, "timestamp": time.time()}
	return level_data
	except Exception as e:
	import traceback
	print(f"[AdventureAgent] JSON parse failed: {e}. Using static fallback.")
	traceback.print_exc()
	return self._get_fallback_level(mode, difficulty)

	# ─────────────────────────────────────────────────────────────────────────
	# STATIC LEVELS
	# ─────────────────────────────────────────────────────────────────────────
	def _get_onboarding_level(self, mode: str) -> Dict[str, Any]:
	return {
	"story_arc_title": "Welcome to CodeCracker!",
	"levels": [{
	"type": "blockly",
	"level_id": "tutorial_001",
	"title": "Welcome to Coding!",
	"story": "Let's learn how to speak to the computer. We need to say 'Hello'.",
	"concept_tutorial": "Drag & Drop: You build code by snapping blocks together, just like LEGOs.",
	"problem": "Print 'Hello World'.",
	"toolbox_categories": ["Text", "Variables"],
	"initial_code": "",
	"validation_rules": {
	"required_concepts": ["print"],
	"expected_output": "Hello World",
	},
	"hint_1": "Find the 'print' block in the Text category.",
	"hint_2": "Type 'Hello World' exactly — capital H and W.",
	}],
	}

	def _get_fallback_level(self, mode: str, difficulty: str = "Easy") -> Dict[str, Any]:
	if mode == "blockly":
	return {
	"story_arc_title": "Logic Repair Arc",
	"levels": [
	{
	"type": "blockly",
	"level_id": f"fallback_00{i + 1}",
	"title": f"Logic Repair Part {i}",
	"story": "The automated systems are down. We need manual logic configuration.",
	"concept_tutorial": "Loops: Use loops to repeat actions without writing code over and over.",
	"problem": "Create a loop that counts to 3.",
	"toolbox_categories": ["Loops", "Math", "Variables", "Text"],
	"initial_code": "",
	"validation_rules": {
	"required_concepts": ["loop"],
	"expected_output": "1\n2\n3",
	},
	"hint_1": "Use the 'repeat' block.",
	"hint_2": "Set the number to 3.",
	}
	for i in range(1, 6)
	],
	}
	if mode == "time_challenge":
	return {
	"story_arc_title": "System Reboot Arc",
	"levels": [
	{
	"type": "time_challenge",
	"level_id": f"fallback_tc_{i}",
	"title": f"System Reboot Part {i}",
	"story": "Critical Error! Debug the startup sequence immediately.",
	"concept_tutorial": "Syntax: Code must follow strict grammar rules.",
	"timer_seconds": 45,
	"buggy_code": "print('System Ready'\nstart_engine()",
	"task": "Fix the syntax error.",
	"solution_patch": "print('System Ready')\nstart_engine()",
	}
	for i in range(1, 6)
	],
	}
	return {"story_arc_title": "Error Arc", "levels": []}


	if __name__ == "__main__":
	agent = AdventureAgent()
	print("Testing Adventure Generation...")
	print(json.dumps(agent.generate_level("blockly", "Easy"), indent=2))