""" AdventureAgent — generates Blockly / Time Challenge story arcs. Model priority chain (tries each in order, falls to next on failure): 1. YOUR GGUF fine-tune (faaizashiq/adventure-agent-qwen-gguf) → llama-cpp-python on HF Spaces free CPU (16GB RAM) → Slow first load (~60s), fast after warm (~30s) → FREE FOREVER, uses your actual fine-tuned weights 2. Gemini 2.0 Flash fallback → Used automatically if GGUF fails or is still loading → Fast (~3-5s), free tier (1500 req/day) → Requires GEMINI_API_KEY 3. Static fallback levels → Used if both AI paths fail (no internet, no keys, etc.) Environment variables (set as HF Space Secrets): GEMINI_API_KEY — for fallback path 2 ADVENTURE_TUNED_MODEL — your HF repo: faaizashiq/adventure-agent-qwen-gguf ADVENTURE_GGUF_FILE — specific .gguf filename (auto-detected if blank) HF_TOKEN — only needed if your GGUF repo is private """ import google.generativeai as genai import json import os import random import time import csv import sys import threading from typing import Dict, Any, Optional, List csv.field_size_limit(sys.maxsize) STUDENTS_DATASET_ID = "majeedkazemi/students-coding-questions-from-ai-assistant" # ───────────────────────────────────────────────────────────────────────────── # GGUF SINGLETON LOADER # Loads once on first request, stays in memory for all subsequent requests. # Thread-safe: uses a lock so concurrent requests don't double-load. # ───────────────────────────────────────────────────────────────────────────── _gguf_llm = None _gguf_load_lock = threading.Lock() _gguf_load_attempted = False _gguf_load_failed = False def _load_gguf_model(repo_id: str, gguf_filename: Optional[str] = None): """ Download and load a GGUF model from HF Hub via llama-cpp-python. Returns the Llama instance, or None if loading fails. Subsequent calls return the cached instance immediately. """ global _gguf_llm, _gguf_load_attempted, _gguf_load_failed # Fast path — already loaded or already failed if _gguf_load_attempted: return _gguf_llm with _gguf_load_lock: # Double-check inside lock if _gguf_load_attempted: return _gguf_llm _gguf_load_attempted = True try: # Check if llama-cpp-python is installed before attempting anything try: from llama_cpp import Llama # type: ignore except ImportError: print("[AdventureAgent] ❌ llama-cpp-python not installed — GGUF unavailable. Using Gemini fallback.") _gguf_load_failed = True return None from huggingface_hub import hf_hub_download, list_repo_files hf_token = os.getenv("HF_TOKEN", "").strip() or None # Auto-detect the best GGUF file if not pinned if not gguf_filename: print(f"[AdventureAgent] Scanning {repo_id} for GGUF files...") all_files = list(list_repo_files(repo_id, token=hf_token)) gguf_files = [f for f in all_files if f.endswith(".gguf")] if not gguf_files: raise FileNotFoundError(f"No .gguf files found in {repo_id}") # Prefer Q4_K_M — best quality/speed balance on CPU # Then Q4_0, then Q5_K_M, then whatever is smallest def _rank(name: str) -> int: n = name.lower() if "q4_k_m" in n: return 0 if "q4_0" in n: return 1 if "q5_k_m" in n: return 2 if "q3_k_m" in n: return 3 return 99 gguf_files.sort(key=_rank) gguf_filename = gguf_files[0] print(f"[AdventureAgent] Auto-selected GGUF: {gguf_filename}") cache_dir = os.path.join(os.getenv("HF_HOME", "/tmp"), "gguf_models") print(f"[AdventureAgent] Downloading {repo_id}/{gguf_filename} ...") model_path = hf_hub_download( repo_id=repo_id, filename=gguf_filename, token=hf_token, local_dir=cache_dir, ) print(f"[AdventureAgent] Loading GGUF from {model_path} ...") _gguf_llm = Llama( model_path=model_path, n_ctx=2048, # context window n_threads=2, # HF Spaces free tier: 2 vCPU n_gpu_layers=0, # CPU only — no GPU on free tier verbose=False, ) print("[AdventureAgent] ✅ GGUF model loaded and ready on CPU.") except ImportError: print("[AdventureAgent] ❌ llama-cpp-python not installed.") _gguf_load_failed = True except Exception as e: print(f"[AdventureAgent] ❌ GGUF load failed: {e}") _gguf_load_failed = True return _gguf_llm def _data_dir() -> str: return os.path.join(os.path.dirname(__file__), '..', 'data') # ───────────────────────────────────────────────────────────────────────────── # AGENT # ───────────────────────────────────────────────────────────────────────────── class AdventureAgent: _dataset_cache: List[Dict[str, str]] = [] _students_cache: List[Dict[str, Any]] = [] _few_shot_cache: Optional[Dict[str, Any]] = None def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or os.getenv("GEMINI_API_KEY") self.tuned_model_env = os.getenv("ADVENTURE_TUNED_MODEL", "").strip() self.gguf_file_env = os.getenv("ADVENTURE_GGUF_FILE", "").strip() or None # Decide which primary path to use self.use_gguf = ( bool(self.tuned_model_env) and "/" in self.tuned_model_env and not self.tuned_model_env.startswith("tunedModels/") ) # Gemini client — used as primary (if no GGUF) or fallback (if GGUF fails) self.gemini_client = None gemini_model_id = "gemini-2.0-flash" if self.tuned_model_env.startswith("tunedModels/"): gemini_model_id = self.tuned_model_env # Gemini tuned model if self.api_key: genai.configure(api_key=self.api_key) self.gemini_client = genai.GenerativeModel(gemini_model_id) if self.use_gguf: print(f"[AdventureAgent] Primary: GGUF ({self.tuned_model_env})") print(f"[AdventureAgent] Fallback: {'Gemini ' + gemini_model_id if self.gemini_client else 'Static levels'}") # Warm up GGUF model in background thread during application startup threading.Thread( target=_load_gguf_model, args=(self.tuned_model_env, self.gguf_file_env), daemon=True ).start() elif self.gemini_client: print(f"[AdventureAgent] Primary: Gemini ({gemini_model_id})") else: print("[AdventureAgent] WARNING: No model configured — static fallback only.") self.cache: Dict[str, Any] = {} self.cache_duration = 3600 self.high_rated_levels: List[Dict] = [] self._load_dataset_lazy() self._load_students_dataset_lazy() self._load_few_shot_examples() # ───────────────────────────────────────────────────────────────────────── # DATA LOADING # ───────────────────────────────────────────────────────────────────────── def _load_dataset_lazy(self): if AdventureAgent._dataset_cache: return file_path = os.path.join(_data_dir(), 'train.csv') if os.path.exists(file_path): try: with open(file_path, "r", encoding="utf-8") as f: reader = csv.DictReader(f) for i, row in enumerate(reader): if i >= 500: break if row.get('question'): AdventureAgent._dataset_cache.append(row) print(f"[AdventureAgent] Loaded {len(AdventureAgent._dataset_cache)} rows from train.csv") except Exception as e: print(f"[AdventureAgent] train.csv load error: {e}") def _load_students_dataset_lazy(self): if AdventureAgent._students_cache: return jsonl_path = os.path.join(_data_dir(), 'students_coding_questions.jsonl') if os.path.isfile(jsonl_path): try: with open(jsonl_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: AdventureAgent._students_cache.append(json.loads(line)) print(f"[AdventureAgent] Loaded {len(AdventureAgent._students_cache)} CodeAid rows") return except Exception as e: print(f"[AdventureAgent] students jsonl error: {e}") if os.getenv("ADVENTURE_LOAD_HF_DATASET", "").lower() in ("1", "true", "yes"): try: from datasets import load_dataset ds = load_dataset(STUDENTS_DATASET_ID, split="train") for i, row in enumerate(ds): if i >= 800: break stem = (row.get("input_task_description") or row.get("input_intention") or row.get("input_question")) if stem and str(stem).strip().lower() not in ("null", "none", ""): AdventureAgent._students_cache.append({ "problem_stem": str(stem)[:500], "feature_type": row.get("feature_type"), }) print(f"[AdventureAgent] Loaded {len(AdventureAgent._students_cache)} rows from HF") except Exception as e: print(f"[AdventureAgent] HF dataset skip: {e}") def _load_few_shot_examples(self): if AdventureAgent._few_shot_cache is not None: return path = os.path.join(_data_dir(), 'adventure_few_shot_examples.json') if os.path.isfile(path): try: with open(path, "r", encoding="utf-8") as f: AdventureAgent._few_shot_cache = json.load(f) except Exception as e: print(f"[AdventureAgent] few-shot load error: {e}") AdventureAgent._few_shot_cache = {} else: AdventureAgent._few_shot_cache = {} # ───────────────────────────────────────────────────────────────────────── # PROMPT HELPERS # ───────────────────────────────────────────────────────────────────────── def _pick_student_problem(self) -> Optional[Dict[str, Any]]: if not AdventureAgent._students_cache: return None weighted = [r for r in AdventureAgent._students_cache if r.get("task_description")] pool = weighted if weighted else AdventureAgent._students_cache row = random.choice(pool) return { "question": row.get("problem_stem") or row.get("task_description") or row.get("question", ""), "feature_type": row.get("feature_type", ""), } def _few_shot_block(self, mode: str, difficulty: str) -> str: if not AdventureAgent._few_shot_cache: return "" mode_key = "blockly" if mode == "blockly" else mode examples = (AdventureAgent._few_shot_cache.get(mode_key) or {}).get(difficulty) if not examples: return "" return ( "\nQUALITY REFERENCE (match this tone and JSON shape — do not copy verbatim):\n" f"{json.dumps(examples, indent=2)[:3000]}\n" ) def learn_from_feedback(self, level_data: Dict[str, Any], rating: int, developer_feedback: Optional[str] = None): if developer_feedback and str(developer_feedback).strip(): level_data["CRITICAL_DEVELOPER_TUNING_NOTE"] = str(developer_feedback).strip() if rating == 5 or (developer_feedback and str(developer_feedback).strip()): if len(self.high_rated_levels) > 50: self.high_rated_levels.pop(0) self.high_rated_levels.append(level_data) def _build_prompt(self, mode: str, difficulty: str, dataset_problem: Optional[Dict[str, str]] = None, is_gguf: bool = False) -> str: if difficulty == "Easy": diff_rules = "EASY: Simple If/Else or basic sequencing. No complex loops." concepts = '["if", "print"]' elif difficulty == "Intermediate": diff_rules = "INTERMEDIATE: MUST use a loop (For/While) or basic Array iteration." concepts = '["loop", "variable"]' else: diff_rules = "EXPERT: Nested loops, complex logic, or algorithmic thinking." concepts = '["loop", "nested_loop"]' dataset_block = "" if dataset_problem: q = dataset_problem.get('question') or dataset_problem.get('problem_stem', '') ft = dataset_problem.get('feature_type', '') dataset_block = ( f"\nREAL STUDENT PROBLEM (adapt for ages 10-14, sci-fi story):\n" f"Type: {ft}\nContext: {q}\n" "Do NOT use C pointers, malloc, or university-level topics.\n" ) prompt = ( "You are the Lead Game Designer for 'CodeCracker', an educational coding game for ages 10-14.\n" "Generate a UNIQUE STORY ARC of EXACTLY 5 interconnected game levels.\n\n" f"Mode: {mode} | Difficulty: {difficulty}\n" f"{diff_rules}\n" f"{dataset_block}" "\nSCAFFOLDING RULES:\n" "- Never give the full solution code.\n" "- Teach one concept per level; build across the arc.\n" "- Hints guide structure first, logic second — never paste the answer.\n" ) if not is_gguf: prompt += f"{self._few_shot_block(mode, difficulty)}" prompt += ( "\nOUTPUT: RAW JSON ONLY. No markdown. No explanation.\n" 'Top-level keys: "story_arc_title", "levels" (array of exactly 5).\n' ) if mode == "blockly": prompt += ( f'\nEach level: {{"type":"blockly","level_id":"id","title":"...","story":"...",' f'"concept_tutorial":"...","problem":"...","toolbox_categories":["Math","Loops","Variables","Text","Logic"],' f'"initial_code":"","validation_rules":{{"required_concepts":{concepts},"expected_output":"..."}},' f'"hint_1":"...","hint_2":"..."}}' ) elif mode == "time_challenge": prompt += ( '\nEach level: {"type":"time_challenge","level_id":"id","title":"...","story":"...",' '"concept_tutorial":"...","timer_seconds":60,"buggy_code":"...","task":"...","solution_patch":"..."}' ) if not is_gguf and self.high_rated_levels: prompt += ( f"\n\nDEVELOPER-APPROVED EXAMPLES (match this quality):\n" f"{json.dumps(self.high_rated_levels[-2:], indent=2)[:1500]}" ) return prompt # ───────────────────────────────────────────────────────────────────────── # GGUF INFERENCE # ───────────────────────────────────────────────────────────────────────── def _call_gguf(self, prompt: str) -> str: llm = _load_gguf_model(self.tuned_model_env, self.gguf_file_env) if llm is None: raise RuntimeError("GGUF model unavailable") print("[AdventureAgent] Running GGUF inference on CPU...") response_chunks = [] try: for chunk in llm.create_chat_completion( messages=[ { "role": "system", "content": ( "You are the Lead Game Designer for CodeCracker, an educational " "coding game for ages 10-14. Always respond with valid JSON only. " "No markdown, no explanation." ), }, {"role": "user", "content": prompt}, ], max_tokens=1500, temperature=0.7, top_p=0.9, stop=["<|im_end|>", "<|endoftext|>"], stream=True ): token = chunk["choices"][0]["delta"].get("content", "") if token: response_chunks.append(token) print(token, end="", flush=True) except Exception as e: print(f"\n[AdventureAgent] GGUF streaming error: {e}") raise e print() # New line after streaming completes text = "".join(response_chunks) print("[AdventureAgent] GGUF inference complete.") return text # ───────────────────────────────────────────────────────────────────────── # GEMINI INFERENCE # ───────────────────────────────────────────────────────────────────────── def _call_gemini(self, prompt: str) -> str: if not self.gemini_client: raise RuntimeError("Gemini client not configured (no API key)") print("[AdventureAgent] Calling Gemini API...") response = self.gemini_client.generate_content(prompt) return response.text or "" # ───────────────────────────────────────────────────────────────────────── # JSON EXTRACTION # ───────────────────────────────────────────────────────────────────────── @staticmethod def _extract_json(text: str) -> Dict[str, Any]: """Strip markdown fences and extract the first JSON object.""" if "```json" in text: text = text.split("```json")[1].split("```")[0] elif "```" in text: text = text.split("```")[1].split("```")[0] # Find the opening brace start = text.find("{") if start != -1: text = text[start:] return json.loads(text.strip()) # ───────────────────────────────────────────────────────────────────────── # PUBLIC ENTRY POINT # ───────────────────────────────────────────────────────────────────────── def generate_level(self, mode: str, difficulty: str, specific_topic: Optional[str] = None) -> Dict[str, Any]: if difficulty in ["Tutorial", "Onboarding"]: return self._get_onboarding_level(mode) # Pick a student problem for context dataset_prob: Optional[Dict[str, Any]] = None if specific_topic: dataset_prob = {"question": specific_topic} elif mode == "blockly": dataset_prob = self._pick_student_problem() if not dataset_prob and AdventureAgent._dataset_cache: row = random.choice(AdventureAgent._dataset_cache) dataset_prob = {"question": (row.get("question") or "")[:600]} # Cache key — avoids hammering the model for identical requests cache_key = ( f"{mode}_{difficulty}_" f"{specific_topic or (dataset_prob or {}).get('question', '')[:80]}" ) if cache_key in self.cache: entry = self.cache[cache_key] if time.time() - entry['timestamp'] < self.cache_duration: print("[AdventureAgent] Serving from cache.") return entry['data'] # ── Try GGUF first, then Gemini, then static fallback ────────────── text = None if self.use_gguf: try: gguf_prompt = self._build_prompt(mode, difficulty, dataset_prob, is_gguf=True) text = self._call_gguf(gguf_prompt) except Exception as e: import traceback print(f"[AdventureAgent] GGUF failed: {e}. Trying Gemini fallback...") traceback.print_exc() if text is None: try: gemini_prompt = self._build_prompt(mode, difficulty, dataset_prob, is_gguf=False) text = self._call_gemini(gemini_prompt) except Exception as e: import traceback print(f"[AdventureAgent] Gemini failed: {e}. Using static fallback.") traceback.print_exc() return self._get_fallback_level(mode, difficulty) try: level_data = self._extract_json(text) self.cache[cache_key] = {"data": level_data, "timestamp": time.time()} return level_data except Exception as e: import traceback print(f"[AdventureAgent] JSON parse failed: {e}. Using static fallback.") traceback.print_exc() return self._get_fallback_level(mode, difficulty) # ───────────────────────────────────────────────────────────────────────── # STATIC LEVELS # ───────────────────────────────────────────────────────────────────────── def _get_onboarding_level(self, mode: str) -> Dict[str, Any]: return { "story_arc_title": "Welcome to CodeCracker!", "levels": [{ "type": "blockly", "level_id": "tutorial_001", "title": "Welcome to Coding!", "story": "Let's learn how to speak to the computer. We need to say 'Hello'.", "concept_tutorial": "Drag & Drop: You build code by snapping blocks together, just like LEGOs.", "problem": "Print 'Hello World'.", "toolbox_categories": ["Text", "Variables"], "initial_code": "", "validation_rules": { "required_concepts": ["print"], "expected_output": "Hello World", }, "hint_1": "Find the 'print' block in the Text category.", "hint_2": "Type 'Hello World' exactly — capital H and W.", }], } def _get_fallback_level(self, mode: str, difficulty: str = "Easy") -> Dict[str, Any]: if mode == "blockly": return { "story_arc_title": "Logic Repair Arc", "levels": [ { "type": "blockly", "level_id": f"fallback_00{i + 1}", "title": f"Logic Repair Part {i}", "story": "The automated systems are down. We need manual logic configuration.", "concept_tutorial": "Loops: Use loops to repeat actions without writing code over and over.", "problem": "Create a loop that counts to 3.", "toolbox_categories": ["Loops", "Math", "Variables", "Text"], "initial_code": "", "validation_rules": { "required_concepts": ["loop"], "expected_output": "1\n2\n3", }, "hint_1": "Use the 'repeat' block.", "hint_2": "Set the number to 3.", } for i in range(1, 6) ], } if mode == "time_challenge": return { "story_arc_title": "System Reboot Arc", "levels": [ { "type": "time_challenge", "level_id": f"fallback_tc_{i}", "title": f"System Reboot Part {i}", "story": "Critical Error! Debug the startup sequence immediately.", "concept_tutorial": "Syntax: Code must follow strict grammar rules.", "timer_seconds": 45, "buggy_code": "print('System Ready'\nstart_engine()", "task": "Fix the syntax error.", "solution_patch": "print('System Ready')\nstart_engine()", } for i in range(1, 6) ], } return {"story_arc_title": "Error Arc", "levels": []} if __name__ == "__main__": agent = AdventureAgent() print("Testing Adventure Generation...") print(json.dumps(agent.generate_level("blockly", "Easy"), indent=2))