Spaces:
Running on Zero
Running on Zero
| """Backend-agnostic shared source for FrogQuest's GPU work. | |
| This module holds everything that BOTH the in-Space local path (llm.py / images.py running on | |
| ZeroGPU) and the off-Space Modal path (modal_app.py running on a Modal GPU) need, so the two | |
| backends can never drift: the model ids/config, the LLM system prompts, the FLUX prompt builders, | |
| the JSON extractor, and the CUDA-lib preloader. | |
| Hard rule: this file must stay importable on a CPU-only box with NO heavy GPU deps installed. | |
| Only stdlib + (lazily) PIL/torch are referenced, and torch is imported inside a function. That is | |
| what lets app.py import the local modules on a CPU-basic Space (FROGQUEST_BACKEND=modal) without | |
| dragging in torch/diffusers/llama_cpp. The schemas live in schema.py (also dependency-free) and | |
| are imported directly by both paths — they are NOT duplicated here. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| # ----------------------------- model ids / config (shared by both backends) ----------------------------- | |
| # Nemotron-3 Nano 4B GGUF (verified June 2026). Q8_0 (~4.3GB, near-fp16); filename is a glob that | |
| # Llama.from_pretrained resolves to the exact file. (Hard floor Q4 — sub-4-bit degrades this arch.) | |
| GGUF_REPO = "unsloth/NVIDIA-Nemotron-3-Nano-4B-GGUF" | |
| GGUF_FILE = "*Q8_0*.gguf" | |
| # FLUX.2 [klein] (verified: ungated, Apache 2.0). The "4B" is just the diffusion transformer; the | |
| # repo also ships a large multimodal text encoder + VAE (~23GB total in bf16). | |
| MODEL_ID = "black-forest-labs/FLUX.2-klein-4B" | |
| # Image quality/speed knobs. klein is DISTILLED -> 4 steps is the model card's value (more doesn't | |
| # help). 384 (a multiple of 16) matches the UI's ~380px display; resolution is the only real knob. | |
| STEPS = 4 | |
| GUIDANCE = 4.0 | |
| MAX_SIDE = 384 | |
| # LLM context: full 128k on a big GPU; 16k on a small one (a 128k KV cache won't fit beside FLUX). | |
| N_CTX = 131072 | |
| N_CTX_SMALL = 16384 | |
| LOW_VRAM_GB = 24 # at/below this, treat the GPU as "small" (T4 = 16GB, A10G/L4 = 24GB) | |
| # ----------------------------- LLM prompts ----------------------------- | |
| SYSTEM_PROMPT = """You are FrogQuest's quest designer. Convert the user's real to-do list into a themed text-adventure quest log and OUTPUT JSON ONLY - no prose, no markdown, no code. | |
| Apply the "Eat That Frog" method: | |
| - The FROG = the single most important/hardest task. Mark exactly ONE quest is_frog:true and order it FIRST. | |
| - Break each big/multi-step goal into an ordered chain of smaller quests sharing one goal_group label; keep simple to-dos as standalone quests. | |
| - Add 1-3 bonus self-care quests (type:"bonus") such as meditate 5 min, exercise 20 min, digital detox 1 hr. They are OPTIONAL and ENCOURAGING - never guilt-inducing. | |
| For EVERY quest write vivid {theme}-themed, 8-bit pixel-art image instructions where the USER is the hero: | |
| - initial_image_prompt: the hero facing the challenge (scene only - the renderer adds the user's face from a photo; do NOT describe their face). | |
| - success_edit: edit instruction showing how the initial scene would look victorious. | |
| - failure_edit: a FORGIVING edit instruction - the hero retreats to fight another day from the initial image. Never shaming. | |
| Set adventure.art_style to one shared "8-bit / 16-bit pixel-art, {theme} palette" string applied to every image, and adventure.seed to a single integer for the whole adventure. xp 10-100 by effort. All status:"active", image_state:"initial". Echo the user's real wording in each quest.task. | |
| /no_think""" | |
| # Campaign designer: ONE long-term goal -> a themed campaign + an ORDERED chain of concrete steps. | |
| CAMPAIGN_SYSTEM_PROMPT = """You are FrogQuest's campaign designer. The user gives ONE long-term goal (e.g. ace an exam, win a hackathon, learn a skill). Break it into 3-8 ORDERED, concrete, real-world tasks - first things first, each small enough to act on - that together achieve the goal, and present them as a themed quest chain. OUTPUT JSON ONLY - no prose, no markdown, no code. | |
| If "Research notes" are provided below the goal, ground the tasks in them (they are summaries of real web sources). Otherwise rely on your own knowledge. | |
| campaign.title = a short epic {theme}-themed name for this goal. campaign.art_style = one shared "8-bit / 16-bit pixel-art, {theme} palette" string. campaign.seed = a single integer for the whole campaign. | |
| For EVERY quest write vivid {theme}-themed, 8-bit pixel-art image instructions where the USER is the hero: | |
| - initial_image_prompt: the hero facing this step (scene only - do NOT describe their face). | |
| - success_edit: edit instruction showing the scene victorious. | |
| - failure_edit: a FORGIVING edit instruction - the hero retreats to try again. Never shaming. | |
| quest.task = the real-world action in plain words. xp 10-100 by effort. All type:"main", is_frog:false, status:"active", image_state:"initial". | |
| /no_think""" | |
| # Frog Master chat router. Classifies one user message into a single intent and OUTPUTS JSON ONLY. | |
| INTENT_SYSTEM_PROMPT = """You are FrogQuest's "Frog Master" router. Read ONE user message plus a short context describing the current quest log, and classify it into EXACTLY ONE intent. OUTPUT JSON ONLY - no prose. | |
| intent must be one of: | |
| - "forge": the user is describing their to-do list / plans / goals for the first time (or wants a brand-new quest log). Use this when no quest log exists yet, or they clearly want to start over. | |
| - "add_tasks": the user wants to ADD one or more new tasks/goals to the EXISTING quest log. | |
| - "mark_done": the user says they FINISHED/completed a task. Put the task they mean in target_task (match it to one of the listed quest titles or tasks; leave empty to mean the currently selected quest). | |
| - "mark_couldnt": the user could NOT do a task, or wants to skip/postpone it. Put the task in target_task (empty = currently selected quest) and put their explanation in reason. | |
| - "unknown": small talk, a question, or anything that doesn't fit the above. | |
| Only "forge" and "add_tasks" describe NEW work; if a log already exists and the user is describing more things to do, prefer "add_tasks". target_task should copy the matching quest's title or task wording when you can identify it. | |
| /no_think""" | |
| # ----------------------------- FLUX prompt builders ----------------------------- | |
| def build_initial_prompt(art_style: str, scene_prompt: str) -> str: | |
| """Initial generation: the user (from their reference photo) as the hero facing the scene.""" | |
| return ( | |
| f"{art_style}. {scene_prompt}. " | |
| "The hero is the person shown in the reference image, in this style and scene." | |
| ) | |
| def build_edit_prompt(art_style: str, edit_instruction: str) -> str: | |
| """Edit pass: transform the existing scene into its success/failure state.""" | |
| return f"{art_style}. {edit_instruction}" | |
| # ----------------------------- helpers ----------------------------- | |
| def extract_json(text: str) -> dict: | |
| """Parse JSON from model output, tolerating stray prose or code fences.""" | |
| text = (text or "").strip() | |
| try: | |
| return json.loads(text) | |
| except json.JSONDecodeError: | |
| pass | |
| # Fallback: grab the outermost { ... } span. | |
| start, end = text.find("{"), text.rfind("}") | |
| if start != -1 and end != -1 and end > start: | |
| try: | |
| return json.loads(text[start : end + 1]) | |
| except json.JSONDecodeError: | |
| pass | |
| return {} | |
| def preload_cuda_libs(): | |
| """Load the CUDA runtime libs (libcudart.so.12, libcublas*, ...) RTLD_GLOBAL by full path so | |
| the prebuilt llama.cpp CUDA wheel can resolve them. They ship in the nvidia-*-cu12 pip packages | |
| and inside torch/lib, but neither is on the dynamic loader's search path. No-op for anything not | |
| found. Order matters: cudart before cublasLt before cublas. Needed on BOTH ZeroGPU and Modal | |
| (same prebuilt cu124 wheel, same missing-loader-path problem).""" | |
| import ctypes | |
| import glob | |
| import os | |
| import site | |
| dirs = [] | |
| try: | |
| import torch | |
| dirs.append(os.path.join(os.path.dirname(torch.__file__), "lib")) | |
| except Exception: | |
| pass | |
| site_dirs = [] | |
| if hasattr(site, "getsitepackages"): | |
| site_dirs += site.getsitepackages() | |
| site_dirs.append(os.path.dirname(os.path.dirname(os.__file__))) # fallback | |
| for sp in dict.fromkeys(site_dirs): | |
| dirs += glob.glob(os.path.join(sp, "nvidia", "*", "lib")) | |
| for prefix in ("libcudart", "libnvrtc", "libcublasLt", "libcublas", "libcudnn"): | |
| for d in dict.fromkeys(dirs): | |
| for lib in sorted(glob.glob(os.path.join(d, prefix + "*.so*"))): | |
| try: | |
| ctypes.CDLL(lib, mode=ctypes.RTLD_GLOBAL) | |
| except OSError: | |
| pass | |