FrogQuest / gpu_shared.py
VirusDumb's picture
Big Leagues Calling
c6815eb
"""Backend-agnostic shared source for FrogQuest's GPU work.
This module holds everything that BOTH the in-Space local path (llm.py / images.py running on
ZeroGPU) and the off-Space Modal path (modal_app.py running on a Modal GPU) need, so the two
backends can never drift: the model ids/config, the LLM system prompts, the FLUX prompt builders,
the JSON extractor, and the CUDA-lib preloader.
Hard rule: this file must stay importable on a CPU-only box with NO heavy GPU deps installed.
Only stdlib + (lazily) PIL/torch are referenced, and torch is imported inside a function. That is
what lets app.py import the local modules on a CPU-basic Space (FROGQUEST_BACKEND=modal) without
dragging in torch/diffusers/llama_cpp. The schemas live in schema.py (also dependency-free) and
are imported directly by both paths — they are NOT duplicated here.
"""
from __future__ import annotations
import json
# ----------------------------- model ids / config (shared by both backends) -----------------------------
# Nemotron-3 Nano 4B GGUF (verified June 2026). Q8_0 (~4.3GB, near-fp16); filename is a glob that
# Llama.from_pretrained resolves to the exact file. (Hard floor Q4 — sub-4-bit degrades this arch.)
GGUF_REPO = "unsloth/NVIDIA-Nemotron-3-Nano-4B-GGUF"
GGUF_FILE = "*Q8_0*.gguf"
# FLUX.2 [klein] (verified: ungated, Apache 2.0). The "4B" is just the diffusion transformer; the
# repo also ships a large multimodal text encoder + VAE (~23GB total in bf16).
MODEL_ID = "black-forest-labs/FLUX.2-klein-4B"
# Image quality/speed knobs. klein is DISTILLED -> 4 steps is the model card's value (more doesn't
# help). 384 (a multiple of 16) matches the UI's ~380px display; resolution is the only real knob.
STEPS = 4
GUIDANCE = 4.0
MAX_SIDE = 384
# LLM context: full 128k on a big GPU; 16k on a small one (a 128k KV cache won't fit beside FLUX).
N_CTX = 131072
N_CTX_SMALL = 16384
LOW_VRAM_GB = 24 # at/below this, treat the GPU as "small" (T4 = 16GB, A10G/L4 = 24GB)
# ----------------------------- LLM prompts -----------------------------
SYSTEM_PROMPT = """You are FrogQuest's quest designer. Convert the user's real to-do list into a themed text-adventure quest log and OUTPUT JSON ONLY - no prose, no markdown, no code.
Apply the "Eat That Frog" method:
- The FROG = the single most important/hardest task. Mark exactly ONE quest is_frog:true and order it FIRST.
- Break each big/multi-step goal into an ordered chain of smaller quests sharing one goal_group label; keep simple to-dos as standalone quests.
- Add 1-3 bonus self-care quests (type:"bonus") such as meditate 5 min, exercise 20 min, digital detox 1 hr. They are OPTIONAL and ENCOURAGING - never guilt-inducing.
For EVERY quest write vivid {theme}-themed, 8-bit pixel-art image instructions where the USER is the hero:
- initial_image_prompt: the hero facing the challenge (scene only - the renderer adds the user's face from a photo; do NOT describe their face).
- success_edit: edit instruction showing how the initial scene would look victorious.
- failure_edit: a FORGIVING edit instruction - the hero retreats to fight another day from the initial image. Never shaming.
Set adventure.art_style to one shared "8-bit / 16-bit pixel-art, {theme} palette" string applied to every image, and adventure.seed to a single integer for the whole adventure. xp 10-100 by effort. All status:"active", image_state:"initial". Echo the user's real wording in each quest.task.
/no_think"""
# Campaign designer: ONE long-term goal -> a themed campaign + an ORDERED chain of concrete steps.
CAMPAIGN_SYSTEM_PROMPT = """You are FrogQuest's campaign designer. The user gives ONE long-term goal (e.g. ace an exam, win a hackathon, learn a skill). Break it into 3-8 ORDERED, concrete, real-world tasks - first things first, each small enough to act on - that together achieve the goal, and present them as a themed quest chain. OUTPUT JSON ONLY - no prose, no markdown, no code.
If "Research notes" are provided below the goal, ground the tasks in them (they are summaries of real web sources). Otherwise rely on your own knowledge.
campaign.title = a short epic {theme}-themed name for this goal. campaign.art_style = one shared "8-bit / 16-bit pixel-art, {theme} palette" string. campaign.seed = a single integer for the whole campaign.
For EVERY quest write vivid {theme}-themed, 8-bit pixel-art image instructions where the USER is the hero:
- initial_image_prompt: the hero facing this step (scene only - do NOT describe their face).
- success_edit: edit instruction showing the scene victorious.
- failure_edit: a FORGIVING edit instruction - the hero retreats to try again. Never shaming.
quest.task = the real-world action in plain words. xp 10-100 by effort. All type:"main", is_frog:false, status:"active", image_state:"initial".
/no_think"""
# Frog Master chat router. Classifies one user message into a single intent and OUTPUTS JSON ONLY.
INTENT_SYSTEM_PROMPT = """You are FrogQuest's "Frog Master" router. Read ONE user message plus a short context describing the current quest log, and classify it into EXACTLY ONE intent. OUTPUT JSON ONLY - no prose.
intent must be one of:
- "forge": the user is describing their to-do list / plans / goals for the first time (or wants a brand-new quest log). Use this when no quest log exists yet, or they clearly want to start over.
- "add_tasks": the user wants to ADD one or more new tasks/goals to the EXISTING quest log.
- "mark_done": the user says they FINISHED/completed a task. Put the task they mean in target_task (match it to one of the listed quest titles or tasks; leave empty to mean the currently selected quest).
- "mark_couldnt": the user could NOT do a task, or wants to skip/postpone it. Put the task in target_task (empty = currently selected quest) and put their explanation in reason.
- "unknown": small talk, a question, or anything that doesn't fit the above.
Only "forge" and "add_tasks" describe NEW work; if a log already exists and the user is describing more things to do, prefer "add_tasks". target_task should copy the matching quest's title or task wording when you can identify it.
/no_think"""
# ----------------------------- FLUX prompt builders -----------------------------
def build_initial_prompt(art_style: str, scene_prompt: str) -> str:
"""Initial generation: the user (from their reference photo) as the hero facing the scene."""
return (
f"{art_style}. {scene_prompt}. "
"The hero is the person shown in the reference image, in this style and scene."
)
def build_edit_prompt(art_style: str, edit_instruction: str) -> str:
"""Edit pass: transform the existing scene into its success/failure state."""
return f"{art_style}. {edit_instruction}"
# ----------------------------- helpers -----------------------------
def extract_json(text: str) -> dict:
"""Parse JSON from model output, tolerating stray prose or code fences."""
text = (text or "").strip()
try:
return json.loads(text)
except json.JSONDecodeError:
pass
# Fallback: grab the outermost { ... } span.
start, end = text.find("{"), text.rfind("}")
if start != -1 and end != -1 and end > start:
try:
return json.loads(text[start : end + 1])
except json.JSONDecodeError:
pass
return {}
def preload_cuda_libs():
"""Load the CUDA runtime libs (libcudart.so.12, libcublas*, ...) RTLD_GLOBAL by full path so
the prebuilt llama.cpp CUDA wheel can resolve them. They ship in the nvidia-*-cu12 pip packages
and inside torch/lib, but neither is on the dynamic loader's search path. No-op for anything not
found. Order matters: cudart before cublasLt before cublas. Needed on BOTH ZeroGPU and Modal
(same prebuilt cu124 wheel, same missing-loader-path problem)."""
import ctypes
import glob
import os
import site
dirs = []
try:
import torch
dirs.append(os.path.join(os.path.dirname(torch.__file__), "lib"))
except Exception:
pass
site_dirs = []
if hasattr(site, "getsitepackages"):
site_dirs += site.getsitepackages()
site_dirs.append(os.path.dirname(os.path.dirname(os.__file__))) # fallback
for sp in dict.fromkeys(site_dirs):
dirs += glob.glob(os.path.join(sp, "nvidia", "*", "lib"))
for prefix in ("libcudart", "libnvrtc", "libcublasLt", "libcublas", "libcudnn"):
for d in dict.fromkeys(dirs):
for lib in sorted(glob.glob(os.path.join(d, prefix + "*.so*"))):
try:
ctypes.CDLL(lib, mode=ctypes.RTLD_GLOBAL)
except OSError:
pass