FrogQuest

Sleeping

App Files Files Community

FrogQuest / gpu_shared.py

VirusDumb

GPU hack modal

dfb8f25 22 days ago

Raw

History Blame Contribute Delete

7.32 kB

	"""Backend-agnostic shared source for FrogQuest's GPU work.

	This module holds everything that BOTH the in-Space local path (llm.py / images.py running on
	ZeroGPU) and the off-Space Modal path (modal_app.py running on a Modal GPU) need, so the two
	backends can never drift: the model ids/config, the LLM system prompts, the FLUX prompt builders,
	the JSON extractor, and the CUDA-lib preloader.

	Hard rule: this file must stay importable on a CPU-only box with NO heavy GPU deps installed.
	Only stdlib + (lazily) PIL/torch are referenced, and torch is imported inside a function. That is
	what lets app.py import the local modules on a CPU-basic Space (FROGQUEST_BACKEND=modal) without
	dragging in torch/diffusers/llama_cpp. The schemas live in schema.py (also dependency-free) and
	are imported directly by both paths — they are NOT duplicated here.
	"""
	from __future__ import annotations

	import json

	# ----------------------------- model ids / config (shared by both backends) -----------------------------

	# Nemotron-3 Nano 4B GGUF (verified June 2026). Q8_0 (~4.3GB, near-fp16); filename is a glob that
	# Llama.from_pretrained resolves to the exact file. (Hard floor Q4 — sub-4-bit degrades this arch.)
	GGUF_REPO = "unsloth/NVIDIA-Nemotron-3-Nano-4B-GGUF"
	GGUF_FILE = "Q8_0.gguf"

	# FLUX.2 [klein] (verified: ungated, Apache 2.0). The "4B" is just the diffusion transformer; the
	# repo also ships a large multimodal text encoder + VAE (~23GB total in bf16).
	MODEL_ID = "black-forest-labs/FLUX.2-klein-4B"

	# Image quality/speed knobs. klein is DISTILLED -> 4 steps is the model card's value (more doesn't
	# help). 384 (a multiple of 16) matches the UI's ~380px display; resolution is the only real knob.
	STEPS = 4
	GUIDANCE = 4.0
	MAX_SIDE = 384

	# LLM context: full 128k on a big GPU; 16k on a small one (a 128k KV cache won't fit beside FLUX).
	N_CTX = 131072
	N_CTX_SMALL = 16384
	LOW_VRAM_GB = 24 # at/below this, treat the GPU as "small" (T4 = 16GB, A10G/L4 = 24GB)


	# ----------------------------- LLM prompts -----------------------------

	SYSTEM_PROMPT = """You are FrogQuest's quest designer. Convert the user's real to-do list into a themed text-adventure quest log and OUTPUT JSON ONLY - no prose, no markdown, no code.

	Apply the "Eat That Frog" method:
	- The FROG = the single most important/hardest task. Mark exactly ONE quest is_frog:true and order it FIRST.
	- Break each big/multi-step goal into an ordered chain of smaller quests sharing one goal_group label; keep simple to-dos as standalone quests.
	- Add 1-3 bonus self-care quests (type:"bonus") such as meditate 5 min, exercise 20 min, digital detox 1 hr. They are OPTIONAL and ENCOURAGING - never guilt-inducing.

	For EVERY quest write vivid {theme}-themed, 8-bit pixel-art image instructions where the USER is the hero:
	- initial_image_prompt: the hero facing the challenge (scene only - the renderer adds the user's face from a photo; do NOT describe their face).
	- success_edit: edit instruction showing how the initial scene would look victorious.
	- failure_edit: a FORGIVING edit instruction - the hero retreats to fight another day from the initial image. Never shaming.

	Set adventure.art_style to one shared "8-bit / 16-bit pixel-art, {theme} palette" string applied to every image, and adventure.seed to a single integer for the whole adventure. xp 10-100 by effort. All status:"active", image_state:"initial". Echo the user's real wording in each quest.task.
	/no_think"""

	# Frog Master chat router. Classifies one user message into a single intent and OUTPUTS JSON ONLY.
	INTENT_SYSTEM_PROMPT = """You are FrogQuest's "Frog Master" router. Read ONE user message plus a short context describing the current quest log, and classify it into EXACTLY ONE intent. OUTPUT JSON ONLY - no prose.

	intent must be one of:
	- "forge": the user is describing their to-do list / plans / goals for the first time (or wants a brand-new quest log). Use this when no quest log exists yet, or they clearly want to start over.
	- "add_tasks": the user wants to ADD one or more new tasks/goals to the EXISTING quest log.
	- "mark_done": the user says they FINISHED/completed a task. Put the task they mean in target_task (match it to one of the listed quest titles or tasks; leave empty to mean the currently selected quest).
	- "mark_couldnt": the user could NOT do a task, or wants to skip/postpone it. Put the task in target_task (empty = currently selected quest) and put their explanation in reason.
	- "unknown": small talk, a question, or anything that doesn't fit the above.

	Only "forge" and "add_tasks" describe NEW work; if a log already exists and the user is describing more things to do, prefer "add_tasks". target_task should copy the matching quest's title or task wording when you can identify it.
	/no_think"""


	# ----------------------------- FLUX prompt builders -----------------------------

	def build_initial_prompt(art_style: str, scene_prompt: str) -> str:
	"""Initial generation: the user (from their reference photo) as the hero facing the scene."""
	return (
	f"{art_style}. {scene_prompt}. "
	"The hero is the person shown in the reference image, in this style and scene."
	)


	def build_edit_prompt(art_style: str, edit_instruction: str) -> str:
	"""Edit pass: transform the existing scene into its success/failure state."""
	return f"{art_style}. {edit_instruction}"


	# ----------------------------- helpers -----------------------------

	def extract_json(text: str) -> dict:
	"""Parse JSON from model output, tolerating stray prose or code fences."""
	text = (text or "").strip()
	try:
	return json.loads(text)
	except json.JSONDecodeError:
	pass
	# Fallback: grab the outermost { ... } span.
	start, end = text.find("{"), text.rfind("}")
	if start != -1 and end != -1 and end > start:
	try:
	return json.loads(text[start : end + 1])
	except json.JSONDecodeError:
	pass
	return {}


	def preload_cuda_libs():
	"""Load the CUDA runtime libs (libcudart.so.12, libcublas*, ...) RTLD_GLOBAL by full path so
	the prebuilt llama.cpp CUDA wheel can resolve them. They ship in the nvidia-*-cu12 pip packages
	and inside torch/lib, but neither is on the dynamic loader's search path. No-op for anything not
	found. Order matters: cudart before cublasLt before cublas. Needed on BOTH ZeroGPU and Modal
	(same prebuilt cu124 wheel, same missing-loader-path problem)."""
	import ctypes
	import glob
	import os
	import site

	dirs = []
	try:
	import torch
	dirs.append(os.path.join(os.path.dirname(torch.__file__), "lib"))
	except Exception:
	pass
	site_dirs = []
	if hasattr(site, "getsitepackages"):
	site_dirs += site.getsitepackages()
	site_dirs.append(os.path.dirname(os.path.dirname(os.__file__))) # fallback
	for sp in dict.fromkeys(site_dirs):
	dirs += glob.glob(os.path.join(sp, "nvidia", "*", "lib"))

	for prefix in ("libcudart", "libnvrtc", "libcublasLt", "libcublas", "libcudnn"):
	for d in dict.fromkeys(dirs):
	for lib in sorted(glob.glob(os.path.join(d, prefix + ".so"))):
	try:
	ctypes.CDLL(lib, mode=ctypes.RTLD_GLOBAL)
	except OSError:
	pass