File size: 8,684 Bytes
dfb8f25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6815eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfb8f25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""Backend-agnostic shared source for FrogQuest's GPU work.

This module holds everything that BOTH the in-Space local path (llm.py / images.py running on
ZeroGPU) and the off-Space Modal path (modal_app.py running on a Modal GPU) need, so the two
backends can never drift: the model ids/config, the LLM system prompts, the FLUX prompt builders,
the JSON extractor, and the CUDA-lib preloader.

Hard rule: this file must stay importable on a CPU-only box with NO heavy GPU deps installed.
Only stdlib + (lazily) PIL/torch are referenced, and torch is imported inside a function. That is
what lets app.py import the local modules on a CPU-basic Space (FROGQUEST_BACKEND=modal) without
dragging in torch/diffusers/llama_cpp. The schemas live in schema.py (also dependency-free) and
are imported directly by both paths — they are NOT duplicated here.
"""
from __future__ import annotations

import json

# ----------------------------- model ids / config (shared by both backends) -----------------------------

# Nemotron-3 Nano 4B GGUF (verified June 2026). Q8_0 (~4.3GB, near-fp16); filename is a glob that
# Llama.from_pretrained resolves to the exact file. (Hard floor Q4 — sub-4-bit degrades this arch.)
GGUF_REPO = "unsloth/NVIDIA-Nemotron-3-Nano-4B-GGUF"
GGUF_FILE = "*Q8_0*.gguf"

# FLUX.2 [klein] (verified: ungated, Apache 2.0). The "4B" is just the diffusion transformer; the
# repo also ships a large multimodal text encoder + VAE (~23GB total in bf16).
MODEL_ID = "black-forest-labs/FLUX.2-klein-4B"

# Image quality/speed knobs. klein is DISTILLED -> 4 steps is the model card's value (more doesn't
# help). 384 (a multiple of 16) matches the UI's ~380px display; resolution is the only real knob.
STEPS = 4
GUIDANCE = 4.0
MAX_SIDE = 384

# LLM context: full 128k on a big GPU; 16k on a small one (a 128k KV cache won't fit beside FLUX).
N_CTX = 131072
N_CTX_SMALL = 16384
LOW_VRAM_GB = 24  # at/below this, treat the GPU as "small" (T4 = 16GB, A10G/L4 = 24GB)


# ----------------------------- LLM prompts -----------------------------

SYSTEM_PROMPT = """You are FrogQuest's quest designer. Convert the user's real to-do list into a themed text-adventure quest log and OUTPUT JSON ONLY - no prose, no markdown, no code.

Apply the "Eat That Frog" method:
- The FROG = the single most important/hardest task. Mark exactly ONE quest is_frog:true and order it FIRST.
- Break each big/multi-step goal into an ordered chain of smaller quests sharing one goal_group label; keep simple to-dos as standalone quests.
- Add 1-3 bonus self-care quests (type:"bonus") such as meditate 5 min, exercise 20 min, digital detox 1 hr. They are OPTIONAL and ENCOURAGING - never guilt-inducing.

For EVERY quest write vivid {theme}-themed, 8-bit pixel-art image instructions where the USER is the hero:
- initial_image_prompt: the hero facing the challenge (scene only - the renderer adds the user's face from a photo; do NOT describe their face).
- success_edit: edit instruction showing how the initial scene would look victorious.
- failure_edit: a FORGIVING edit instruction - the hero retreats to fight another day from the initial image. Never shaming.

Set adventure.art_style to one shared "8-bit / 16-bit pixel-art, {theme} palette" string applied to every image, and adventure.seed to a single integer for the whole adventure. xp 10-100 by effort. All status:"active", image_state:"initial". Echo the user's real wording in each quest.task.
/no_think"""

# Campaign designer: ONE long-term goal -> a themed campaign + an ORDERED chain of concrete steps.
CAMPAIGN_SYSTEM_PROMPT = """You are FrogQuest's campaign designer. The user gives ONE long-term goal (e.g. ace an exam, win a hackathon, learn a skill). Break it into 3-8 ORDERED, concrete, real-world tasks - first things first, each small enough to act on - that together achieve the goal, and present them as a themed quest chain. OUTPUT JSON ONLY - no prose, no markdown, no code.

If "Research notes" are provided below the goal, ground the tasks in them (they are summaries of real web sources). Otherwise rely on your own knowledge.

campaign.title = a short epic {theme}-themed name for this goal. campaign.art_style = one shared "8-bit / 16-bit pixel-art, {theme} palette" string. campaign.seed = a single integer for the whole campaign.

For EVERY quest write vivid {theme}-themed, 8-bit pixel-art image instructions where the USER is the hero:
- initial_image_prompt: the hero facing this step (scene only - do NOT describe their face).
- success_edit: edit instruction showing the scene victorious.
- failure_edit: a FORGIVING edit instruction - the hero retreats to try again. Never shaming.

quest.task = the real-world action in plain words. xp 10-100 by effort. All type:"main", is_frog:false, status:"active", image_state:"initial".
/no_think"""

# Frog Master chat router. Classifies one user message into a single intent and OUTPUTS JSON ONLY.
INTENT_SYSTEM_PROMPT = """You are FrogQuest's "Frog Master" router. Read ONE user message plus a short context describing the current quest log, and classify it into EXACTLY ONE intent. OUTPUT JSON ONLY - no prose.

intent must be one of:
- "forge": the user is describing their to-do list / plans / goals for the first time (or wants a brand-new quest log). Use this when no quest log exists yet, or they clearly want to start over.
- "add_tasks": the user wants to ADD one or more new tasks/goals to the EXISTING quest log.
- "mark_done": the user says they FINISHED/completed a task. Put the task they mean in target_task (match it to one of the listed quest titles or tasks; leave empty to mean the currently selected quest).
- "mark_couldnt": the user could NOT do a task, or wants to skip/postpone it. Put the task in target_task (empty = currently selected quest) and put their explanation in reason.
- "unknown": small talk, a question, or anything that doesn't fit the above.

Only "forge" and "add_tasks" describe NEW work; if a log already exists and the user is describing more things to do, prefer "add_tasks". target_task should copy the matching quest's title or task wording when you can identify it.
/no_think"""


# ----------------------------- FLUX prompt builders -----------------------------

def build_initial_prompt(art_style: str, scene_prompt: str) -> str:
    """Initial generation: the user (from their reference photo) as the hero facing the scene."""
    return (
        f"{art_style}. {scene_prompt}. "
        "The hero is the person shown in the reference image, in this style and scene."
    )


def build_edit_prompt(art_style: str, edit_instruction: str) -> str:
    """Edit pass: transform the existing scene into its success/failure state."""
    return f"{art_style}. {edit_instruction}"


# ----------------------------- helpers -----------------------------

def extract_json(text: str) -> dict:
    """Parse JSON from model output, tolerating stray prose or code fences."""
    text = (text or "").strip()
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        pass
    # Fallback: grab the outermost { ... } span.
    start, end = text.find("{"), text.rfind("}")
    if start != -1 and end != -1 and end > start:
        try:
            return json.loads(text[start : end + 1])
        except json.JSONDecodeError:
            pass
    return {}


def preload_cuda_libs():
    """Load the CUDA runtime libs (libcudart.so.12, libcublas*, ...) RTLD_GLOBAL by full path so
    the prebuilt llama.cpp CUDA wheel can resolve them. They ship in the nvidia-*-cu12 pip packages
    and inside torch/lib, but neither is on the dynamic loader's search path. No-op for anything not
    found. Order matters: cudart before cublasLt before cublas. Needed on BOTH ZeroGPU and Modal
    (same prebuilt cu124 wheel, same missing-loader-path problem)."""
    import ctypes
    import glob
    import os
    import site

    dirs = []
    try:
        import torch
        dirs.append(os.path.join(os.path.dirname(torch.__file__), "lib"))
    except Exception:
        pass
    site_dirs = []
    if hasattr(site, "getsitepackages"):
        site_dirs += site.getsitepackages()
    site_dirs.append(os.path.dirname(os.path.dirname(os.__file__)))  # fallback
    for sp in dict.fromkeys(site_dirs):
        dirs += glob.glob(os.path.join(sp, "nvidia", "*", "lib"))

    for prefix in ("libcudart", "libnvrtc", "libcublasLt", "libcublas", "libcudnn"):
        for d in dict.fromkeys(dirs):
            for lib in sorted(glob.glob(os.path.join(d, prefix + "*.so*"))):
                try:
                    ctypes.CDLL(lib, mode=ctypes.RTLD_GLOBAL)
                except OSError:
                    pass