Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
# app.py
|
| 2 |
# Gradio app exposing full Corpus (coarse) and Capoera (topic/mood) selections
|
| 3 |
-
|
| 4 |
import json
|
| 5 |
import gradio as gr
|
| 6 |
import torch
|
|
|
|
| 7 |
from tokenizers import Tokenizer
|
| 8 |
from huggingface_hub import hf_hub_download
|
| 9 |
from safetensors.torch import load_file as load_safetensors
|
|
@@ -59,7 +60,7 @@ CONFIG = {
|
|
| 59 |
},
|
| 60 |
}
|
| 61 |
|
| 62 |
-
|
| 63 |
infer: BeeperRoseGPT | None = None
|
| 64 |
tok: Tokenizer | None = None
|
| 65 |
current_version: str | None = None
|
|
@@ -70,6 +71,7 @@ CORPUS_INDEX: dict[str, int] = {}
|
|
| 70 |
TOPIC_CHOICES: list[str] = []
|
| 71 |
MOOD_CHOICES: list[str] = []
|
| 72 |
|
|
|
|
| 73 |
def _mood_labels(mood_bins: int) -> list[str]:
|
| 74 |
center = mood_bins // 2
|
| 75 |
labels = []
|
|
@@ -83,7 +85,6 @@ def _mood_labels(mood_bins: int) -> list[str]:
|
|
| 83 |
def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C: int):
|
| 84 |
global CORPUS_CHOICES, CORPUS_INDEX, TOPIC_CHOICES, MOOD_CHOICES
|
| 85 |
CORPUS_CHOICES, CORPUS_INDEX = [], {}
|
| 86 |
-
# Try to load training config.json (exported alongside weights)
|
| 87 |
names = []
|
| 88 |
try:
|
| 89 |
cfg_path = hf_hub_download(repo_id, "config.json")
|
|
@@ -93,7 +94,6 @@ def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C
|
|
| 93 |
if isinstance(alive, list) and all(isinstance(e, dict) for e in alive):
|
| 94 |
names = [str(e.get("name", f"Class {i}")) for i, e in enumerate(alive)]
|
| 95 |
elif isinstance(train_cfg.get("corpus"), list):
|
| 96 |
-
# fallback: use corpus list if length matches bank size
|
| 97 |
maybe = [str(e.get("name", f"Class {i}")) for i, e in enumerate(train_cfg["corpus"])]
|
| 98 |
if len(maybe) == coarse_C:
|
| 99 |
names = maybe
|
|
@@ -108,6 +108,7 @@ def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C
|
|
| 108 |
TOPIC_CHOICES = [str(i) for i in range(topic_C)]
|
| 109 |
MOOD_CHOICES = _mood_labels(mood_C)
|
| 110 |
|
|
|
|
| 111 |
def load_model_version(version_name: str) -> str:
|
| 112 |
global infer, tok, current_version, CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES
|
| 113 |
if current_version == version_name and infer is not None and tok is not None:
|
|
@@ -119,8 +120,8 @@ def load_model_version(version_name: str) -> str:
|
|
| 119 |
tokenizer_file = hf_hub_download(info["repo_id"], "tokenizer.json")
|
| 120 |
|
| 121 |
state = load_safetensors(model_file, device="cpu")
|
| 122 |
-
m = BeeperRoseGPT(CONFIG)
|
| 123 |
-
prepare_model_for_state_dict(m, state, device=
|
| 124 |
|
| 125 |
try:
|
| 126 |
missing, unexpected = m.load_state_dict(state, strict=True)
|
|
@@ -134,7 +135,6 @@ def load_model_version(version_name: str) -> str:
|
|
| 134 |
|
| 135 |
infer, tok, current_version = m, t, version_name
|
| 136 |
|
| 137 |
-
# Build UI choices from bank sizes + training config (for names)
|
| 138 |
coarse_C = infer.penta_coarse.size(0) if infer.penta_coarse is not None else 0
|
| 139 |
topic_C = infer.penta_medium.size(0) if infer.penta_medium is not None else 512
|
| 140 |
mood_C = infer.penta_fine.size(0) if infer.penta_fine is not None else 7
|
|
@@ -156,12 +156,42 @@ except Exception:
|
|
| 156 |
status = load_model_version("Beeper v3 (Multi-Concept)")
|
| 157 |
print(status)
|
| 158 |
|
|
|
|
| 159 |
def _parse_selected_indices(values: list[str] | None, mapping: dict[str,int] | None = None) -> list[int] | None:
|
| 160 |
if not values: return None
|
| 161 |
if mapping is None:
|
| 162 |
return [int(v.split()[0]) if isinstance(v, str) else int(v) for v in values]
|
| 163 |
return [mapping[v] for v in values if v in mapping]
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
def beeper_reply(message, history, model_version, temperature, top_k, top_p, max_new_tokens,
|
| 166 |
corpus_selected, topic_selected, mood_selected):
|
| 167 |
global infer, tok, current_version
|
|
@@ -173,12 +203,14 @@ def beeper_reply(message, history, model_version, temperature, top_k, top_p, max
|
|
| 173 |
if infer is None or tok is None:
|
| 174 |
return "⚠️ Model not loaded. Please select a version and try again."
|
| 175 |
|
| 176 |
-
# Build runtime pull config with user selections
|
| 177 |
rt = dict(CONFIG.get("runtime_pentachora", {}))
|
| 178 |
-
|
| 179 |
-
rt["
|
| 180 |
-
rt["
|
| 181 |
-
rt["
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
m = (message or "").strip()
|
| 184 |
if "?" in m: prompt = f"Q: {m}\nA:"
|
|
@@ -186,21 +218,14 @@ def beeper_reply(message, history, model_version, temperature, top_k, top_p, max
|
|
| 186 |
elif "story" in m.lower(): prompt = "Once upon a time, there was a robot. "
|
| 187 |
else: prompt = m + ". "
|
| 188 |
|
| 189 |
-
out =
|
| 190 |
-
model=infer, tok=tok, cfg=CONFIG, prompt=prompt,
|
| 191 |
-
max_new_tokens=int(max_new_tokens),
|
| 192 |
-
temperature=float(temperature) if temperature is not None else None,
|
| 193 |
-
top_k=int(top_k) if top_k is not None else None,
|
| 194 |
-
top_p=float(top_p) if top_p is not None else None,
|
| 195 |
-
repetition_penalty=1.10, presence_penalty=0.8, frequency_penalty=0.1,
|
| 196 |
-
device=device, detokenize=True, runtime_cfg=rt,
|
| 197 |
-
)
|
| 198 |
|
| 199 |
if out.startswith(prompt): out = out[len(prompt):]
|
| 200 |
out = out.replace("Q:","").replace("A:","").strip()
|
| 201 |
if out and out[-1] not in ".!?”\"'": out += "."
|
| 202 |
return out[:200]
|
| 203 |
|
|
|
|
| 204 |
# ---------------- UI ----------------
|
| 205 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 206 |
gr.Markdown("# 🤖 Beeper — Corpus & Capoera–aware Chat")
|
|
@@ -209,13 +234,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 209 |
with gr.Column(scale=3):
|
| 210 |
model_dropdown = gr.Dropdown(
|
| 211 |
choices=list(MODEL_VERSIONS.keys()),
|
| 212 |
-
value="Beeper
|
| 213 |
label="Select Beeper Version"
|
| 214 |
)
|
| 215 |
with gr.Column(scale=7):
|
| 216 |
-
version_info = gr.Markdown("**Current:** " + MODEL_VERSIONS["Beeper
|
| 217 |
|
| 218 |
-
# Runtime pentachora selectors
|
| 219 |
with gr.Row():
|
| 220 |
with gr.Column():
|
| 221 |
corpus_select = gr.Dropdown(choices=CORPUS_CHOICES, multiselect=True, label="Corpus (Coarse classes)")
|
|
@@ -241,11 +265,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 241 |
submit = gr.Button("Send", variant="primary")
|
| 242 |
clear = gr.Button("Clear")
|
| 243 |
|
| 244 |
-
# On version change: load model + update selectors
|
| 245 |
def on_change_version(version_name: str):
|
| 246 |
status = load_model_version(version_name)
|
| 247 |
info = f"**Current:** {MODEL_VERSIONS[version_name]['description']} \n{status}"
|
| 248 |
-
# refresh selector choices
|
| 249 |
return (
|
| 250 |
info,
|
| 251 |
gr.update(choices=CORPUS_CHOICES, value=[]),
|
|
@@ -271,9 +293,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 271 |
corpus_select, topic_select, mood_select]
|
| 272 |
outputs_all = [msg, chatbot]
|
| 273 |
|
| 274 |
-
msg.submit(respond, inputs_all, outputs_all
|
| 275 |
-
|
|
|
|
|
|
|
| 276 |
clear.click(lambda: None, None, chatbot, queue=False)
|
| 277 |
|
| 278 |
if __name__ == "__main__":
|
| 279 |
-
demo.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# app.py
|
| 2 |
# Gradio app exposing full Corpus (coarse) and Capoera (topic/mood) selections
|
| 3 |
+
import os, gc
|
| 4 |
import json
|
| 5 |
import gradio as gr
|
| 6 |
import torch
|
| 7 |
+
import spaces # NEW: for ZeroGPU
|
| 8 |
from tokenizers import Tokenizer
|
| 9 |
from huggingface_hub import hf_hub_download
|
| 10 |
from safetensors.torch import load_file as load_safetensors
|
|
|
|
| 60 |
},
|
| 61 |
}
|
| 62 |
|
| 63 |
+
# no global device pinning — keep model on CPU until ZeroGPU allocates GPU
|
| 64 |
infer: BeeperRoseGPT | None = None
|
| 65 |
tok: Tokenizer | None = None
|
| 66 |
current_version: str | None = None
|
|
|
|
| 71 |
TOPIC_CHOICES: list[str] = []
|
| 72 |
MOOD_CHOICES: list[str] = []
|
| 73 |
|
| 74 |
+
|
| 75 |
def _mood_labels(mood_bins: int) -> list[str]:
|
| 76 |
center = mood_bins // 2
|
| 77 |
labels = []
|
|
|
|
| 85 |
def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C: int):
|
| 86 |
global CORPUS_CHOICES, CORPUS_INDEX, TOPIC_CHOICES, MOOD_CHOICES
|
| 87 |
CORPUS_CHOICES, CORPUS_INDEX = [], {}
|
|
|
|
| 88 |
names = []
|
| 89 |
try:
|
| 90 |
cfg_path = hf_hub_download(repo_id, "config.json")
|
|
|
|
| 94 |
if isinstance(alive, list) and all(isinstance(e, dict) for e in alive):
|
| 95 |
names = [str(e.get("name", f"Class {i}")) for i, e in enumerate(alive)]
|
| 96 |
elif isinstance(train_cfg.get("corpus"), list):
|
|
|
|
| 97 |
maybe = [str(e.get("name", f"Class {i}")) for i, e in enumerate(train_cfg["corpus"])]
|
| 98 |
if len(maybe) == coarse_C:
|
| 99 |
names = maybe
|
|
|
|
| 108 |
TOPIC_CHOICES = [str(i) for i in range(topic_C)]
|
| 109 |
MOOD_CHOICES = _mood_labels(mood_C)
|
| 110 |
|
| 111 |
+
|
| 112 |
def load_model_version(version_name: str) -> str:
|
| 113 |
global infer, tok, current_version, CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES
|
| 114 |
if current_version == version_name and infer is not None and tok is not None:
|
|
|
|
| 120 |
tokenizer_file = hf_hub_download(info["repo_id"], "tokenizer.json")
|
| 121 |
|
| 122 |
state = load_safetensors(model_file, device="cpu")
|
| 123 |
+
m = BeeperRoseGPT(CONFIG) # keep on CPU
|
| 124 |
+
prepare_model_for_state_dict(m, state, device="cpu")
|
| 125 |
|
| 126 |
try:
|
| 127 |
missing, unexpected = m.load_state_dict(state, strict=True)
|
|
|
|
| 135 |
|
| 136 |
infer, tok, current_version = m, t, version_name
|
| 137 |
|
|
|
|
| 138 |
coarse_C = infer.penta_coarse.size(0) if infer.penta_coarse is not None else 0
|
| 139 |
topic_C = infer.penta_medium.size(0) if infer.penta_medium is not None else 512
|
| 140 |
mood_C = infer.penta_fine.size(0) if infer.penta_fine is not None else 7
|
|
|
|
| 156 |
status = load_model_version("Beeper v3 (Multi-Concept)")
|
| 157 |
print(status)
|
| 158 |
|
| 159 |
+
|
| 160 |
def _parse_selected_indices(values: list[str] | None, mapping: dict[str,int] | None = None) -> list[int] | None:
|
| 161 |
if not values: return None
|
| 162 |
if mapping is None:
|
| 163 |
return [int(v.split()[0]) if isinstance(v, str) else int(v) for v in values]
|
| 164 |
return [mapping[v] for v in values if v in mapping]
|
| 165 |
|
| 166 |
+
|
| 167 |
+
@spaces.GPU(duration=300)
|
| 168 |
+
def beeper_infer(prompt: str, runtime_cfg: dict) -> str:
|
| 169 |
+
"""ZeroGPU: allocate GPU only here, move model to GPU for inference."""
|
| 170 |
+
global infer, tok
|
| 171 |
+
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 172 |
+
|
| 173 |
+
if dev.type == "cuda" and next(infer.parameters()).device.type != "cuda":
|
| 174 |
+
infer.to(dev)
|
| 175 |
+
torch.cuda.empty_cache()
|
| 176 |
+
|
| 177 |
+
try:
|
| 178 |
+
out = generate(
|
| 179 |
+
model=infer, tok=tok, cfg=CONFIG, prompt=prompt,
|
| 180 |
+
max_new_tokens=int(runtime_cfg.pop("_max_new_tokens")),
|
| 181 |
+
temperature=float(runtime_cfg.pop("_temperature")) if runtime_cfg.get("_temperature") is not None else None,
|
| 182 |
+
top_k=int(runtime_cfg.pop("_top_k")) if runtime_cfg.get("_top_k") is not None else None,
|
| 183 |
+
top_p=float(runtime_cfg.pop("_top_p")) if runtime_cfg.get("_top_p") is not None else None,
|
| 184 |
+
repetition_penalty=1.10, presence_penalty=0.8, frequency_penalty=0.1,
|
| 185 |
+
device=dev, detokenize=True, runtime_cfg=runtime_cfg,
|
| 186 |
+
)
|
| 187 |
+
return out
|
| 188 |
+
finally:
|
| 189 |
+
if dev.type == "cuda":
|
| 190 |
+
infer.to("cpu")
|
| 191 |
+
torch.cuda.empty_cache()
|
| 192 |
+
gc.collect()
|
| 193 |
+
|
| 194 |
+
|
| 195 |
def beeper_reply(message, history, model_version, temperature, top_k, top_p, max_new_tokens,
|
| 196 |
corpus_selected, topic_selected, mood_selected):
|
| 197 |
global infer, tok, current_version
|
|
|
|
| 203 |
if infer is None or tok is None:
|
| 204 |
return "⚠️ Model not loaded. Please select a version and try again."
|
| 205 |
|
|
|
|
| 206 |
rt = dict(CONFIG.get("runtime_pentachora", {}))
|
| 207 |
+
rt["coarse_select"] = _parse_selected_indices(corpus_selected, CORPUS_INDEX)
|
| 208 |
+
rt["topic_select"] = _parse_selected_indices(topic_selected, None)
|
| 209 |
+
rt["mood_select"] = _parse_selected_indices(mood_selected, None)
|
| 210 |
+
rt["_temperature"] = temperature
|
| 211 |
+
rt["_top_k"] = top_k
|
| 212 |
+
rt["_top_p"] = top_p
|
| 213 |
+
rt["_max_new_tokens"]= max_new_tokens
|
| 214 |
|
| 215 |
m = (message or "").strip()
|
| 216 |
if "?" in m: prompt = f"Q: {m}\nA:"
|
|
|
|
| 218 |
elif "story" in m.lower(): prompt = "Once upon a time, there was a robot. "
|
| 219 |
else: prompt = m + ". "
|
| 220 |
|
| 221 |
+
out = beeper_infer(prompt, rt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
if out.startswith(prompt): out = out[len(prompt):]
|
| 224 |
out = out.replace("Q:","").replace("A:","").strip()
|
| 225 |
if out and out[-1] not in ".!?”\"'": out += "."
|
| 226 |
return out[:200]
|
| 227 |
|
| 228 |
+
|
| 229 |
# ---------------- UI ----------------
|
| 230 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 231 |
gr.Markdown("# 🤖 Beeper — Corpus & Capoera–aware Chat")
|
|
|
|
| 234 |
with gr.Column(scale=3):
|
| 235 |
model_dropdown = gr.Dropdown(
|
| 236 |
choices=list(MODEL_VERSIONS.keys()),
|
| 237 |
+
value="Beeper v4 (Advanced)",
|
| 238 |
label="Select Beeper Version"
|
| 239 |
)
|
| 240 |
with gr.Column(scale=7):
|
| 241 |
+
version_info = gr.Markdown("**Current:** " + MODEL_VERSIONS["Beeper v4 (Advanced)"]["description"])
|
| 242 |
|
|
|
|
| 243 |
with gr.Row():
|
| 244 |
with gr.Column():
|
| 245 |
corpus_select = gr.Dropdown(choices=CORPUS_CHOICES, multiselect=True, label="Corpus (Coarse classes)")
|
|
|
|
| 265 |
submit = gr.Button("Send", variant="primary")
|
| 266 |
clear = gr.Button("Clear")
|
| 267 |
|
|
|
|
| 268 |
def on_change_version(version_name: str):
|
| 269 |
status = load_model_version(version_name)
|
| 270 |
info = f"**Current:** {MODEL_VERSIONS[version_name]['description']} \n{status}"
|
|
|
|
| 271 |
return (
|
| 272 |
info,
|
| 273 |
gr.update(choices=CORPUS_CHOICES, value=[]),
|
|
|
|
| 293 |
corpus_select, topic_select, mood_select]
|
| 294 |
outputs_all = [msg, chatbot]
|
| 295 |
|
| 296 |
+
msg.submit(respond, inputs_all, outputs_all,
|
| 297 |
+
concurrency_id="infer", concurrency_limit="default")
|
| 298 |
+
submit.click(respond, inputs_all, outputs_all,
|
| 299 |
+
concurrency_id="infer", concurrency_limit="default")
|
| 300 |
clear.click(lambda: None, None, chatbot, queue=False)
|
| 301 |
|
| 302 |
if __name__ == "__main__":
|
| 303 |
+
demo.queue(
|
| 304 |
+
max_size=256,
|
| 305 |
+
default_concurrency_limit=1,
|
| 306 |
+
status_update_rate="auto",
|
| 307 |
+
api_open=False,
|
| 308 |
+
).launch()
|