Spaces:
Sleeping
Sleeping
| # app.py | |
| # Gradio app exposing full Corpus (coarse) and Capoera (topic/mood) selections | |
| import os, gc | |
| import json | |
| import gradio as gr | |
| import torch | |
| import spaces # NEW: for ZeroGPU | |
| from tokenizers import Tokenizer | |
| from huggingface_hub import hf_hub_download | |
| from safetensors.torch import load_file as load_safetensors | |
| from beeper_model import BeeperRoseGPT, generate, prepare_model_for_state_dict | |
| MODEL_VERSIONS = { | |
| "Beeper v4 (Advanced)": { | |
| "repo_id": "AbstractPhil/beeper-rose-v4", | |
| "model_file": "beeper_final.safetensors", | |
| "description": "Beeper v4 with nearly 40% the full corpus training - the most capable version currently." | |
| }, | |
| "Beeper v3 (Multi-Concept)": { | |
| "repo_id": "AbstractPhil/beeper-rose-v3", | |
| "model_file": "beeper_final.safetensors", | |
| "description": "Beeper v3 with 30+ epochs including reasoning, math, and ethics" | |
| }, | |
| "Beeper v2 (Extended)": { | |
| "repo_id": "AbstractPhil/beeper-rose-v2", | |
| "model_file": "beeper_final.safetensors", | |
| "description": "Beeper v2 with extended training (~15 epochs)" | |
| }, | |
| "Beeper v1 (Original)": { | |
| "repo_id": "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512", | |
| "model_file": "beeper_rose.safetensors", | |
| "description": "Original Beeper trained on TinyStories" | |
| }, | |
| } | |
| CONFIG = { | |
| "context": 512, | |
| "vocab_size": 8192, | |
| "dim": 512, | |
| "n_heads": 8, | |
| "n_layers": 6, | |
| "mlp_ratio": 4.0, | |
| "temperature": 0.9, | |
| "top_k": 40, | |
| "top_p": 0.9, | |
| "repetition_penalty": 1.10, | |
| "presence_penalty": 0.6, | |
| "frequency_penalty": 0.0, | |
| "resid_dropout": 0.1, | |
| "dropout": 0.0, | |
| "grad_checkpoint": False, | |
| "runtime_pentachora": { | |
| "enable": True, | |
| "pool": "mean", | |
| "temp": 0.10, | |
| "coarse_alpha": 0.25, | |
| "topic_alpha": 0.15, | |
| "mood_alpha": 0.10, | |
| }, | |
| } | |
| # no global device pinning — keep model on CPU until ZeroGPU allocates GPU | |
| infer: BeeperRoseGPT | None = None | |
| tok: Tokenizer | None = None | |
| current_version: str | None = None | |
| # Metadata for selectors | |
| CORPUS_CHOICES: list[str] = [] | |
| CORPUS_INDEX: dict[str, int] = {} | |
| TOPIC_CHOICES: list[str] = [] | |
| MOOD_CHOICES: list[str] = [] | |
| def _mood_labels(mood_bins: int) -> list[str]: | |
| center = mood_bins // 2 | |
| labels = [] | |
| for i in range(mood_bins): | |
| v = i - center | |
| name = { -3:"Very Negative", -2:"Negative", -1:"Slightly Negative", | |
| 0:"Neutral", 1:"Slightly Positive", 2:"Positive", 3:"Very Positive" }.get(v, f"Valence {v:+d}") | |
| labels.append(f"{i} ({name} {v:+d})") | |
| return labels | |
| def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C: int): | |
| global CORPUS_CHOICES, CORPUS_INDEX, TOPIC_CHOICES, MOOD_CHOICES | |
| CORPUS_CHOICES, CORPUS_INDEX = [], {} | |
| names = [] | |
| try: | |
| cfg_path = hf_hub_download(repo_id, "config.json") | |
| with open(cfg_path, "r", encoding="utf-8") as f: | |
| train_cfg = json.load(f) | |
| alive = train_cfg.get("_alive_entries") | |
| if isinstance(alive, list) and all(isinstance(e, dict) for e in alive): | |
| names = [str(e.get("name", f"Class {i}")) for i, e in enumerate(alive)] | |
| elif isinstance(train_cfg.get("corpus"), list): | |
| maybe = [str(e.get("name", f"Class {i}")) for i, e in enumerate(train_cfg["corpus"])] | |
| if len(maybe) == coarse_C: | |
| names = maybe | |
| except Exception: | |
| names = [] | |
| if len(names) != coarse_C: | |
| names = [f"Class {i}" for i in range(coarse_C)] | |
| CORPUS_CHOICES = names | |
| CORPUS_INDEX = {name: i for i, name in enumerate(names)} | |
| TOPIC_CHOICES = [str(i) for i in range(topic_C)] | |
| MOOD_CHOICES = _mood_labels(mood_C) | |
| def load_model_version(version_name: str) -> str: | |
| global infer, tok, current_version, CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES | |
| if current_version == version_name and infer is not None and tok is not None: | |
| return f"Already loaded: {version_name}" | |
| info = MODEL_VERSIONS[version_name] | |
| try: | |
| model_file = hf_hub_download(info["repo_id"], info["model_file"]) | |
| tokenizer_file = hf_hub_download(info["repo_id"], "tokenizer.json") | |
| state = load_safetensors(model_file, device="cpu") | |
| m = BeeperRoseGPT(CONFIG) # keep on CPU | |
| prepare_model_for_state_dict(m, state, device="cpu") | |
| try: | |
| missing, unexpected = m.load_state_dict(state, strict=True) | |
| _msg = f"strict load ok | missing={len(missing)} unexpected={len(unexpected)}" | |
| except Exception as e: | |
| _msg = f"strict load failed ({e}); non-strict fallback" | |
| m.load_state_dict(state, strict=False) | |
| m.eval() | |
| t = Tokenizer.from_file(tokenizer_file) | |
| infer, tok, current_version = m, t, version_name | |
| coarse_C = infer.penta_coarse.size(0) if infer.penta_coarse is not None else 0 | |
| topic_C = infer.penta_medium.size(0) if infer.penta_medium is not None else 512 | |
| mood_C = infer.penta_fine.size(0) if infer.penta_fine is not None else 7 | |
| _build_choices_from_config(info["repo_id"], coarse_C, topic_C, mood_C) | |
| return f"Successfully loaded: {version_name} ({_msg})" | |
| except Exception as e: | |
| infer = None; tok = None; current_version = None | |
| CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES = [], [], [] | |
| return f"Error loading {version_name}: {str(e)}" | |
| # Initial load: prefer v4, fallback to v3 | |
| try: | |
| status = load_model_version("Beeper v4 (Advanced)") | |
| if "Error" in status: | |
| print(status) | |
| status = load_model_version("Beeper v3 (Multi-Concept)") | |
| except Exception: | |
| status = load_model_version("Beeper v3 (Multi-Concept)") | |
| print(status) | |
| def _parse_selected_indices(values: list[str] | None, mapping: dict[str,int] | None = None) -> list[int] | None: | |
| if not values: return None | |
| if mapping is None: | |
| return [int(v.split()[0]) if isinstance(v, str) else int(v) for v in values] | |
| return [mapping[v] for v in values if v in mapping] | |
| def beeper_infer(prompt: str, runtime_cfg: dict) -> str: | |
| """ZeroGPU: allocate GPU only here, move model to GPU for inference.""" | |
| global infer, tok | |
| dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| if dev.type == "cuda" and next(infer.parameters()).device.type != "cuda": | |
| infer.to(dev) | |
| torch.cuda.empty_cache() | |
| try: | |
| out = generate( | |
| model=infer, tok=tok, cfg=CONFIG, prompt=prompt, | |
| max_new_tokens=int(runtime_cfg.pop("_max_new_tokens")), | |
| temperature=float(runtime_cfg.pop("_temperature")) if runtime_cfg.get("_temperature") is not None else None, | |
| top_k=int(runtime_cfg.pop("_top_k")) if runtime_cfg.get("_top_k") is not None else None, | |
| top_p=float(runtime_cfg.pop("_top_p")) if runtime_cfg.get("_top_p") is not None else None, | |
| repetition_penalty=1.10, presence_penalty=0.8, frequency_penalty=0.1, | |
| device=dev, detokenize=True, runtime_cfg=runtime_cfg, | |
| ) | |
| return out | |
| finally: | |
| if dev.type == "cuda": | |
| infer.to("cpu") | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| def beeper_reply(message, history, model_version, temperature, top_k, top_p, max_new_tokens, | |
| corpus_selected, topic_selected, mood_selected): | |
| global infer, tok, current_version | |
| if model_version != current_version: | |
| s = load_model_version(model_version) | |
| if "Error" in s: | |
| return f"⚠️ {s}" | |
| if infer is None or tok is None: | |
| return "⚠️ Model not loaded. Please select a version and try again." | |
| rt = dict(CONFIG.get("runtime_pentachora", {})) | |
| rt["coarse_select"] = _parse_selected_indices(corpus_selected, CORPUS_INDEX) | |
| rt["topic_select"] = _parse_selected_indices(topic_selected, None) | |
| rt["mood_select"] = _parse_selected_indices(mood_selected, None) | |
| rt["_temperature"] = temperature | |
| rt["_top_k"] = top_k | |
| rt["_top_p"] = top_p | |
| rt["_max_new_tokens"]= max_new_tokens | |
| m = (message or "").strip() | |
| if "?" in m: prompt = f"Q: {m}\nA:" | |
| elif m.lower() in {"hi","hello","hey"}: prompt = 'The little robot said hello. She said, "' | |
| elif "story" in m.lower(): prompt = "Once upon a time, there was a robot. " | |
| else: prompt = m + ". " | |
| out = beeper_infer(prompt, rt) | |
| if out.startswith(prompt): out = out[len(prompt):] | |
| out = out.replace("Q:","").replace("A:","").strip() | |
| if out and out[-1] not in ".!?”\"'": out += "." | |
| return out[:200] | |
| # ---------------- UI ---------------- | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 🤖 Beeper — Corpus & Capoera–aware Chat") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| model_dropdown = gr.Dropdown( | |
| choices=list(MODEL_VERSIONS.keys()), | |
| value="Beeper v4 (Advanced)", | |
| label="Select Beeper Version" | |
| ) | |
| with gr.Column(scale=7): | |
| version_info = gr.Markdown("**Current:** " + MODEL_VERSIONS["Beeper v4 (Advanced)"]["description"]) | |
| with gr.Row(): | |
| with gr.Column(): | |
| corpus_select = gr.Dropdown(choices=CORPUS_CHOICES, multiselect=True, label="Corpus (Coarse classes)") | |
| with gr.Column(): | |
| topic_select = gr.Dropdown(choices=TOPIC_CHOICES, multiselect=True, label="Capoera Topics (IDs)") | |
| with gr.Column(): | |
| mood_select = gr.Dropdown(choices=MOOD_CHOICES, multiselect=True, label="Capoera Moods (valence)") | |
| chatbot = gr.Chatbot(label="Chat with Beeper", height=420) | |
| msg = gr.Textbox(label="Message", placeholder="Type your message here...") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| temperature_slider = gr.Slider(0.1, 1.5, value=0.9, step=0.1, label="Temperature") | |
| with gr.Column(scale=2): | |
| top_k_slider = gr.Slider(1, 100, value=40, step=1, label="Top-k") | |
| with gr.Column(scale=2): | |
| top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p") | |
| with gr.Column(scale=2): | |
| max_new_tokens_slider = gr.Slider(20, 512, value=128, step=1, label="Max new tokens") | |
| with gr.Row(): | |
| submit = gr.Button("Send", variant="primary") | |
| clear = gr.Button("Clear") | |
| def on_change_version(version_name: str): | |
| status = load_model_version(version_name) | |
| info = f"**Current:** {MODEL_VERSIONS[version_name]['description']} \n{status}" | |
| return ( | |
| info, | |
| gr.update(choices=CORPUS_CHOICES, value=[]), | |
| gr.update(choices=TOPIC_CHOICES, value=[]), | |
| gr.update(choices=MOOD_CHOICES, value=[]), | |
| ) | |
| model_dropdown.change( | |
| on_change_version, | |
| inputs=[model_dropdown], | |
| outputs=[version_info, corpus_select, topic_select, mood_select], | |
| ) | |
| def respond(message, chat_history, model_version, temperature, top_k, top_p, max_new_tokens, | |
| corpus_selected, topic_selected, mood_selected): | |
| if chat_history is None: chat_history = [] | |
| resp = beeper_reply(message, chat_history, model_version, temperature, top_k, top_p, max_new_tokens, | |
| corpus_selected, topic_selected, mood_selected) | |
| chat_history.append((message, resp)) | |
| return "", chat_history | |
| inputs_all = [msg, chatbot, model_dropdown, temperature_slider, top_k_slider, top_p_slider, max_new_tokens_slider, | |
| corpus_select, topic_select, mood_select] | |
| outputs_all = [msg, chatbot] | |
| msg.submit(respond, inputs_all, outputs_all, | |
| concurrency_id="infer", concurrency_limit="default") | |
| submit.click(respond, inputs_all, outputs_all, | |
| concurrency_id="infer", concurrency_limit="default") | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| if __name__ == "__main__": | |
| demo.queue( | |
| max_size=256, | |
| default_concurrency_limit=1, | |
| status_update_rate="auto", | |
| api_open=False, | |
| ).launch() | |