Spaces:
Running on Zero
Running on Zero
| """ | |
| Gemma Explorer β Gradio app to explore and chat with the full Gemma model family. | |
| Powered by Google DeepMind models on ZeroGPU (NVIDIA H200). | |
| """ | |
| import os | |
| import gc | |
| import json | |
| import datetime | |
| import html as _html | |
| import gradio as gr | |
| import torch | |
| import spaces | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoProcessor, | |
| AutoModelForCausalLM, | |
| TextIteratorStreamer, | |
| ) | |
| from threading import Thread | |
| from PIL import Image | |
| import numpy as np | |
| from models_data import MODELS, FAMILIES, get_models_by_family | |
| _HF_TOKEN = os.environ.get("HF_TOKEN") | |
| if _HF_TOKEN: | |
| from huggingface_hub import login | |
| login(token=_HF_TOKEN, add_to_git_credential=False) | |
| try: | |
| import torchvision # noqa: F401 | |
| except ImportError: | |
| import subprocess, sys | |
| subprocess.run( | |
| [sys.executable, "-m", "pip", "install", "torchvision", "-q", "--no-input"], | |
| check=True, | |
| ) | |
| try: | |
| from transformers import AutoModelForMultimodalLM | |
| _HAS_MULTIMODAL = True | |
| except ImportError: | |
| _HAS_MULTIMODAL = False | |
| AutoModelForMultimodalLM = None | |
| # ββ Model state βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _model = None | |
| _processor = None | |
| _current_id = None | |
| def _purge_model(): | |
| """Unload current model from CPU memory. Never touches CUDA directly β | |
| CUDA cleanup happens inside @spaces.GPU contexts only.""" | |
| global _model, _processor, _current_id | |
| if _model is not None: | |
| del _model | |
| _model = None | |
| if _processor is not None: | |
| del _processor | |
| _processor = None | |
| _current_id = None | |
| gc.collect() | |
| def _load_weights(model_id: str): | |
| """Load model weights. Always purges previous model first, no exceptions.""" | |
| global _model, _processor, _current_id | |
| meta = MODELS[model_id] | |
| loader = meta["loader_type"] | |
| dtype = meta["torch_dtype"] | |
| _purge_model() | |
| if loader == "multimodal": | |
| if not _HAS_MULTIMODAL: | |
| raise ImportError("AutoModelForMultimodalLM not available. Run: pip install -U transformers") | |
| _processor = AutoProcessor.from_pretrained(model_id, token=_HF_TOKEN) | |
| _model = AutoModelForMultimodalLM.from_pretrained(model_id, torch_dtype=dtype, token=_HF_TOKEN) | |
| elif loader == "vision_causal": | |
| _processor = AutoProcessor.from_pretrained(model_id, token=_HF_TOKEN) | |
| _model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype, token=_HF_TOKEN) | |
| else: | |
| _processor = AutoTokenizer.from_pretrained(model_id, token=_HF_TOKEN) | |
| _model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype, token=_HF_TOKEN) | |
| _model.eval() | |
| _current_id = model_id | |
| def load_model_stream(model_id: str, card_html_list_len: int): | |
| """ | |
| Two-phase generator so the loading notice is *visibly rendered* before | |
| the blocking from_pretrained() call starts. | |
| """ | |
| meta = MODELS[model_id] | |
| n = card_html_list_len | |
| # ββ Phase 1: show loading notice ββββββββββββββββββββββββββββββββββββββββββ | |
| loading = _loading_html(meta["name"]) | |
| yield ( | |
| gr.update(visible=True, value=loading), | |
| gr.update(visible=True, value=loading), | |
| gr.update(), | |
| gr.update(), | |
| gr.update(), | |
| gr.update(), | |
| gr.update(), | |
| gr.update(), | |
| *([gr.update()] * n), | |
| ) | |
| # ββ Phase 2: actual load ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| try: | |
| _load_weights(model_id) | |
| status = _make_status_html(meta, state="ready") | |
| inline = _chat_inline_status(loaded=True, name=meta["name"]) | |
| cards = [ | |
| gr.update(value=_card_html(mid, m, active=(mid == model_id))) | |
| for mid, m in MODELS.items() | |
| ] | |
| yield ( | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| status, | |
| gr.update(selected="single"), | |
| gr.update(visible=meta["supports_vision"]), | |
| gr.update(value=model_id), | |
| [], | |
| inline, | |
| *cards, | |
| ) | |
| except Exception as exc: | |
| error = (f'<div class="status-error">Error loading ' | |
| f'<strong>{_html.escape(meta["name"])}</strong>: ' | |
| f'{_html.escape(str(exc))}</div>') | |
| yield ( | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| error, | |
| gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), | |
| *([gr.update()] * n), | |
| ) | |
| # ββ Inference βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _run_inference(message: str, image, max_new_tokens: int, temperature: float): | |
| global _model, _processor, _current_id | |
| if _model is None or _current_id is None or _processor is None: | |
| yield "No model loaded. Go to Explore Models and click Load & Chat." | |
| return | |
| if _current_id not in MODELS: | |
| _purge_model() | |
| yield "Model state corrupted β please reload the model from Explore Models." | |
| return | |
| torch.cuda.empty_cache() | |
| max_new_tokens = max(64, min(int(max_new_tokens), 2048)) | |
| temperature = max(0.0, min(float(temperature), 1.5)) | |
| meta = MODELS[_current_id] | |
| loader = meta["loader_type"] | |
| device = "cuda" | |
| _model.to(device) | |
| pil_image = None | |
| if image is not None: | |
| try: | |
| pil_image = Image.fromarray(image) if isinstance(image, np.ndarray) else image | |
| except Exception: | |
| pil_image = None | |
| try: | |
| if loader == "multimodal": | |
| content = [] | |
| if pil_image is not None: | |
| content.append({"type": "image", "image": pil_image}) | |
| content.append({"type": "text", "text": message}) | |
| messages = [{"role": "user", "content": content}] | |
| inputs = _processor.apply_chat_template( | |
| messages, tokenize=True, return_dict=True, | |
| return_tensors="pt", add_generation_prompt=True, | |
| ).to(device) | |
| elif loader == "vision_causal": | |
| if pil_image is not None: | |
| content = [{"type": "image"}, {"type": "text", "text": message}] | |
| messages = [{"role": "user", "content": content}] | |
| else: | |
| messages = [{"role": "user", "content": message}] | |
| text = _processor.apply_chat_template(messages, add_generation_prompt=True) | |
| inputs = _processor(text=text, images=pil_image, return_tensors="pt").to(device) | |
| else: | |
| if pil_image is not None: | |
| message = "[This model does not support images] " + message | |
| messages = [{"role": "user", "content": message}] | |
| text = _processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = _processor(text, return_tensors="pt").to(device) | |
| except Exception as exc: | |
| yield f"Error preparing inputs: {exc}" | |
| return | |
| tok = getattr(_processor, "tokenizer", _processor) | |
| streamer = TextIteratorStreamer(tok, skip_prompt=True, skip_special_tokens=True, timeout=60) | |
| allowed_keys = {"input_ids", "attention_mask", "token_type_ids", "pixel_values", | |
| "image_sizes", "pixel_attention_mask", "image_position_ids", | |
| "pixel_position_ids", "token_type_ids_for_images"} | |
| filtered_inputs = {k: v for k, v in inputs.items() if k in allowed_keys} | |
| if "image_position_ids" in filtered_inputs: | |
| ipi = filtered_inputs["image_position_ids"] | |
| if not isinstance(ipi, torch.Tensor): | |
| filtered_inputs["image_position_ids"] = torch.tensor(ipi, device=device) | |
| elif ipi.dtype == torch.bool: | |
| filtered_inputs["image_position_ids"] = ipi.long() | |
| gen_kwargs: dict = {**filtered_inputs, "streamer": streamer, "max_new_tokens": max_new_tokens} | |
| if temperature > 0.01: | |
| gen_kwargs.update({"do_sample": True, "temperature": temperature, "top_p": 0.95}) | |
| else: | |
| gen_kwargs["do_sample"] = False | |
| thread = Thread(target=_model.generate, kwargs=gen_kwargs) | |
| thread.start() | |
| partial = "" | |
| for token in streamer: | |
| partial += token | |
| yield partial | |
| thread.join() | |
| def infer_large(message, image, max_new_tokens, temperature): | |
| yield from _run_inference(message, image, max_new_tokens, temperature) | |
| def infer_xlarge(message, image, max_new_tokens, temperature): | |
| yield from _run_inference(message, image, max_new_tokens, temperature) | |
| def respond(message: str, image, max_new_tokens: int, temperature: float, history: list): | |
| if not message.strip() and image is None: | |
| yield history, gr.update() | |
| return | |
| if _current_id is None: | |
| yield history + [ | |
| {"role": "user", "content": message or "[image attached]"}, | |
| {"role": "assistant", "content": "Please load a model first from the **Explore Models** tab."}, | |
| ], gr.update() | |
| return | |
| new_history = history + [{"role": "user", "content": message or "[image attached]"}] | |
| yield new_history, gr.update() | |
| meta = MODELS[_current_id] | |
| infer_fn = infer_xlarge if meta["gpu_size"] == "xlarge" else infer_large | |
| partial = "" | |
| for chunk in infer_fn(message, image, max_new_tokens, temperature): | |
| partial = chunk | |
| yield new_history + [{"role": "assistant", "content": partial}], gr.update() | |
| yield new_history + [{"role": "assistant", "content": partial}], gr.update(value=None) | |
| def export_chat(history: list) -> str: | |
| if not history: | |
| return "" | |
| model_name = MODELS[_current_id]["name"] if _current_id else "unknown" | |
| lines = [ | |
| "# Gemma Explorer β Chat Export", | |
| f"Model: {model_name}", | |
| f"Date: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}", | |
| "---", "", | |
| ] | |
| for msg in history: | |
| role = "**You**" if msg["role"] == "user" else f"**{model_name}**" | |
| lines.append(f"{role}: {msg['content']}") | |
| lines.append("") | |
| return "\n".join(lines) | |
| # ββ Dual-chat inference βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def respond_dual(message: str, image, | |
| max_new_tokens: int, temperature: float, | |
| sys_a: str, sys_b: str, | |
| model_a_id: str, model_b_id: str, | |
| hist_a: list, hist_b: list): | |
| if not message.strip() and image is None: | |
| yield hist_a, hist_b | |
| return | |
| user_msg = message or "[image attached]" | |
| try: | |
| _load_weights(model_a_id) | |
| except Exception as exc: | |
| _purge_model() | |
| yield (hist_a + [{"role": "user", "content": user_msg}, | |
| {"role": "assistant", "content": f"Failed to load {model_a_id}: {exc}"}], | |
| hist_b) | |
| return | |
| new_hist_a = hist_a + [{"role": "user", "content": user_msg}] | |
| yield new_hist_a, hist_b | |
| meta_a = MODELS[model_a_id] | |
| infer_fn = infer_xlarge if meta_a["gpu_size"] == "xlarge" else infer_large | |
| full_a = (sys_a.strip() + "\n\n" + message) if sys_a.strip() else message | |
| partial_a = "" | |
| for chunk in infer_fn(full_a, image, max_new_tokens, temperature): | |
| partial_a = chunk | |
| yield new_hist_a + [{"role": "assistant", "content": partial_a}], hist_b | |
| hist_a = new_hist_a + [{"role": "assistant", "content": partial_a}] | |
| try: | |
| _load_weights(model_b_id) | |
| except Exception as exc: | |
| _purge_model() | |
| yield (hist_a, | |
| hist_b + [{"role": "user", "content": user_msg}, | |
| {"role": "assistant", "content": f"Failed to load {model_b_id}: {exc}"}]) | |
| return | |
| new_hist_b = hist_b + [{"role": "user", "content": user_msg}] | |
| yield hist_a, new_hist_b | |
| meta_b = MODELS[model_b_id] | |
| infer_fn = infer_xlarge if meta_b["gpu_size"] == "xlarge" else infer_large | |
| full_b = (sys_b.strip() + "\n\n" + message) if sys_b.strip() else message | |
| partial_b = "" | |
| for chunk in infer_fn(full_b, image, max_new_tokens, temperature): | |
| partial_b = chunk | |
| yield hist_a, new_hist_b + [{"role": "assistant", "content": partial_b}] | |
| yield hist_a, new_hist_b + [{"role": "assistant", "content": partial_b}] | |
| # ββ CSS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Space+Grotesk:wght@400;500;600;700&display=swap'); | |
| :root { | |
| --bg: #080d18; | |
| --surface: #0f1523; | |
| --surface2: #161e30; | |
| --border: rgba(80,110,220,0.16); | |
| --blue: #4f7ef8; | |
| --blue-dim: #3a5fc4; | |
| --purple: #8b72f0; | |
| --purple-dim: #6a55cc; | |
| --text: #dde4f4; | |
| --text-dim: #7a86a8; | |
| --green: #34d399; | |
| --red: #f87171; | |
| --amber: #fbbf24; | |
| --radius: 13px; | |
| } | |
| *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } | |
| .gradio-container { | |
| font-family: 'Inter', sans-serif !important; | |
| background: var(--bg) !important; | |
| max-width: 1420px !important; | |
| color: var(--text) !important; | |
| } | |
| footer, .gr-prose { display: none !important; } | |
| .gemma-hero { | |
| position: relative; | |
| background: linear-gradient(150deg, #06091566 0%, #0b1225 60%, #0d1530 100%); | |
| border-radius: 18px; padding: 42px 52px; margin-bottom: 6px; | |
| overflow: hidden; border: 1px solid var(--border); | |
| } | |
| .hero-bg { position: absolute; inset: 0; pointer-events: none; z-index: 0; overflow: hidden; } | |
| .hero-grid { | |
| position: absolute; inset: 0; | |
| background-image: | |
| linear-gradient(rgba(79,126,248,0.055) 1px, transparent 1px), | |
| linear-gradient(90deg, rgba(79,126,248,0.055) 1px, transparent 1px); | |
| background-size: 44px 44px; | |
| animation: grid-drift 22s linear infinite; | |
| } | |
| @keyframes grid-drift { from { background-position: 0 0; } to { background-position: 44px 44px; } } | |
| .orb { | |
| position: absolute; border-radius: 50%; | |
| filter: blur(70px); opacity: 0.16; | |
| animation: float-orb var(--dur, 13s) ease-in-out infinite var(--delay, 0s); | |
| } | |
| .orb-1 { width: 400px; height: 400px; background: var(--blue); top: -130px; right: -60px; --dur:15s; --delay:0s; } | |
| .orb-2 { width: 280px; height: 280px; background: var(--purple); bottom: -90px; left: 12%; --dur:12s; --delay:-4s; } | |
| .orb-3 { width: 200px; height: 200px; background: #60a5fa; top: 35%; left: 52%; --dur:18s; --delay:-7s; } | |
| @keyframes float-orb { | |
| 0%,100% { transform: translate(0,0) scale(1); } | |
| 33% { transform: translate(16px,-20px) scale(1.05); } | |
| 66% { transform: translate(-10px,12px) scale(0.96); } | |
| } | |
| .hero-stars { position: absolute; inset: 0; } | |
| .star { | |
| position: absolute; width: 2px; height: 2px; | |
| background: #fff; border-radius: 50%; opacity: 0; | |
| animation: twinkle var(--dur,3s) ease-in-out infinite var(--delay,0s); | |
| } | |
| @keyframes twinkle { | |
| 0%,100% { opacity:0; transform:scale(.8); } | |
| 50% { opacity:.65; transform:scale(1.3); } | |
| } | |
| .hero-inner { position: relative; z-index: 1; } | |
| .hero-top-bar { display: flex; align-items: center; margin-bottom: 22px; } | |
| .hero-eyebrow { | |
| display: inline-flex; align-items: center; gap: 9px; | |
| background: rgba(79,126,248,0.1); border: 1px solid rgba(79,126,248,0.28); | |
| border-radius: 20px; padding: 5px 16px; | |
| font-size: 11px; font-weight: 600; color: #7aabf8; letter-spacing: 1px; text-transform: uppercase; | |
| } | |
| .hero-dot-pulse { | |
| width: 7px; height: 7px; border-radius: 50%; background: var(--green); | |
| animation: pulse-ring 2s ease-out infinite; | |
| } | |
| @keyframes pulse-ring { | |
| 0% { box-shadow: 0 0 0 0 rgba(52,211,153,.5); } | |
| 70% { box-shadow: 0 0 0 8px rgba(52,211,153,0); } | |
| 100% { box-shadow: 0 0 0 0 rgba(52,211,153,0); } | |
| } | |
| .hero-title { | |
| font-family: 'Space Grotesk', sans-serif; | |
| font-size: 54px; font-weight: 700; color: #fff; | |
| line-height: 1.05; margin: 0 0 14px; letter-spacing: -2px; | |
| } | |
| .hero-title span { | |
| background: linear-gradient(120deg, var(--blue) 20%, var(--purple) 80%); | |
| -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; | |
| } | |
| .hero-subtitle { | |
| font-size: 15px; color: rgba(221,228,244,0.55); | |
| margin: 0 0 28px; max-width: 530px; line-height: 1.7; font-weight: 400; | |
| } | |
| .hero-chips { display: flex; flex-wrap: wrap; gap: 7px; } | |
| .hero-chip { | |
| background: rgba(255,255,255,0.04); border: 1px solid rgba(255,255,255,0.09); | |
| border-radius: 7px; padding: 5px 12px; | |
| font-size: 12px; color: rgba(221,228,244,0.7); font-weight: 500; | |
| } | |
| .hero-chip strong { color: #fff; } | |
| .hero-legend { display: flex; flex-wrap: wrap; gap: 14px; margin-top: 16px; } | |
| .legend-dot { display: inline-flex; align-items: center; gap: 6px; font-size: 11px; color: rgba(221,228,244,0.5); font-weight: 500; } | |
| .legend-swatch { width: 9px; height: 9px; border-radius: 3px; flex-shrink: 0; } | |
| .tab-nav { background: transparent !important; border-bottom: 1px solid var(--border) !important; gap: 2px !important; padding: 0 4px !important; margin-top: 6px !important; } | |
| .tab-nav button { font-family: 'Inter', sans-serif !important; font-size: 13px !important; font-weight: 500 !important; border-radius: 8px 8px 0 0 !important; padding: 10px 22px !important; color: var(--text-dim) !important; border: none !important; background: transparent !important; transition: all 0.2s !important; letter-spacing: 0.2px !important; } | |
| .tab-nav button:hover { color: var(--text) !important; background: rgba(79,126,248,0.07) !important; } | |
| .tab-nav button.selected { color: var(--blue) !important; background: rgba(79,126,248,0.08) !important; border-bottom: 2px solid var(--blue) !important; } | |
| .status-bar { | |
| display: flex; align-items: center; justify-content: space-between; flex-wrap: wrap; | |
| background: var(--surface); border: 1px solid var(--border); | |
| border-left: 3px solid var(--accent, var(--blue)); | |
| border-radius: var(--radius); padding: 11px 18px; gap: 10px; margin-bottom: 14px; | |
| } | |
| .status-left, .status-right { display: flex; align-items: center; gap: 8px; flex-wrap: wrap; } | |
| .status-dot { width: 8px; height: 8px; border-radius: 50%; flex-shrink: 0; } | |
| .dot-ready { background: var(--green); box-shadow: 0 0 6px rgba(52,211,153,.5); } | |
| .dot-loading { background: var(--amber); animation: pulse-ring 1.5s ease-out infinite; } | |
| .status-name { font-family: 'Space Grotesk', sans-serif; font-size: 14px; font-weight: 600; } | |
| .status-chip { font-size: 11px; font-weight: 500; padding: 2px 9px; border-radius: 5px; background: rgba(255,255,255,0.05); border: 1px solid var(--border); color: var(--text-dim); } | |
| .chip-vision { background: rgba(79,126,248,0.13); border-color: rgba(79,126,248,0.28); color: #8ab4f8; } | |
| .chip-text { background: rgba(122,134,168,0.08); border-color: var(--border); color: var(--text-dim); } | |
| .status-ok { font-size: 12px; color: var(--green); font-weight: 500; } | |
| .status-error { background: rgba(248,113,113,0.08); border: 1px solid rgba(248,113,113,0.22); color: var(--red); border-radius: 10px; padding: 12px 16px; font-size: 13px; } | |
| .status-empty { font-size: 13px; color: var(--text-dim); padding: 10px 0; font-style: italic; } | |
| .loading-notice { display: flex; align-items: center; gap: 12px; background: rgba(79,126,248,0.07); border: 1px solid rgba(79,126,248,0.22); border-radius: var(--radius); padding: 13px 18px; margin-bottom: 14px; color: #8ab4f8; font-size: 13px; font-weight: 500; animation: notice-pulse 2s ease-in-out infinite; } | |
| .notice-spinner { width: 15px; height: 15px; border: 2px solid rgba(79,126,248,0.25); border-top-color: var(--blue); border-radius: 50%; animation: spin .75s linear infinite; flex-shrink: 0; } | |
| @keyframes spin { to { transform: rotate(360deg); } } | |
| @keyframes notice-pulse { 0%,100% { border-color: rgba(79,126,248,0.22); } 50% { border-color: rgba(79,126,248,0.5); } } | |
| .family-header { display: flex; align-items: center; gap: 14px; padding: 15px 20px; border-radius: var(--radius); margin-bottom: 12px; background: var(--surface); border: 1px solid var(--border); } | |
| .family-icon { font-family: 'Space Grotesk', sans-serif; font-size: 17px; font-weight: 700; width: 40px; height: 40px; display: flex; align-items: center; justify-content: center; border-radius: 10px; color: #fff; flex-shrink: 0; } | |
| .family-text h3 { font-family: 'Space Grotesk', sans-serif; font-size: 15.5px; font-weight: 600; margin: 0 0 2px; color: var(--text); } | |
| .family-text p { font-size: 12px; color: var(--text-dim); margin: 0; } | |
| .family-year { margin-left: auto; font-size: 11px; color: var(--text-dim); font-weight: 500; opacity: 0.7; } | |
| .family-new-badge { margin-left: 8px; background: linear-gradient(120deg, var(--blue), var(--purple)); color: #fff; border-radius: 7px; padding: 3px 10px; font-size: 10px; font-weight: 700; letter-spacing: 0.6px; text-transform: uppercase; } | |
| .model-card-wrap { padding: 5px !important; } | |
| .model-card { | |
| background: var(--surface); border: 1px solid var(--border); | |
| border-radius: var(--radius); overflow: hidden; height: 100%; | |
| display: flex; flex-direction: column; | |
| transition: border-color 0.25s, transform 0.25s, box-shadow 0.25s; | |
| min-height: 255px; | |
| } | |
| .model-card:hover { border-color: rgba(79,126,248,0.42); transform: translateY(-3px); box-shadow: 0 10px 32px rgba(0,0,0,0.4); } | |
| .model-card.card-active { border-color: var(--green) !important; box-shadow: 0 0 0 1px rgba(52,211,153,0.18), 0 8px 28px rgba(0,0,0,0.35) !important; } | |
| .card-active-badge { font-size: 9px; font-weight: 700; padding: 2px 8px; border-radius: 5px; background: rgba(52,211,153,0.13); color: var(--green); border: 1px solid rgba(52,211,153,0.28); text-transform: uppercase; letter-spacing: 0.5px; flex-shrink: 0; } | |
| .card-accent { height: 3px; width: 100%; flex-shrink: 0; } | |
| .card-body { padding: 17px 17px 14px; flex: 1; display: flex; flex-direction: column; } | |
| .card-top { display: flex; align-items: flex-start; justify-content: space-between; margin-bottom: 8px; gap: 6px; flex-wrap: wrap; } | |
| .card-name { font-family: 'Space Grotesk', sans-serif; font-size: 15.5px; font-weight: 600; color: var(--text); margin: 0; line-height: 1.2; } | |
| .card-badge { font-size: 9px; font-weight: 700; padding: 2px 7px; border-radius: 5px; text-transform: uppercase; letter-spacing: 0.5px; flex-shrink: 0; } | |
| .badge-NEW { background: rgba(79,126,248,0.15); color: #8ab4f8; border: 1px solid rgba(79,126,248,0.28); } | |
| .badge-FLAGSHIP { background: rgba(139,114,240,0.15); color: #baaaf8; border: 1px solid rgba(139,114,240,0.28); } | |
| .card-desc { font-size: 12px; color: var(--text-dim); line-height: 1.55; margin: 0 0 13px; flex: 1; } | |
| .card-stats { display: grid; grid-template-columns: repeat(3,1fr); gap: 5px; margin-bottom: 11px; } | |
| .card-stat { font-size: 10.5px; font-weight: 500; padding: 5px 4px; border-radius: 6px; background: rgba(255,255,255,0.03); color: var(--text-dim); border: 1px solid var(--border); text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } | |
| .card-tags { display: flex; flex-wrap: wrap; gap: 5px; margin-bottom: 13px; } | |
| .tag { font-size: 10px; font-weight: 500; padding: 2px 8px; border-radius: 5px; letter-spacing: 0.2px; border: 1px solid transparent; } | |
| .tag-vision { background: rgba(79,126,248,0.1); color: #8ab4f8; border-color: rgba(79,126,248,0.22); } | |
| .tag-text { background: rgba(122,134,168,0.07); color: var(--text-dim); border-color: var(--border); } | |
| .tag-apache { background: rgba(52,211,153,0.09); color: #6ee7b7; border-color: rgba(52,211,153,0.22); } | |
| .tag-gemma { background: rgba(251,191,36,0.09); color: #fcd34d; border-color: rgba(251,191,36,0.22); } | |
| .tag-xlarge { background: rgba(139,114,240,0.1); color: #baaaf8; border-color: rgba(139,114,240,0.25); } | |
| .tag-instruct { background: rgba(167,139,250,0.13); color: #c4b5fd; border-color: rgba(167,139,250,0.3); } | |
| .tag-base { background: rgba(148,163,184,0.08); color: #94a3b8; border-color: rgba(148,163,184,0.2); } | |
| .model-card-wrap { display: flex !important; flex-direction: column !important; } | |
| .model-card-wrap > * { width: 100% !important; } | |
| .card-btn { width: 100% !important; margin-top: 6px !important; } | |
| .card-btn > div { width: 100% !important; } | |
| .card-btn button { width: 100% !important; background: linear-gradient(120deg, var(--blue), var(--blue-dim)) !important; color: #fff !important; border-radius: 8px !important; font-family: 'Inter', sans-serif !important; font-size: 12.5px !important; font-weight: 500 !important; padding: 9px !important; border: none !important; cursor: pointer !important; letter-spacing: 0.2px !important; transition: opacity 0.2s, transform 0.15s !important; } | |
| .card-btn button:hover { opacity: 0.85 !important; transform: translateY(-1px) !important; } | |
| .card-btn button:disabled { opacity: 0.45 !important; cursor: not-allowed !important; transform: none !important; } | |
| .card-btn-xlarge button { background: linear-gradient(120deg, var(--purple), var(--purple-dim)) !important; } | |
| .thinking-wrap { | |
| display: flex; align-items: center; gap: 10px; | |
| padding: 8px 14px 10px; | |
| border-bottom: 1px solid var(--border); | |
| background: var(--surface2); | |
| font-size: 12px; color: var(--text-dim); font-style: italic; | |
| } | |
| .thinking-dots { display: flex; gap: 5px; align-items: center; } | |
| .thinking-dots span { | |
| width: 7px; height: 7px; border-radius: 50%; | |
| background: var(--blue); opacity: 0.3; | |
| animation: dot-bounce 1.1s ease-in-out infinite; | |
| } | |
| .thinking-dots span:nth-child(2) { animation-delay: 0.18s; } | |
| .thinking-dots span:nth-child(3) { animation-delay: 0.36s; } | |
| @keyframes dot-bounce { | |
| 0%, 100% { opacity: 0.2; transform: translateY(0px); } | |
| 50% { opacity: 1; transform: translateY(-5px); } | |
| } | |
| .dual-loading-notice { | |
| display: flex; align-items: flex-start; gap: 12px; | |
| background: rgba(79,126,248,0.07); border: 1px solid rgba(79,126,248,0.28); | |
| border-radius: var(--radius); padding: 13px 18px; margin-bottom: 12px; | |
| animation: notice-pulse 2s ease-in-out infinite; | |
| } | |
| .dual-loading-notice .notice-spinner { margin-top: 2px; } | |
| .dual-loading-body { display: flex; flex-direction: column; gap: 3px; } | |
| .dual-loading-title { font-size: 13px; font-weight: 600; color: #8ab4f8; } | |
| .dual-loading-sub { font-size: 11px; color: var(--text-dim); } | |
| .chat-panel { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); overflow: hidden; } | |
| .gradio-chatbot { background: var(--surface) !important; border: none !important; color: var(--text) !important; } | |
| .settings-panel { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 18px; } | |
| .zerogpu-notice { background: rgba(245,158,11,0.08); border: 1px solid rgba(245,158,11,0.25); color: #fcd34d; border-radius: 8px; padding: 12px 14px; font-size: 11px; margin-bottom: 16px; line-height: 1.6; font-weight: 500; } | |
| .zgn-title { font-weight: 700; font-size: 11.5px; margin-bottom: 3px; } | |
| .zgn-divider { border-top: 1px solid rgba(245,158,11,0.2); margin: 9px 0; } | |
| .zerogpu-notice strong { color: #fde68a; } | |
| .settings-title { font-family: 'Space Grotesk', sans-serif; font-size: 10.5px; font-weight: 700; color: var(--text-dim); text-transform: uppercase; letter-spacing: 1.2px; margin-bottom: 14px; padding-bottom: 10px; border-bottom: 1px solid var(--border); } | |
| .settings-hint { font-size: 11.5px; color: var(--text-dim); line-height: 1.8; margin-top: 14px; padding-top: 14px; border-top: 1px solid var(--border); } | |
| .settings-hint strong { color: rgba(221,228,244,0.55); font-weight: 500; display: block; margin-bottom: 2px; } | |
| .system-prompt textarea { font-size: 12px !important; min-height: 72px !important; background: var(--surface2) !important; color: var(--text) !important; border-color: var(--border) !important; border-radius: 8px !important; resize: vertical !important; } | |
| .send-btn button { background: linear-gradient(120deg, var(--blue), var(--blue-dim)) !important; color: #fff !important; border-radius: 10px !important; font-family: 'Inter', sans-serif !important; font-weight: 500 !important; padding: 10px 20px !important; border: none !important; letter-spacing: 0.2px !important; transition: opacity 0.2s !important; } | |
| .send-btn button:hover { opacity: 0.85 !important; } | |
| .clear-btn button, .export-btn button, .reset-btn button { background: transparent !important; border: 1px solid var(--border) !important; color: var(--text-dim) !important; border-radius: 10px !important; font-family: 'Inter', sans-serif !important; font-weight: 500 !important; font-size: 12.5px !important; width: 100% !important; transition: all 0.2s !important; } | |
| .clear-btn button:hover { border-color: rgba(248,113,113,0.4) !important; color: var(--red) !important; background: rgba(248,113,113,0.06) !important; } | |
| .export-btn button:hover { border-color: rgba(79,126,248,0.4) !important; color: var(--blue) !important; background: rgba(79,126,248,0.06) !important; } | |
| .reset-btn button:hover { border-color: rgba(251,191,36,0.4) !important; color: var(--amber) !important; background: rgba(251,191,36,0.06) !important; } | |
| .chat-inline-status { display: flex; align-items: center; gap: 8px; padding: 6px 14px; font-size: 12px; color: var(--text-dim); border-bottom: 1px solid var(--border); background: var(--surface2); min-height: 32px; } | |
| .csi-dot { width: 7px; height: 7px; border-radius: 50%; flex-shrink: 0; } | |
| .csi-dot-idle { background: var(--text-dim); opacity: 0.35; } | |
| .csi-dot-ready { background: var(--green); box-shadow: 0 0 6px rgba(52,211,153,.5); } | |
| .csi-name { font-weight: 600; color: var(--text); } | |
| .csi-label { color: var(--green); font-weight: 500; } | |
| .csi-idle { font-style: italic; } | |
| .image-upload-wrap { width: 100% !important; margin-top: 6px !important; } | |
| .image-upload-wrap > div, | |
| .image-upload-wrap .wrap { min-height: 260px !important; border-radius: 10px !important; } | |
| .dual-panel { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); overflow: hidden; } | |
| .dual-header { display: flex; align-items: center; padding: 10px 14px; background: var(--surface2); border-bottom: 1px solid var(--border); font-size: 12px; font-weight: 600; } | |
| .dual-label-a { color: var(--blue); } | |
| .dual-label-b { color: var(--purple); } | |
| .dual-send-btn button { background: linear-gradient(120deg, var(--blue), var(--purple)) !important; color: #fff !important; border-radius: 10px !important; font-family: 'Inter', sans-serif !important; font-weight: 500 !important; padding: 10px 20px !important; border: none !important; letter-spacing: 0.2px !important; transition: opacity 0.2s !important; } | |
| .dual-send-btn button:hover { opacity: 0.85 !important; } | |
| .dual-img-wrap { width: 100% !important; margin-top: 6px !important; } | |
| .dual-img-wrap > div, | |
| .dual-img-wrap .wrap { min-height: 140px !important; border-radius: 10px !important; } | |
| input, textarea, .gr-input { background: var(--surface2) !important; color: var(--text) !important; border-color: var(--border) !important; border-radius: 10px !important; } | |
| label { color: var(--text-dim) !important; font-size: 12px !important; } | |
| .gr-slider input[type=range] { accent-color: var(--blue); } | |
| .gr-box, .gr-form { background: transparent !important; } | |
| """ | |
| # ββ Static HTML fragments βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _STAR_COORDS = [ | |
| (8,12),(15,78),(23,45),(31,67),(42,23),(48,89),(55,34),(63,56), | |
| (71,14),(78,72),(85,41),(92,88),(5,55),(18,33),(27,91),(36,8), | |
| (44,62),(52,27),(59,79),(67,48),(74,19),(82,64),(89,35),(96,82), | |
| (11,95),(20,7),(29,53),(38,74),(47,16),(56,39), | |
| ] | |
| _STARS_HTML = "".join( | |
| f'<div class="star" style="left:{x}%;top:{y}%;--dur:{2.4+(i%5)*0.7}s;--delay:{-(i%8)*0.6}s"></div>' | |
| for i,(x,y) in enumerate(_STAR_COORDS) | |
| ) | |
| _N_MODELS = len(MODELS) | |
| _N_FAMILIES = len(FAMILIES) | |
| _LEGEND_HTML = "".join( | |
| f'<span class="legend-dot">' | |
| f'<span class="legend-swatch" style="background:{info["color"]}"></span>' | |
| f'{_html.escape(name)}</span>' | |
| for name, info in FAMILIES.items() | |
| ) | |
| _HERO_HTML = f""" | |
| <div class="gemma-hero"> | |
| <div class="hero-bg"> | |
| <div class="hero-grid"></div> | |
| <div class="orb orb-1"></div><div class="orb orb-2"></div><div class="orb orb-3"></div> | |
| <div class="hero-stars">{_STARS_HTML}</div> | |
| </div> | |
| <div class="hero-inner"> | |
| <div class="hero-top-bar"> | |
| <div class="hero-eyebrow"> | |
| <span class="hero-dot-pulse"></span> | |
| Google DeepMind · Open Models | |
| </div> | |
| </div> | |
| <h1 class="hero-title">Gemma <span>Explorer</span></h1> | |
| <p class="hero-subtitle">Explore, compare, and chat with the full Gemma open model family — from the compact 1B to the powerful 31B multimodal.</p> | |
| <div class="hero-chips"> | |
| <span class="hero-chip"><strong>{_N_MODELS}</strong> models</span> | |
| <span class="hero-chip"><strong>{_N_FAMILIES}</strong> generations</span> | |
| <span class="hero-chip">Vision — Gemma 3 & 4</span> | |
| <span class="hero-chip">ZeroGPU · NVIDIA H200</span> | |
| </div> | |
| <div class="hero-legend">{_LEGEND_HTML}</div> | |
| </div> | |
| </div> | |
| """ | |
| _ZEROGPU_NOTICE = """ | |
| <div class="zerogpu-notice"> | |
| <div class="zgn-title">β‘ ZeroGPU Latency</div> | |
| GPU allocation happens on every message in this serverless Space β expect a brief wait before the first token. | |
| <div class="zgn-divider"></div> | |
| <div class="zgn-title">No Memory</div> | |
| Due to ZeroGPU constraints, each message is processed independently. | |
| The model has <strong>no conversation history</strong> β it starts fresh on every reply. | |
| </div> | |
| """ | |
| _SETTINGS_HINT = """ | |
| <div class="settings-hint"> | |
| <strong>Temperature</strong> | |
| 0 = deterministic<br>0.7 = balanced<br>1.5 = creative | |
| <br><br> | |
| <strong>GPU allocation</strong> | |
| large = 70 GB H200<br>xlarge = 141 GB H200<br>(Gemma 4 31B only) | |
| </div> | |
| """ | |
| # ββ Card / header HTML ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _card_html(model_id: str, meta: dict, active: bool = False) -> str: | |
| color = meta["family_color"] | |
| badge = meta.get("badge") | |
| badge_html = f'<span class="card-badge badge-{badge}">{_html.escape(badge)}</span>' if badge else "" | |
| active_html = '<span class="card-active-badge">β Loaded</span>' if active else "" | |
| vision_tag = '<span class="tag tag-vision">Vision</span>' if meta["supports_vision"] else '<span class="tag tag-text">Text only</span>' | |
| license_tag = '<span class="tag tag-apache">Apache 2.0</span>' if meta["license_open"] else '<span class="tag tag-gemma">Gemma License</span>' | |
| gpu_tag = '<span class="tag tag-xlarge">xlarge GPU</span>' if meta["gpu_size"] == "xlarge" else "" | |
| instruct_tag = '<span class="tag tag-instruct">Instruct</span>' if "-it" in model_id.lower() else '<span class="tag tag-base">Base</span>' | |
| active_cls = " card-active" if active else "" | |
| return f""" | |
| <div class="model-card{active_cls}"> | |
| <div class="card-accent" style="background:linear-gradient(90deg,{color},{color}66)"></div> | |
| <div class="card-body"> | |
| <div class="card-top"> | |
| <p class="card-name">{_html.escape(meta['name'])}</p> | |
| <span style="display:flex;gap:4px;flex-shrink:0">{badge_html}{active_html}</span> | |
| </div> | |
| <p class="card-desc">{_html.escape(meta['description'])}</p> | |
| <div class="card-stats"> | |
| <span class="card-stat">{_html.escape(meta['params_short'])}</span> | |
| <span class="card-stat">{_html.escape(meta['context'])} ctx</span> | |
| <span class="card-stat">{_html.escape(meta['vram'])}</span> | |
| </div> | |
| <div class="card-tags">{vision_tag}{instruct_tag}{license_tag}{gpu_tag}</div> | |
| </div> | |
| </div> | |
| """ | |
| def _family_header_html(name: str, info: dict) -> str: | |
| new_badge = '<span class="family-new-badge">New</span>' if info.get("new") else "" | |
| return f""" | |
| <div class="family-header"> | |
| <div class="family-icon" style="background:linear-gradient(135deg,{info['color']},{info['color']}88)">{info['icon']}</div> | |
| <div class="family-text"><h3>{_html.escape(name)}</h3><p>{_html.escape(info['description'])}</p></div> | |
| <span class="family-year">{info['year']}</span> | |
| {new_badge} | |
| </div> | |
| """ | |
| def _make_status_html(meta: dict, state: str = "ready") -> str: | |
| color = meta["family_color"] | |
| safe_name = _html.escape(meta["name"]) | |
| dot_class, label = { | |
| "ready": ("dot-ready", "Ready"), | |
| "already": ("dot-ready", "Already loaded"), | |
| }.get(state, ("dot-loading", "Loading\u2026")) | |
| vision_tag = ( | |
| '<span class="status-chip chip-vision">Vision</span>' | |
| if meta["supports_vision"] | |
| else '<span class="status-chip chip-text">Text only</span>' | |
| ) | |
| return f""" | |
| <div class="status-bar" style="--accent:{color}"> | |
| <div class="status-left"> | |
| <span class="status-dot {dot_class}"></span> | |
| <span class="status-name" style="color:{color}">{safe_name}</span> | |
| {vision_tag} | |
| </div> | |
| <div class="status-right"> | |
| <span class="status-chip">{_html.escape(meta['params_short'])}</span> | |
| <span class="status-chip">{_html.escape(str(meta['context']))} ctx</span> | |
| <span class="status-chip">{_html.escape(meta['vram'])}</span> | |
| <span class="status-ok">{label}</span> | |
| </div> | |
| </div> | |
| """ | |
| def _chat_inline_status(loaded: bool = False, name: str = "") -> str: | |
| safe_name = _html.escape(name) | |
| if loaded: | |
| return (f'<div class="chat-inline-status">' | |
| f'<span class="csi-dot csi-dot-ready"></span>' | |
| f'<span class="csi-name">{safe_name}</span>' | |
| f'<span class="csi-label"> · Ready to chat</span>' | |
| f'</div>') | |
| return ('<div class="chat-inline-status">' | |
| '<span class="csi-dot csi-dot-idle"></span>' | |
| '<span class="csi-idle">No model loaded — go to <strong>Explore Models</strong> and click <em>Load & Chat</em>.</span>' | |
| '</div>') | |
| def _empty_status() -> str: | |
| return '<div class="status-empty">No model loaded — select one in <strong>Explore Models</strong>.</div>' | |
| def _loading_html(model_name: str = "") -> str: | |
| name_part = f" <strong>{_html.escape(model_name)}</strong>" if model_name else "" | |
| return ( | |
| '<div class="loading-notice">' | |
| '<div class="notice-spinner"></div>' | |
| '<div>' | |
| f'<div>Loading{name_part}, please wait…</div>' | |
| '<div style="font-size:11px;opacity:0.7;margin-top:3px;font-weight:400">' | |
| 'β± Large models (27B, 31B) can take 1–3 min. Please be patient.' | |
| '</div>' | |
| '</div>' | |
| '</div>' | |
| ) | |
| _THINKING_HTML = ( | |
| '<div class="thinking-wrap">' | |
| '<div class="thinking-dots"><span></span><span></span><span></span></div>' | |
| 'Thinking…' | |
| '</div>' | |
| ) | |
| def _dual_loading_html(label: str, color: str, model_name: str) -> str: | |
| return ( | |
| '<div class="dual-loading-notice">' | |
| '<div class="notice-spinner"></div>' | |
| '<div class="dual-loading-body">' | |
| f'<span class="dual-loading-title" style="color:{color}">' | |
| f'Loading {_html.escape(label)}: {_html.escape(model_name)}…' | |
| '</span>' | |
| '<span class="dual-loading-sub">' | |
| 'β± Large models (27B, 31B) may take 1–3 min. Please be patient.' | |
| '</span>' | |
| '</div>' | |
| '</div>' | |
| ) | |
| # ββ Build Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _MODEL_CHOICES = [(meta["name"], mid) for mid, meta in MODELS.items()] | |
| with gr.Blocks(title="Gemma Explorer") as demo: | |
| current_model_state = gr.State(value=None) | |
| gr.HTML(value=_HERO_HTML) | |
| with gr.Tabs() as main_tabs: | |
| with gr.Tab("Explore Models", id="explore"): | |
| status_html = gr.HTML(value=_empty_status()) | |
| loading_notice = gr.HTML(value=_loading_html(), visible=False) | |
| card_html_components: dict[str, gr.HTML] = {} | |
| load_btns: list[gr.Button] = [] | |
| for family_name, family_info in FAMILIES.items(): | |
| gr.HTML(_family_header_html(family_name, family_info)) | |
| family_models = list(get_models_by_family(family_name).items()) | |
| for row_start in range(0, len(family_models), 4): | |
| row_models = family_models[row_start: row_start + 4] | |
| with gr.Row(equal_height=True): | |
| for model_id, meta in row_models: | |
| with gr.Column(min_width=200, elem_classes=["model-card-wrap"]): | |
| card_comp = gr.HTML(_card_html(model_id, meta, active=False)) | |
| card_html_components[model_id] = card_comp | |
| btn_cls = ["card-btn", "card-btn-xlarge"] if meta["gpu_size"] == "xlarge" else ["card-btn"] | |
| load_btn = gr.Button("Load & Chat", elem_classes=btn_cls) | |
| load_btn._model_id = model_id | |
| load_btns.append(load_btn) | |
| with gr.Tab("Single Chat", id="single"): | |
| chat_status_html = gr.HTML(value=_empty_status()) | |
| chat_loading_notice = gr.HTML(value=_loading_html(), visible=False) | |
| with gr.Row(equal_height=False): | |
| with gr.Column(scale=4, elem_classes=["chat-panel"]): | |
| chat_inline = gr.HTML(value=_chat_inline_status(loaded=False)) | |
| thinking_html = gr.HTML(value="", visible=False) | |
| chatbot = gr.Chatbot(value=[], height=480, show_label=False, placeholder="") | |
| with gr.Row(): | |
| msg_input = gr.Textbox( | |
| placeholder="Type your message here\u2026", | |
| show_label=False, scale=5, lines=1, max_lines=5, autofocus=True, | |
| ) | |
| send_btn = gr.Button("Send", variant="primary", elem_classes=["send-btn"], scale=1) | |
| image_input = gr.Image( | |
| type="numpy", label="Attach image (optional)", | |
| show_label=True, visible=False, | |
| elem_classes=["image-upload-wrap"], height=260, | |
| ) | |
| with gr.Column(scale=1, elem_classes=["settings-panel"]): | |
| gr.HTML(_ZEROGPU_NOTICE) | |
| gr.HTML('<div class="settings-title">Parameters</div>') | |
| max_tokens = gr.Slider(64, 2048, value=512, step=64, label="Max new tokens") | |
| temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature") | |
| system_prompt = gr.Textbox( | |
| label="System prompt (optional)", | |
| placeholder="e.g. You are a helpful assistant\u2026", | |
| lines=3, max_lines=6, | |
| elem_classes=["system-prompt"], | |
| ) | |
| gr.HTML(_SETTINGS_HINT) | |
| reset_btn = gr.Button("βΊ Reset params", elem_classes=["reset-btn"]) | |
| clear_btn = gr.Button("Clear Chat", elem_classes=["clear-btn"]) | |
| export_btn = gr.Button("β¬ Export .md", elem_classes=["export-btn"]) | |
| export_file = gr.File(label="Download chat", visible=False) | |
| with gr.Tab("Dual Chat", id="dual"): | |
| gr.HTML(""" | |
| <div class="zerogpu-notice" style="margin-bottom:16px"> | |
| <div class="zgn-title">Dual Chat β Side-by-Side Comparison</div> | |
| Send the same prompt to two models and compare their responses. | |
| Models are loaded and run sequentially β Model A first, then Model B. | |
| <div class="zgn-divider"></div> | |
| <div class="zgn-title">No Memory</div> | |
| Same ZeroGPU constraints apply β each turn is processed independently with no context history. | |
| </div> | |
| """) | |
| dual_loading_html = gr.HTML(value="", visible=False) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| dual_model_a = gr.Dropdown(choices=_MODEL_CHOICES, value=list(MODELS.keys())[0], label="Model A") | |
| dual_sys_a = gr.Textbox(label="System prompt A (optional)", lines=2, | |
| placeholder="e.g. Answer concisely.", | |
| elem_classes=["system-prompt"]) | |
| with gr.Column(scale=1): | |
| dual_model_b = gr.Dropdown( | |
| choices=_MODEL_CHOICES, | |
| value=list(MODELS.keys())[min(3, len(MODELS)-1)], | |
| label="Model B", | |
| ) | |
| dual_sys_b = gr.Textbox(label="System prompt B (optional)", lines=2, | |
| placeholder="e.g. Answer in detail.", | |
| elem_classes=["system-prompt"]) | |
| with gr.Row(equal_height=True): | |
| with gr.Column(scale=1, elem_classes=["dual-panel"]): | |
| gr.HTML('<div class="dual-header"><span class="dual-label-a">β² Model A</span></div>') | |
| dual_bot_a = gr.Chatbot(value=[], height=400, show_label=False) | |
| with gr.Column(scale=1, elem_classes=["dual-panel"]): | |
| gr.HTML('<div class="dual-header"><span class="dual-label-b">β² Model B</span></div>') | |
| dual_bot_b = gr.Chatbot(value=[], height=400, show_label=False) | |
| _init_a = list(MODELS.keys())[0] | |
| _init_b = list(MODELS.keys())[min(3, len(MODELS)-1)] | |
| _both_vision_init = MODELS[_init_a]["supports_vision"] and MODELS[_init_b]["supports_vision"] | |
| dual_img = gr.Image( | |
| type="numpy", | |
| label="Attach image β sent to both models (only available when both models support vision)", | |
| show_label=True, | |
| elem_classes=["dual-img-wrap"], | |
| height=160, | |
| visible=_both_vision_init, | |
| ) | |
| with gr.Row(): | |
| dual_msg = gr.Textbox(placeholder="Type a prompt β it will be sent to both models\u2026", | |
| show_label=False, scale=5, lines=1, max_lines=4) | |
| dual_send = gr.Button("Send to Both", variant="primary", | |
| elem_classes=["dual-send-btn"], scale=1) | |
| with gr.Row(): | |
| dual_max_tokens = gr.Slider(64, 2048, value=512, step=64, label="Max new tokens") | |
| dual_temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature") | |
| dual_clear = gr.Button("Clear Both", elem_classes=["clear-btn"]) | |
| # ββ Event wiring ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| card_html_list = [card_html_components[mid] for mid in MODELS] | |
| _n_cards = len(card_html_list) | |
| def make_load_fn(model_id): | |
| def load_fn(): | |
| yield from load_model_stream(model_id, _n_cards) | |
| return load_fn | |
| for btn in load_btns: | |
| btn.click( | |
| fn=make_load_fn(btn._model_id), | |
| inputs=[], | |
| outputs=[ | |
| loading_notice, chat_loading_notice, | |
| status_html, main_tabs, image_input, current_model_state, | |
| chatbot, chat_inline, | |
| *card_html_list, | |
| ], | |
| ) | |
| # ββ Single chat βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _do_respond(message, image, max_toks, temp, sys_prompt, history): | |
| full_msg = (sys_prompt.strip() + "\n\n" + message) if sys_prompt.strip() else message | |
| last_hist = history | |
| _lock = gr.update(interactive=False) | |
| _unlock = gr.update(interactive=True) | |
| _thinking = gr.update(visible=True, value=_THINKING_HTML) | |
| _done = gr.update(visible=False, value="") | |
| yield last_hist, gr.update(value=""), gr.update(), _lock, _lock, _thinking | |
| for hist_update, img_update in respond(full_msg, image, max_toks, temp, history): | |
| last_hist = hist_update | |
| yield last_hist, gr.update(), img_update, gr.update(), gr.update(), _done | |
| yield last_hist, gr.update(), gr.update(value=None), _unlock, _unlock, _done | |
| _single_inputs = [msg_input, image_input, max_tokens, temperature, system_prompt, chatbot] | |
| _single_outputs = [chatbot, msg_input, image_input, send_btn, msg_input, thinking_html] | |
| msg_input.submit(fn=_do_respond, inputs=_single_inputs, outputs=_single_outputs) | |
| send_btn.click( fn=_do_respond, inputs=_single_inputs, outputs=_single_outputs) | |
| clear_btn.click(fn=lambda: [], outputs=[chatbot]) | |
| reset_btn.click(fn=lambda: (512, 0.7, ""), outputs=[max_tokens, temperature, system_prompt]) | |
| def _do_export(history): | |
| if not history: | |
| return gr.update(visible=False) | |
| content = export_chat(history) | |
| path = f"/tmp/gemma_chat_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.md" | |
| with open(path, "w") as f: | |
| f.write(content) | |
| return gr.update(value=path, visible=True) | |
| export_btn.click(fn=_do_export, inputs=[chatbot], outputs=[export_file]) | |
| # ββ Dual chat βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _dual_img_visibility(model_a, model_b): | |
| a_ok = MODELS.get(model_a, {}).get("supports_vision", False) | |
| b_ok = MODELS.get(model_b, {}).get("supports_vision", False) | |
| return gr.update(visible=(a_ok and b_ok)) | |
| dual_model_a.change( | |
| fn=_dual_img_visibility, | |
| inputs=[dual_model_a, dual_model_b], | |
| outputs=[dual_img], | |
| ) | |
| dual_model_b.change( | |
| fn=_dual_img_visibility, | |
| inputs=[dual_model_a, dual_model_b], | |
| outputs=[dual_img], | |
| ) | |
| def _do_dual(message, image, max_toks, temp, sys_a, sys_b, model_a, model_b, hist_a, hist_b): | |
| if not message.strip() and image is None: | |
| yield hist_a, hist_b, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible=False) | |
| return | |
| _lock = gr.update(interactive=False) | |
| _unlock = gr.update(interactive=True) | |
| name_a = MODELS.get(model_a, {}).get("name", model_a) | |
| name_b = MODELS.get(model_b, {}).get("name", model_b) | |
| user_msg = message or "[image attached]" | |
| yield (hist_a, hist_b, gr.update(value=""), gr.update(), _lock, _lock, | |
| gr.update(visible=True, value=_dual_loading_html("Model A", "var(--blue)", name_a))) | |
| try: | |
| _load_weights(model_a) | |
| except Exception as exc: | |
| _purge_model() | |
| err = f"Failed to load {name_a}: {exc}" | |
| yield (hist_a + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}], | |
| hist_b, gr.update(), gr.update(value=None), _unlock, _unlock, | |
| gr.update(visible=False)) | |
| return | |
| new_hist_a = hist_a + [{"role": "user", "content": user_msg}] | |
| meta_a = MODELS[model_a] | |
| infer_fn = infer_xlarge if meta_a["gpu_size"] == "xlarge" else infer_large | |
| full_a = (sys_a.strip() + "\n\n" + message) if sys_a.strip() else message | |
| partial_a = "" | |
| for chunk in infer_fn(full_a, image, max_toks, temp): | |
| partial_a = chunk | |
| yield (new_hist_a + [{"role": "assistant", "content": partial_a}], | |
| hist_b, gr.update(), gr.update(), gr.update(), gr.update(), gr.update()) | |
| hist_a = new_hist_a + [{"role": "assistant", "content": partial_a}] | |
| yield (hist_a, hist_b, gr.update(), gr.update(), gr.update(), gr.update(), | |
| gr.update(visible=True, value=_dual_loading_html("Model B", "var(--purple)", name_b))) | |
| try: | |
| _load_weights(model_b) | |
| except Exception as exc: | |
| _purge_model() | |
| err = f"Failed to load {name_b}: {exc}" | |
| yield (hist_a, | |
| hist_b + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}], | |
| gr.update(), gr.update(value=None), _unlock, _unlock, gr.update(visible=False)) | |
| return | |
| new_hist_b = hist_b + [{"role": "user", "content": user_msg}] | |
| meta_b = MODELS[model_b] | |
| infer_fn = infer_xlarge if meta_b["gpu_size"] == "xlarge" else infer_large | |
| full_b = (sys_b.strip() + "\n\n" + message) if sys_b.strip() else message | |
| partial_b = "" | |
| for chunk in infer_fn(full_b, image, max_toks, temp): | |
| partial_b = chunk | |
| yield (hist_a, | |
| new_hist_b + [{"role": "assistant", "content": partial_b}], | |
| gr.update(), gr.update(), gr.update(), gr.update(), gr.update()) | |
| yield (hist_a, new_hist_b + [{"role": "assistant", "content": partial_b}], | |
| gr.update(), gr.update(value=None), _unlock, _unlock, gr.update(visible=False)) | |
| _dual_inputs = [dual_msg, dual_img, dual_max_tokens, dual_temperature, | |
| dual_sys_a, dual_sys_b, dual_model_a, dual_model_b, dual_bot_a, dual_bot_b] | |
| _dual_outputs = [dual_bot_a, dual_bot_b, dual_msg, dual_img, | |
| dual_send, dual_msg, dual_loading_html] | |
| dual_send.click(fn=_do_dual, inputs=_dual_inputs, outputs=_dual_outputs) | |
| dual_msg.submit( fn=_do_dual, inputs=_dual_inputs, outputs=_dual_outputs) | |
| dual_clear.click(fn=lambda: ([], []), outputs=[dual_bot_a, dual_bot_b]) | |
| if __name__ == "__main__": | |
| _debug = os.environ.get("GRADIO_DEBUG", "0") == "1" | |
| demo.launch( | |
| debug=_debug, | |
| css=CSS, | |
| theme=gr.themes.Base() | |
| ) |