""" ZeroGPU-friendly Gradio entrypoint for OMada demo. - Downloads checkpoint + assets + style centroids from Hugging Face Hub - Instantiates OmadaDemo once (global) - Exposes 10 modalities via Gradio tabs - Uses @spaces.GPU only on inference handlers so GPU is allocated per request """ import os import sys import subprocess import importlib import base64 import html import io import re import wave import tempfile import shutil import threading import random from urllib.parse import quote from pathlib import Path from typing import List import numpy as np import torch from PIL import Image import gradio as gr import spaces from packaging.version import parse as parse_version GRADIO_VERSION = parse_version(gr.__version__) GRADIO_V6_PLUS = GRADIO_VERSION >= parse_version("6.0.0") # --------------------------- # Project roots & sys.path # --------------------------- PROJECT_ROOT = Path(__file__).resolve().parent os.environ.setdefault("FORCE_EVAL_SETTINGS", "0") GLOBAL_SEED = int(os.getenv("GLOBAL_SEED", "42")) PREVIEW_DIR = PROJECT_ROOT / "_preview_cache" PREVIEW_DIR.mkdir(parents=True, exist_ok=True) MMADA_ROOT = PROJECT_ROOT / "MMaDA" if str(MMADA_ROOT) not in sys.path: sys.path.insert(0, str(MMADA_ROOT)) EMOVA_ROOT = PROJECT_ROOT / "EMOVA_speech_tokenizer" if str(EMOVA_ROOT) not in sys.path: sys.path.insert(0, str(EMOVA_ROOT)) # --------------------------- # HuggingFace Hub helper # --------------------------- def ensure_hf_hub(target: str = "0.36.0"): """ Make sure huggingface_hub stays <1.0 to satisfy transformers/tokenizers. """ try: import huggingface_hub as hub except ImportError: subprocess.check_call( [sys.executable, "-m", "pip", "install", f"huggingface-hub=={target}", "--no-cache-dir"] ) import huggingface_hub as hub if parse_version(hub.__version__) >= parse_version("1.0.0"): subprocess.check_call( [sys.executable, "-m", "pip", "install", f"huggingface-hub=={target}", "--no-cache-dir"] ) hub = importlib.reload(hub) # Backfill missing constants in older hub versions to avoid AttributeError. try: import huggingface_hub.constants as hub_consts # type: ignore except Exception: hub_consts = None if hub_consts and not hasattr(hub_consts, "HF_HUB_ENABLE_HF_TRANSFER"): setattr(hub_consts, "HF_HUB_ENABLE_HF_TRANSFER", False) return hub snapshot_download = ensure_hf_hub().snapshot_download # --------------------------- # OMada demo imports # --------------------------- from inference.gradio_multimodal_demo_inst import ( # noqa: E402 OmadaDemo, CUSTOM_CSS, FORCE_LIGHT_MODE_JS, ) # --------------------------- # HF download helpers # --------------------------- def download_assets() -> Path: """Download demo assets (logo + sample prompts/media) and return the root path.""" repo_id = os.getenv("ASSET_REPO_ID", "snu-aidas/Dynin-Omni-Demo-Assets") revision = os.getenv("ASSET_REVISION", "main") token = os.getenv("HF_TOKEN") cache_dir = PROJECT_ROOT / "_asset_cache" cache_dir.mkdir(parents=True, exist_ok=True) return Path( snapshot_download( repo_id=repo_id, revision=revision, repo_type="dataset", local_dir=cache_dir, local_dir_use_symlinks=False, token=token, ) ) def download_style() -> Path: """Download style centroid dataset and return the root path.""" repo_id = os.getenv("STYLE_REPO_ID", "snu-aidas/aidas-style-centroid") revision = os.getenv("STYLE_REVISION", "main") token = os.getenv("HF_TOKEN") cache_dir = PROJECT_ROOT / "_style_cache" cache_dir.mkdir(parents=True, exist_ok=True) preferred_repo_type = os.getenv("STYLE_REPO_TYPE", "dataset").strip().lower() repo_type_candidates = [preferred_repo_type] for t in ("dataset", "model"): if t not in repo_type_candidates: repo_type_candidates.append(t) last_exc = None for repo_type in repo_type_candidates: try: return Path( snapshot_download( repo_id=repo_id, revision=revision, repo_type=repo_type, local_dir=cache_dir, local_dir_use_symlinks=False, token=token, ) ) except Exception as exc: last_exc = exc continue print( f"[Style] Failed to download '{repo_id}' (tried repo_type={repo_type_candidates}). " f"Using local cache at {cache_dir}. Last error: {last_exc}", flush=True, ) return cache_dir def download_checkpoint() -> Path: """Download checkpoint snapshot and return an `unwrapped_model` directory.""" local_override = os.getenv("MODEL_CHECKPOINT_PATH") if local_override: override_path = Path(local_override).expanduser() if override_path.name != "unwrapped_model": nested = override_path / "unwrapped_model" if nested.is_dir(): override_path = nested if not override_path.exists(): raise FileNotFoundError(f"MODEL_CHECKPOINT_PATH does not exist: {override_path}") return override_path repo_id = os.getenv("MODEL_REPO_ID", "snu-aidas/Dynin-Omni") revision = os.getenv("MODEL_REVISION", "main") token = os.getenv("HF_TOKEN") cache_dir = PROJECT_ROOT / "_ckpt_cache" cache_dir.mkdir(parents=True, exist_ok=True) snapshot_path = Path( snapshot_download( repo_id=repo_id, revision=revision, repo_type="model", local_dir=cache_dir, local_dir_use_symlinks=False, token=token, ) ) if snapshot_path.name == "unwrapped_model": return snapshot_path nested = snapshot_path / "unwrapped_model" if nested.is_dir(): return nested aliased = snapshot_path.parent / "unwrapped_model" if not aliased.exists(): aliased.symlink_to(snapshot_path, target_is_directory=True) return aliased # --------------------------- # Assets (for examples + logo) # --------------------------- ASSET_ROOT = download_assets() STYLE_ROOT = download_style() LOGO_PATH = ASSET_ROOT / "logo.png" # optional def _load_text_examples(path: Path): if not path.exists(): return [] lines = [ ln.strip() for ln in path.read_text(encoding="utf-8").splitlines() if ln.strip() ] return [[ln] for ln in lines] def _load_media_examples(subdir: str, suffixes): d = ASSET_ROOT / subdir if not d.exists(): return [] ex = [] for p in sorted(d.iterdir()): if p.is_file() and p.suffix.lower() in suffixes: ex.append([str(p)]) return ex def _load_i2i_examples(): d = ASSET_ROOT / "i2i" if not d.exists(): return [] # 이미지 파일들 (image1.jpeg, image2.png, ...) image_files = sorted( [p for p in d.iterdir() if p.suffix.lower() in {".png", ".jpg", ".jpeg", ".webp"}] ) # 텍스트 파일들 (text1.txt, text2.txt, ...) text_files = sorted( [p for p in d.iterdir() if p.suffix.lower() == ".txt"] ) n = min(len(image_files), len(text_files)) if n == 0: return [] examples = [] for i in range(n): img_path = image_files[i] txt_path = text_files[i] instruction = txt_path.read_text(encoding="utf-8").strip() if not instruction: continue # Gradio Examples 형식: [image, instruction_text] examples.append([str(img_path), instruction]) return examples # text-based examples T2S_EXAMPLES = _load_text_examples(ASSET_ROOT / "t2s" / "text.txt") CHAT_EXAMPLES = _load_text_examples(ASSET_ROOT / "chat" / "text.txt") T2I_EXAMPLES = _load_text_examples(ASSET_ROOT / "t2i" / "text.txt") I2I_EXAMPLES = _load_i2i_examples() def _get_example_value(examples, idx: int, pos: int = 0, default=None): try: if len(examples) > idx and len(examples[idx]) > pos: val = examples[idx][pos] if val is not None and str(val) != "": return val except Exception: pass try: if examples and len(examples[0]) > pos: val = examples[0][pos] if val is not None and str(val) != "": return val except Exception: pass return default def _sample_preview_label(item: dict, idx: int) -> str: base = f"sample {idx + 1}" if not isinstance(item, dict): return base text = str(item.get("text") or "").strip() image = item.get("image") audio = item.get("audio") video = item.get("video") preview = "" if text: preview = text.replace("\n", " ").strip() elif image: preview = f"image: {Path(str(image)).name}" elif audio: preview = f"audio: {Path(str(audio)).name}" elif video: preview = f"video: {Path(str(video)).name}" if not preview: return base if len(preview) > 34: preview = preview[:31] + "..." return f"{base} - {preview}" def _image_to_data_uri(path: str) -> str: p = str(path or "") if not p or not os.path.exists(p): return "" try: with Image.open(p).convert("RGB") as pil_img: buf = io.BytesIO() pil_img.save(buf, format="PNG") encoded = base64.b64encode(buf.getvalue()).decode("ascii") return f"data:image/png;base64,{encoded}" except Exception: return "" def _render_sample_preview_card(item: dict, idx: int) -> str: title = f"sample {idx + 1}" if not isinstance(item, dict): return ( "
" f"
{title}
" "
No preview available
" "
" ) text = str(item.get("text") or "").strip() image = item.get("image") audio = item.get("audio") video = item.get("video") media_html = "
🧩
" desc = "No preview available" if image: img_path = str(image) src = _image_to_data_uri(img_path) if src: media_html = f"sample image" desc = text if text else f"image: {Path(img_path).name}" else: # Fallback for browsers/Gradio sanitization cases where data URI is blocked. file_src = f"/file={quote(img_path)}" media_html = f"sample image" desc = text if text else f"image: {Path(img_path).name}" elif video: vpath = str(video) thumb = _video_thumb_data_uri(vpath) if thumb: media_html = f"sample video" else: media_html = "
🎬
" desc = text if text else f"video: {Path(vpath).name}" elif audio: apath = str(audio) media_html = "
🎤
" desc = text if text else f"audio: {Path(apath).name}" elif text: media_html = "
💬
" desc = text desc = desc.replace("\n", " ").strip() if len(desc) > 120: desc = desc[:117] + "..." return ( "
" f"
{media_html}
" "
" f"
{html.escape(title)}
" f"
{html.escape(desc)}
" "
" "
" ) def _render_response(status: str, body_html: str = "") -> str: safe_status = html.escape(status or "") parts = [] if safe_status: parts.append(f"

{safe_status}

") if body_html: parts.append(body_html) content = "".join(parts) return f"
{content}
" def _render_text_message(status: str, content: str) -> str: content = (content or "").strip() if not content: return _render_response(status) safe_content = _format_tokenized_text(content) body = f"
{safe_content}
" return _render_response(status, body) def _is_mask_like_token(token: str) -> bool: t = token.strip() if not t: return False upper = t.upper() return ( upper in {"[MASK]", "", "<|MASK|>", "", "<|MASK_TOKEN|>"} or upper in {"", "MDM_MASK", "<|MDM_MASK|>"} or "MASK" in upper ) def _is_special_token(token: str) -> bool: t = token.strip() return bool(t) and t.startswith("<|") and t.endswith("|>") def _format_tokenized_text(text: str) -> str: if not text: return "" # Handle both complete and partially-streamed mask tokens. mask_pat = r"(<[^>\n]*MASK[^>\n]*>?|\[MASK\]|MASK_TOKEN)" chunks = re.split(mask_pat, text, flags=re.IGNORECASE) out = [] for chunk in chunks: if not chunk: continue if re.fullmatch(mask_pat, chunk, flags=re.IGNORECASE) or _is_mask_like_token(chunk): out.append("MASK") continue if chunk.isspace(): out.append(chunk.replace("\n", "
")) continue safe = html.escape(chunk) if _is_special_token(chunk): out.append(f"{safe}") else: out.append(safe) return "".join(out).replace("\n", "
") def _render_audio_message(status: str, audio): if not audio: return _render_response(status) sample_rate, data = audio if data is None: return _render_response(status) waveform = np.asarray(data, dtype=np.float32) if waveform.size == 0: return _render_response(status) if waveform.ndim == 1: waveform = waveform[:, None] channels = waveform.shape[1] clipped = np.clip(waveform, -1.0, 1.0) pcm16 = (clipped * 32767.0).astype(np.int16) buffer = io.BytesIO() with wave.open(buffer, "wb") as wav_writer: wav_writer.setnchannels(channels) wav_writer.setsampwidth(2) wav_writer.setframerate(int(sample_rate)) wav_writer.writeframes(pcm16.tobytes()) encoded = base64.b64encode(buffer.getvalue()).decode("ascii") audio_tag = ( "
" "" "
" ) return _render_response(status, audio_tag) def _render_image_message(status: str, image: Image.Image): if image is None: return _render_response(status) buffer = io.BytesIO() try: image.save(buffer, format="PNG") except Exception: return _render_response(status) encoded = base64.b64encode(buffer.getvalue()).decode("ascii") safe_status = html.escape(status or "") return ( "
" f"

{safe_status}

" "Generated image" "
" ) def _render_user_message(mode: str, message: str, image_in, audio_in, video_in, defer_video: bool = False) -> str: def _cache_media_copy(src_path: str) -> str: path = str(src_path or "") if not path or not os.path.exists(path): return path try: suffix = Path(path).suffix or "" fd, dst = tempfile.mkstemp(prefix="omada_media_", suffix=suffix, dir=str(PREVIEW_DIR)) os.close(fd) shutil.copy2(path, dst) return dst except Exception: return path def _to_browser_mp4(video_path: str) -> str: path = str(video_path or "") if not path: return path try: fd, out_path = tempfile.mkstemp(prefix="omada_preview_", suffix=".mp4", dir=str(PREVIEW_DIR)) os.close(fd) cmd = [ "ffmpeg", "-y", "-i", path, "-an", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-movflags", "+faststart", out_path, ] proc = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) if proc.returncode == 0 and os.path.exists(out_path): return out_path if os.path.exists(out_path): os.remove(out_path) except Exception: pass return path def _video_data_uri(video_path: str, mime: str, max_bytes: int = 25 * 1024 * 1024) -> str: try: size = os.path.getsize(video_path) if size <= 0 or size > max_bytes: return "" with open(video_path, "rb") as f: encoded = base64.b64encode(f.read()).decode("ascii") return f"data:{mime};base64,{encoded}" except Exception: return "" def _video_poster_data_uri(video_path: str) -> str: try: import cv2 # type: ignore cap = cv2.VideoCapture(video_path) ok, frame = cap.read() cap.release() if not ok or frame is None: return "" ok, buf = cv2.imencode(".jpg", frame) if not ok: return "" encoded = base64.b64encode(buf.tobytes()).decode("ascii") return f"data:image/jpeg;base64,{encoded}" except Exception: return "" parts = [] text = (message or "").strip() if image_in is not None: try: if isinstance(image_in, Image.Image): buffer = io.BytesIO() image_in.save(buffer, format="PNG") encoded = base64.b64encode(buffer.getvalue()).decode("ascii") parts.append( "
" f"Input image" "
" ) elif isinstance(image_in, str) and image_in: try: with Image.open(image_in).convert("RGB") as pil_img: buf = io.BytesIO() pil_img.save(buf, format="PNG") encoded = base64.b64encode(buf.getvalue()).decode("ascii") parts.append( "
" f"Input image" "
" ) except Exception: image_path = _cache_media_copy(image_in) parts.append( "
" f"Input image" "
" ) except Exception: pass if mode == "MMU (Video → Text)" and video_in: if defer_video: parts.append("
Video loading...
") if text: parts.append(f"
{html.escape(text)}
") return "".join(parts) video_path = None if isinstance(video_in, str): video_path = video_in elif isinstance(video_in, dict): video_path = video_in.get("path") or video_in.get("name") if video_path: cached_original = _cache_media_copy(video_path) preview_path = _to_browser_mp4(cached_original) poster = _video_poster_data_uri(cached_original) poster_attr = f" poster='{poster}'" if poster else "" source_path = str(preview_path or cached_original) fallback_path = str(cached_original) def _video_mime(path: str) -> str: ext = os.path.splitext(path.lower())[1] return { ".mp4": "video/mp4", ".webm": "video/webm", ".mov": "video/quicktime", ".m4v": "video/mp4", ".avi": "video/x-msvideo", ".mkv": "video/x-matroska", }.get(ext, "video/mp4") parts.append( "
" f"" "
" ) if audio_in is not None: audio_path = "" if isinstance(audio_in, str): audio_path = audio_in elif isinstance(audio_in, dict): audio_path = audio_in.get("path") or audio_in.get("name") or "" elif isinstance(audio_in, (tuple, list)) and len(audio_in) == 2: try: sample_rate, data = audio_in waveform = np.asarray(data, dtype=np.float32) if waveform.ndim == 1: waveform = waveform[:, None] waveform = np.clip(waveform, -1.0, 1.0) pcm16 = (waveform * 32767.0).astype(np.int16) fd, temp_audio = tempfile.mkstemp(prefix="omada_user_audio_", suffix=".wav", dir=str(PREVIEW_DIR)) os.close(fd) with wave.open(temp_audio, "wb") as wav_writer: wav_writer.setnchannels(pcm16.shape[1]) wav_writer.setsampwidth(2) wav_writer.setframerate(int(sample_rate)) wav_writer.writeframes(pcm16.tobytes()) audio_path = temp_audio except Exception: audio_path = "" if audio_path: ext = os.path.splitext(audio_path.lower())[1] mime = { ".wav": "audio/wav", ".mp3": "audio/mpeg", ".flac": "audio/flac", ".ogg": "audio/ogg", ".m4a": "audio/mp4", }.get(ext, "audio/wav") src = "" try: with open(audio_path, "rb") as f: encoded_audio = base64.b64encode(f.read()).decode("ascii") src = f"data:{mime};base64,{encoded_audio}" except Exception: audio_path = _cache_media_copy(audio_path) src = f"/file={quote(audio_path)}" parts.append( "
" f"" f"
Open audio
" "
" ) if text: parts.append(f"
{html.escape(text)}
") if not parts: parts.append(f"
[{html.escape(mode)}]
") return "".join(parts) def _extract_video_path(video_in) -> str: if isinstance(video_in, str): return video_in if isinstance(video_in, dict): return str(video_in.get("path") or video_in.get("name") or "") return "" def _video_thumb_data_uri(video_path: str) -> str: if not video_path or not os.path.exists(video_path): return "" try: import cv2 # type: ignore cap = cv2.VideoCapture(video_path) ok, frame = cap.read() cap.release() if not ok or frame is None: return "" ok, buf = cv2.imencode(".jpg", frame) if not ok: return "" encoded = base64.b64encode(buf.tobytes()).decode("ascii") return f"data:image/jpeg;base64,{encoded}" except Exception: return "" def _render_attachment_preview(image_in, audio_in, video_in) -> str: items = [] if image_in is not None: try: if isinstance(image_in, Image.Image): buf = io.BytesIO() image_in.save(buf, format="PNG") encoded = base64.b64encode(buf.getvalue()).decode("ascii") items.append( "
" f"image attachment" "Image" "
" ) elif isinstance(image_in, str) and image_in: with Image.open(image_in).convert("RGB") as pil_img: buf = io.BytesIO() pil_img.save(buf, format="PNG") encoded = base64.b64encode(buf.getvalue()).decode("ascii") items.append( "
" f"image attachment" "Image" "
" ) except Exception: pass video_path = _extract_video_path(video_in) if video_path: thumb = _video_thumb_data_uri(video_path) if thumb: items.append( "
" f"video attachment" "Video" "
" ) else: items.append( "
" "🎬 Video" "
" ) audio_path = "" if isinstance(audio_in, str): audio_path = audio_in elif isinstance(audio_in, dict): audio_path = str(audio_in.get("path") or audio_in.get("name") or "") if audio_path: filename = html.escape(Path(audio_path).name or "speech.wav") items.append( "
" "🎤" f"{filename}" "
" ) if not items: return "" return "
" + "".join(items) + "
" def _render_task_chip(mode: str) -> str: icon_map = { "Chat": "💬", "MMU (Image → Text)": "🖼️", "MMU (Video → Text)": "🎬", "Image Generation": "🎨", "Image Editing": "🛠️", "ASR": "🎙️", "TTS": "🔊", } icon = icon_map.get(mode, "🧩") safe_mode = html.escape(mode or "Task") return f"
{icon} {safe_mode}
" # audio / video / image examples S2T_EXAMPLES = _load_media_examples("s2t", {".wav", ".mp3", ".flac", ".ogg"}) V2T_EXAMPLES = _load_media_examples("v2t", {".mp4", ".mov", ".avi", ".webm"}) # MMU images MMU_DIR = ASSET_ROOT / "mmu" MMU_EXAMPLES: List[List[str]] = [] DEFAULT_MMU_PROMPT = "Describe the given image in detail." if MMU_DIR.exists(): for path in sorted( [ p for p in MMU_DIR.iterdir() if p.suffix.lower() in {".png", ".jpg", ".jpeg", ".webp"} ] ): MMU_EXAMPLES.append([ str(path), DEFAULT_MMU_PROMPT, ]) # --------------------------- # Global OmadaDemo instance # --------------------------- APP = None # type: ignore APP_LOCK = threading.Lock() def get_app() -> OmadaDemo: global APP if APP is not None: return APP with APP_LOCK: if APP is not None: return APP ckpt_dir = download_checkpoint() # Wire style centroids to expected locations style_targets = [ MMADA_ROOT / "models" / "speech_tokenization" / "condition_style_centroid", PROJECT_ROOT / "EMOVA_speech_tokenizer" / "emova_speech_tokenizer" / "speech_tokenization" / "condition_style_centroid", ] style_root_resolved = STYLE_ROOT.resolve() for starget in style_targets: starget.parent.mkdir(parents=True, exist_ok=True) try: if starget.is_symlink(): try: current = starget.resolve() except Exception: current = None if current != style_root_resolved: starget.unlink(missing_ok=True) starget.symlink_to(STYLE_ROOT, target_is_directory=True) elif starget.exists(): # Existing concrete dir/file: keep as-is. pass else: starget.symlink_to(STYLE_ROOT, target_is_directory=True) except FileExistsError: # Concurrent init race (warmup vs request): safe to ignore. pass # Prefer a repo-local Space config first, then fall back to demo configs. space_demo_cfg = PROJECT_ROOT / "MMaDA" / "inference" / "demo" / "space_demo.yaml" default_cfg = PROJECT_ROOT / "MMaDA" / "inference" / "demo" / "demo.yaml" legacy_cfg = PROJECT_ROOT / "MMaDA" / "configs" / "mmada_demo.yaml" train_config = os.getenv("TRAIN_CONFIG_PATH") if not train_config: if space_demo_cfg.exists(): train_config = str(space_demo_cfg) else: train_config = str(default_cfg if default_cfg.exists() else legacy_cfg) device = os.getenv("DEVICE", "cuda") APP = OmadaDemo(train_config=train_config, checkpoint=str(ckpt_dir), device=device) return APP def warmup_model_status() -> str: try: # ZeroGPU Spaces forbids CUDA init in the main process. if os.getenv("SPACE_ID"): return "Model status: Ready (lazy load on first request)." get_app() return "Model status: Loaded. Inference is ready." except Exception as exc: return f"Model status: Load failed ({exc})." def _set_global_seed(seed: int = GLOBAL_SEED) -> None: random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) try: torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False except Exception: pass # --------------------------- # ZeroGPU-wrapped handlers # --------------------------- # (== 그대로, 생략 없이 둔 부분 ==) @spaces.GPU def t2s_handler(text, max_tokens, steps, block_len, temperature, cfg_scale, gender, emotion, speed, pitch): app = get_app() audio, status = app.run_t2s( text=text, max_new_tokens=int(max_tokens), steps=int(steps), block_length=int(block_len), temperature=float(temperature), cfg_scale=float(cfg_scale), gender_choice=gender, emotion_choice=emotion, speed_choice=speed, pitch_choice=pitch, ) return audio, status @spaces.GPU def s2t_handler(audio_path, steps, block_len, max_tokens, remasking): app = get_app() for text, status in app.run_s2t_stream( audio_path=audio_path, steps=int(steps), block_length=int(block_len), max_new_tokens=int(max_tokens), remasking=str(remasking), update_every=32, ): yield text, status @spaces.GPU def v2t_handler(video, steps, block_len, max_tokens): app = get_app() for text, status in app.run_v2t_stream( video_path=video, steps=int(steps), block_length=int(block_len), max_new_tokens=int(max_tokens), update_every=32, ): yield text, status @spaces.GPU def chat_handler(message, max_tokens, steps, block_len, temperature): app = get_app() for reply_html, status, done in app.run_chat_stream( message=message, max_new_tokens=int(max_tokens), steps=int(steps), block_length=int(block_len), temperature=float(temperature), update_every=32, ): yield reply_html, status @spaces.GPU def mmu_handler(image, question, max_tokens, steps, block_len, temperature): app = get_app() text, status = app.run_mmu( images=image, message=question, max_new_tokens=int(max_tokens), steps=int(steps), block_length=int(block_len), temperature=float(temperature), ) return text, status @spaces.GPU def t2i_handler(prompt, timesteps, temperature, guidance): app = get_app() for image, status in app.run_t2i_stream( prompt=prompt, timesteps=int(timesteps), temperature=float(temperature), guidance_scale=float(guidance), update_every=2, ): yield image, status @spaces.GPU def i2i_handler(instruction, image, timesteps, temperature, guidance): app = get_app() for image_out, status in app.run_i2i_stream( instruction=instruction, source_image=image, timesteps=int(timesteps), temperature=float(temperature), guidance_scale=float(guidance), update_every=2, ): yield image_out, status # --------------------------- # Gradio UI (10 tabs + examples) # --------------------------- theme = gr.themes.Soft(primary_hue="blue", neutral_hue="gray") EXTRA_CSS = """ html, body, .gradio-container { background: var(--omada-surface) !important; color: var(--omada-text-primary) !important; } .omada-shell { min-height: 0; display: flex; flex-direction: column; padding-bottom: 6px; } .omada-sample-row { gap: 10px !important; justify-content: center !important; margin-bottom: 6px; } .omada-sample-row .gradio-button { max-width: 280px !important; } .omada-hero { text-align: center; margin: 40px 0 24px 0; } .omada-hero h2 { font-size: 2.2rem; margin: 0; color: var(--omada-dark-text); } .omada-hero p { margin: 10px 0 0 0; color: var(--omada-dark-muted); } .omada-input-row { gap: 6px !important; align-items: center !important; display: flex !important; flex-direction: row !important; justify-content: center !important; position: relative !important; inset: auto !important; top: auto !important; right: auto !important; bottom: auto !important; left: auto !important; transform: none !important; background: var(--omada-surface-alt); padding: 6px 14px; border-radius: 999px; z-index: 5; width: min(980px, calc(100vw - 24px)); margin: 4px auto 8px; box-shadow: 0 8px 24px rgba(0,0,0,0.08); box-sizing: border-box; } .omada-input-row > * { min-width: 0 !important; margin: 0 !important; align-self: center !important; background: transparent !important; box-shadow: none !important; border: none !important; } .omada-input-row .gradio-textbox textarea { background: var(--omada-surface) !important; color: var(--omada-text-primary) !important; border-radius: 999px !important; border: 1px solid var(--omada-border) !important; padding: 6px 10px !important; min-height: 36px !important; } .omada-plus-btn button, .omada-send-btn button { border-radius: 999px !important; width: 36px !important; min-width: 36px !important; height: 36px !important; background: var(--omada-surface) !important; color: var(--omada-text-primary) !important; border: 1px solid var(--omada-border) !important; padding: 0 !important; font-size: 1.2rem !important; line-height: 1 !important; } .omada-plus-btn, .omada-send-btn { flex: 0 0 36px !important; display: flex !important; align-items: center !important; justify-content: center !important; } .omada-auto { width: 132px !important; flex: 0 0 132px !important; display: flex !important; align-items: center !important; position: relative !important; border-radius: 999px !important; overflow: visible !important; } .omada-auto button { height: 36px !important; min-height: 36px !important; width: 100% !important; font-size: 0.9rem !important; padding: 0 12px !important; background: rgba(255, 255, 255, 0.24) !important; border: 1px solid rgba(171, 188, 214, 0.42) !important; color: var(--omada-text-primary) !important; border-radius: 999px !important; appearance: none !important; -webkit-appearance: none !important; -moz-appearance: none !important; text-align: left !important; } .omada-auto svg, .omada-auto .wrap > svg, .omada-auto .dropdown-arrow { display: none !important; } .omada-plus-btn button, .omada-send-btn button { flex: 0 0 auto !important; } .omada-input-row .gradio-textbox { width: 100% !important; flex: 1 1 auto !important; min-width: 0 !important; opacity: 1 !important; pointer-events: auto !important; background: transparent !important; border: none !important; box-shadow: none !important; } .omada-input-row .gradio-textbox > div, .omada-input-row .gradio-dropdown, .omada-input-row .gradio-dropdown > div, .omada-plus-btn, .omada-send-btn, .omada-auto { background: transparent !important; border: none !important; box-shadow: none !important; } .omada-send-btn { margin-left: -2px !important; } .omada-input-row .gradio-textbox textarea { width: 100% !important; display: block !important; pointer-events: auto !important; opacity: 1 !important; cursor: text !important; } .omada-panel-backdrop { position: fixed !important; inset: 0 !important; background: rgba(255, 255, 255, 0.22) !important; backdrop-filter: blur(12px) saturate(120%) !important; -webkit-backdrop-filter: blur(12px) saturate(120%) !important; z-index: 1100 !important; pointer-events: auto !important; } .omada-panel { position: relative !important; top: auto !important; left: auto !important; transform: none !important; max-height: none !important; overflow: visible !important; width: min(980px, calc(100vw - 24px)); margin: 0 auto 14px auto; box-shadow: 0 20px 60px rgba(0,0,0,0.12); z-index: 9999; pointer-events: auto !important; isolation: isolate; } .omada-controls-safe { position: fixed !important; left: 50% !important; top: 50% !important; transform: translate(-50%, -50%) !important; width: min(980px, calc(100vw - 36px)) !important; max-height: min(82vh, 900px) !important; overflow: auto !important; margin: 0 !important; z-index: 1200 !important; border-radius: 34px !important; } .omada-panel * { pointer-events: auto; } .omada-panel input, .omada-panel select, .omada-panel textarea, .omada-panel button, .omada-panel .gradio-slider, .omada-panel .gradio-slider * { pointer-events: auto !important; } .omada-panel .gradio-radio, .omada-panel .gradio-radio label, .omada-panel .gradio-radio input { pointer-events: auto !important; cursor: pointer !important; } .omada-panel .gradio-radio { position: relative !important; z-index: 300 !important; } .omada-panel .gradio-slider, .omada-panel .gradio-slider .wrap, .omada-panel .gradio-slider .wrap-inner, .omada-panel .gradio-slider input[type="range"], .omada-panel .gradio-slider input[type="number"], .omada-panel .gradio-dropdown, .omada-panel .gradio-dropdown select, .omada-panel .gradio-textbox textarea { pointer-events: auto !important; position: relative !important; z-index: 400 !important; } .omada-panel .gradio-slider input[type="range"] { touch-action: pan-x !important; } .omada-panel .gradio-dropdown, .omada-panel .gradio-dropdown .wrap { z-index: 1000 !important; } .gradio-dropdown .options, .gradio-dropdown .wrap .options { z-index: 2000 !important; } .gradio-container .input-status, .gradio-container .status, .gradio-container .status-dot, .gradio-container .status-indicator, .gradio-container .label-wrap .status, .gradio-container .label-wrap .status-dot { display: none !important; } .omada-chatbot { background: transparent !important; border: none !important; position: relative !important; z-index: 1 !important; } .gradio-chatbot .message { border-radius: 18px !important; } .gradio-chatbot .message.user { margin-left: auto !important; background: #2e3037 !important; color: var(--omada-text-primary) !important; pointer-events: auto !important; } .gradio-chatbot .message.bot { margin-right: auto !important; background: #22242a !important; color: var(--omada-text-primary) !important; pointer-events: auto !important; } .gradio-chatbot .message.user *, .gradio-chatbot .message.bot * { pointer-events: auto !important; } .omada-panel { background: var(--omada-dark-panel); border: 1px solid var(--omada-dark-border); border-radius: 16px; padding: 16px; } .omada-chip button { border-radius: 999px !important; background: linear-gradient(160deg, rgba(255,255,255,0.62), rgba(255,255,255,0.36)) !important; color: #22324a !important; border: 1px solid rgba(255,255,255,0.72) !important; font-size: 0.68rem !important; line-height: 1.2 !important; padding: 6px 10px !important; backdrop-filter: blur(14px) saturate(165%); -webkit-backdrop-filter: blur(14px) saturate(165%); box-shadow: 0 8px 20px rgba(36, 56, 92, 0.16) !important; } .omada-sample-row .gradio-button, .omada-sample-row .gradio-button > div, .omada-sample-row .gradio-button > button { background: transparent !important; } .omada-chip button:hover { transform: translateY(-1px); background: linear-gradient(160deg, rgba(255,255,255,0.74), rgba(255,255,255,0.44)) !important; } .omada-video-loading { width: 360px; max-width: min(42vw, 360px); min-height: 64px; border-radius: 12px; border: 1px solid var(--omada-glass-border); background: rgba(255,255,255,0.35); display: flex; align-items: center; justify-content: center; font-size: 0.9rem; color: #304463; backdrop-filter: blur(10px) saturate(150%); -webkit-backdrop-filter: blur(10px) saturate(150%); } .omada-user-media { margin-bottom: 6px; } .omada-user-media img, .omada-user-media video { max-width: 240px; width: 240px; max-height: 180px; object-fit: contain; border-radius: 10px; border: 1px solid var(--omada-border); display: block; } .omada-user-media .omada-user-video { width: 360px; max-width: min(42vw, 360px); max-height: 240px; } .omada-user-media audio { width: 360px; max-width: min(42vw, 360px); display: block; } .omada-response-status { color: var(--omada-dark-muted) !important; } .omada-token-pill { display: inline-block; padding: 1px 8px; margin: 1px 2px; border-radius: 999px; border: 1px solid var(--omada-border); font-size: 0.82em; line-height: 1.6; vertical-align: baseline; background: #f7f8fa; } .omada-token-mask { border-color: #8da2c6; background: #eef3ff; color: #1f3d7a; font-weight: 600; } .omada-token-special { border-color: #c5ccd8; background: #f3f4f7; color: #4b5563; } /* Apple-like glass look */ :root { --omada-surface: #f7faff; --omada-surface-alt: #f3f8ff; --omada-glass-bg: rgba(255, 255, 255, 0.62); --omada-glass-strong: rgba(255, 255, 255, 0.72); --omada-glass-border: rgba(221, 232, 248, 0.92); --omada-glass-shadow: 0 14px 34px rgba(136, 162, 196, 0.16); } html, body, .gradio-container { background: radial-gradient(1200px 520px at 10% -10%, rgba(255,255,255,0.96), rgba(255,255,255,0.78) 48%, rgba(247,251,255,0.96) 100%), linear-gradient(135deg, #f8fbff 0%, #f3f8ff 45%, #f7fbff 100%) !important; } .omada-input-row, .omada-controls-safe, .omada-panel, .gradio-chatbot .message, .omada-chip button, .omada-input-row .gradio-textbox textarea, .omada-plus-btn button, .omada-send-btn button, .omada-auto select { background: var(--omada-glass-bg) !important; border: 1px solid var(--omada-glass-border) !important; box-shadow: var(--omada-glass-shadow) !important; backdrop-filter: blur(22px) saturate(175%); -webkit-backdrop-filter: blur(22px) saturate(175%); } .omada-controls-safe { padding: 14px 16px !important; border-radius: 28px !important; margin: 10px auto 10px auto !important; } .omada-controls-safe > div { padding: 10px 12px !important; border-radius: 22px !important; } .omada-controls-safe .gradio-button, .omada-controls-safe button, .omada-controls-safe .gradio-dropdown, .omada-controls-safe .gradio-textbox, .omada-controls-safe .gradio-slider { border-radius: 16px !important; } .omada-controls-safe .gradio-button { border: 1px solid var(--omada-glass-border) !important; } .gradio-chatbot .message.user { background: var(--omada-glass-strong) !important; color: #1f2937 !important; } .gradio-chatbot .message.bot { background: rgba(255, 255, 255, 0.50) !important; color: #1f2937 !important; } /* Keep generated images crisp (no frosted overlay on image replies) */ .gradio-chatbot .message { backdrop-filter: none !important; -webkit-backdrop-filter: none !important; } .gradio-chatbot .message.bot:has(.omada-image-only) { background: transparent !important; border: none !important; box-shadow: none !important; padding: 0 !important; margin: 0 !important; } .omada-image-only { display: inline-block; background: transparent !important; border: 0 !important; box-shadow: none !important; padding: 0 !important; margin: 0 !important; opacity: 1 !important; filter: none !important; } .gradio-chatbot .message.bot:has(.omada-image-only) *, .omada-image-only * { background: transparent !important; box-shadow: none !important; filter: none !important; opacity: 1 !important; } .omada-image-status { margin: 0 0 6px 0 !important; font-size: 0.85rem !important; color: #42526b !important; font-weight: 600 !important; } .omada-chip button { color: #273247 !important; } .omada-panel { border-radius: 28px !important; padding: 20px !important; } .omada-input-row { border-radius: 999px !important; } .omada-main-input, .omada-main-input * { pointer-events: auto !important; } .omada-main-input textarea, .omada-main-input input { pointer-events: auto !important; position: relative !important; z-index: 40 !important; } .omada-sample-row, .omada-sample-row * { pointer-events: auto !important; } .omada-sample-row, .omada-input-row { position: relative !important; z-index: 25 !important; } .omada-sample-row .gradio-button, .omada-input-row .gradio-button, .omada-input-row button { pointer-events: auto !important; } /* Compact controls (keep chat bubbles unchanged) */ .omada-shell, .omada-controls-safe, .omada-input-row, .omada-sample-row { font-size: 0.88rem !important; } .omada-sample-row { width: min(980px, calc(100vw - 24px)) !important; margin: 0 auto 4px auto !important; gap: 6px !important; } .omada-sample-row .gradio-button { flex: 1 1 0 !important; max-width: none !important; } .omada-chip button { min-height: 14px !important; height: 14px !important; font-size: 0.34rem !important; line-height: 1.0 !important; padding: 0 3px !important; border-radius: 999px !important; } .omada-chip button * { font-size: 0.34rem !important; line-height: 1.0 !important; } .omada-sample-row .omada-chip button, .omada-sample-row .gradio-button button, .omada-sample-row .omada-chip button span, .omada-sample-row .gradio-button button span, .omada-sample-row .omada-chip button p, .omada-sample-row .gradio-button button p, .omada-sample-row .omada-chip button div { font-size: 0.34rem !important; line-height: 1.05 !important; } /* Force sample chip size against Gradio theme defaults */ .omada-sample-row .omada-chip, .omada-sample-row .omada-chip .gradio-button, .omada-sample-row .omada-chip .gradio-button > div, .omada-sample-row .omada-chip .gradio-button > button, .omada-sample-row .omada-chip button { min-height: 16px !important; height: 16px !important; max-height: 16px !important; padding-top: 0 !important; padding-bottom: 0 !important; } .omada-sample-row .omada-chip button, .omada-sample-row .omada-chip button span, .omada-sample-row .omada-chip button p, .omada-sample-row .omada-chip button div { font-size: 0.42rem !important; line-height: 1 !important; padding: 0 3px !important; } .omada-input-row { padding: 4px 10px !important; margin: 2px auto 6px !important; } .omada-plus-btn button, .omada-send-btn button { width: 30px !important; min-width: 30px !important; height: 30px !important; font-size: 1rem !important; } .omada-plus-btn, .omada-send-btn { flex: 0 0 30px !important; } .omada-auto { width: 104px !important; flex: 0 0 104px !important; border-radius: 999px !important; } .omada-auto button { height: 30px !important; min-height: 30px !important; font-size: 0.9rem !important; padding: 0 8px !important; border-radius: 999px !important; text-align: left !important; background: rgba(255, 255, 255, 0.24) !important; border: 1px solid rgba(171, 188, 214, 0.42) !important; box-shadow: inset 0 0 0 0.5px rgba(255, 255, 255, 0.45) !important; } /* Gradio dropdown text (new/old DOM variants) */ .omada-auto, .omada-auto *, .omada-auto .wrap, .omada-auto .wrap-inner, .omada-auto .wrap-inner input, .omada-auto input, .omada-auto button, .omada-auto button span { font-size: 0.9rem !important; line-height: 1.0 !important; } .omada-input-row .gradio-textbox textarea { min-height: 30px !important; padding: 4px 9px !important; font-size: 0.9rem !important; background: transparent !important; border: none !important; box-shadow: none !important; outline: none !important; } .omada-input-row .gradio-textbox > div, .omada-input-row .gradio-textbox .wrap, .omada-input-row .gradio-textbox label { background: transparent !important; border: none !important; box-shadow: none !important; } .omada-main-input, .omada-main-input > div, .omada-main-input .wrap, .omada-main-input .wrap-inner, .omada-main-input .block, .omada-main-input .container, .omada-main-input .scroll-hide, .omada-main-input .scroll-hide > div, .omada-main-input [data-testid="textbox"] { background: transparent !important; border: none !important; box-shadow: none !important; outline: none !important; } .omada-main-input:focus, .omada-main-input:focus-within, .omada-main-input > div:focus, .omada-main-input > div:focus-within, .omada-main-input .wrap:focus, .omada-main-input .wrap:focus-within, .omada-main-input .wrap-inner:focus, .omada-main-input .wrap-inner:focus-within, .omada-main-input textarea:focus, .omada-main-input textarea:focus-visible { border: none !important; box-shadow: none !important; outline: none !important; } .omada-main-input textarea, .omada-main-input textarea::placeholder { background: transparent !important; } /* hide textbox secondary footer/counter text (e.g., "seconds") */ .omada-main-input .footer, .omada-main-input [data-testid="textbox-footer"], .omada-main-input .char-counter, .omada-main-input small, .omada-main-input .secondary-text { display: none !important; } .omada-controls-safe { padding: 10px 12px !important; } .omada-controls-safe > div { padding: 8px 10px !important; } .omada-controls-safe .gradio-button, .omada-controls-safe button, .omada-controls-safe .gradio-dropdown, .omada-controls-safe .gradio-textbox, .omada-controls-safe .gradio-slider, .omada-controls-safe label, .omada-controls-safe p, .omada-controls-safe span { font-size: 0.88rem !important; } /* modal transparency: outer is whiter, inner is more transparent */ .omada-controls-safe { background: linear-gradient(165deg, rgba(255, 255, 255, 0.72), rgba(245, 250, 255, 0.60)) !important; border: 1px solid rgba(218, 231, 248, 0.90) !important; box-shadow: 0 18px 42px rgba(123, 150, 188, 0.16) !important; } .omada-controls-safe > div, .omada-controls-safe .gr-box, .omada-controls-safe .gr-form, .omada-controls-safe .gr-block, .omada-controls-safe .gradio-row, .omada-controls-safe .gradio-column { background: rgba(255, 255, 255, 0.44) !important; border-color: rgba(225, 237, 252, 0.86) !important; box-shadow: none !important; } .omada-controls-safe .gradio-button, .omada-controls-safe button, .omada-controls-safe .gradio-dropdown, .omada-controls-safe .gradio-dropdown > div, .omada-controls-safe .gradio-textbox, .omada-controls-safe .gradio-textbox > div, .omada-controls-safe .gradio-slider { background: rgba(255, 255, 255, 0.56) !important; border: 1px solid rgba(221, 234, 251, 0.90) !important; box-shadow: none !important; } .omada-controls-safe .gradio-button:hover, .omada-controls-safe button:hover { background: rgba(255, 255, 255, 0.72) !important; } .omada-controls-safe .primary, .omada-controls-safe .primary button { background: linear-gradient(165deg, #4d8ef7, #3f7ee8) !important; color: #ffffff !important; border: 1px solid #3d75d8 !important; box-shadow: 0 8px 20px rgba(69, 126, 233, 0.35) !important; } .omada-controls-safe .primary:hover, .omada-controls-safe .primary button:hover { background: linear-gradient(165deg, #5b99fb, #4b87ed) !important; } .omada-controls-safe .primary:disabled, .omada-controls-safe .primary button:disabled, .omada-controls-safe .primary[disabled], .omada-controls-safe .primary button[disabled] { opacity: 1 !important; color: #ffffff !important; background: linear-gradient(165deg, #4d8ef7, #3f7ee8) !important; border: 1px solid #3d75d8 !important; } .omada-selected-task-chip { display: inline-flex; align-items: center; padding: 6px 12px; border-radius: 999px; color: #fff; background: linear-gradient(165deg, #4d8ef7, #3f7ee8); border: 1px solid #3d75d8; font-weight: 700; box-shadow: 0 8px 18px rgba(69, 126, 233, 0.30); } .omada-ready-hero { position: sticky; top: clamp(8px, 12vh, 90px); margin: 0 auto clamp(56px, 10vh, 140px) auto; width: min(760px, calc(100vw - 48px)); z-index: 15; pointer-events: none; font-size: clamp(1.2rem, 2.2vw, 2rem); font-weight: 700; color: #2f3f58; letter-spacing: -0.01em; text-align: center; } .omada-ready-hero .omada-ready-sub { margin-top: 5px; font-size: clamp(0.78rem, 1.2vw, 0.95rem); font-weight: 500; color: #5b6e8d; line-height: 1.4; } /* Sample list container style aligned with main input row */ .omada-sample-row { width: min(900px, calc(100vw - 80px)) !important; margin: 0 auto 8px auto !important; padding: 0 !important; background: transparent !important; border: none !important; box-shadow: none !important; gap: 10px !important; } .omada-sample-row .omada-chip button, .omada-sample-row .omada-chip button span, .omada-sample-row .omada-chip button p, .omada-sample-row .omada-chip button div { font-size: 0.9rem !important; line-height: 1.15 !important; font-weight: 700 !important; } .omada-sample-row .omada-chip button { min-height: 34px !important; height: 34px !important; padding: 0 12px !important; border-radius: 999px !important; background: rgba(255, 255, 255, 0.46) !important; border: 1px solid rgba(180, 198, 224, 0.45) !important; box-shadow: 0 6px 14px rgba(120, 148, 186, 0.08) !important; } /* final override: keep selector pill with subtle visible border */ .omada-input-row .omada-auto, .omada-input-row .omada-auto button { border-radius: 999px !important; } .omada-input-row .omada-auto button { border: 1px solid rgba(160, 178, 206, 0.72) !important; box-shadow: inset 0 0 0 1px rgba(255, 255, 255, 0.52), 0 1px 4px rgba(125, 146, 176, 0.14) !important; } /* hard override: keep subtle border visible for selector pill */ .omada-input-row .omada-auto, .omada-input-row .omada-auto > div, .omada-input-row .omada-auto button, .omada-input-row .omada-auto .gradio-button, .omada-input-row .omada-auto .gradio-button > div { border: 1px solid rgba(160, 178, 206, 0.72) !important; border-radius: 999px !important; box-shadow: inset 0 0 0 1px rgba(255, 255, 255, 0.52), 0 1px 4px rgba(125, 146, 176, 0.14) !important; } /* hard override: sample chip vertical size */ .omada-sample-row .gradio-button { flex: 0 0 auto !important; } .omada-sample-row .omada-chip, .omada-sample-row .omada-chip .gradio-button, .omada-sample-row .omada-chip .gradio-button > div, .omada-sample-row .omada-chip button { min-width: 120px !important; width: 120px !important; max-width: 120px !important; min-height: 40px !important; height: 40px !important; max-height: 40px !important; border-radius: 999px !important; } .omada-input-stack { width: min(980px, calc(100vw - 24px)); margin: 0 auto 6px auto; position: sticky; bottom: 6px; z-index: 40; } .omada-input-row { display: flex !important; flex-direction: column !important; justify-content: flex-end !important; align-items: stretch !important; gap: 6px !important; padding: 10px 12px !important; border-radius: 36px !important; min-height: 110px; } /* final layout override */ .omada-bottom-row { display: flex !important; flex-wrap: nowrap !important; align-items: flex-end !important; justify-content: flex-start !important; gap: 8px !important; } .omada-bottom-row > * { flex: 0 0 auto !important; margin: 0 !important; } .omada-bottom-row .omada-main-input { flex: 1 1 auto !important; margin: -10px !important; min-width: 0 !important; width: auto !important; max-width: none !important; } .omada-bottom-row .omada-main-input, .omada-bottom-row .omada-main-input > div, .omada-bottom-row .omada-main-input .wrap, .omada-bottom-row .omada-main-input .wrap-inner, .omada-bottom-row .omada-main-input .block, .omada-bottom-row .omada-main-input [data-testid="textbox"], .omada-bottom-row .omada-main-input .scroll-hide, .omada-bottom-row .omada-main-input .scroll-hide > div { background: transparent !important; border: none !important; box-shadow: none !important; width: auto !important; min-width: 0 !important; } .omada-bottom-row .omada-main-input textarea { background: transparent !important; border: none !important; border-radius: 14px !important; box-shadow: none !important; } .omada-bottom-row .omada-send-btn { margin-left: auto !important; flex: 0 0 auto !important; align-self: flex-end !important; } .omada-bottom-row .omada-send-btn > div { width: 42px !important; min-width: 42px !important; } .omada-bottom-row .omada-send-btn button, .omada-bottom-row .omada-send-btn > div > button { width: 42px !important; min-width: 42px !important; height: 42px !important; display: flex !important; align-items: center !important; justify-content: center !important; border-radius: 999px !important; border: 2px solid #4f96ff !important; background: rgba(130, 196, 255, 0.28) !important; color: #1153a6 !important; box-shadow: inset 0 0 0 1px rgba(214, 239, 255, 0.88), 0 0 0 1px rgba(79, 150, 255, 0.28) !important; } .omada-bottom-row .omada-send-btn button:hover { background: rgba(130, 196, 255, 0.38) !important; } /* scope input-row cleanup to avoid wrapper layout breakage */ .omada-input-row .omada-plus-btn, .omada-input-row .omada-task-btn, .omada-input-row .omada-custom-btn, .omada-input-row .omada-task-chip-inline, .omada-input-row .omada-main-input, .omada-input-row .omada-send-btn { margin: 0 !important; } .omada-attach-preview-wrap { min-height: 0 !important; width: 100% !important; margin: 0 !important; padding: 0 !important; } .omada-attach-preview-wrap .html-container { min-height: 0 !important; height: auto !important; padding: 0 !important; margin: 0 !important; } .omada-attach-preview-wrap .html-container:empty { display: none !important; } .omada-attach-preview { display: flex; align-items: center; gap: 8px; justify-content: flex-start; min-height: 0; flex-wrap: wrap; } .omada-attach-preview.omada-empty { display: none !important; } .omada-attach-preview-wrap:has(.omada-attach-preview.omada-empty) { display: none !important; } .omada-attach-item { display: inline-flex; flex-direction: column; align-items: center; justify-content: center; gap: 4px; padding: 6px; border-radius: 10px; background: rgba(255, 255, 255, 0.46); border: 1px solid rgba(190, 208, 234, 0.65); min-width: 66px; } .omada-attach-item img { width: 54px; height: 42px; border-radius: 8px; object-fit: cover; } .omada-attach-item span { font-size: 0.68rem; color: #2f4568; font-weight: 600; } .omada-attach-audio { flex-direction: row; min-width: 140px; padding: 10px 12px; } .omada-audio-icon { font-size: 0.95rem; } .omada-task-btn, .omada-custom-btn { flex: 0 0 auto !important; } .omada-task-btn button, .omada-custom-btn button { height: 30px !important; min-height: 30px !important; border-radius: 999px !important; font-size: 0.72rem !important; font-weight: 700 !important; padding: 0 10px !important; background: transparent !important; border: none !important; box-shadow: none !important; } .omada-plus-btn button { background: transparent !important; border: none !important; box-shadow: none !important; } .omada-task-chip-inline { flex: 0 0 auto !important; min-width: 0 !important; } .omada-task-chip-inline .omada-selected-task-chip { padding: 2px 8px !important; font-size: 0.72rem !important; box-shadow: none; margin-left: -2px !important; } /* keep sample chips/task-custom typography aligned */ .omada-sample-row .omada-chip button, .omada-sample-row .omada-chip button span, .omada-sample-row .omada-chip button p, .omada-sample-row .omada-chip button div, .omada-task-btn button, .omada-custom-btn button, .omada-task-chip-inline .omada-selected-task-chip { font-size: 0.72rem !important; line-height: 1.05 !important; } /* ========================= FIX: composer layout (textbox above buttons) ========================= */ .omada-input-row { width: 100% !important; position: relative !important; justify-content: flex-start !important; align-items: stretch !important; gap: 10px !important; padding-right: 76px !important; /* room for absolute send button */ } .omada-main-input, .omada-main-input > div, .omada-main-input .wrap, .omada-main-input .wrap-inner, .omada-main-input [data-testid="textbox"] { width: 100% !important; max-width: 100% !important; } .omada-main-input textarea { width: 100% !important; min-height: 34px !important; background: transparent !important; border: none !important; box-shadow: none !important; } .omada-bottom-row { width: 100% !important; max-width: 100% !important; display: flex !important; flex-wrap: nowrap !important; justify-content: flex-start !important; align-items: center !important; gap: 8px !important; margin-top: -5px !important; } .omada-bottom-row > * { min-width: 0 !important; flex: 0 0 auto !important; } .omada-send-btn { position: absolute !important; right: 16px !important; top: 50% !important; transform: translateY(-50%) !important; margin: 0 !important; } .omada-send-btn > div { width: 46px !important; min-width: 46px !important; } .omada-send-btn button, .omada-send-btn > div > button { width: 46px !important; min-width: 46px !important; height: 46px !important; border-radius: 999px !important; border: 1.8px solid rgba(98, 170, 255, 0.95) !important; background: linear-gradient(160deg, rgba(185, 224, 255, 0.40), rgba(118, 179, 255, 0.26)) !important; box-shadow: inset 0 0 0 1px rgba(229, 245, 255, 0.95), 0 10px 22px rgba(79, 146, 255, 0.24) !important; color: #0d4fa7 !important; font-size: 1.75rem !important; font-weight: 800 !important; line-height: 1 !important; display: flex !important; align-items: center !important; justify-content: center !important; } .omada-send-btn button:hover { background: linear-gradient(160deg, rgba(196, 230, 255, 0.52), rgba(129, 186, 255, 0.34)) !important; } /* ========================= Task / Custom / Chat: same font and borderless ========================= */ .omada-task-btn button, .omada-custom-btn button { font-size: 0.37rem !important; line-height: 1.0 !important; font-weight: 700 !important; height: 30px !important; min-height: 30px !important; padding: 0 10px !important; border-radius: 999px !important; border: none !important; box-shadow: none !important; background: transparent !important; color: #22324a !important; } /* keep selected task chip blue, but match typography */ .omada-task-chip-inline .omada-selected-task-chip { font-size: 0.78rem !important; line-height: 1.0 !important; padding: 5px 11px !important; background: linear-gradient(165deg, #4d8ef7, #3f7ee8) !important; color: #ffffff !important; border: 1px solid #3d75d8 !important; box-shadow: 0 6px 14px rgba(69, 126, 233, 0.28) !important; } .omada-task-btn button *, .omada-custom-btn button * { font-size: 0.37rem !important; line-height: 1.0 !important; } /* hard-fix: keep chat/custom glued together on left */ .omada-bottom-row .omada-task-chip-inline, .omada-bottom-row .omada-task-chip-inline > div, .omada-bottom-row .omada-task-chip-inline > div > div, .omada-bottom-row .omada-custom-btn, .omada-bottom-row .omada-custom-btn > div { display: inline-flex !important; width: auto !important; max-width: max-content !important; flex: 0 0 auto !important; margin-left: 0 !important; } /* hard-fix: if legacy send exists in row, hide it */ .omada-bottom-row .omada-send-btn { display: none !important; } /* hard-fix: dedicated send button on right-bottom */ .omada-input-row .omada-send-btn-fix { display: inline-flex !important; position: absolute !important; right: 14px !important; bottom: 14px !important; top: auto !important; left: auto !important; transform: none !important; z-index: 60 !important; width: 44px !important; min-width: 44px !important; max-width: 44px !important; flex: 0 0 44px !important; margin: 0 !important; border-radius: 999px !important; } .omada-input-row .omada-send-btn-fix::before { content: "" !important; position: absolute !important; inset: -3px !important; border-radius: 999px !important; border: 1.6px solid rgba(168, 216, 255, 0.92) !important; background: radial-gradient(circle at 30% 20%, rgba(226, 246, 255, 0.40), rgba(170, 213, 255, 0.18)) !important; box-shadow: 0 8px 20px rgba(78, 143, 236, 0.22) !important; pointer-events: none !important; z-index: -1 !important; } .omada-input-row .omada-send-btn-fix > div { width: 44px !important; min-width: 44px !important; max-width: 44px !important; flex: 0 0 44px !important; } .omada-input-row .omada-send-btn-fix button, .omada-input-row .omada-send-btn-fix > div > button { width: 44px !important; min-width: 44px !important; max-width: 44px !important; height: 44px !important; border-radius: 999px !important; border: 2px solid rgba(98, 170, 255, 0.98) !important; background: linear-gradient(160deg, rgba(185, 224, 255, 0.46), rgba(118, 179, 255, 0.30)) !important; box-shadow: inset 0 0 0 1px rgba(229, 245, 255, 0.95), 0 10px 22px rgba(79, 146, 255, 0.24) !important; color: #0d4fa7 !important; display: flex !important; align-items: center !important; justify-content: center !important; padding: 0 !important; } .omada-input-row .omada-send-btn-fix button span, .omada-input-row .omada-send-btn-fix button p { font-size: 1.6rem !important; line-height: 1 !important; font-weight: 800 !important; } /* textbox inner gray action buttons remove */ .omada-main-input [class*="icon"], .omada-main-input button, .omada-main-input [role="button"] { display: none !important; } .omada-main-input textarea { display: block !important; } /* final send border lock */ .omada-input-row .omada-send-btn-fix button, .omada-input-row .omada-send-btn-fix > div > button { border: 1.8px solid rgba(116, 182, 248, 1) !important; outline: 1px solid rgba(198, 229, 255, 0.95) !important; border-radius: 999px !important; } .omada-input-row .omada-send-btn-fix::before { border: 1.2px solid rgba(153, 207, 255, 0.9) !important; } /* compact vertical height */ .omada-input-row { min-height: 62px !important; padding-top: 0 !important; padding-bottom: 1px !important; gap: 0 !important; } .omada-main-input { margin-top: 0 !important; transform: translateY(-10px) !important; } .omada-main-input textarea { min-height: 26px !important; padding-top: 0 !important; padding-bottom: 1px !important; } /* outer container: slightly less rounded */ .omada-input-row { border-radius: 28px !important; } /* bigger plus button */ .omada-bottom-row .omada-plus-btn button { width: 46px !important; min-width: 46px !important; height: 46px !important; font-size: 1.9rem !important; font-weight: 800 !important; } /* final typography lock */ .omada-task-btn button, .omada-custom-btn button, .omada-task-btn button *, .omada-custom-btn button * { font-size: 0.46rem !important; line-height: 1 !important; border: none !important; box-shadow: none !important; } .omada-task-chip-inline .omada-selected-task-chip { font-size: 0.9rem !important; line-height: 1 !important; padding: 8px 14px !important; } /* ultimate final lock */ .omada-bottom-row .omada-task-btn, .omada-bottom-row .omada-task-btn > div, .omada-bottom-row .omada-task-btn .gradio-button, .omada-bottom-row .omada-task-btn .gradio-button > div, .omada-bottom-row .omada-custom-btn, .omada-bottom-row .omada-custom-btn > div, .omada-bottom-row .omada-custom-btn .gradio-button, .omada-bottom-row .omada-custom-btn .gradio-button > div { border: none !important; box-shadow: none !important; background: transparent !important; } .omada-bottom-row .omada-task-btn button, .omada-bottom-row .omada-task-btn > div > button, .omada-bottom-row .omada-custom-btn button, .omada-bottom-row .omada-custom-btn > div > button, .omada-bottom-row .omada-task-btn button *, .omada-bottom-row .omada-custom-btn button * { font-size: 0.42rem !important; line-height: 1 !important; border: none !important; box-shadow: none !important; background: transparent !important; } .omada-bottom-row .omada-task-chip-inline .omada-selected-task-chip { font-size: 0.7rem !important; line-height: 1 !important; padding: 9px 15px !important; } .omada-bottom-row .omada-plus-btn button, .omada-bottom-row .omada-plus-btn > div > button { width: 74px !important; min-width: 74px !important; height: 74px !important; font-size: 3rem !important; font-weight: 900 !important; } .omada-input-row .omada-send-btn-fix button, .omada-input-row .omada-send-btn-fix > div > button { border: none !important; outline: none !important; border-radius: 999px !important; background: rgba(167, 214, 255, 0.88) !important; box-shadow: 0 4px 9px rgba(95, 156, 232, 0.15) !important; } .omada-input-row .omada-send-btn-fix::before { content: "" !important; position: absolute !important; inset: -4px !important; border-radius: 999px !important; border: none !important; background: rgba(206, 234, 255, 0.35) !important; box-shadow: 0 4px 10px rgba(104, 165, 236, 0.14) !important; pointer-events: none !important; z-index: -1 !important; } /* bigger placeholder + sample task chip text */ .omada-main-input textarea, .omada-main-input textarea::placeholder { font-size: 1.05rem !important; } .omada-sample-row .omada-chip button, .omada-sample-row .omada-chip button span, .omada-sample-row .omada-chip button p, .omada-sample-row .omada-chip button div { font-size: 1.22rem !important; line-height: 1.14 !important; } .omada-sample-row .omada-chip .gradio-button > button, .omada-sample-row .omada-chip .gradio-button > button span, .omada-sample-row .omada-chip .gradio-button > button p, .omada-sample-row .omada-chip .gradio-button > button div { font-size: 1.22rem !important; line-height: 1.14 !important; } /* absolute final lock: plus + sample text size */ .omada-input-row .omada-plus-btn, .omada-input-row .omada-plus-btn > div, .omada-input-row .omada-plus-btn .gradio-button, .omada-input-row .omada-plus-btn .gradio-button > div, .omada-input-row .omada-plus-btn button { width: 82px !important; min-width: 82px !important; max-width: 82px !important; height: 82px !important; min-height: 82px !important; max-height: 82px !important; } .omada-input-row .omada-plus-btn button, .omada-input-row .omada-plus-btn button span, .omada-input-row .omada-plus-btn button p, .omada-input-row .omada-plus-btn button div { font-size: 3.5rem !important; line-height: 1 !important; font-weight: 900 !important; } .omada-sample-row .gradio-button button, .omada-sample-row .gradio-button button span, .omada-sample-row .gradio-button button p, .omada-sample-row .gradio-button button div, .omada-sample-row .omada-chip button, .omada-sample-row .omada-chip button span, .omada-sample-row .omada-chip button p, .omada-sample-row .omada-chip button div { font-size: 1.34rem !important; line-height: 1.16 !important; font-weight: 700 !important; } /* ultra final force: make plus/sample visibly bigger */ .omada-input-row .omada-plus-btn button { transform: scale(1.18) !important; transform-origin: center center !important; } .omada-sample-row .gradio-button > button, .omada-sample-row .gradio-button > button * { font-size: 1.42rem !important; line-height: 1.18 !important; font-weight: 700 !important; } /* terminal hard override: controls size + hero position */ .omada-input-row .omada-bottom-row .omada-plus-btn, .omada-input-row .omada-bottom-row .omada-plus-btn > div, .omada-input-row .omada-bottom-row .omada-plus-btn .gradio-button, .omada-input-row .omada-bottom-row .omada-plus-btn .gradio-button > div, .omada-input-row .omada-bottom-row .omada-plus-btn button { width: 40px !important; min-width: 40px !important; max-width: 40px !important; height: 40px !important; min-height: 40px !important; max-height: 40px !important; } .omada-input-row .omada-bottom-row .omada-plus-btn button, .omada-input-row .omada-bottom-row .omada-plus-btn button span, .omada-input-row .omada-bottom-row .omada-plus-btn button p, .omada-input-row .omada-bottom-row .omada-plus-btn button div { font-size: 4.2rem !important; line-height: 1 !important; font-weight: 900 !important; } .omada-input-row .omada-bottom-row .omada-task-btn button, .omada-input-row .omada-bottom-row .omada-custom-btn button, .omada-input-row .omada-bottom-row .omada-task-btn button *, .omada-input-row .omada-bottom-row .omada-custom-btn button * { font-size: 0.6rem !important; line-height: 1.1 !important; height: 42px !important; min-height: 42px !important; padding: 0 14px !important; } .omada-ready-hero { top: 0 !important; margin-top: -36px !important; } /* move bottom controls lower */ .omada-bottom-row { margin-top: 14px !important; } /* absolute terminal lock v2 */ [class*="omada-plus-btn"] button { width: 96px !important; min-width: 96px !important; height: 96px !important; min-height: 96px !important; font-size: 4.2rem !important; line-height: 1 !important; font-weight: 900 !important; } [class*="omada-task-btn"] button, [class*="omada-custom-btn"] button, [class*="omada-task-btn"] button *, [class*="omada-custom-btn"] button * { font-size: 0.8rem !important; line-height: 1.1 !important; height: 42px !important; min-height: 42px !important; } .omada-ready-hero { top: -8px !important; margin-top: -56px !important; } /* non-negotiable final override */ .gradio-container .omada-ready-hero { top: -20px !important; margin-top: -88px !important; } .gradio-container .omada-input-row { min-height: 0 !important; padding-top: 0 !important; padding-bottom: 0 !important; gap: 0 !important; } .gradio-container .omada-main-input { margin-top: 0 !important; margin-bottom: -2px !important; transform: translateY(-4px) !important; } .gradio-container .omada-main-input textarea, .gradio-container .omada-main-input textarea::placeholder { min-height: 34px !important; line-height: 1.25 !important; padding-top: 2px !important; padding-bottom: 2px !important; font-size: 1.08rem !important; } .gradio-container .omada-bottom-row { margin-top: -2px !important; gap: 4px !important; align-items: center !important; } .gradio-container .omada-bottom-row .omada-plus-btn button, .gradio-container .omada-bottom-row .omada-plus-btn > div > button { width: 84px !important; min-width: 84px !important; height: 84px !important; min-height: 84px !important; font-size: 3.7rem !important; } .gradio-container .omada-bottom-row .omada-task-btn button, .gradio-container .omada-bottom-row .omada-custom-btn button, .gradio-container .omada-bottom-row .omada-task-btn button *, .gradio-container .omada-bottom-row .omada-custom-btn button * { font-size: 0.6rem !important; height: 46px !important; min-height: 46px !important; line-height: 1.08 !important; padding: 0 15px !important; } /* compact composer height + tighter spacing */ .omada-input-row { min-height: 136px !important; padding-top: 6px !important; padding-bottom: 8px !important; gap: 2px !important; } .omada-main-input { transform: none !important; margin-top: 0 !important; margin-bottom: 0 !important; } .omada-main-input textarea { min-height: 56px !important; padding-top: 6px !important; padding-bottom: 4px !important; line-height: 1.2 !important; font-size: 1.16rem !important; } .omada-bottom-row { margin-top: 18px !important; } /* final lock: prevent placeholder/text clipping */ .gradio-container .omada-main-input, .gradio-container .omada-main-input > div, .gradio-container .omada-main-input .wrap, .gradio-container .omada-main-input .wrap-inner { overflow: visible !important; } .gradio-container .omada-main-input textarea, .gradio-container .omada-main-input textarea::placeholder { min-height: 64px !important; height: auto !important; padding: 8px 8px 4px 8px !important; font-size: 1.08rem !important; line-height: 1.25 !important; box-sizing: border-box !important; overflow: visible !important; } .gradio-container .omada-main-input textarea::placeholder { min-height: unset !important; height: auto !important; padding: 0 !important; font-size: 1.08rem !important; line-height: 1.35 !important; } /* final alignment lock: controls and send on one horizontal line */ .gradio-container .omada-input-row { padding-bottom: 0px !important; } .gradio-container .omada-main-input { margin-bottom: 0 !important; } .gradio-container .omada-main-input textarea { padding-bottom: 0 !important; max-height: 72px !important; overflow-y: auto !important; } .gradio-container .omada-bottom-row { position: static !important; left: auto !important; right: auto !important; bottom: auto !important; margin-top: 0 !important; transform: none !important; align-items: flex-end !important; justify-content: flex-start !important; gap: 14px !important; z-index: auto !important; } .gradio-container .omada-bottom-row > * { align-self: flex-end !important; } .gradio-container .omada-send-btn-fix, .gradio-container .omada-send-btn { position: static !important; right: auto !important; bottom: auto !important; margin-left: auto !important; margin-top: 0 !important; align-self: flex-end !important; transform: none !important; z-index: auto !important; } /* final lock: normalize plus button size */ .gradio-container .omada-bottom-row { min-height: 0 !important; height: auto !important; align-items: center !important; } .gradio-container .omada-bottom-row > * { min-height: 0 !important; height: auto !important; align-self: center !important; } .gradio-container .omada-bottom-row .omada-send-btn, .gradio-container .omada-bottom-row .omada-send-btn-fix { margin-left: auto !important; } .gradio-container .omada-bottom-row [class*="omada-plus-btn"], .gradio-container .omada-bottom-row [class*="omada-plus-btn"] > div, .gradio-container .omada-bottom-row [class*="omada-plus-btn"] .gradio-button, .gradio-container .omada-bottom-row [class*="omada-plus-btn"] .gradio-button > div, .gradio-container .omada-bottom-row [class*="omada-plus-btn"] button { flex: 0 0 48px !important; width: 48px !important; min-width: 48px !important; max-width: 48px !important; height: 48px !important; min-height: 48px !important; max-height: 48px !important; padding: 0 !important; } .gradio-container .omada-bottom-row [class*="omada-plus-btn"] button, .gradio-container .omada-bottom-row [class*="omada-plus-btn"] button span, .gradio-container .omada-bottom-row [class*="omada-plus-btn"] button p, .gradio-container .omada-bottom-row [class*="omada-plus-btn"] button div { font-size: 2rem !important; line-height: 1 !important; font-weight: 800 !important; } /* remove Gradio html wrapper padding inside bottom row */ .gradio-container .omada-bottom-row .html-container { padding: 0 !important; margin: 0 !important; width: auto !important; min-width: 0 !important; } .gradio-container .omada-bottom-row .html-container > div, .gradio-container .omada-bottom-row .html-container .prose { padding: 0 !important; margin: 0 !important; width: auto !important; min-width: 0 !important; } /* final tune: smaller send circle + lighter/smaller task/custom text */ .gradio-container .omada-bottom-row .omada-send-btn-fix, .gradio-container .omada-bottom-row .omada-send-btn-fix > div, .gradio-container .omada-bottom-row .omada-send-btn, .gradio-container .omada-bottom-row .omada-send-btn > div, .gradio-container .omada-bottom-row .omada-send-btn button, .gradio-container .omada-bottom-row .omada-send-btn-fix button, .gradio-container .omada-bottom-row .omada-send-btn > div > button, .gradio-container .omada-bottom-row .omada-send-btn-fix > div > button { width: 35px !important; min-width: 35px !important; max-width: 35px !important; height: 35px !important; min-height: 35px !important; max-height: 35px !important; } .gradio-container .omada-bottom-row .omada-send-btn button, .gradio-container .omada-bottom-row .omada-send-btn-fix button, .gradio-container .omada-bottom-row .omada-send-btn > div > button, .gradio-container .omada-bottom-row .omada-send-btn-fix > div > button { font-size: 1.6rem !important; } .gradio-container .omada-bottom-row .omada-task-btn button, .gradio-container .omada-bottom-row .omada-custom-btn button, .gradio-container .omada-bottom-row .omada-task-btn > div > button, .gradio-container .omada-bottom-row .omada-custom-btn > div > button, .gradio-container .omada-bottom-row .omada-task-btn button *, .gradio-container .omada-bottom-row .omada-custom-btn button * { font-size: 0.75rem !important; font-weight: 400 !important; line-height: 1.05 !important; } /* Gradio lg token override (computed 16px -> force smaller) */ .gradio-container .omada-bottom-row button.lg.omada-task-btn, .gradio-container .omada-bottom-row button.lg.omada-custom-btn, .gradio-container .omada-bottom-row .omada-task-btn button.lg, .gradio-container .omada-bottom-row .omada-custom-btn button.lg { --button-large-text-size: 0.7rem !important; font-size: 0.75rem !important; font-weight: 400 !important; line-height: 1.05 !important; } .gradio-container .omada-bottom-row button.lg.omada-task-btn *, .gradio-container .omada-bottom-row button.lg.omada-custom-btn *, .gradio-container .omada-bottom-row .omada-task-btn button.lg *, .gradio-container .omada-bottom-row .omada-custom-btn button.lg * { font-size: 0.75rem !important; font-weight: 400 !important; line-height: 1.05 !important; } /* sample preview cards */ .gradio-container .omada-sample-preview-row { width: min(980px, calc(100vw - 24px)) !important; margin: 0 auto 6px auto !important; gap: 10px !important; order: 2 !important; flex: 0 0 auto !important; } .gradio-container .omada-sample-preview-col { flex: 1 1 0 !important; } .gradio-container .omada-sample-preview-card .html-container, .gradio-container .omada-sample-preview-card .html-container > div { padding: 0 !important; margin: 0 !important; } .gradio-container .omada-sample-preview-inner { display: flex; align-items: center; gap: 10px; width: 100%; min-height: 84px; border-radius: 14px; border: 1px solid rgba(186, 204, 232, 0.82); background: rgba(255, 255, 255, 0.72); box-shadow: 0 5px 14px rgba(108, 134, 177, 0.12); padding: 10px 12px; box-sizing: border-box; } .gradio-container .omada-sample-preview-media { width: 76px; min-width: 76px; height: 56px; border-radius: 10px; overflow: hidden; display: flex; align-items: center; justify-content: center; background: rgba(227, 236, 249, 0.8); } .gradio-container .omada-sample-preview-thumb { width: 100%; height: 100%; object-fit: cover; } .gradio-container .omada-sample-preview-icon { font-size: 1.4rem; } .gradio-container .omada-sample-preview-meta { min-width: 0; } .gradio-container .omada-sample-preview-title { font-size: 0.83rem; font-weight: 700; color: #2a3a52; } .gradio-container .omada-sample-preview-desc { margin-top: 4px; font-size: 0.78rem; line-height: 1.2; color: #4a5f80; word-break: break-word; } /* adaptive first-view layout: keep composer visible without page scroll */ html, body { height: 100% !important; overflow: hidden !important; } .gradio-container { height: 100vh !important; overflow: hidden !important; } .gradio-container .omada-shell { height: calc(100vh - 72px) !important; max-height: calc(100vh - 72px) !important; display: flex !important; flex-direction: column !important; min-height: 0 !important; position: relative !important; } .gradio-container .omada-ready-hero { position: fixed !important; left: 50% !important; top: 42% !important; transform: translate(-50%, -50%) !important; z-index: 999 !important; } .gradio-container .omada-ready-hero { flex: 0 0 auto !important; margin-top: 0 !important; margin-bottom: 0 !important; align-self: center !important; width: min(820px, calc(100vw - 40px)) !important; text-align: center !important; pointer-events: none !important; } .gradio-container .omada-chatbot { order: 1 !important; } .gradio-container .omada-sample-row { order: 2 !important; } .gradio-container .omada-input-stack { order: 3 !important; } .gradio-container .omada-sample-row { flex: 0 0 auto !important; margin: 0 auto 6px auto !important; } .gradio-container .omada-chatbot { flex: 1 1 auto !important; min-height: 0 !important; height: auto !important; max-height: none !important; overflow: auto !important; } .gradio-container .omada-input-stack { flex: 0 0 auto !important; margin: 0 auto 4px auto !important; } .gradio-container .omada-ready-hero-wrap { position: relative !important; z-index: 60 !important; } .gradio-container .omada-ready-banner { display: block !important; } /* ensure hidden Gradio blocks never intercept clicks */ .gradio-container .hide, .gradio-container .wrap.hide { display: none !important; visibility: hidden !important; pointer-events: none !important; } """ _blocks_kwargs = { "title": "AIDAS Lab @ SNU - Omni-modal Diffusion", } if not GRADIO_V6_PLUS: _blocks_kwargs.update( { "css": CUSTOM_CSS + EXTRA_CSS, "theme": theme, "js": FORCE_LIGHT_MODE_JS, } ) with gr.Blocks(**_blocks_kwargs) as demo: model_status = gr.Markdown("Model status: Loading model...", visible=False) demo.load(warmup_model_status, outputs=[model_status]) MODE_OPTIONS = [ "Chat", "MMU (Image → Text)", "MMU (Video → Text)", "Image Generation", "Image Editing", "ASR", "TTS", ] with gr.Column(elem_classes=["omada-shell"]): _chatbot_kwargs = { "label": None, "sanitize_html": False, "elem_classes": ["omada-chatbot"], } if not GRADIO_V6_PLUS: _chatbot_kwargs["bubble_full_width"] = False chatbox = gr.Chatbot(**_chatbot_kwargs) intro_hero = gr.HTML( "
" "
" "Ready to get started?" "
" "
" "Use `+` to attach image/video/speech, `Task` to choose a mode, and `Generation Settings` to adjust generation options." "
" "
", visible=True, elem_classes=["omada-ready-hero-wrap"], ) sample_task_items = [ ("💬 Chat", "Chat"), ("🖼️ Image QA", "MMU (Image → Text)"), ("🎬 Video Captioning", "MMU (Video → Text)"), ("🎨 Image Generation", "Image Generation"), ("🛠️ Image Editing", "Image Editing"), ("🎙️ ASR", "ASR"), ("🔊 TTS", "TTS"), ] sample_payloads = gr.State({ "Chat": [ {"mode": "Chat", "text": _get_example_value(CHAT_EXAMPLES, 0, 0, "Hello! Please introduce yourself."), "image": None, "audio": None, "video": None}, {"mode": "Chat", "text": _get_example_value(CHAT_EXAMPLES, 1, 0, _get_example_value(CHAT_EXAMPLES, 0, 0, "Hello! Please introduce yourself.")), "image": None, "audio": None, "video": None}, ], "MMU (Image → Text)": [ {"mode": "MMU (Image → Text)", "text": _get_example_value(MMU_EXAMPLES, 0, 1, DEFAULT_MMU_PROMPT), "image": _get_example_value(MMU_EXAMPLES, 0, 0, None), "audio": None, "video": None}, {"mode": "MMU (Image → Text)", "text": _get_example_value(MMU_EXAMPLES, 1, 1, _get_example_value(MMU_EXAMPLES, 0, 1, DEFAULT_MMU_PROMPT)), "image": _get_example_value(MMU_EXAMPLES, 1, 0, _get_example_value(MMU_EXAMPLES, 0, 0, None)), "audio": None, "video": None}, ], "MMU (Video → Text)": [ {"mode": "MMU (Video → Text)", "text": "", "image": None, "audio": None, "video": _get_example_value(V2T_EXAMPLES, -2, 0, _get_example_value(V2T_EXAMPLES, 0, 0, None))}, {"mode": "MMU (Video → Text)", "text": "", "image": None, "audio": None, "video": _get_example_value(V2T_EXAMPLES, -1, 0, _get_example_value(V2T_EXAMPLES, 1, 0, _get_example_value(V2T_EXAMPLES, 0, 0, None)))}, ], "Image Generation": [ {"mode": "Image Generation", "text": _get_example_value(T2I_EXAMPLES, 0, 0, "A cinematic mountain landscape at sunrise."), "image": None, "audio": None, "video": None}, {"mode": "Image Generation", "text": _get_example_value(T2I_EXAMPLES, 1, 0, _get_example_value(T2I_EXAMPLES, 0, 0, "A cinematic mountain landscape at sunrise.")), "image": None, "audio": None, "video": None}, ], "Image Editing": [ {"mode": "Image Editing", "text": _get_example_value(I2I_EXAMPLES, 0, 1, "Add warm sunset lighting."), "image": _get_example_value(I2I_EXAMPLES, 0, 0, None), "audio": None, "video": None}, {"mode": "Image Editing", "text": _get_example_value(I2I_EXAMPLES, 1, 1, _get_example_value(I2I_EXAMPLES, 0, 1, "Add warm sunset lighting.")), "image": _get_example_value(I2I_EXAMPLES, 1, 0, _get_example_value(I2I_EXAMPLES, 0, 0, None)), "audio": None, "video": None}, ], "ASR": [ {"mode": "ASR", "text": "", "image": None, "audio": _get_example_value(S2T_EXAMPLES, 0, 0, None), "video": None}, {"mode": "ASR", "text": "", "image": None, "audio": _get_example_value(S2T_EXAMPLES, 1, 0, _get_example_value(S2T_EXAMPLES, 0, 0, None)), "video": None}, ], "TTS": [ {"mode": "TTS", "text": _get_example_value(T2S_EXAMPLES, 0, 0, "Hello from Dynin-Omni."), "image": None, "audio": None, "video": None}, {"mode": "TTS", "text": _get_example_value(T2S_EXAMPLES, 1, 0, _get_example_value(T2S_EXAMPLES, 0, 0, "Hello from Dynin-Omni.")), "image": None, "audio": None, "video": None}, ], }) selected_sample_mode = gr.State("Chat") task_sample_buttons = [] with gr.Row(elem_classes=["omada-sample-row"], visible=True) as task_sample_row_1: for i in range(3): task_sample_buttons.append(gr.Button(sample_task_items[i][0], size="sm", elem_classes=["omada-chip"], visible=True)) with gr.Row(elem_classes=["omada-sample-row"], visible=True) as task_sample_row_2: for i in range(3, 7): task_sample_buttons.append(gr.Button(sample_task_items[i][0], size="sm", elem_classes=["omada-chip"], visible=True)) with gr.Row(elem_classes=["omada-sample-preview-row"], visible=False) as sample_choice_row: with gr.Column(elem_classes=["omada-sample-preview-col"]): sample_preview_1 = gr.HTML("", elem_classes=["omada-sample-preview-card"]) sample_choice_1 = gr.Button("Sample 1", size="sm", elem_classes=["omada-chip"], visible=True) with gr.Column(elem_classes=["omada-sample-preview-col"]): sample_preview_2 = gr.HTML("", elem_classes=["omada-sample-preview-card"]) sample_choice_2 = gr.Button("Sample 2", size="sm", elem_classes=["omada-chip"], visible=True) with gr.Column(elem_classes=["omada-input-stack"]): with gr.Column(elem_classes=["omada-input-row"]): attachment_preview = gr.HTML( _render_attachment_preview(None, None, None), elem_classes=["omada-attach-preview-wrap"], ) chat_input = gr.Textbox( show_label=False, placeholder="How can I help you today?", lines=1, interactive=True, scale=1, min_width=0, elem_classes=["omada-main-input"], ) with gr.Row(elem_classes=["omada-bottom-row"]): plus_btn = gr.Button("+", elem_classes=["omada-plus-btn"], scale=0, min_width=30) task_btn = gr.Button("🛠 Task", elem_classes=["omada-task-btn"], scale=0, min_width=0) selected_task_badge = gr.HTML( _render_task_chip("Chat"), elem_classes=["omada-task-chip-inline"], ) custom_btn = gr.Button("🧠 Generation Settings", elem_classes=["omada-custom-btn"], scale=0, min_width=0) send_button = gr.Button("↑", elem_classes=["omada-send-btn", "omada-send-btn-fix"], scale=0, min_width=30) auto_mode_state = gr.State("Custom") controls_visible = gr.State(False) panel_mode_state = gr.State("task") backdrop = gr.HTML("
", visible=False, elem_classes=["omada-panel-backdrop"]) controls_panel = gr.Column(visible=False, elem_classes=["omada-controls-safe"]) with controls_panel: panel_title = gr.Markdown("**Task**") mode_selector = gr.State("Chat") with gr.Column(visible=False) as attach_section: media_image = gr.Image(type="pil", label="Image", sources=["upload"], visible=True) media_audio = gr.Audio(type="filepath", label="Speech", sources=["microphone", "upload"], visible=True) media_video = gr.Video(label="Video", sources=["upload", "webcam"], visible=True) with gr.Column(visible=False) as task_section: with gr.Row(): task_buttons = [ gr.Button( option, size="sm", variant="primary" if option == "Chat" else "secondary", ) for option in MODE_OPTIONS ] with gr.Column(visible=False) as custom_section: gr.Markdown("Task-specific generation settings") adv_chat = gr.Column(visible=False) with adv_chat: chat_max_tokens = gr.Slider(2, 512, value=512, step=2, label="Chat max tokens", interactive=True) chat_steps = gr.Slider(2, 512, value=512, step=2, label="Chat steps", interactive=True) chat_block = gr.Slider(2, 512, value=16, step=2, label="Chat block length", interactive=True) chat_temperature_slider = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="Chat temperature", interactive=True) adv_t2s = gr.Column(visible=False) with adv_t2s: t2s_max_tokens = gr.Slider(2, 512, value=512, step=2, label="Speech token length", interactive=True) t2s_steps = gr.Slider(2, 512, value=256, step=2, label="T2S refinement steps", interactive=True) t2s_block = gr.Slider(2, 512, value=256, step=2, label="T2S block length", interactive=True) t2s_temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="T2S temperature", interactive=True) t2s_cfg = gr.Slider(0.0, 6.0, value=3.5, step=0.1, label="T2S CFG scale", interactive=True) t2s_gender = gr.Dropdown(["random", "female", "male"], value="random", label="T2S gender", interactive=True) t2s_emotion = gr.Dropdown(["random", "angry", "happy", "neutral", "sad"], value="random", label="T2S emotion", interactive=True) t2s_speed = gr.Dropdown(["random", "normal", "fast", "slow"], value="random", label="T2S speed", interactive=True) t2s_pitch = gr.Dropdown(["random", "normal", "high", "low"], value="random", label="T2S pitch", interactive=True) adv_s2t = gr.Column(visible=False) with adv_s2t: s2t_steps = gr.Slider(2, 512, value=128, step=2, label="S2T steps", interactive=True) s2t_block = gr.Slider(2, 512, value=16, step=2, label="S2T block length", interactive=True) s2t_max_tokens = gr.Slider(2, 512, value=128, step=2, label="S2T max tokens", interactive=True) s2t_remasking = gr.Dropdown(["low_confidence", "random"], value="low_confidence", label="S2T remasking", interactive=True) adv_v2t = gr.Column(visible=False) with adv_v2t: v2t_steps = gr.Slider(2, 512, value=256, step=2, label="V2T steps", interactive=True) v2t_block = gr.Slider(2, 512, value=16, step=2, label="V2T block length", interactive=True) v2t_max_tokens = gr.Slider(2, 512, value=256, step=2, label="V2T max tokens", interactive=True) adv_t2i = gr.Column(visible=False) with adv_t2i: t2i_timesteps = gr.Slider(4, 128, value=16, step=2, label="T2I timesteps", interactive=True) t2i_temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="T2I temperature", interactive=True) t2i_guidance = gr.Slider(0.0, 8.0, value=2.5, step=0.1, label="T2I CFG scale", interactive=True) adv_i2i = gr.Column(visible=False) with adv_i2i: i2i_timesteps = gr.Slider(4, 128, value=32, step=2, label="I2I timesteps", interactive=True) i2i_temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="I2I temperature", interactive=True) i2i_guidance = gr.Slider(0.0, 8.0, value=2.5, step=0.1, label="I2I CFG scale", interactive=True) adv_mmu = gr.Column(visible=False) with adv_mmu: mmu_max_tokens = gr.Slider(2, 512, value=128, step=2, label="MMU max tokens", interactive=True) mmu_steps = gr.Slider(2, 512, value=128, step=2, label="MMU steps", interactive=True) mmu_block = gr.Slider(2, 512, value=16, step=2, label="MMU block length", interactive=True) mmu_temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="MMU temperature", interactive=True) save_btn = gr.Button("Save", variant="primary") def _task_button_updates(selected_mode: str): return tuple( gr.update(variant="primary" if option == selected_mode else "secondary") for option in MODE_OPTIONS ) def _update_advanced(mode, auto_mode): return ( gr.update(visible=mode == "Chat"), gr.update(visible=mode == "TTS"), gr.update(visible=mode == "ASR"), gr.update(visible=mode == "MMU (Video → Text)"), gr.update(visible=mode == "Image Generation"), gr.update(visible=mode == "Image Editing"), gr.update(visible=mode == "MMU (Image → Text)"), ) def _panel_title(kind: str) -> str: return { "attach": "**Attach**", "task": "**Task**", "custom": "**Custom Config**", }.get(kind, "**Task**") def _open_controls(panel_kind, mode): is_attach = panel_kind == "attach" is_task = panel_kind == "task" is_custom = panel_kind == "custom" adv_updates = _update_advanced(mode, "Custom") if is_custom else (gr.update(visible=False),) * 7 return ( panel_kind, gr.update(value=_panel_title(panel_kind)), gr.update(visible=True), gr.update(visible=True), True, gr.update(visible=is_attach), gr.update(visible=is_task), gr.update(visible=is_custom), *adv_updates, *_task_button_updates(mode), ) plus_btn.click( lambda mode: _open_controls("attach", mode), inputs=[mode_selector], outputs=[ panel_mode_state, panel_title, backdrop, controls_panel, controls_visible, attach_section, task_section, custom_section, adv_chat, adv_t2s, adv_s2t, adv_v2t, adv_t2i, adv_i2i, adv_mmu, *task_buttons, ], ) task_btn.click( lambda mode: _open_controls("task", mode), inputs=[mode_selector], outputs=[ panel_mode_state, panel_title, backdrop, controls_panel, controls_visible, attach_section, task_section, custom_section, adv_chat, adv_t2s, adv_s2t, adv_v2t, adv_t2i, adv_i2i, adv_mmu, *task_buttons, ], ) custom_btn.click( lambda mode: _open_controls("custom", mode), inputs=[mode_selector], outputs=[ panel_mode_state, panel_title, backdrop, controls_panel, controls_visible, attach_section, task_section, custom_section, adv_chat, adv_t2s, adv_s2t, adv_v2t, adv_t2i, adv_i2i, adv_mmu, *task_buttons, ], ) def _update_mode(mode): placeholders = { "Chat": "How can I help you today?", "TTS": "Type the speech you want to synthesize...", "ASR": "Upload audio, then add notes here...", "MMU (Video → Text)": "Upload video, then add notes here...", "Image Generation": "Describe the image you want to generate...", "Image Editing": "Describe how you want to edit the image...", "MMU (Image → Text)": "Ask about the uploaded image...", } return gr.update(placeholder=placeholders.get(mode, "How can I help you today?")) _update_mode("Chat") def _pick_mode(choice, panel_mode): show_custom = panel_mode == "custom" adv_updates = _update_advanced(choice, "Custom") if show_custom else (gr.update(visible=False),) * 7 return ( choice, _render_task_chip(choice), _update_mode(choice), *adv_updates, *_task_button_updates(choice), ) for idx, task_choice_btn in enumerate(task_buttons): task_choice_btn.click( lambda panel_mode, choice=MODE_OPTIONS[idx]: _pick_mode(choice, panel_mode), inputs=[panel_mode_state], outputs=[mode_selector, selected_task_badge, chat_input, adv_chat, adv_t2s, adv_s2t, adv_v2t, adv_t2i, adv_i2i, adv_mmu, *task_buttons], ) def _refresh_attachment_preview(image_in, audio_in, video_in): return _render_attachment_preview(image_in, audio_in, video_in) media_image.change( _refresh_attachment_preview, inputs=[media_image, media_audio, media_video], outputs=[attachment_preview], ) media_audio.change( _refresh_attachment_preview, inputs=[media_image, media_audio, media_video], outputs=[attachment_preview], ) media_video.change( _refresh_attachment_preview, inputs=[media_image, media_audio, media_video], outputs=[attachment_preview], ) save_btn.click( lambda: (gr.update(visible=False), gr.update(visible=False), False), outputs=[backdrop, controls_panel, controls_visible], ) def _format_user_message(msg: str) -> str: return msg.strip() if msg else " " def _normalize_chat_history(history): if not history: return [] if isinstance(history, list) and history and isinstance(history[0], dict): pairs = [] pending_user = None for msg in history: role = msg.get("role") content = msg.get("content", "") if role == "user": if pending_user is not None: pairs.append((pending_user, "")) pending_user = content elif role == "assistant": if pending_user is None: pairs.append((" ", content)) else: pairs.append((pending_user, content)) pending_user = None if pending_user is not None: pairs.append((pending_user, "")) return pairs return list(history) def _serialize_chat_history(pairs): if not GRADIO_V6_PLUS: return pairs messages = [] for user_msg, assistant_msg in pairs: messages.append({"role": "user", "content": user_msg if user_msg is not None else " "}) messages.append({"role": "assistant", "content": assistant_msg if assistant_msg is not None else ""}) return messages def _is_identity_query(message: str) -> bool: q = re.sub(r"[^a-z0-9\s]", " ", (message or "").lower()) q = re.sub(r"\s+", " ", q).strip() if not q: return False triggers = [ "who are you", "what are you", "introduce yourself", "what is your name", "your name", "are you dynin omni", "what model are you", ] return any(t in q for t in triggers) @spaces.GPU def _chat_handler( history, message, mode, auto_mode, image_in, audio_in, video_in, chat_max_tokens, chat_steps, chat_block, chat_temperature, t2s_max_tokens, t2s_steps, t2s_block, t2s_temperature, t2s_cfg, t2s_gender, t2s_emotion, t2s_speed, t2s_pitch, s2t_steps, s2t_block, s2t_max_tokens, s2t_remasking, v2t_steps, v2t_block, v2t_max_tokens, t2i_timesteps, t2i_temperature, t2i_guidance, i2i_timesteps, i2i_temperature, i2i_guidance, mmu_max_tokens, mmu_steps, mmu_block, mmu_temperature, ): _set_global_seed() history = _normalize_chat_history(history) message = (message or "").strip() defer_video = mode == "MMU (Video → Text)" and bool(video_in) display_user = _render_user_message(mode, message, image_in, audio_in, video_in, defer_video=defer_video) history.append((display_user, _render_text_message("Model loading...", ""))) yield _serialize_chat_history(history), "" if mode == "Chat" and _is_identity_query(message): fixed = ( "I am Dynin-Omni, an omnimodal unified diffusion language model developed by AIDAS Lab.\n" "I can understand and generate text, images, speech, and video within a single architecture." ) history[-1] = (display_user, _render_text_message("Assistant reply generated.", fixed)) yield _serialize_chat_history(history), "" return if defer_video: display_user = _render_user_message(mode, message, image_in, audio_in, video_in, defer_video=False) history[-1] = (display_user, history[-1][1]) yield _serialize_chat_history(history), "" app = get_app() history[-1] = (display_user, _render_text_message("Generating...", "")) yield _serialize_chat_history(history), "" # Use UI-provided generation settings. app.force_eval_settings = str(auto_mode).strip().lower() == "auto" if mode == "Chat": for reply_html, status, done in app.run_chat_stream( message, chat_max_tokens, chat_steps, chat_block, chat_temperature, update_every=64, ): response = _render_response(status, reply_html) history[-1] = (display_user, response) yield _serialize_chat_history(history), "" return if mode == "TTS": if not message: history[-1] = (display_user, _render_text_message("Please type some text.", "")) yield _serialize_chat_history(history), "" return audio, status = app.run_t2s( message, t2s_max_tokens, t2s_steps, t2s_block, t2s_temperature, t2s_cfg, t2s_gender, t2s_emotion, t2s_speed, t2s_pitch, ) history[-1] = (display_user, _render_audio_message(status, audio)) yield _serialize_chat_history(history), "" return if mode == "ASR": if not audio_in: history[-1] = (display_user, _render_text_message("Please upload audio.", "")) yield _serialize_chat_history(history), "" return for text, status in app.run_s2t_stream( audio_in, s2t_steps, s2t_block, s2t_max_tokens, s2t_remasking, update_every=32, ): history[-1] = (display_user, _render_text_message(status, text)) yield _serialize_chat_history(history), "" return if mode == "MMU (Video → Text)": if not video_in: history[-1] = (display_user, _render_text_message("Please upload a video.", "")) yield _serialize_chat_history(history), "" return for text, status in app.run_v2t_stream( video_in, v2t_steps, v2t_block, v2t_max_tokens, update_every=32, ): history[-1] = (display_user, _render_text_message(status, text)) yield _serialize_chat_history(history), "" return if mode == "Image Generation": if not message: history[-1] = (display_user, _render_text_message("Please provide a prompt.", "")) yield _serialize_chat_history(history), "" return for image, status in app.run_t2i_stream( message, t2i_timesteps, t2i_temperature, t2i_guidance, update_every=2, ): history[-1] = (display_user, _render_image_message(status, image)) yield _serialize_chat_history(history), "" return if mode == "Image Editing": if not image_in: history[-1] = (display_user, _render_text_message("Please upload an image.", "")) yield _serialize_chat_history(history), "" return if not message: history[-1] = (display_user, _render_text_message("Please provide an edit instruction.", "")) yield _serialize_chat_history(history), "" return for image, status in app.run_i2i_stream( message, image_in, i2i_timesteps, i2i_temperature, i2i_guidance, update_every=2, ): history[-1] = (display_user, _render_image_message(status, image)) yield _serialize_chat_history(history), "" return if mode == "MMU (Image → Text)": if not image_in: history[-1] = (display_user, _render_text_message("Please upload an image.", "")) yield _serialize_chat_history(history), "" return # Keep MMU QA consistent with chat mask-pill UX. try: mmu_mask_count = max(16, min(int(mmu_max_tokens or 128), 256)) except Exception: mmu_mask_count = 128 mmu_mask_surface = " ".join([""] * mmu_mask_count) history[-1] = (display_user, _render_text_message("Generating...", mmu_mask_surface)) yield _serialize_chat_history(history), "" reply, status = app.run_mmu( images=[image_in], message=message, max_new_tokens=mmu_max_tokens, steps=mmu_steps, block_length=mmu_block, temperature=mmu_temperature, ) history[-1] = (display_user, _render_text_message(status, reply)) yield _serialize_chat_history(history), "" return history[-1] = (display_user, _render_text_message("Unsupported mode.", "")) yield _serialize_chat_history(history), "" with demo: def _hide_intro(): return gr.update(visible=False) submit_inputs = [ chatbox, chat_input, mode_selector, auto_mode_state, media_image, media_audio, media_video, chat_max_tokens, chat_steps, chat_block, chat_temperature_slider, t2s_max_tokens, t2s_steps, t2s_block, t2s_temperature, t2s_cfg, t2s_gender, t2s_emotion, t2s_speed, t2s_pitch, s2t_steps, s2t_block, s2t_max_tokens, s2t_remasking, v2t_steps, v2t_block, v2t_max_tokens, t2i_timesteps, t2i_temperature, t2i_guidance, i2i_timesteps, i2i_temperature, i2i_guidance, mmu_max_tokens, mmu_steps, mmu_block, mmu_temperature, ] submit_outputs = [chatbox, chat_input] chat_input.submit(_hide_intro, outputs=[intro_hero], queue=False).then( _chat_handler, inputs=submit_inputs, outputs=submit_outputs ) send_button.click(_hide_intro, outputs=[intro_hero], queue=False).then( _chat_handler, inputs=submit_inputs, outputs=submit_outputs ) def _open_sample_choices(sample_map, mode): items = (sample_map or {}).get(mode, []) has_1 = len(items) >= 1 has_2 = len(items) >= 2 item1 = items[0] if has_1 else {} item2 = items[1] if has_2 else {} return ( mode, _render_task_chip(mode), _update_mode(mode), mode, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), _render_sample_preview_card(item1, 0), _render_sample_preview_card(item2, 1), gr.update(value="Sample 1", visible=has_1, interactive=has_1), gr.update(value="Sample 2", visible=has_2, interactive=has_2), *_task_button_updates(mode), ) def _use_sample(sample_map, mode, sample_idx): items = (sample_map or {}).get(mode, []) if not items: current_mode = "Chat" return ( "", None, None, None, current_mode, _render_task_chip(current_mode), _render_attachment_preview(None, None, None), _update_mode(current_mode), gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), "", "", gr.update(value="Sample 1", visible=True, interactive=True), gr.update(value="Sample 2", visible=True, interactive=True), *_task_button_updates(current_mode), ) idx = max(0, min(int(sample_idx), len(items) - 1)) item = items[idx] or {} sample_mode = item.get("mode", "Chat") sample_text = item.get("text", "") if not sample_text: sample_video = item.get("video") sample_audio = item.get("audio") if sample_video: sample_text = f"[Video] {Path(str(sample_video)).name}" elif sample_audio: sample_text = f"[Audio] {Path(str(sample_audio)).name}" image_item = item.get("image") audio_item = item.get("audio") video_item = item.get("video") return ( sample_text, image_item, audio_item, video_item, sample_mode, _render_task_chip(sample_mode), _render_attachment_preview(image_item, audio_item, video_item), _update_mode(sample_mode), gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), "", "", gr.update(value="Sample 1", visible=True, interactive=True), gr.update(value="Sample 2", visible=True, interactive=True), *_task_button_updates(sample_mode), ) for i, btn in enumerate(task_sample_buttons): mode_value = sample_task_items[i][1] btn.click( lambda payloads, m=mode_value: _open_sample_choices(payloads, m), inputs=[sample_payloads], outputs=[ mode_selector, selected_task_badge, chat_input, selected_sample_mode, task_sample_row_1, task_sample_row_2, sample_choice_row, sample_preview_1, sample_preview_2, sample_choice_1, sample_choice_2, *task_buttons, ], ) sample_choice_1.click( lambda payloads, m: _use_sample(payloads, m, 0), inputs=[sample_payloads, selected_sample_mode], outputs=[ chat_input, media_image, media_audio, media_video, mode_selector, selected_task_badge, attachment_preview, chat_input, task_sample_row_1, task_sample_row_2, sample_choice_row, sample_preview_1, sample_preview_2, sample_choice_1, sample_choice_2, *task_buttons, ], ) sample_choice_2.click( lambda payloads, m: _use_sample(payloads, m, 1), inputs=[sample_payloads, selected_sample_mode], outputs=[ chat_input, media_image, media_audio, media_video, mode_selector, selected_task_badge, attachment_preview, chat_input, task_sample_row_1, task_sample_row_2, sample_choice_row, sample_preview_1, sample_preview_2, sample_choice_1, sample_choice_2, *task_buttons, ], ) # Initial: task chips visible, sample choices hidden. demo.load( lambda: ( gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), "", "", gr.update(visible=True), gr.update(visible=True), ), outputs=[ task_sample_row_1, task_sample_row_2, sample_choice_row, sample_preview_1, sample_preview_2, sample_choice_1, sample_choice_2, ], queue=False, ) if __name__ == "__main__": _launch_kwargs = { "allowed_paths": [ str(PREVIEW_DIR), str(PROJECT_ROOT), str(ASSET_ROOT), "/tmp", ], } if GRADIO_V6_PLUS: _launch_kwargs.update( { "css": CUSTOM_CSS + EXTRA_CSS, "theme": theme, "js": FORCE_LIGHT_MODE_JS } ) demo.launch(**_launch_kwargs)