Spaces:
Running on Zero
Running on Zero
| """ | |
| ZeroGPU-friendly Gradio entrypoint for OMada demo. | |
| - Downloads checkpoint + assets + style centroids from Hugging Face Hub | |
| - Instantiates OmadaDemo once (global) | |
| - Exposes 10 modalities via Gradio tabs | |
| - Uses @spaces.GPU only on inference handlers so GPU is allocated per request | |
| """ | |
| import os | |
| import sys | |
| import subprocess | |
| import importlib | |
| import base64 | |
| import html | |
| import io | |
| import re | |
| import wave | |
| import tempfile | |
| import shutil | |
| import threading | |
| import random | |
| from urllib.parse import quote | |
| from pathlib import Path | |
| from typing import List | |
| import numpy as np | |
| import torch | |
| from PIL import Image | |
| import gradio as gr | |
| import spaces | |
| from packaging.version import parse as parse_version | |
| GRADIO_VERSION = parse_version(gr.__version__) | |
| GRADIO_V6_PLUS = GRADIO_VERSION >= parse_version("6.0.0") | |
| # --------------------------- | |
| # Project roots & sys.path | |
| # --------------------------- | |
| PROJECT_ROOT = Path(__file__).resolve().parent | |
| os.environ.setdefault("FORCE_EVAL_SETTINGS", "0") | |
| GLOBAL_SEED = int(os.getenv("GLOBAL_SEED", "42")) | |
| PREVIEW_DIR = PROJECT_ROOT / "_preview_cache" | |
| PREVIEW_DIR.mkdir(parents=True, exist_ok=True) | |
| MMADA_ROOT = PROJECT_ROOT / "MMaDA" | |
| if str(MMADA_ROOT) not in sys.path: | |
| sys.path.insert(0, str(MMADA_ROOT)) | |
| EMOVA_ROOT = PROJECT_ROOT / "EMOVA_speech_tokenizer" | |
| if str(EMOVA_ROOT) not in sys.path: | |
| sys.path.insert(0, str(EMOVA_ROOT)) | |
| # --------------------------- | |
| # HuggingFace Hub helper | |
| # --------------------------- | |
| def ensure_hf_hub(target: str = "0.36.0"): | |
| """ | |
| Make sure huggingface_hub stays <1.0 to satisfy transformers/tokenizers. | |
| """ | |
| try: | |
| import huggingface_hub as hub | |
| except ImportError: | |
| subprocess.check_call( | |
| [sys.executable, "-m", "pip", "install", f"huggingface-hub=={target}", "--no-cache-dir"] | |
| ) | |
| import huggingface_hub as hub | |
| if parse_version(hub.__version__) >= parse_version("1.0.0"): | |
| subprocess.check_call( | |
| [sys.executable, "-m", "pip", "install", f"huggingface-hub=={target}", "--no-cache-dir"] | |
| ) | |
| hub = importlib.reload(hub) | |
| # Backfill missing constants in older hub versions to avoid AttributeError. | |
| try: | |
| import huggingface_hub.constants as hub_consts # type: ignore | |
| except Exception: | |
| hub_consts = None | |
| if hub_consts and not hasattr(hub_consts, "HF_HUB_ENABLE_HF_TRANSFER"): | |
| setattr(hub_consts, "HF_HUB_ENABLE_HF_TRANSFER", False) | |
| return hub | |
| snapshot_download = ensure_hf_hub().snapshot_download | |
| # --------------------------- | |
| # OMada demo imports | |
| # --------------------------- | |
| from inference.gradio_multimodal_demo_inst import ( # noqa: E402 | |
| OmadaDemo, | |
| CUSTOM_CSS, | |
| FORCE_LIGHT_MODE_JS, | |
| ) | |
| # --------------------------- | |
| # HF download helpers | |
| # --------------------------- | |
| def download_assets() -> Path: | |
| """Download demo assets (logo + sample prompts/media) and return the root path.""" | |
| repo_id = os.getenv("ASSET_REPO_ID", "snu-aidas/Dynin-Omni-Demo-Assets") | |
| revision = os.getenv("ASSET_REVISION", "main") | |
| token = os.getenv("HF_TOKEN") | |
| cache_dir = PROJECT_ROOT / "_asset_cache" | |
| cache_dir.mkdir(parents=True, exist_ok=True) | |
| return Path( | |
| snapshot_download( | |
| repo_id=repo_id, | |
| revision=revision, | |
| repo_type="dataset", | |
| local_dir=cache_dir, | |
| local_dir_use_symlinks=False, | |
| token=token, | |
| ) | |
| ) | |
| def download_style() -> Path: | |
| """Download style centroid dataset and return the root path.""" | |
| repo_id = os.getenv("STYLE_REPO_ID", "snu-aidas/aidas-style-centroid") | |
| revision = os.getenv("STYLE_REVISION", "main") | |
| token = os.getenv("HF_TOKEN") | |
| cache_dir = PROJECT_ROOT / "_style_cache" | |
| cache_dir.mkdir(parents=True, exist_ok=True) | |
| preferred_repo_type = os.getenv("STYLE_REPO_TYPE", "dataset").strip().lower() | |
| repo_type_candidates = [preferred_repo_type] | |
| for t in ("dataset", "model"): | |
| if t not in repo_type_candidates: | |
| repo_type_candidates.append(t) | |
| last_exc = None | |
| for repo_type in repo_type_candidates: | |
| try: | |
| return Path( | |
| snapshot_download( | |
| repo_id=repo_id, | |
| revision=revision, | |
| repo_type=repo_type, | |
| local_dir=cache_dir, | |
| local_dir_use_symlinks=False, | |
| token=token, | |
| ) | |
| ) | |
| except Exception as exc: | |
| last_exc = exc | |
| continue | |
| print( | |
| f"[Style] Failed to download '{repo_id}' (tried repo_type={repo_type_candidates}). " | |
| f"Using local cache at {cache_dir}. Last error: {last_exc}", | |
| flush=True, | |
| ) | |
| return cache_dir | |
| def download_checkpoint() -> Path: | |
| """Download checkpoint snapshot and return an `unwrapped_model` directory.""" | |
| local_override = os.getenv("MODEL_CHECKPOINT_PATH") | |
| if local_override: | |
| override_path = Path(local_override).expanduser() | |
| if override_path.name != "unwrapped_model": | |
| nested = override_path / "unwrapped_model" | |
| if nested.is_dir(): | |
| override_path = nested | |
| if not override_path.exists(): | |
| raise FileNotFoundError(f"MODEL_CHECKPOINT_PATH does not exist: {override_path}") | |
| return override_path | |
| repo_id = os.getenv("MODEL_REPO_ID", "snu-aidas/Dynin-Omni") | |
| revision = os.getenv("MODEL_REVISION", "main") | |
| token = os.getenv("HF_TOKEN") | |
| cache_dir = PROJECT_ROOT / "_ckpt_cache" | |
| cache_dir.mkdir(parents=True, exist_ok=True) | |
| snapshot_path = Path( | |
| snapshot_download( | |
| repo_id=repo_id, | |
| revision=revision, | |
| repo_type="model", | |
| local_dir=cache_dir, | |
| local_dir_use_symlinks=False, | |
| token=token, | |
| ) | |
| ) | |
| if snapshot_path.name == "unwrapped_model": | |
| return snapshot_path | |
| nested = snapshot_path / "unwrapped_model" | |
| if nested.is_dir(): | |
| return nested | |
| aliased = snapshot_path.parent / "unwrapped_model" | |
| if not aliased.exists(): | |
| aliased.symlink_to(snapshot_path, target_is_directory=True) | |
| return aliased | |
| # --------------------------- | |
| # Assets (for examples + logo) | |
| # --------------------------- | |
| ASSET_ROOT = download_assets() | |
| STYLE_ROOT = download_style() | |
| LOGO_PATH = ASSET_ROOT / "logo.png" # optional | |
| def _load_text_examples(path: Path): | |
| if not path.exists(): | |
| return [] | |
| lines = [ | |
| ln.strip() | |
| for ln in path.read_text(encoding="utf-8").splitlines() | |
| if ln.strip() | |
| ] | |
| return [[ln] for ln in lines] | |
| def _load_media_examples(subdir: str, suffixes): | |
| d = ASSET_ROOT / subdir | |
| if not d.exists(): | |
| return [] | |
| ex = [] | |
| for p in sorted(d.iterdir()): | |
| if p.is_file() and p.suffix.lower() in suffixes: | |
| ex.append([str(p)]) | |
| return ex | |
| def _load_i2i_examples(): | |
| d = ASSET_ROOT / "i2i" | |
| if not d.exists(): | |
| return [] | |
| # 이미지 파일들 (image1.jpeg, image2.png, ...) | |
| image_files = sorted( | |
| [p for p in d.iterdir() if p.suffix.lower() in {".png", ".jpg", ".jpeg", ".webp"}] | |
| ) | |
| # 텍스트 파일들 (text1.txt, text2.txt, ...) | |
| text_files = sorted( | |
| [p for p in d.iterdir() if p.suffix.lower() == ".txt"] | |
| ) | |
| n = min(len(image_files), len(text_files)) | |
| if n == 0: | |
| return [] | |
| examples = [] | |
| for i in range(n): | |
| img_path = image_files[i] | |
| txt_path = text_files[i] | |
| instruction = txt_path.read_text(encoding="utf-8").strip() | |
| if not instruction: | |
| continue | |
| # Gradio Examples 형식: [image, instruction_text] | |
| examples.append([str(img_path), instruction]) | |
| return examples | |
| # text-based examples | |
| T2S_EXAMPLES = _load_text_examples(ASSET_ROOT / "t2s" / "text.txt") | |
| CHAT_EXAMPLES = _load_text_examples(ASSET_ROOT / "chat" / "text.txt") | |
| T2I_EXAMPLES = _load_text_examples(ASSET_ROOT / "t2i" / "text.txt") | |
| I2I_EXAMPLES = _load_i2i_examples() | |
| def _get_example_value(examples, idx: int, pos: int = 0, default=None): | |
| try: | |
| if len(examples) > idx and len(examples[idx]) > pos: | |
| val = examples[idx][pos] | |
| if val is not None and str(val) != "": | |
| return val | |
| except Exception: | |
| pass | |
| try: | |
| if examples and len(examples[0]) > pos: | |
| val = examples[0][pos] | |
| if val is not None and str(val) != "": | |
| return val | |
| except Exception: | |
| pass | |
| return default | |
| def _sample_preview_label(item: dict, idx: int) -> str: | |
| base = f"sample {idx + 1}" | |
| if not isinstance(item, dict): | |
| return base | |
| text = str(item.get("text") or "").strip() | |
| image = item.get("image") | |
| audio = item.get("audio") | |
| video = item.get("video") | |
| preview = "" | |
| if text: | |
| preview = text.replace("\n", " ").strip() | |
| elif image: | |
| preview = f"image: {Path(str(image)).name}" | |
| elif audio: | |
| preview = f"audio: {Path(str(audio)).name}" | |
| elif video: | |
| preview = f"video: {Path(str(video)).name}" | |
| if not preview: | |
| return base | |
| if len(preview) > 34: | |
| preview = preview[:31] + "..." | |
| return f"{base} - {preview}" | |
| def _image_to_data_uri(path: str) -> str: | |
| p = str(path or "") | |
| if not p or not os.path.exists(p): | |
| return "" | |
| try: | |
| with Image.open(p).convert("RGB") as pil_img: | |
| buf = io.BytesIO() | |
| pil_img.save(buf, format="PNG") | |
| encoded = base64.b64encode(buf.getvalue()).decode("ascii") | |
| return f"data:image/png;base64,{encoded}" | |
| except Exception: | |
| return "" | |
| def _render_sample_preview_card(item: dict, idx: int) -> str: | |
| title = f"sample {idx + 1}" | |
| if not isinstance(item, dict): | |
| return ( | |
| "<div class='omada-sample-preview-inner'>" | |
| f"<div class='omada-sample-preview-title'>{title}</div>" | |
| "<div class='omada-sample-preview-desc'>No preview available</div>" | |
| "</div>" | |
| ) | |
| text = str(item.get("text") or "").strip() | |
| image = item.get("image") | |
| audio = item.get("audio") | |
| video = item.get("video") | |
| media_html = "<div class='omada-sample-preview-icon'>🧩</div>" | |
| desc = "No preview available" | |
| if image: | |
| img_path = str(image) | |
| src = _image_to_data_uri(img_path) | |
| if src: | |
| media_html = f"<img src='{src}' alt='sample image' class='omada-sample-preview-thumb' />" | |
| desc = text if text else f"image: {Path(img_path).name}" | |
| else: | |
| # Fallback for browsers/Gradio sanitization cases where data URI is blocked. | |
| file_src = f"/file={quote(img_path)}" | |
| media_html = f"<img src='{file_src}' alt='sample image' class='omada-sample-preview-thumb' />" | |
| desc = text if text else f"image: {Path(img_path).name}" | |
| elif video: | |
| vpath = str(video) | |
| thumb = _video_thumb_data_uri(vpath) | |
| if thumb: | |
| media_html = f"<img src='{thumb}' alt='sample video' class='omada-sample-preview-thumb' />" | |
| else: | |
| media_html = "<div class='omada-sample-preview-icon'>🎬</div>" | |
| desc = text if text else f"video: {Path(vpath).name}" | |
| elif audio: | |
| apath = str(audio) | |
| media_html = "<div class='omada-sample-preview-icon'>🎤</div>" | |
| desc = text if text else f"audio: {Path(apath).name}" | |
| elif text: | |
| media_html = "<div class='omada-sample-preview-icon'>💬</div>" | |
| desc = text | |
| desc = desc.replace("\n", " ").strip() | |
| if len(desc) > 120: | |
| desc = desc[:117] + "..." | |
| return ( | |
| "<div class='omada-sample-preview-inner'>" | |
| f"<div class='omada-sample-preview-media'>{media_html}</div>" | |
| "<div class='omada-sample-preview-meta'>" | |
| f"<div class='omada-sample-preview-title'>{html.escape(title)}</div>" | |
| f"<div class='omada-sample-preview-desc'>{html.escape(desc)}</div>" | |
| "</div>" | |
| "</div>" | |
| ) | |
| def _render_response(status: str, body_html: str = "") -> str: | |
| safe_status = html.escape(status or "") | |
| parts = [] | |
| if safe_status: | |
| parts.append(f"<p class='omada-response-status'>{safe_status}</p>") | |
| if body_html: | |
| parts.append(body_html) | |
| content = "".join(parts) | |
| return f"<div class='omada-response-container'>{content}</div>" | |
| def _render_text_message(status: str, content: str) -> str: | |
| content = (content or "").strip() | |
| if not content: | |
| return _render_response(status) | |
| safe_content = _format_tokenized_text(content) | |
| body = f"<div class='omada-response-block'>{safe_content}</div>" | |
| return _render_response(status, body) | |
| def _is_mask_like_token(token: str) -> bool: | |
| t = token.strip() | |
| if not t: | |
| return False | |
| upper = t.upper() | |
| return ( | |
| upper in {"[MASK]", "<MASK>", "<|MASK|>", "<MASK_TOKEN>", "<|MASK_TOKEN|>"} | |
| or upper in {"<MDM_MASK>", "MDM_MASK", "<|MDM_MASK|>"} | |
| or "MASK" in upper | |
| ) | |
| def _is_special_token(token: str) -> bool: | |
| t = token.strip() | |
| return bool(t) and t.startswith("<|") and t.endswith("|>") | |
| def _format_tokenized_text(text: str) -> str: | |
| if not text: | |
| return "" | |
| # Handle both complete and partially-streamed mask tokens. | |
| mask_pat = r"(<[^>\n]*MASK[^>\n]*>?|\[MASK\]|MASK_TOKEN)" | |
| chunks = re.split(mask_pat, text, flags=re.IGNORECASE) | |
| out = [] | |
| for chunk in chunks: | |
| if not chunk: | |
| continue | |
| if re.fullmatch(mask_pat, chunk, flags=re.IGNORECASE) or _is_mask_like_token(chunk): | |
| out.append("<span class='omada-token-pill omada-token-mask'>MASK</span>") | |
| continue | |
| if chunk.isspace(): | |
| out.append(chunk.replace("\n", "<br>")) | |
| continue | |
| safe = html.escape(chunk) | |
| if _is_special_token(chunk): | |
| out.append(f"<span class='omada-token-pill omada-token-special'>{safe}</span>") | |
| else: | |
| out.append(safe) | |
| return "".join(out).replace("\n", "<br>") | |
| def _render_audio_message(status: str, audio): | |
| if not audio: | |
| return _render_response(status) | |
| sample_rate, data = audio | |
| if data is None: | |
| return _render_response(status) | |
| waveform = np.asarray(data, dtype=np.float32) | |
| if waveform.size == 0: | |
| return _render_response(status) | |
| if waveform.ndim == 1: | |
| waveform = waveform[:, None] | |
| channels = waveform.shape[1] | |
| clipped = np.clip(waveform, -1.0, 1.0) | |
| pcm16 = (clipped * 32767.0).astype(np.int16) | |
| buffer = io.BytesIO() | |
| with wave.open(buffer, "wb") as wav_writer: | |
| wav_writer.setnchannels(channels) | |
| wav_writer.setsampwidth(2) | |
| wav_writer.setframerate(int(sample_rate)) | |
| wav_writer.writeframes(pcm16.tobytes()) | |
| encoded = base64.b64encode(buffer.getvalue()).decode("ascii") | |
| audio_tag = ( | |
| "<div class='omada-audio-block'>" | |
| "<audio controls preload='auto' playsinline>" | |
| f"<source src='data:audio/wav;base64,{encoded}' type='audio/wav' /></audio>" | |
| "</div>" | |
| ) | |
| return _render_response(status, audio_tag) | |
| def _render_image_message(status: str, image: Image.Image): | |
| if image is None: | |
| return _render_response(status) | |
| buffer = io.BytesIO() | |
| try: | |
| image.save(buffer, format="PNG") | |
| except Exception: | |
| return _render_response(status) | |
| encoded = base64.b64encode(buffer.getvalue()).decode("ascii") | |
| safe_status = html.escape(status or "") | |
| return ( | |
| "<div class='omada-image-only'>" | |
| f"<p class='omada-image-status'>{safe_status}</p>" | |
| "<img src='data:image/png;base64," | |
| f"{encoded}" | |
| "' alt='Generated image' style='display:block;width:auto;height:auto;max-width:min(100%,720px);" | |
| "border-radius:0;image-rendering:crisp-edges;image-rendering:-webkit-optimize-contrast;filter:none;opacity:1;' />" | |
| "</div>" | |
| ) | |
| def _render_user_message(mode: str, message: str, image_in, audio_in, video_in, defer_video: bool = False) -> str: | |
| def _cache_media_copy(src_path: str) -> str: | |
| path = str(src_path or "") | |
| if not path or not os.path.exists(path): | |
| return path | |
| try: | |
| suffix = Path(path).suffix or "" | |
| fd, dst = tempfile.mkstemp(prefix="omada_media_", suffix=suffix, dir=str(PREVIEW_DIR)) | |
| os.close(fd) | |
| shutil.copy2(path, dst) | |
| return dst | |
| except Exception: | |
| return path | |
| def _to_browser_mp4(video_path: str) -> str: | |
| path = str(video_path or "") | |
| if not path: | |
| return path | |
| try: | |
| fd, out_path = tempfile.mkstemp(prefix="omada_preview_", suffix=".mp4", dir=str(PREVIEW_DIR)) | |
| os.close(fd) | |
| cmd = [ | |
| "ffmpeg", | |
| "-y", | |
| "-i", | |
| path, | |
| "-an", | |
| "-c:v", | |
| "libx264", | |
| "-pix_fmt", | |
| "yuv420p", | |
| "-movflags", | |
| "+faststart", | |
| out_path, | |
| ] | |
| proc = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
| if proc.returncode == 0 and os.path.exists(out_path): | |
| return out_path | |
| if os.path.exists(out_path): | |
| os.remove(out_path) | |
| except Exception: | |
| pass | |
| return path | |
| def _video_data_uri(video_path: str, mime: str, max_bytes: int = 25 * 1024 * 1024) -> str: | |
| try: | |
| size = os.path.getsize(video_path) | |
| if size <= 0 or size > max_bytes: | |
| return "" | |
| with open(video_path, "rb") as f: | |
| encoded = base64.b64encode(f.read()).decode("ascii") | |
| return f"data:{mime};base64,{encoded}" | |
| except Exception: | |
| return "" | |
| def _video_poster_data_uri(video_path: str) -> str: | |
| try: | |
| import cv2 # type: ignore | |
| cap = cv2.VideoCapture(video_path) | |
| ok, frame = cap.read() | |
| cap.release() | |
| if not ok or frame is None: | |
| return "" | |
| ok, buf = cv2.imencode(".jpg", frame) | |
| if not ok: | |
| return "" | |
| encoded = base64.b64encode(buf.tobytes()).decode("ascii") | |
| return f"data:image/jpeg;base64,{encoded}" | |
| except Exception: | |
| return "" | |
| parts = [] | |
| text = (message or "").strip() | |
| if image_in is not None: | |
| try: | |
| if isinstance(image_in, Image.Image): | |
| buffer = io.BytesIO() | |
| image_in.save(buffer, format="PNG") | |
| encoded = base64.b64encode(buffer.getvalue()).decode("ascii") | |
| parts.append( | |
| "<div class='omada-user-media'>" | |
| f"<img src='data:image/png;base64,{encoded}' alt='Input image' />" | |
| "</div>" | |
| ) | |
| elif isinstance(image_in, str) and image_in: | |
| try: | |
| with Image.open(image_in).convert("RGB") as pil_img: | |
| buf = io.BytesIO() | |
| pil_img.save(buf, format="PNG") | |
| encoded = base64.b64encode(buf.getvalue()).decode("ascii") | |
| parts.append( | |
| "<div class='omada-user-media'>" | |
| f"<img src='data:image/png;base64,{encoded}' alt='Input image' />" | |
| "</div>" | |
| ) | |
| except Exception: | |
| image_path = _cache_media_copy(image_in) | |
| parts.append( | |
| "<div class='omada-user-media'>" | |
| f"<img src='/file={quote(image_path)}' alt='Input image' />" | |
| "</div>" | |
| ) | |
| except Exception: | |
| pass | |
| if mode == "MMU (Video → Text)" and video_in: | |
| if defer_video: | |
| parts.append("<div class='omada-user-media'><div class='omada-video-loading'>Video loading...</div></div>") | |
| if text: | |
| parts.append(f"<div>{html.escape(text)}</div>") | |
| return "".join(parts) | |
| video_path = None | |
| if isinstance(video_in, str): | |
| video_path = video_in | |
| elif isinstance(video_in, dict): | |
| video_path = video_in.get("path") or video_in.get("name") | |
| if video_path: | |
| cached_original = _cache_media_copy(video_path) | |
| preview_path = _to_browser_mp4(cached_original) | |
| poster = _video_poster_data_uri(cached_original) | |
| poster_attr = f" poster='{poster}'" if poster else "" | |
| source_path = str(preview_path or cached_original) | |
| fallback_path = str(cached_original) | |
| def _video_mime(path: str) -> str: | |
| ext = os.path.splitext(path.lower())[1] | |
| return { | |
| ".mp4": "video/mp4", | |
| ".webm": "video/webm", | |
| ".mov": "video/quicktime", | |
| ".m4v": "video/mp4", | |
| ".avi": "video/x-msvideo", | |
| ".mkv": "video/x-matroska", | |
| }.get(ext, "video/mp4") | |
| parts.append( | |
| "<div class='omada-user-media'>" | |
| f"<video class='omada-user-video' controls playsinline preload='metadata'{poster_attr}>" | |
| f"<source src='{(_video_data_uri(source_path, _video_mime(source_path)) or f'/file={quote(source_path)}')}' type='{_video_mime(source_path)}' />" | |
| f"<source src='/file={quote(fallback_path)}' type='{_video_mime(fallback_path)}' />" | |
| f"<a href='/file={quote(fallback_path)}' target='_blank' rel='noopener'>Open video</a>" | |
| "</video>" | |
| "</div>" | |
| ) | |
| if audio_in is not None: | |
| audio_path = "" | |
| if isinstance(audio_in, str): | |
| audio_path = audio_in | |
| elif isinstance(audio_in, dict): | |
| audio_path = audio_in.get("path") or audio_in.get("name") or "" | |
| elif isinstance(audio_in, (tuple, list)) and len(audio_in) == 2: | |
| try: | |
| sample_rate, data = audio_in | |
| waveform = np.asarray(data, dtype=np.float32) | |
| if waveform.ndim == 1: | |
| waveform = waveform[:, None] | |
| waveform = np.clip(waveform, -1.0, 1.0) | |
| pcm16 = (waveform * 32767.0).astype(np.int16) | |
| fd, temp_audio = tempfile.mkstemp(prefix="omada_user_audio_", suffix=".wav", dir=str(PREVIEW_DIR)) | |
| os.close(fd) | |
| with wave.open(temp_audio, "wb") as wav_writer: | |
| wav_writer.setnchannels(pcm16.shape[1]) | |
| wav_writer.setsampwidth(2) | |
| wav_writer.setframerate(int(sample_rate)) | |
| wav_writer.writeframes(pcm16.tobytes()) | |
| audio_path = temp_audio | |
| except Exception: | |
| audio_path = "" | |
| if audio_path: | |
| ext = os.path.splitext(audio_path.lower())[1] | |
| mime = { | |
| ".wav": "audio/wav", | |
| ".mp3": "audio/mpeg", | |
| ".flac": "audio/flac", | |
| ".ogg": "audio/ogg", | |
| ".m4a": "audio/mp4", | |
| }.get(ext, "audio/wav") | |
| src = "" | |
| try: | |
| with open(audio_path, "rb") as f: | |
| encoded_audio = base64.b64encode(f.read()).decode("ascii") | |
| src = f"data:{mime};base64,{encoded_audio}" | |
| except Exception: | |
| audio_path = _cache_media_copy(audio_path) | |
| src = f"/file={quote(audio_path)}" | |
| parts.append( | |
| "<div class='omada-user-media'>" | |
| f"<audio controls preload='metadata'><source src='{src}' type='{mime}' /></audio>" | |
| f"<div><a href='{src}' target='_blank' rel='noopener'>Open audio</a></div>" | |
| "</div>" | |
| ) | |
| if text: | |
| parts.append(f"<div>{html.escape(text)}</div>") | |
| if not parts: | |
| parts.append(f"<div>[{html.escape(mode)}]</div>") | |
| return "".join(parts) | |
| def _extract_video_path(video_in) -> str: | |
| if isinstance(video_in, str): | |
| return video_in | |
| if isinstance(video_in, dict): | |
| return str(video_in.get("path") or video_in.get("name") or "") | |
| return "" | |
| def _video_thumb_data_uri(video_path: str) -> str: | |
| if not video_path or not os.path.exists(video_path): | |
| return "" | |
| try: | |
| import cv2 # type: ignore | |
| cap = cv2.VideoCapture(video_path) | |
| ok, frame = cap.read() | |
| cap.release() | |
| if not ok or frame is None: | |
| return "" | |
| ok, buf = cv2.imencode(".jpg", frame) | |
| if not ok: | |
| return "" | |
| encoded = base64.b64encode(buf.tobytes()).decode("ascii") | |
| return f"data:image/jpeg;base64,{encoded}" | |
| except Exception: | |
| return "" | |
| def _render_attachment_preview(image_in, audio_in, video_in) -> str: | |
| items = [] | |
| if image_in is not None: | |
| try: | |
| if isinstance(image_in, Image.Image): | |
| buf = io.BytesIO() | |
| image_in.save(buf, format="PNG") | |
| encoded = base64.b64encode(buf.getvalue()).decode("ascii") | |
| items.append( | |
| "<div class='omada-attach-item'>" | |
| f"<img src='data:image/png;base64,{encoded}' alt='image attachment' />" | |
| "<span>Image</span>" | |
| "</div>" | |
| ) | |
| elif isinstance(image_in, str) and image_in: | |
| with Image.open(image_in).convert("RGB") as pil_img: | |
| buf = io.BytesIO() | |
| pil_img.save(buf, format="PNG") | |
| encoded = base64.b64encode(buf.getvalue()).decode("ascii") | |
| items.append( | |
| "<div class='omada-attach-item'>" | |
| f"<img src='data:image/png;base64,{encoded}' alt='image attachment' />" | |
| "<span>Image</span>" | |
| "</div>" | |
| ) | |
| except Exception: | |
| pass | |
| video_path = _extract_video_path(video_in) | |
| if video_path: | |
| thumb = _video_thumb_data_uri(video_path) | |
| if thumb: | |
| items.append( | |
| "<div class='omada-attach-item'>" | |
| f"<img src='{thumb}' alt='video attachment' />" | |
| "<span>Video</span>" | |
| "</div>" | |
| ) | |
| else: | |
| items.append( | |
| "<div class='omada-attach-item omada-attach-item-fallback'>" | |
| "<span>🎬 Video</span>" | |
| "</div>" | |
| ) | |
| audio_path = "" | |
| if isinstance(audio_in, str): | |
| audio_path = audio_in | |
| elif isinstance(audio_in, dict): | |
| audio_path = str(audio_in.get("path") or audio_in.get("name") or "") | |
| if audio_path: | |
| filename = html.escape(Path(audio_path).name or "speech.wav") | |
| items.append( | |
| "<div class='omada-attach-item omada-attach-audio'>" | |
| "<span class='omada-audio-icon'>🎤</span>" | |
| f"<span>{filename}</span>" | |
| "</div>" | |
| ) | |
| if not items: | |
| return "" | |
| return "<div class='omada-attach-preview'>" + "".join(items) + "</div>" | |
| def _render_task_chip(mode: str) -> str: | |
| icon_map = { | |
| "Chat": "💬", | |
| "MMU (Image → Text)": "🖼️", | |
| "MMU (Video → Text)": "🎬", | |
| "Image Generation": "🎨", | |
| "Image Editing": "🛠️", | |
| "ASR": "🎙️", | |
| "TTS": "🔊", | |
| } | |
| icon = icon_map.get(mode, "🧩") | |
| safe_mode = html.escape(mode or "Task") | |
| return f"<div class='omada-selected-task-chip'>{icon} {safe_mode}</div>" | |
| # audio / video / image examples | |
| S2T_EXAMPLES = _load_media_examples("s2t", {".wav", ".mp3", ".flac", ".ogg"}) | |
| V2T_EXAMPLES = _load_media_examples("v2t", {".mp4", ".mov", ".avi", ".webm"}) | |
| # MMU images | |
| MMU_DIR = ASSET_ROOT / "mmu" | |
| MMU_EXAMPLES: List[List[str]] = [] | |
| DEFAULT_MMU_PROMPT = "Describe the given image in detail." | |
| if MMU_DIR.exists(): | |
| for path in sorted( | |
| [ | |
| p | |
| for p in MMU_DIR.iterdir() | |
| if p.suffix.lower() in {".png", ".jpg", ".jpeg", ".webp"} | |
| ] | |
| ): | |
| MMU_EXAMPLES.append([ | |
| str(path), | |
| DEFAULT_MMU_PROMPT, | |
| ]) | |
| # --------------------------- | |
| # Global OmadaDemo instance | |
| # --------------------------- | |
| APP = None # type: ignore | |
| APP_LOCK = threading.Lock() | |
| def get_app() -> OmadaDemo: | |
| global APP | |
| if APP is not None: | |
| return APP | |
| with APP_LOCK: | |
| if APP is not None: | |
| return APP | |
| ckpt_dir = download_checkpoint() | |
| # Wire style centroids to expected locations | |
| style_targets = [ | |
| MMADA_ROOT / "models" / "speech_tokenization" / "condition_style_centroid", | |
| PROJECT_ROOT | |
| / "EMOVA_speech_tokenizer" | |
| / "emova_speech_tokenizer" | |
| / "speech_tokenization" | |
| / "condition_style_centroid", | |
| ] | |
| style_root_resolved = STYLE_ROOT.resolve() | |
| for starget in style_targets: | |
| starget.parent.mkdir(parents=True, exist_ok=True) | |
| try: | |
| if starget.is_symlink(): | |
| try: | |
| current = starget.resolve() | |
| except Exception: | |
| current = None | |
| if current != style_root_resolved: | |
| starget.unlink(missing_ok=True) | |
| starget.symlink_to(STYLE_ROOT, target_is_directory=True) | |
| elif starget.exists(): | |
| # Existing concrete dir/file: keep as-is. | |
| pass | |
| else: | |
| starget.symlink_to(STYLE_ROOT, target_is_directory=True) | |
| except FileExistsError: | |
| # Concurrent init race (warmup vs request): safe to ignore. | |
| pass | |
| # Prefer a repo-local Space config first, then fall back to demo configs. | |
| space_demo_cfg = PROJECT_ROOT / "MMaDA" / "inference" / "demo" / "space_demo.yaml" | |
| default_cfg = PROJECT_ROOT / "MMaDA" / "inference" / "demo" / "demo.yaml" | |
| legacy_cfg = PROJECT_ROOT / "MMaDA" / "configs" / "mmada_demo.yaml" | |
| train_config = os.getenv("TRAIN_CONFIG_PATH") | |
| if not train_config: | |
| if space_demo_cfg.exists(): | |
| train_config = str(space_demo_cfg) | |
| else: | |
| train_config = str(default_cfg if default_cfg.exists() else legacy_cfg) | |
| device = os.getenv("DEVICE", "cuda") | |
| APP = OmadaDemo(train_config=train_config, checkpoint=str(ckpt_dir), device=device) | |
| return APP | |
| def warmup_model_status() -> str: | |
| try: | |
| # ZeroGPU Spaces forbids CUDA init in the main process. | |
| if os.getenv("SPACE_ID"): | |
| return "Model status: Ready (lazy load on first request)." | |
| get_app() | |
| return "Model status: Loaded. Inference is ready." | |
| except Exception as exc: | |
| return f"Model status: Load failed ({exc})." | |
| def _set_global_seed(seed: int = GLOBAL_SEED) -> None: | |
| random.seed(seed) | |
| np.random.seed(seed) | |
| torch.manual_seed(seed) | |
| if torch.cuda.is_available(): | |
| torch.cuda.manual_seed_all(seed) | |
| try: | |
| torch.backends.cudnn.deterministic = True | |
| torch.backends.cudnn.benchmark = False | |
| except Exception: | |
| pass | |
| # --------------------------- | |
| # ZeroGPU-wrapped handlers | |
| # --------------------------- | |
| # (== 그대로, 생략 없이 둔 부분 ==) | |
| def t2s_handler(text, max_tokens, steps, block_len, temperature, cfg_scale, gender, emotion, speed, pitch): | |
| app = get_app() | |
| audio, status = app.run_t2s( | |
| text=text, | |
| max_new_tokens=int(max_tokens), | |
| steps=int(steps), | |
| block_length=int(block_len), | |
| temperature=float(temperature), | |
| cfg_scale=float(cfg_scale), | |
| gender_choice=gender, | |
| emotion_choice=emotion, | |
| speed_choice=speed, | |
| pitch_choice=pitch, | |
| ) | |
| return audio, status | |
| def s2t_handler(audio_path, steps, block_len, max_tokens, remasking): | |
| app = get_app() | |
| for text, status in app.run_s2t_stream( | |
| audio_path=audio_path, | |
| steps=int(steps), | |
| block_length=int(block_len), | |
| max_new_tokens=int(max_tokens), | |
| remasking=str(remasking), | |
| update_every=32, | |
| ): | |
| yield text, status | |
| def v2t_handler(video, steps, block_len, max_tokens): | |
| app = get_app() | |
| for text, status in app.run_v2t_stream( | |
| video_path=video, | |
| steps=int(steps), | |
| block_length=int(block_len), | |
| max_new_tokens=int(max_tokens), | |
| update_every=32, | |
| ): | |
| yield text, status | |
| def chat_handler(message, max_tokens, steps, block_len, temperature): | |
| app = get_app() | |
| for reply_html, status, done in app.run_chat_stream( | |
| message=message, | |
| max_new_tokens=int(max_tokens), | |
| steps=int(steps), | |
| block_length=int(block_len), | |
| temperature=float(temperature), | |
| update_every=32, | |
| ): | |
| yield reply_html, status | |
| def mmu_handler(image, question, max_tokens, steps, block_len, temperature): | |
| app = get_app() | |
| text, status = app.run_mmu( | |
| images=image, | |
| message=question, | |
| max_new_tokens=int(max_tokens), | |
| steps=int(steps), | |
| block_length=int(block_len), | |
| temperature=float(temperature), | |
| ) | |
| return text, status | |
| def t2i_handler(prompt, timesteps, temperature, guidance): | |
| app = get_app() | |
| for image, status in app.run_t2i_stream( | |
| prompt=prompt, | |
| timesteps=int(timesteps), | |
| temperature=float(temperature), | |
| guidance_scale=float(guidance), | |
| update_every=2, | |
| ): | |
| yield image, status | |
| def i2i_handler(instruction, image, timesteps, temperature, guidance): | |
| app = get_app() | |
| for image_out, status in app.run_i2i_stream( | |
| instruction=instruction, | |
| source_image=image, | |
| timesteps=int(timesteps), | |
| temperature=float(temperature), | |
| guidance_scale=float(guidance), | |
| update_every=2, | |
| ): | |
| yield image_out, status | |
| # --------------------------- | |
| # Gradio UI (10 tabs + examples) | |
| # --------------------------- | |
| theme = gr.themes.Soft(primary_hue="blue", neutral_hue="gray") | |
| EXTRA_CSS = """ | |
| html, body, .gradio-container { | |
| background: var(--omada-surface) !important; | |
| color: var(--omada-text-primary) !important; | |
| } | |
| .omada-shell { | |
| min-height: 0; | |
| display: flex; | |
| flex-direction: column; | |
| padding-bottom: 6px; | |
| } | |
| .omada-sample-row { | |
| gap: 10px !important; | |
| justify-content: center !important; | |
| margin-bottom: 6px; | |
| } | |
| .omada-sample-row .gradio-button { | |
| max-width: 280px !important; | |
| } | |
| .omada-hero { | |
| text-align: center; | |
| margin: 40px 0 24px 0; | |
| } | |
| .omada-hero h2 { | |
| font-size: 2.2rem; | |
| margin: 0; | |
| color: var(--omada-dark-text); | |
| } | |
| .omada-hero p { | |
| margin: 10px 0 0 0; | |
| color: var(--omada-dark-muted); | |
| } | |
| .omada-input-row { | |
| gap: 6px !important; | |
| align-items: center !important; | |
| display: flex !important; | |
| flex-direction: row !important; | |
| justify-content: center !important; | |
| position: relative !important; | |
| inset: auto !important; | |
| top: auto !important; | |
| right: auto !important; | |
| bottom: auto !important; | |
| left: auto !important; | |
| transform: none !important; | |
| background: var(--omada-surface-alt); | |
| padding: 6px 14px; | |
| border-radius: 999px; | |
| z-index: 5; | |
| width: min(980px, calc(100vw - 24px)); | |
| margin: 4px auto 8px; | |
| box-shadow: 0 8px 24px rgba(0,0,0,0.08); | |
| box-sizing: border-box; | |
| } | |
| .omada-input-row > * { | |
| min-width: 0 !important; | |
| margin: 0 !important; | |
| align-self: center !important; | |
| background: transparent !important; | |
| box-shadow: none !important; | |
| border: none !important; | |
| } | |
| .omada-input-row .gradio-textbox textarea { | |
| background: var(--omada-surface) !important; | |
| color: var(--omada-text-primary) !important; | |
| border-radius: 999px !important; | |
| border: 1px solid var(--omada-border) !important; | |
| padding: 6px 10px !important; | |
| min-height: 36px !important; | |
| } | |
| .omada-plus-btn button, | |
| .omada-send-btn button { | |
| border-radius: 999px !important; | |
| width: 36px !important; | |
| min-width: 36px !important; | |
| height: 36px !important; | |
| background: var(--omada-surface) !important; | |
| color: var(--omada-text-primary) !important; | |
| border: 1px solid var(--omada-border) !important; | |
| padding: 0 !important; | |
| font-size: 1.2rem !important; | |
| line-height: 1 !important; | |
| } | |
| .omada-plus-btn, | |
| .omada-send-btn { | |
| flex: 0 0 36px !important; | |
| display: flex !important; | |
| align-items: center !important; | |
| justify-content: center !important; | |
| } | |
| .omada-auto { | |
| width: 132px !important; | |
| flex: 0 0 132px !important; | |
| display: flex !important; | |
| align-items: center !important; | |
| position: relative !important; | |
| border-radius: 999px !important; | |
| overflow: visible !important; | |
| } | |
| .omada-auto button { | |
| height: 36px !important; | |
| min-height: 36px !important; | |
| width: 100% !important; | |
| font-size: 0.9rem !important; | |
| padding: 0 12px !important; | |
| background: rgba(255, 255, 255, 0.24) !important; | |
| border: 1px solid rgba(171, 188, 214, 0.42) !important; | |
| color: var(--omada-text-primary) !important; | |
| border-radius: 999px !important; | |
| appearance: none !important; | |
| -webkit-appearance: none !important; | |
| -moz-appearance: none !important; | |
| text-align: left !important; | |
| } | |
| .omada-auto svg, | |
| .omada-auto .wrap > svg, | |
| .omada-auto .dropdown-arrow { | |
| display: none !important; | |
| } | |
| .omada-plus-btn button, | |
| .omada-send-btn button { | |
| flex: 0 0 auto !important; | |
| } | |
| .omada-input-row .gradio-textbox { | |
| width: 100% !important; | |
| flex: 1 1 auto !important; | |
| min-width: 0 !important; | |
| opacity: 1 !important; | |
| pointer-events: auto !important; | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| .omada-input-row .gradio-textbox > div, | |
| .omada-input-row .gradio-dropdown, | |
| .omada-input-row .gradio-dropdown > div, | |
| .omada-plus-btn, | |
| .omada-send-btn, | |
| .omada-auto { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| .omada-send-btn { | |
| margin-left: -2px !important; | |
| } | |
| .omada-input-row .gradio-textbox textarea { | |
| width: 100% !important; | |
| display: block !important; | |
| pointer-events: auto !important; | |
| opacity: 1 !important; | |
| cursor: text !important; | |
| } | |
| .omada-panel-backdrop { | |
| position: fixed !important; | |
| inset: 0 !important; | |
| background: rgba(255, 255, 255, 0.22) !important; | |
| backdrop-filter: blur(12px) saturate(120%) !important; | |
| -webkit-backdrop-filter: blur(12px) saturate(120%) !important; | |
| z-index: 1100 !important; | |
| pointer-events: auto !important; | |
| } | |
| .omada-panel { | |
| position: relative !important; | |
| top: auto !important; | |
| left: auto !important; | |
| transform: none !important; | |
| max-height: none !important; | |
| overflow: visible !important; | |
| width: min(980px, calc(100vw - 24px)); | |
| margin: 0 auto 14px auto; | |
| box-shadow: 0 20px 60px rgba(0,0,0,0.12); | |
| z-index: 9999; | |
| pointer-events: auto !important; | |
| isolation: isolate; | |
| } | |
| .omada-controls-safe { | |
| position: fixed !important; | |
| left: 50% !important; | |
| top: 50% !important; | |
| transform: translate(-50%, -50%) !important; | |
| width: min(980px, calc(100vw - 36px)) !important; | |
| max-height: min(82vh, 900px) !important; | |
| overflow: auto !important; | |
| margin: 0 !important; | |
| z-index: 1200 !important; | |
| border-radius: 34px !important; | |
| } | |
| .omada-panel * { | |
| pointer-events: auto; | |
| } | |
| .omada-panel input, | |
| .omada-panel select, | |
| .omada-panel textarea, | |
| .omada-panel button, | |
| .omada-panel .gradio-slider, | |
| .omada-panel .gradio-slider * { | |
| pointer-events: auto !important; | |
| } | |
| .omada-panel .gradio-radio, | |
| .omada-panel .gradio-radio label, | |
| .omada-panel .gradio-radio input { | |
| pointer-events: auto !important; | |
| cursor: pointer !important; | |
| } | |
| .omada-panel .gradio-radio { | |
| position: relative !important; | |
| z-index: 300 !important; | |
| } | |
| .omada-panel .gradio-slider, | |
| .omada-panel .gradio-slider .wrap, | |
| .omada-panel .gradio-slider .wrap-inner, | |
| .omada-panel .gradio-slider input[type="range"], | |
| .omada-panel .gradio-slider input[type="number"], | |
| .omada-panel .gradio-dropdown, | |
| .omada-panel .gradio-dropdown select, | |
| .omada-panel .gradio-textbox textarea { | |
| pointer-events: auto !important; | |
| position: relative !important; | |
| z-index: 400 !important; | |
| } | |
| .omada-panel .gradio-slider input[type="range"] { | |
| touch-action: pan-x !important; | |
| } | |
| .omada-panel .gradio-dropdown, | |
| .omada-panel .gradio-dropdown .wrap { | |
| z-index: 1000 !important; | |
| } | |
| .gradio-dropdown .options, | |
| .gradio-dropdown .wrap .options { | |
| z-index: 2000 !important; | |
| } | |
| .gradio-container .input-status, | |
| .gradio-container .status, | |
| .gradio-container .status-dot, | |
| .gradio-container .status-indicator, | |
| .gradio-container .label-wrap .status, | |
| .gradio-container .label-wrap .status-dot { | |
| display: none !important; | |
| } | |
| .omada-chatbot { | |
| background: transparent !important; | |
| border: none !important; | |
| position: relative !important; | |
| z-index: 1 !important; | |
| } | |
| .gradio-chatbot .message { | |
| border-radius: 18px !important; | |
| } | |
| .gradio-chatbot .message.user { | |
| margin-left: auto !important; | |
| background: #2e3037 !important; | |
| color: var(--omada-text-primary) !important; | |
| pointer-events: auto !important; | |
| } | |
| .gradio-chatbot .message.bot { | |
| margin-right: auto !important; | |
| background: #22242a !important; | |
| color: var(--omada-text-primary) !important; | |
| pointer-events: auto !important; | |
| } | |
| .gradio-chatbot .message.user *, | |
| .gradio-chatbot .message.bot * { | |
| pointer-events: auto !important; | |
| } | |
| .omada-panel { | |
| background: var(--omada-dark-panel); | |
| border: 1px solid var(--omada-dark-border); | |
| border-radius: 16px; | |
| padding: 16px; | |
| } | |
| .omada-chip button { | |
| border-radius: 999px !important; | |
| background: linear-gradient(160deg, rgba(255,255,255,0.62), rgba(255,255,255,0.36)) !important; | |
| color: #22324a !important; | |
| border: 1px solid rgba(255,255,255,0.72) !important; | |
| font-size: 0.68rem !important; | |
| line-height: 1.2 !important; | |
| padding: 6px 10px !important; | |
| backdrop-filter: blur(14px) saturate(165%); | |
| -webkit-backdrop-filter: blur(14px) saturate(165%); | |
| box-shadow: 0 8px 20px rgba(36, 56, 92, 0.16) !important; | |
| } | |
| .omada-sample-row .gradio-button, | |
| .omada-sample-row .gradio-button > div, | |
| .omada-sample-row .gradio-button > button { | |
| background: transparent !important; | |
| } | |
| .omada-chip button:hover { | |
| transform: translateY(-1px); | |
| background: linear-gradient(160deg, rgba(255,255,255,0.74), rgba(255,255,255,0.44)) !important; | |
| } | |
| .omada-video-loading { | |
| width: 360px; | |
| max-width: min(42vw, 360px); | |
| min-height: 64px; | |
| border-radius: 12px; | |
| border: 1px solid var(--omada-glass-border); | |
| background: rgba(255,255,255,0.35); | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| font-size: 0.9rem; | |
| color: #304463; | |
| backdrop-filter: blur(10px) saturate(150%); | |
| -webkit-backdrop-filter: blur(10px) saturate(150%); | |
| } | |
| .omada-user-media { | |
| margin-bottom: 6px; | |
| } | |
| .omada-user-media img, | |
| .omada-user-media video { | |
| max-width: 240px; | |
| width: 240px; | |
| max-height: 180px; | |
| object-fit: contain; | |
| border-radius: 10px; | |
| border: 1px solid var(--omada-border); | |
| display: block; | |
| } | |
| .omada-user-media .omada-user-video { | |
| width: 360px; | |
| max-width: min(42vw, 360px); | |
| max-height: 240px; | |
| } | |
| .omada-user-media audio { | |
| width: 360px; | |
| max-width: min(42vw, 360px); | |
| display: block; | |
| } | |
| .omada-response-status { | |
| color: var(--omada-dark-muted) !important; | |
| } | |
| .omada-token-pill { | |
| display: inline-block; | |
| padding: 1px 8px; | |
| margin: 1px 2px; | |
| border-radius: 999px; | |
| border: 1px solid var(--omada-border); | |
| font-size: 0.82em; | |
| line-height: 1.6; | |
| vertical-align: baseline; | |
| background: #f7f8fa; | |
| } | |
| .omada-token-mask { | |
| border-color: #8da2c6; | |
| background: #eef3ff; | |
| color: #1f3d7a; | |
| font-weight: 600; | |
| } | |
| .omada-token-special { | |
| border-color: #c5ccd8; | |
| background: #f3f4f7; | |
| color: #4b5563; | |
| } | |
| /* Apple-like glass look */ | |
| :root { | |
| --omada-surface: #f7faff; | |
| --omada-surface-alt: #f3f8ff; | |
| --omada-glass-bg: rgba(255, 255, 255, 0.62); | |
| --omada-glass-strong: rgba(255, 255, 255, 0.72); | |
| --omada-glass-border: rgba(221, 232, 248, 0.92); | |
| --omada-glass-shadow: 0 14px 34px rgba(136, 162, 196, 0.16); | |
| } | |
| html, body, .gradio-container { | |
| background: | |
| radial-gradient(1200px 520px at 10% -10%, rgba(255,255,255,0.96), rgba(255,255,255,0.78) 48%, rgba(247,251,255,0.96) 100%), | |
| linear-gradient(135deg, #f8fbff 0%, #f3f8ff 45%, #f7fbff 100%) !important; | |
| } | |
| .omada-input-row, | |
| .omada-controls-safe, | |
| .omada-panel, | |
| .gradio-chatbot .message, | |
| .omada-chip button, | |
| .omada-input-row .gradio-textbox textarea, | |
| .omada-plus-btn button, | |
| .omada-send-btn button, | |
| .omada-auto select { | |
| background: var(--omada-glass-bg) !important; | |
| border: 1px solid var(--omada-glass-border) !important; | |
| box-shadow: var(--omada-glass-shadow) !important; | |
| backdrop-filter: blur(22px) saturate(175%); | |
| -webkit-backdrop-filter: blur(22px) saturate(175%); | |
| } | |
| .omada-controls-safe { | |
| padding: 14px 16px !important; | |
| border-radius: 28px !important; | |
| margin: 10px auto 10px auto !important; | |
| } | |
| .omada-controls-safe > div { | |
| padding: 10px 12px !important; | |
| border-radius: 22px !important; | |
| } | |
| .omada-controls-safe .gradio-button, | |
| .omada-controls-safe button, | |
| .omada-controls-safe .gradio-dropdown, | |
| .omada-controls-safe .gradio-textbox, | |
| .omada-controls-safe .gradio-slider { | |
| border-radius: 16px !important; | |
| } | |
| .omada-controls-safe .gradio-button { | |
| border: 1px solid var(--omada-glass-border) !important; | |
| } | |
| .gradio-chatbot .message.user { | |
| background: var(--omada-glass-strong) !important; | |
| color: #1f2937 !important; | |
| } | |
| .gradio-chatbot .message.bot { | |
| background: rgba(255, 255, 255, 0.50) !important; | |
| color: #1f2937 !important; | |
| } | |
| /* Keep generated images crisp (no frosted overlay on image replies) */ | |
| .gradio-chatbot .message { | |
| backdrop-filter: none !important; | |
| -webkit-backdrop-filter: none !important; | |
| } | |
| .gradio-chatbot .message.bot:has(.omada-image-only) { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| padding: 0 !important; | |
| margin: 0 !important; | |
| } | |
| .omada-image-only { | |
| display: inline-block; | |
| background: transparent !important; | |
| border: 0 !important; | |
| box-shadow: none !important; | |
| padding: 0 !important; | |
| margin: 0 !important; | |
| opacity: 1 !important; | |
| filter: none !important; | |
| } | |
| .gradio-chatbot .message.bot:has(.omada-image-only) *, | |
| .omada-image-only * { | |
| background: transparent !important; | |
| box-shadow: none !important; | |
| filter: none !important; | |
| opacity: 1 !important; | |
| } | |
| .omada-image-status { | |
| margin: 0 0 6px 0 !important; | |
| font-size: 0.85rem !important; | |
| color: #42526b !important; | |
| font-weight: 600 !important; | |
| } | |
| .omada-chip button { | |
| color: #273247 !important; | |
| } | |
| .omada-panel { | |
| border-radius: 28px !important; | |
| padding: 20px !important; | |
| } | |
| .omada-input-row { | |
| border-radius: 999px !important; | |
| } | |
| .omada-main-input, | |
| .omada-main-input * { | |
| pointer-events: auto !important; | |
| } | |
| .omada-main-input textarea, | |
| .omada-main-input input { | |
| pointer-events: auto !important; | |
| position: relative !important; | |
| z-index: 40 !important; | |
| } | |
| .omada-sample-row, | |
| .omada-sample-row * { | |
| pointer-events: auto !important; | |
| } | |
| .omada-sample-row, | |
| .omada-input-row { | |
| position: relative !important; | |
| z-index: 25 !important; | |
| } | |
| .omada-sample-row .gradio-button, | |
| .omada-input-row .gradio-button, | |
| .omada-input-row button { | |
| pointer-events: auto !important; | |
| } | |
| /* Compact controls (keep chat bubbles unchanged) */ | |
| .omada-shell, | |
| .omada-controls-safe, | |
| .omada-input-row, | |
| .omada-sample-row { | |
| font-size: 0.88rem !important; | |
| } | |
| .omada-sample-row { | |
| width: min(980px, calc(100vw - 24px)) !important; | |
| margin: 0 auto 4px auto !important; | |
| gap: 6px !important; | |
| } | |
| .omada-sample-row .gradio-button { | |
| flex: 1 1 0 !important; | |
| max-width: none !important; | |
| } | |
| .omada-chip button { | |
| min-height: 14px !important; | |
| height: 14px !important; | |
| font-size: 0.34rem !important; | |
| line-height: 1.0 !important; | |
| padding: 0 3px !important; | |
| border-radius: 999px !important; | |
| } | |
| .omada-chip button * { | |
| font-size: 0.34rem !important; | |
| line-height: 1.0 !important; | |
| } | |
| .omada-sample-row .omada-chip button, | |
| .omada-sample-row .gradio-button button, | |
| .omada-sample-row .omada-chip button span, | |
| .omada-sample-row .gradio-button button span, | |
| .omada-sample-row .omada-chip button p, | |
| .omada-sample-row .gradio-button button p, | |
| .omada-sample-row .omada-chip button div { | |
| font-size: 0.34rem !important; | |
| line-height: 1.05 !important; | |
| } | |
| /* Force sample chip size against Gradio theme defaults */ | |
| .omada-sample-row .omada-chip, | |
| .omada-sample-row .omada-chip .gradio-button, | |
| .omada-sample-row .omada-chip .gradio-button > div, | |
| .omada-sample-row .omada-chip .gradio-button > button, | |
| .omada-sample-row .omada-chip button { | |
| min-height: 16px !important; | |
| height: 16px !important; | |
| max-height: 16px !important; | |
| padding-top: 0 !important; | |
| padding-bottom: 0 !important; | |
| } | |
| .omada-sample-row .omada-chip button, | |
| .omada-sample-row .omada-chip button span, | |
| .omada-sample-row .omada-chip button p, | |
| .omada-sample-row .omada-chip button div { | |
| font-size: 0.42rem !important; | |
| line-height: 1 !important; | |
| padding: 0 3px !important; | |
| } | |
| .omada-input-row { | |
| padding: 4px 10px !important; | |
| margin: 2px auto 6px !important; | |
| } | |
| .omada-plus-btn button, | |
| .omada-send-btn button { | |
| width: 30px !important; | |
| min-width: 30px !important; | |
| height: 30px !important; | |
| font-size: 1rem !important; | |
| } | |
| .omada-plus-btn, | |
| .omada-send-btn { | |
| flex: 0 0 30px !important; | |
| } | |
| .omada-auto { | |
| width: 104px !important; | |
| flex: 0 0 104px !important; | |
| border-radius: 999px !important; | |
| } | |
| .omada-auto button { | |
| height: 30px !important; | |
| min-height: 30px !important; | |
| font-size: 0.9rem !important; | |
| padding: 0 8px !important; | |
| border-radius: 999px !important; | |
| text-align: left !important; | |
| background: rgba(255, 255, 255, 0.24) !important; | |
| border: 1px solid rgba(171, 188, 214, 0.42) !important; | |
| box-shadow: inset 0 0 0 0.5px rgba(255, 255, 255, 0.45) !important; | |
| } | |
| /* Gradio dropdown text (new/old DOM variants) */ | |
| .omada-auto, | |
| .omada-auto *, | |
| .omada-auto .wrap, | |
| .omada-auto .wrap-inner, | |
| .omada-auto .wrap-inner input, | |
| .omada-auto input, | |
| .omada-auto button, | |
| .omada-auto button span { | |
| font-size: 0.9rem !important; | |
| line-height: 1.0 !important; | |
| } | |
| .omada-input-row .gradio-textbox textarea { | |
| min-height: 30px !important; | |
| padding: 4px 9px !important; | |
| font-size: 0.9rem !important; | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| outline: none !important; | |
| } | |
| .omada-input-row .gradio-textbox > div, | |
| .omada-input-row .gradio-textbox .wrap, | |
| .omada-input-row .gradio-textbox label { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| .omada-main-input, | |
| .omada-main-input > div, | |
| .omada-main-input .wrap, | |
| .omada-main-input .wrap-inner, | |
| .omada-main-input .block, | |
| .omada-main-input .container, | |
| .omada-main-input .scroll-hide, | |
| .omada-main-input .scroll-hide > div, | |
| .omada-main-input [data-testid="textbox"] { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| outline: none !important; | |
| } | |
| .omada-main-input:focus, | |
| .omada-main-input:focus-within, | |
| .omada-main-input > div:focus, | |
| .omada-main-input > div:focus-within, | |
| .omada-main-input .wrap:focus, | |
| .omada-main-input .wrap:focus-within, | |
| .omada-main-input .wrap-inner:focus, | |
| .omada-main-input .wrap-inner:focus-within, | |
| .omada-main-input textarea:focus, | |
| .omada-main-input textarea:focus-visible { | |
| border: none !important; | |
| box-shadow: none !important; | |
| outline: none !important; | |
| } | |
| .omada-main-input textarea, | |
| .omada-main-input textarea::placeholder { | |
| background: transparent !important; | |
| } | |
| /* hide textbox secondary footer/counter text (e.g., "seconds") */ | |
| .omada-main-input .footer, | |
| .omada-main-input [data-testid="textbox-footer"], | |
| .omada-main-input .char-counter, | |
| .omada-main-input small, | |
| .omada-main-input .secondary-text { | |
| display: none !important; | |
| } | |
| .omada-controls-safe { | |
| padding: 10px 12px !important; | |
| } | |
| .omada-controls-safe > div { | |
| padding: 8px 10px !important; | |
| } | |
| .omada-controls-safe .gradio-button, | |
| .omada-controls-safe button, | |
| .omada-controls-safe .gradio-dropdown, | |
| .omada-controls-safe .gradio-textbox, | |
| .omada-controls-safe .gradio-slider, | |
| .omada-controls-safe label, | |
| .omada-controls-safe p, | |
| .omada-controls-safe span { | |
| font-size: 0.88rem !important; | |
| } | |
| /* modal transparency: outer is whiter, inner is more transparent */ | |
| .omada-controls-safe { | |
| background: linear-gradient(165deg, rgba(255, 255, 255, 0.72), rgba(245, 250, 255, 0.60)) !important; | |
| border: 1px solid rgba(218, 231, 248, 0.90) !important; | |
| box-shadow: 0 18px 42px rgba(123, 150, 188, 0.16) !important; | |
| } | |
| .omada-controls-safe > div, | |
| .omada-controls-safe .gr-box, | |
| .omada-controls-safe .gr-form, | |
| .omada-controls-safe .gr-block, | |
| .omada-controls-safe .gradio-row, | |
| .omada-controls-safe .gradio-column { | |
| background: rgba(255, 255, 255, 0.44) !important; | |
| border-color: rgba(225, 237, 252, 0.86) !important; | |
| box-shadow: none !important; | |
| } | |
| .omada-controls-safe .gradio-button, | |
| .omada-controls-safe button, | |
| .omada-controls-safe .gradio-dropdown, | |
| .omada-controls-safe .gradio-dropdown > div, | |
| .omada-controls-safe .gradio-textbox, | |
| .omada-controls-safe .gradio-textbox > div, | |
| .omada-controls-safe .gradio-slider { | |
| background: rgba(255, 255, 255, 0.56) !important; | |
| border: 1px solid rgba(221, 234, 251, 0.90) !important; | |
| box-shadow: none !important; | |
| } | |
| .omada-controls-safe .gradio-button:hover, | |
| .omada-controls-safe button:hover { | |
| background: rgba(255, 255, 255, 0.72) !important; | |
| } | |
| .omada-controls-safe .primary, | |
| .omada-controls-safe .primary button { | |
| background: linear-gradient(165deg, #4d8ef7, #3f7ee8) !important; | |
| color: #ffffff !important; | |
| border: 1px solid #3d75d8 !important; | |
| box-shadow: 0 8px 20px rgba(69, 126, 233, 0.35) !important; | |
| } | |
| .omada-controls-safe .primary:hover, | |
| .omada-controls-safe .primary button:hover { | |
| background: linear-gradient(165deg, #5b99fb, #4b87ed) !important; | |
| } | |
| .omada-controls-safe .primary:disabled, | |
| .omada-controls-safe .primary button:disabled, | |
| .omada-controls-safe .primary[disabled], | |
| .omada-controls-safe .primary button[disabled] { | |
| opacity: 1 !important; | |
| color: #ffffff !important; | |
| background: linear-gradient(165deg, #4d8ef7, #3f7ee8) !important; | |
| border: 1px solid #3d75d8 !important; | |
| } | |
| .omada-selected-task-chip { | |
| display: inline-flex; | |
| align-items: center; | |
| padding: 6px 12px; | |
| border-radius: 999px; | |
| color: #fff; | |
| background: linear-gradient(165deg, #4d8ef7, #3f7ee8); | |
| border: 1px solid #3d75d8; | |
| font-weight: 700; | |
| box-shadow: 0 8px 18px rgba(69, 126, 233, 0.30); | |
| } | |
| .omada-ready-hero { | |
| position: sticky; | |
| top: clamp(8px, 12vh, 90px); | |
| margin: 0 auto clamp(56px, 10vh, 140px) auto; | |
| width: min(760px, calc(100vw - 48px)); | |
| z-index: 15; | |
| pointer-events: none; | |
| font-size: clamp(1.2rem, 2.2vw, 2rem); | |
| font-weight: 700; | |
| color: #2f3f58; | |
| letter-spacing: -0.01em; | |
| text-align: center; | |
| } | |
| .omada-ready-hero .omada-ready-sub { | |
| margin-top: 5px; | |
| font-size: clamp(0.78rem, 1.2vw, 0.95rem); | |
| font-weight: 500; | |
| color: #5b6e8d; | |
| line-height: 1.4; | |
| } | |
| /* Sample list container style aligned with main input row */ | |
| .omada-sample-row { | |
| width: min(900px, calc(100vw - 80px)) !important; | |
| margin: 0 auto 8px auto !important; | |
| padding: 0 !important; | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| gap: 10px !important; | |
| } | |
| .omada-sample-row .omada-chip button, | |
| .omada-sample-row .omada-chip button span, | |
| .omada-sample-row .omada-chip button p, | |
| .omada-sample-row .omada-chip button div { | |
| font-size: 0.9rem !important; | |
| line-height: 1.15 !important; | |
| font-weight: 700 !important; | |
| } | |
| .omada-sample-row .omada-chip button { | |
| min-height: 34px !important; | |
| height: 34px !important; | |
| padding: 0 12px !important; | |
| border-radius: 999px !important; | |
| background: rgba(255, 255, 255, 0.46) !important; | |
| border: 1px solid rgba(180, 198, 224, 0.45) !important; | |
| box-shadow: 0 6px 14px rgba(120, 148, 186, 0.08) !important; | |
| } | |
| /* final override: keep selector pill with subtle visible border */ | |
| .omada-input-row .omada-auto, | |
| .omada-input-row .omada-auto button { | |
| border-radius: 999px !important; | |
| } | |
| .omada-input-row .omada-auto button { | |
| border: 1px solid rgba(160, 178, 206, 0.72) !important; | |
| box-shadow: | |
| inset 0 0 0 1px rgba(255, 255, 255, 0.52), | |
| 0 1px 4px rgba(125, 146, 176, 0.14) !important; | |
| } | |
| /* hard override: keep subtle border visible for selector pill */ | |
| .omada-input-row .omada-auto, | |
| .omada-input-row .omada-auto > div, | |
| .omada-input-row .omada-auto button, | |
| .omada-input-row .omada-auto .gradio-button, | |
| .omada-input-row .omada-auto .gradio-button > div { | |
| border: 1px solid rgba(160, 178, 206, 0.72) !important; | |
| border-radius: 999px !important; | |
| box-shadow: | |
| inset 0 0 0 1px rgba(255, 255, 255, 0.52), | |
| 0 1px 4px rgba(125, 146, 176, 0.14) !important; | |
| } | |
| /* hard override: sample chip vertical size */ | |
| .omada-sample-row .gradio-button { | |
| flex: 0 0 auto !important; | |
| } | |
| .omada-sample-row .omada-chip, | |
| .omada-sample-row .omada-chip .gradio-button, | |
| .omada-sample-row .omada-chip .gradio-button > div, | |
| .omada-sample-row .omada-chip button { | |
| min-width: 120px !important; | |
| width: 120px !important; | |
| max-width: 120px !important; | |
| min-height: 40px !important; | |
| height: 40px !important; | |
| max-height: 40px !important; | |
| border-radius: 999px !important; | |
| } | |
| .omada-input-stack { | |
| width: min(980px, calc(100vw - 24px)); | |
| margin: 0 auto 6px auto; | |
| position: sticky; | |
| bottom: 6px; | |
| z-index: 40; | |
| } | |
| .omada-input-row { | |
| display: flex !important; | |
| flex-direction: column !important; | |
| justify-content: flex-end !important; | |
| align-items: stretch !important; | |
| gap: 6px !important; | |
| padding: 10px 12px !important; | |
| border-radius: 36px !important; | |
| min-height: 110px; | |
| } | |
| /* final layout override */ | |
| .omada-bottom-row { | |
| display: flex !important; | |
| flex-wrap: nowrap !important; | |
| align-items: flex-end !important; | |
| justify-content: flex-start !important; | |
| gap: 8px !important; | |
| } | |
| .omada-bottom-row > * { | |
| flex: 0 0 auto !important; | |
| margin: 0 !important; | |
| } | |
| .omada-bottom-row .omada-main-input { | |
| flex: 1 1 auto !important; | |
| margin: -10px !important; | |
| min-width: 0 !important; | |
| width: auto !important; | |
| max-width: none !important; | |
| } | |
| .omada-bottom-row .omada-main-input, | |
| .omada-bottom-row .omada-main-input > div, | |
| .omada-bottom-row .omada-main-input .wrap, | |
| .omada-bottom-row .omada-main-input .wrap-inner, | |
| .omada-bottom-row .omada-main-input .block, | |
| .omada-bottom-row .omada-main-input [data-testid="textbox"], | |
| .omada-bottom-row .omada-main-input .scroll-hide, | |
| .omada-bottom-row .omada-main-input .scroll-hide > div { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| width: auto !important; | |
| min-width: 0 !important; | |
| } | |
| .omada-bottom-row .omada-main-input textarea { | |
| background: transparent !important; | |
| border: none !important; | |
| border-radius: 14px !important; | |
| box-shadow: none !important; | |
| } | |
| .omada-bottom-row .omada-send-btn { | |
| margin-left: auto !important; | |
| flex: 0 0 auto !important; | |
| align-self: flex-end !important; | |
| } | |
| .omada-bottom-row .omada-send-btn > div { | |
| width: 42px !important; | |
| min-width: 42px !important; | |
| } | |
| .omada-bottom-row .omada-send-btn button, | |
| .omada-bottom-row .omada-send-btn > div > button { | |
| width: 42px !important; | |
| min-width: 42px !important; | |
| height: 42px !important; | |
| display: flex !important; | |
| align-items: center !important; | |
| justify-content: center !important; | |
| border-radius: 999px !important; | |
| border: 2px solid #4f96ff !important; | |
| background: rgba(130, 196, 255, 0.28) !important; | |
| color: #1153a6 !important; | |
| box-shadow: inset 0 0 0 1px rgba(214, 239, 255, 0.88), 0 0 0 1px rgba(79, 150, 255, 0.28) !important; | |
| } | |
| .omada-bottom-row .omada-send-btn button:hover { | |
| background: rgba(130, 196, 255, 0.38) !important; | |
| } | |
| /* scope input-row cleanup to avoid wrapper layout breakage */ | |
| .omada-input-row .omada-plus-btn, | |
| .omada-input-row .omada-task-btn, | |
| .omada-input-row .omada-custom-btn, | |
| .omada-input-row .omada-task-chip-inline, | |
| .omada-input-row .omada-main-input, | |
| .omada-input-row .omada-send-btn { | |
| margin: 0 !important; | |
| } | |
| .omada-attach-preview-wrap { | |
| min-height: 0 !important; | |
| width: 100% !important; | |
| margin: 0 !important; | |
| padding: 0 !important; | |
| } | |
| .omada-attach-preview-wrap .html-container { | |
| min-height: 0 !important; | |
| height: auto !important; | |
| padding: 0 !important; | |
| margin: 0 !important; | |
| } | |
| .omada-attach-preview-wrap .html-container:empty { | |
| display: none !important; | |
| } | |
| .omada-attach-preview { | |
| display: flex; | |
| align-items: center; | |
| gap: 8px; | |
| justify-content: flex-start; | |
| min-height: 0; | |
| flex-wrap: wrap; | |
| } | |
| .omada-attach-preview.omada-empty { | |
| display: none !important; | |
| } | |
| .omada-attach-preview-wrap:has(.omada-attach-preview.omada-empty) { | |
| display: none !important; | |
| } | |
| .omada-attach-item { | |
| display: inline-flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: center; | |
| gap: 4px; | |
| padding: 6px; | |
| border-radius: 10px; | |
| background: rgba(255, 255, 255, 0.46); | |
| border: 1px solid rgba(190, 208, 234, 0.65); | |
| min-width: 66px; | |
| } | |
| .omada-attach-item img { | |
| width: 54px; | |
| height: 42px; | |
| border-radius: 8px; | |
| object-fit: cover; | |
| } | |
| .omada-attach-item span { | |
| font-size: 0.68rem; | |
| color: #2f4568; | |
| font-weight: 600; | |
| } | |
| .omada-attach-audio { | |
| flex-direction: row; | |
| min-width: 140px; | |
| padding: 10px 12px; | |
| } | |
| .omada-audio-icon { | |
| font-size: 0.95rem; | |
| } | |
| .omada-task-btn, | |
| .omada-custom-btn { | |
| flex: 0 0 auto !important; | |
| } | |
| .omada-task-btn button, | |
| .omada-custom-btn button { | |
| height: 30px !important; | |
| min-height: 30px !important; | |
| border-radius: 999px !important; | |
| font-size: 0.72rem !important; | |
| font-weight: 700 !important; | |
| padding: 0 10px !important; | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| .omada-plus-btn button { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| .omada-task-chip-inline { | |
| flex: 0 0 auto !important; | |
| min-width: 0 !important; | |
| } | |
| .omada-task-chip-inline .omada-selected-task-chip { | |
| padding: 2px 8px !important; | |
| font-size: 0.72rem !important; | |
| box-shadow: none; | |
| margin-left: -2px !important; | |
| } | |
| /* keep sample chips/task-custom typography aligned */ | |
| .omada-sample-row .omada-chip button, | |
| .omada-sample-row .omada-chip button span, | |
| .omada-sample-row .omada-chip button p, | |
| .omada-sample-row .omada-chip button div, | |
| .omada-task-btn button, | |
| .omada-custom-btn button, | |
| .omada-task-chip-inline .omada-selected-task-chip { | |
| font-size: 0.72rem !important; | |
| line-height: 1.05 !important; | |
| } | |
| /* ========================= | |
| FIX: composer layout (textbox above buttons) | |
| ========================= */ | |
| .omada-input-row { | |
| width: 100% !important; | |
| position: relative !important; | |
| justify-content: flex-start !important; | |
| align-items: stretch !important; | |
| gap: 10px !important; | |
| padding-right: 76px !important; /* room for absolute send button */ | |
| } | |
| .omada-main-input, | |
| .omada-main-input > div, | |
| .omada-main-input .wrap, | |
| .omada-main-input .wrap-inner, | |
| .omada-main-input [data-testid="textbox"] { | |
| width: 100% !important; | |
| max-width: 100% !important; | |
| } | |
| .omada-main-input textarea { | |
| width: 100% !important; | |
| min-height: 34px !important; | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| .omada-bottom-row { | |
| width: 100% !important; | |
| max-width: 100% !important; | |
| display: flex !important; | |
| flex-wrap: nowrap !important; | |
| justify-content: flex-start !important; | |
| align-items: center !important; | |
| gap: 8px !important; | |
| margin-top: -5px !important; | |
| } | |
| .omada-bottom-row > * { | |
| min-width: 0 !important; | |
| flex: 0 0 auto !important; | |
| } | |
| .omada-send-btn { | |
| position: absolute !important; | |
| right: 16px !important; | |
| top: 50% !important; | |
| transform: translateY(-50%) !important; | |
| margin: 0 !important; | |
| } | |
| .omada-send-btn > div { | |
| width: 46px !important; | |
| min-width: 46px !important; | |
| } | |
| .omada-send-btn button, | |
| .omada-send-btn > div > button { | |
| width: 46px !important; | |
| min-width: 46px !important; | |
| height: 46px !important; | |
| border-radius: 999px !important; | |
| border: 1.8px solid rgba(98, 170, 255, 0.95) !important; | |
| background: linear-gradient(160deg, rgba(185, 224, 255, 0.40), rgba(118, 179, 255, 0.26)) !important; | |
| box-shadow: | |
| inset 0 0 0 1px rgba(229, 245, 255, 0.95), | |
| 0 10px 22px rgba(79, 146, 255, 0.24) !important; | |
| color: #0d4fa7 !important; | |
| font-size: 1.75rem !important; | |
| font-weight: 800 !important; | |
| line-height: 1 !important; | |
| display: flex !important; | |
| align-items: center !important; | |
| justify-content: center !important; | |
| } | |
| .omada-send-btn button:hover { | |
| background: linear-gradient(160deg, rgba(196, 230, 255, 0.52), rgba(129, 186, 255, 0.34)) !important; | |
| } | |
| /* ========================= | |
| Task / Custom / Chat: same font and borderless | |
| ========================= */ | |
| .omada-task-btn button, | |
| .omada-custom-btn button { | |
| font-size: 0.37rem !important; | |
| line-height: 1.0 !important; | |
| font-weight: 700 !important; | |
| height: 30px !important; | |
| min-height: 30px !important; | |
| padding: 0 10px !important; | |
| border-radius: 999px !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| background: transparent !important; | |
| color: #22324a !important; | |
| } | |
| /* keep selected task chip blue, but match typography */ | |
| .omada-task-chip-inline .omada-selected-task-chip { | |
| font-size: 0.78rem !important; | |
| line-height: 1.0 !important; | |
| padding: 5px 11px !important; | |
| background: linear-gradient(165deg, #4d8ef7, #3f7ee8) !important; | |
| color: #ffffff !important; | |
| border: 1px solid #3d75d8 !important; | |
| box-shadow: 0 6px 14px rgba(69, 126, 233, 0.28) !important; | |
| } | |
| .omada-task-btn button *, | |
| .omada-custom-btn button * { | |
| font-size: 0.37rem !important; | |
| line-height: 1.0 !important; | |
| } | |
| /* hard-fix: keep chat/custom glued together on left */ | |
| .omada-bottom-row .omada-task-chip-inline, | |
| .omada-bottom-row .omada-task-chip-inline > div, | |
| .omada-bottom-row .omada-task-chip-inline > div > div, | |
| .omada-bottom-row .omada-custom-btn, | |
| .omada-bottom-row .omada-custom-btn > div { | |
| display: inline-flex !important; | |
| width: auto !important; | |
| max-width: max-content !important; | |
| flex: 0 0 auto !important; | |
| margin-left: 0 !important; | |
| } | |
| /* hard-fix: if legacy send exists in row, hide it */ | |
| .omada-bottom-row .omada-send-btn { | |
| display: none !important; | |
| } | |
| /* hard-fix: dedicated send button on right-bottom */ | |
| .omada-input-row .omada-send-btn-fix { | |
| display: inline-flex !important; | |
| position: absolute !important; | |
| right: 14px !important; | |
| bottom: 14px !important; | |
| top: auto !important; | |
| left: auto !important; | |
| transform: none !important; | |
| z-index: 60 !important; | |
| width: 44px !important; | |
| min-width: 44px !important; | |
| max-width: 44px !important; | |
| flex: 0 0 44px !important; | |
| margin: 0 !important; | |
| border-radius: 999px !important; | |
| } | |
| .omada-input-row .omada-send-btn-fix::before { | |
| content: "" !important; | |
| position: absolute !important; | |
| inset: -3px !important; | |
| border-radius: 999px !important; | |
| border: 1.6px solid rgba(168, 216, 255, 0.92) !important; | |
| background: radial-gradient(circle at 30% 20%, rgba(226, 246, 255, 0.40), rgba(170, 213, 255, 0.18)) !important; | |
| box-shadow: 0 8px 20px rgba(78, 143, 236, 0.22) !important; | |
| pointer-events: none !important; | |
| z-index: -1 !important; | |
| } | |
| .omada-input-row .omada-send-btn-fix > div { | |
| width: 44px !important; | |
| min-width: 44px !important; | |
| max-width: 44px !important; | |
| flex: 0 0 44px !important; | |
| } | |
| .omada-input-row .omada-send-btn-fix button, | |
| .omada-input-row .omada-send-btn-fix > div > button { | |
| width: 44px !important; | |
| min-width: 44px !important; | |
| max-width: 44px !important; | |
| height: 44px !important; | |
| border-radius: 999px !important; | |
| border: 2px solid rgba(98, 170, 255, 0.98) !important; | |
| background: linear-gradient(160deg, rgba(185, 224, 255, 0.46), rgba(118, 179, 255, 0.30)) !important; | |
| box-shadow: inset 0 0 0 1px rgba(229, 245, 255, 0.95), 0 10px 22px rgba(79, 146, 255, 0.24) !important; | |
| color: #0d4fa7 !important; | |
| display: flex !important; | |
| align-items: center !important; | |
| justify-content: center !important; | |
| padding: 0 !important; | |
| } | |
| .omada-input-row .omada-send-btn-fix button span, | |
| .omada-input-row .omada-send-btn-fix button p { | |
| font-size: 1.6rem !important; | |
| line-height: 1 !important; | |
| font-weight: 800 !important; | |
| } | |
| /* textbox inner gray action buttons remove */ | |
| .omada-main-input [class*="icon"], | |
| .omada-main-input button, | |
| .omada-main-input [role="button"] { | |
| display: none !important; | |
| } | |
| .omada-main-input textarea { | |
| display: block !important; | |
| } | |
| /* final send border lock */ | |
| .omada-input-row .omada-send-btn-fix button, | |
| .omada-input-row .omada-send-btn-fix > div > button { | |
| border: 1.8px solid rgba(116, 182, 248, 1) !important; | |
| outline: 1px solid rgba(198, 229, 255, 0.95) !important; | |
| border-radius: 999px !important; | |
| } | |
| .omada-input-row .omada-send-btn-fix::before { | |
| border: 1.2px solid rgba(153, 207, 255, 0.9) !important; | |
| } | |
| /* compact vertical height */ | |
| .omada-input-row { | |
| min-height: 62px !important; | |
| padding-top: 0 !important; | |
| padding-bottom: 1px !important; | |
| gap: 0 !important; | |
| } | |
| .omada-main-input { | |
| margin-top: 0 !important; | |
| transform: translateY(-10px) !important; | |
| } | |
| .omada-main-input textarea { | |
| min-height: 26px !important; | |
| padding-top: 0 !important; | |
| padding-bottom: 1px !important; | |
| } | |
| /* outer container: slightly less rounded */ | |
| .omada-input-row { | |
| border-radius: 28px !important; | |
| } | |
| /* bigger plus button */ | |
| .omada-bottom-row .omada-plus-btn button { | |
| width: 46px !important; | |
| min-width: 46px !important; | |
| height: 46px !important; | |
| font-size: 1.9rem !important; | |
| font-weight: 800 !important; | |
| } | |
| /* final typography lock */ | |
| .omada-task-btn button, | |
| .omada-custom-btn button, | |
| .omada-task-btn button *, | |
| .omada-custom-btn button * { | |
| font-size: 0.46rem !important; | |
| line-height: 1 !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| .omada-task-chip-inline .omada-selected-task-chip { | |
| font-size: 0.9rem !important; | |
| line-height: 1 !important; | |
| padding: 8px 14px !important; | |
| } | |
| /* ultimate final lock */ | |
| .omada-bottom-row .omada-task-btn, | |
| .omada-bottom-row .omada-task-btn > div, | |
| .omada-bottom-row .omada-task-btn .gradio-button, | |
| .omada-bottom-row .omada-task-btn .gradio-button > div, | |
| .omada-bottom-row .omada-custom-btn, | |
| .omada-bottom-row .omada-custom-btn > div, | |
| .omada-bottom-row .omada-custom-btn .gradio-button, | |
| .omada-bottom-row .omada-custom-btn .gradio-button > div { | |
| border: none !important; | |
| box-shadow: none !important; | |
| background: transparent !important; | |
| } | |
| .omada-bottom-row .omada-task-btn button, | |
| .omada-bottom-row .omada-task-btn > div > button, | |
| .omada-bottom-row .omada-custom-btn button, | |
| .omada-bottom-row .omada-custom-btn > div > button, | |
| .omada-bottom-row .omada-task-btn button *, | |
| .omada-bottom-row .omada-custom-btn button * { | |
| font-size: 0.42rem !important; | |
| line-height: 1 !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| background: transparent !important; | |
| } | |
| .omada-bottom-row .omada-task-chip-inline .omada-selected-task-chip { | |
| font-size: 0.7rem !important; | |
| line-height: 1 !important; | |
| padding: 9px 15px !important; | |
| } | |
| .omada-bottom-row .omada-plus-btn button, | |
| .omada-bottom-row .omada-plus-btn > div > button { | |
| width: 74px !important; | |
| min-width: 74px !important; | |
| height: 74px !important; | |
| font-size: 3rem !important; | |
| font-weight: 900 !important; | |
| } | |
| .omada-input-row .omada-send-btn-fix button, | |
| .omada-input-row .omada-send-btn-fix > div > button { | |
| border: none !important; | |
| outline: none !important; | |
| border-radius: 999px !important; | |
| background: rgba(167, 214, 255, 0.88) !important; | |
| box-shadow: 0 4px 9px rgba(95, 156, 232, 0.15) !important; | |
| } | |
| .omada-input-row .omada-send-btn-fix::before { | |
| content: "" !important; | |
| position: absolute !important; | |
| inset: -4px !important; | |
| border-radius: 999px !important; | |
| border: none !important; | |
| background: rgba(206, 234, 255, 0.35) !important; | |
| box-shadow: 0 4px 10px rgba(104, 165, 236, 0.14) !important; | |
| pointer-events: none !important; | |
| z-index: -1 !important; | |
| } | |
| /* bigger placeholder + sample task chip text */ | |
| .omada-main-input textarea, | |
| .omada-main-input textarea::placeholder { | |
| font-size: 1.05rem !important; | |
| } | |
| .omada-sample-row .omada-chip button, | |
| .omada-sample-row .omada-chip button span, | |
| .omada-sample-row .omada-chip button p, | |
| .omada-sample-row .omada-chip button div { | |
| font-size: 1.22rem !important; | |
| line-height: 1.14 !important; | |
| } | |
| .omada-sample-row .omada-chip .gradio-button > button, | |
| .omada-sample-row .omada-chip .gradio-button > button span, | |
| .omada-sample-row .omada-chip .gradio-button > button p, | |
| .omada-sample-row .omada-chip .gradio-button > button div { | |
| font-size: 1.22rem !important; | |
| line-height: 1.14 !important; | |
| } | |
| /* absolute final lock: plus + sample text size */ | |
| .omada-input-row .omada-plus-btn, | |
| .omada-input-row .omada-plus-btn > div, | |
| .omada-input-row .omada-plus-btn .gradio-button, | |
| .omada-input-row .omada-plus-btn .gradio-button > div, | |
| .omada-input-row .omada-plus-btn button { | |
| width: 82px !important; | |
| min-width: 82px !important; | |
| max-width: 82px !important; | |
| height: 82px !important; | |
| min-height: 82px !important; | |
| max-height: 82px !important; | |
| } | |
| .omada-input-row .omada-plus-btn button, | |
| .omada-input-row .omada-plus-btn button span, | |
| .omada-input-row .omada-plus-btn button p, | |
| .omada-input-row .omada-plus-btn button div { | |
| font-size: 3.5rem !important; | |
| line-height: 1 !important; | |
| font-weight: 900 !important; | |
| } | |
| .omada-sample-row .gradio-button button, | |
| .omada-sample-row .gradio-button button span, | |
| .omada-sample-row .gradio-button button p, | |
| .omada-sample-row .gradio-button button div, | |
| .omada-sample-row .omada-chip button, | |
| .omada-sample-row .omada-chip button span, | |
| .omada-sample-row .omada-chip button p, | |
| .omada-sample-row .omada-chip button div { | |
| font-size: 1.34rem !important; | |
| line-height: 1.16 !important; | |
| font-weight: 700 !important; | |
| } | |
| /* ultra final force: make plus/sample visibly bigger */ | |
| .omada-input-row .omada-plus-btn button { | |
| transform: scale(1.18) !important; | |
| transform-origin: center center !important; | |
| } | |
| .omada-sample-row .gradio-button > button, | |
| .omada-sample-row .gradio-button > button * { | |
| font-size: 1.42rem !important; | |
| line-height: 1.18 !important; | |
| font-weight: 700 !important; | |
| } | |
| /* terminal hard override: controls size + hero position */ | |
| .omada-input-row .omada-bottom-row .omada-plus-btn, | |
| .omada-input-row .omada-bottom-row .omada-plus-btn > div, | |
| .omada-input-row .omada-bottom-row .omada-plus-btn .gradio-button, | |
| .omada-input-row .omada-bottom-row .omada-plus-btn .gradio-button > div, | |
| .omada-input-row .omada-bottom-row .omada-plus-btn button { | |
| width: 40px !important; | |
| min-width: 40px !important; | |
| max-width: 40px !important; | |
| height: 40px !important; | |
| min-height: 40px !important; | |
| max-height: 40px !important; | |
| } | |
| .omada-input-row .omada-bottom-row .omada-plus-btn button, | |
| .omada-input-row .omada-bottom-row .omada-plus-btn button span, | |
| .omada-input-row .omada-bottom-row .omada-plus-btn button p, | |
| .omada-input-row .omada-bottom-row .omada-plus-btn button div { | |
| font-size: 4.2rem !important; | |
| line-height: 1 !important; | |
| font-weight: 900 !important; | |
| } | |
| .omada-input-row .omada-bottom-row .omada-task-btn button, | |
| .omada-input-row .omada-bottom-row .omada-custom-btn button, | |
| .omada-input-row .omada-bottom-row .omada-task-btn button *, | |
| .omada-input-row .omada-bottom-row .omada-custom-btn button * { | |
| font-size: 0.6rem !important; | |
| line-height: 1.1 !important; | |
| height: 42px !important; | |
| min-height: 42px !important; | |
| padding: 0 14px !important; | |
| } | |
| .omada-ready-hero { | |
| top: 0 !important; | |
| margin-top: -36px !important; | |
| } | |
| /* move bottom controls lower */ | |
| .omada-bottom-row { | |
| margin-top: 14px !important; | |
| } | |
| /* absolute terminal lock v2 */ | |
| [class*="omada-plus-btn"] button { | |
| width: 96px !important; | |
| min-width: 96px !important; | |
| height: 96px !important; | |
| min-height: 96px !important; | |
| font-size: 4.2rem !important; | |
| line-height: 1 !important; | |
| font-weight: 900 !important; | |
| } | |
| [class*="omada-task-btn"] button, | |
| [class*="omada-custom-btn"] button, | |
| [class*="omada-task-btn"] button *, | |
| [class*="omada-custom-btn"] button * { | |
| font-size: 0.8rem !important; | |
| line-height: 1.1 !important; | |
| height: 42px !important; | |
| min-height: 42px !important; | |
| } | |
| .omada-ready-hero { | |
| top: -8px !important; | |
| margin-top: -56px !important; | |
| } | |
| /* non-negotiable final override */ | |
| .gradio-container .omada-ready-hero { | |
| top: -20px !important; | |
| margin-top: -88px !important; | |
| } | |
| .gradio-container .omada-input-row { | |
| min-height: 0 !important; | |
| padding-top: 0 !important; | |
| padding-bottom: 0 !important; | |
| gap: 0 !important; | |
| } | |
| .gradio-container .omada-main-input { | |
| margin-top: 0 !important; | |
| margin-bottom: -2px !important; | |
| transform: translateY(-4px) !important; | |
| } | |
| .gradio-container .omada-main-input textarea, | |
| .gradio-container .omada-main-input textarea::placeholder { | |
| min-height: 34px !important; | |
| line-height: 1.25 !important; | |
| padding-top: 2px !important; | |
| padding-bottom: 2px !important; | |
| font-size: 1.08rem !important; | |
| } | |
| .gradio-container .omada-bottom-row { | |
| margin-top: -2px !important; | |
| gap: 4px !important; | |
| align-items: center !important; | |
| } | |
| .gradio-container .omada-bottom-row .omada-plus-btn button, | |
| .gradio-container .omada-bottom-row .omada-plus-btn > div > button { | |
| width: 84px !important; | |
| min-width: 84px !important; | |
| height: 84px !important; | |
| min-height: 84px !important; | |
| font-size: 3.7rem !important; | |
| } | |
| .gradio-container .omada-bottom-row .omada-task-btn button, | |
| .gradio-container .omada-bottom-row .omada-custom-btn button, | |
| .gradio-container .omada-bottom-row .omada-task-btn button *, | |
| .gradio-container .omada-bottom-row .omada-custom-btn button * { | |
| font-size: 0.6rem !important; | |
| height: 46px !important; | |
| min-height: 46px !important; | |
| line-height: 1.08 !important; | |
| padding: 0 15px !important; | |
| } | |
| /* compact composer height + tighter spacing */ | |
| .omada-input-row { | |
| min-height: 136px !important; | |
| padding-top: 6px !important; | |
| padding-bottom: 8px !important; | |
| gap: 2px !important; | |
| } | |
| .omada-main-input { | |
| transform: none !important; | |
| margin-top: 0 !important; | |
| margin-bottom: 0 !important; | |
| } | |
| .omada-main-input textarea { | |
| min-height: 56px !important; | |
| padding-top: 6px !important; | |
| padding-bottom: 4px !important; | |
| line-height: 1.2 !important; | |
| font-size: 1.16rem !important; | |
| } | |
| .omada-bottom-row { | |
| margin-top: 18px !important; | |
| } | |
| /* final lock: prevent placeholder/text clipping */ | |
| .gradio-container .omada-main-input, | |
| .gradio-container .omada-main-input > div, | |
| .gradio-container .omada-main-input .wrap, | |
| .gradio-container .omada-main-input .wrap-inner { | |
| overflow: visible !important; | |
| } | |
| .gradio-container .omada-main-input textarea, | |
| .gradio-container .omada-main-input textarea::placeholder { | |
| min-height: 64px !important; | |
| height: auto !important; | |
| padding: 8px 8px 4px 8px !important; | |
| font-size: 1.08rem !important; | |
| line-height: 1.25 !important; | |
| box-sizing: border-box !important; | |
| overflow: visible !important; | |
| } | |
| .gradio-container .omada-main-input textarea::placeholder { | |
| min-height: unset !important; | |
| height: auto !important; | |
| padding: 0 !important; | |
| font-size: 1.08rem !important; | |
| line-height: 1.35 !important; | |
| } | |
| /* final alignment lock: controls and send on one horizontal line */ | |
| .gradio-container .omada-input-row { | |
| padding-bottom: 0px !important; | |
| } | |
| .gradio-container .omada-main-input { | |
| margin-bottom: 0 !important; | |
| } | |
| .gradio-container .omada-main-input textarea { | |
| padding-bottom: 0 !important; | |
| max-height: 72px !important; | |
| overflow-y: auto !important; | |
| } | |
| .gradio-container .omada-bottom-row { | |
| position: static !important; | |
| left: auto !important; | |
| right: auto !important; | |
| bottom: auto !important; | |
| margin-top: 0 !important; | |
| transform: none !important; | |
| align-items: flex-end !important; | |
| justify-content: flex-start !important; | |
| gap: 14px !important; | |
| z-index: auto !important; | |
| } | |
| .gradio-container .omada-bottom-row > * { | |
| align-self: flex-end !important; | |
| } | |
| .gradio-container .omada-send-btn-fix, | |
| .gradio-container .omada-send-btn { | |
| position: static !important; | |
| right: auto !important; | |
| bottom: auto !important; | |
| margin-left: auto !important; | |
| margin-top: 0 !important; | |
| align-self: flex-end !important; | |
| transform: none !important; | |
| z-index: auto !important; | |
| } | |
| /* final lock: normalize plus button size */ | |
| .gradio-container .omada-bottom-row { | |
| min-height: 0 !important; | |
| height: auto !important; | |
| align-items: center !important; | |
| } | |
| .gradio-container .omada-bottom-row > * { | |
| min-height: 0 !important; | |
| height: auto !important; | |
| align-self: center !important; | |
| } | |
| .gradio-container .omada-bottom-row .omada-send-btn, | |
| .gradio-container .omada-bottom-row .omada-send-btn-fix { | |
| margin-left: auto !important; | |
| } | |
| .gradio-container .omada-bottom-row [class*="omada-plus-btn"], | |
| .gradio-container .omada-bottom-row [class*="omada-plus-btn"] > div, | |
| .gradio-container .omada-bottom-row [class*="omada-plus-btn"] .gradio-button, | |
| .gradio-container .omada-bottom-row [class*="omada-plus-btn"] .gradio-button > div, | |
| .gradio-container .omada-bottom-row [class*="omada-plus-btn"] button { | |
| flex: 0 0 48px !important; | |
| width: 48px !important; | |
| min-width: 48px !important; | |
| max-width: 48px !important; | |
| height: 48px !important; | |
| min-height: 48px !important; | |
| max-height: 48px !important; | |
| padding: 0 !important; | |
| } | |
| .gradio-container .omada-bottom-row [class*="omada-plus-btn"] button, | |
| .gradio-container .omada-bottom-row [class*="omada-plus-btn"] button span, | |
| .gradio-container .omada-bottom-row [class*="omada-plus-btn"] button p, | |
| .gradio-container .omada-bottom-row [class*="omada-plus-btn"] button div { | |
| font-size: 2rem !important; | |
| line-height: 1 !important; | |
| font-weight: 800 !important; | |
| } | |
| /* remove Gradio html wrapper padding inside bottom row */ | |
| .gradio-container .omada-bottom-row .html-container { | |
| padding: 0 !important; | |
| margin: 0 !important; | |
| width: auto !important; | |
| min-width: 0 !important; | |
| } | |
| .gradio-container .omada-bottom-row .html-container > div, | |
| .gradio-container .omada-bottom-row .html-container .prose { | |
| padding: 0 !important; | |
| margin: 0 !important; | |
| width: auto !important; | |
| min-width: 0 !important; | |
| } | |
| /* final tune: smaller send circle + lighter/smaller task/custom text */ | |
| .gradio-container .omada-bottom-row .omada-send-btn-fix, | |
| .gradio-container .omada-bottom-row .omada-send-btn-fix > div, | |
| .gradio-container .omada-bottom-row .omada-send-btn, | |
| .gradio-container .omada-bottom-row .omada-send-btn > div, | |
| .gradio-container .omada-bottom-row .omada-send-btn button, | |
| .gradio-container .omada-bottom-row .omada-send-btn-fix button, | |
| .gradio-container .omada-bottom-row .omada-send-btn > div > button, | |
| .gradio-container .omada-bottom-row .omada-send-btn-fix > div > button { | |
| width: 35px !important; | |
| min-width: 35px !important; | |
| max-width: 35px !important; | |
| height: 35px !important; | |
| min-height: 35px !important; | |
| max-height: 35px !important; | |
| } | |
| .gradio-container .omada-bottom-row .omada-send-btn button, | |
| .gradio-container .omada-bottom-row .omada-send-btn-fix button, | |
| .gradio-container .omada-bottom-row .omada-send-btn > div > button, | |
| .gradio-container .omada-bottom-row .omada-send-btn-fix > div > button { | |
| font-size: 1.6rem !important; | |
| } | |
| .gradio-container .omada-bottom-row .omada-task-btn button, | |
| .gradio-container .omada-bottom-row .omada-custom-btn button, | |
| .gradio-container .omada-bottom-row .omada-task-btn > div > button, | |
| .gradio-container .omada-bottom-row .omada-custom-btn > div > button, | |
| .gradio-container .omada-bottom-row .omada-task-btn button *, | |
| .gradio-container .omada-bottom-row .omada-custom-btn button * { | |
| font-size: 0.75rem !important; | |
| font-weight: 400 !important; | |
| line-height: 1.05 !important; | |
| } | |
| /* Gradio lg token override (computed 16px -> force smaller) */ | |
| .gradio-container .omada-bottom-row button.lg.omada-task-btn, | |
| .gradio-container .omada-bottom-row button.lg.omada-custom-btn, | |
| .gradio-container .omada-bottom-row .omada-task-btn button.lg, | |
| .gradio-container .omada-bottom-row .omada-custom-btn button.lg { | |
| --button-large-text-size: 0.7rem !important; | |
| font-size: 0.75rem !important; | |
| font-weight: 400 !important; | |
| line-height: 1.05 !important; | |
| } | |
| .gradio-container .omada-bottom-row button.lg.omada-task-btn *, | |
| .gradio-container .omada-bottom-row button.lg.omada-custom-btn *, | |
| .gradio-container .omada-bottom-row .omada-task-btn button.lg *, | |
| .gradio-container .omada-bottom-row .omada-custom-btn button.lg * { | |
| font-size: 0.75rem !important; | |
| font-weight: 400 !important; | |
| line-height: 1.05 !important; | |
| } | |
| /* sample preview cards */ | |
| .gradio-container .omada-sample-preview-row { | |
| width: min(980px, calc(100vw - 24px)) !important; | |
| margin: 0 auto 6px auto !important; | |
| gap: 10px !important; | |
| order: 2 !important; | |
| flex: 0 0 auto !important; | |
| } | |
| .gradio-container .omada-sample-preview-col { | |
| flex: 1 1 0 !important; | |
| } | |
| .gradio-container .omada-sample-preview-card .html-container, | |
| .gradio-container .omada-sample-preview-card .html-container > div { | |
| padding: 0 !important; | |
| margin: 0 !important; | |
| } | |
| .gradio-container .omada-sample-preview-inner { | |
| display: flex; | |
| align-items: center; | |
| gap: 10px; | |
| width: 100%; | |
| min-height: 84px; | |
| border-radius: 14px; | |
| border: 1px solid rgba(186, 204, 232, 0.82); | |
| background: rgba(255, 255, 255, 0.72); | |
| box-shadow: 0 5px 14px rgba(108, 134, 177, 0.12); | |
| padding: 10px 12px; | |
| box-sizing: border-box; | |
| } | |
| .gradio-container .omada-sample-preview-media { | |
| width: 76px; | |
| min-width: 76px; | |
| height: 56px; | |
| border-radius: 10px; | |
| overflow: hidden; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| background: rgba(227, 236, 249, 0.8); | |
| } | |
| .gradio-container .omada-sample-preview-thumb { | |
| width: 100%; | |
| height: 100%; | |
| object-fit: cover; | |
| } | |
| .gradio-container .omada-sample-preview-icon { | |
| font-size: 1.4rem; | |
| } | |
| .gradio-container .omada-sample-preview-meta { | |
| min-width: 0; | |
| } | |
| .gradio-container .omada-sample-preview-title { | |
| font-size: 0.83rem; | |
| font-weight: 700; | |
| color: #2a3a52; | |
| } | |
| .gradio-container .omada-sample-preview-desc { | |
| margin-top: 4px; | |
| font-size: 0.78rem; | |
| line-height: 1.2; | |
| color: #4a5f80; | |
| word-break: break-word; | |
| } | |
| /* adaptive first-view layout: keep composer visible without page scroll */ | |
| html, | |
| body { | |
| height: 100% !important; | |
| overflow: hidden !important; | |
| } | |
| .gradio-container { | |
| height: 100vh !important; | |
| overflow: hidden !important; | |
| } | |
| .gradio-container .omada-shell { | |
| height: calc(100vh - 72px) !important; | |
| max-height: calc(100vh - 72px) !important; | |
| display: flex !important; | |
| flex-direction: column !important; | |
| min-height: 0 !important; | |
| position: relative !important; | |
| } | |
| .gradio-container .omada-ready-hero { | |
| position: fixed !important; | |
| left: 50% !important; | |
| top: 42% !important; | |
| transform: translate(-50%, -50%) !important; | |
| z-index: 999 !important; | |
| } | |
| .gradio-container .omada-ready-hero { | |
| flex: 0 0 auto !important; | |
| margin-top: 0 !important; | |
| margin-bottom: 0 !important; | |
| align-self: center !important; | |
| width: min(820px, calc(100vw - 40px)) !important; | |
| text-align: center !important; | |
| pointer-events: none !important; | |
| } | |
| .gradio-container .omada-chatbot { | |
| order: 1 !important; | |
| } | |
| .gradio-container .omada-sample-row { | |
| order: 2 !important; | |
| } | |
| .gradio-container .omada-input-stack { | |
| order: 3 !important; | |
| } | |
| .gradio-container .omada-sample-row { | |
| flex: 0 0 auto !important; | |
| margin: 0 auto 6px auto !important; | |
| } | |
| .gradio-container .omada-chatbot { | |
| flex: 1 1 auto !important; | |
| min-height: 0 !important; | |
| height: auto !important; | |
| max-height: none !important; | |
| overflow: auto !important; | |
| } | |
| .gradio-container .omada-input-stack { | |
| flex: 0 0 auto !important; | |
| margin: 0 auto 4px auto !important; | |
| } | |
| .gradio-container .omada-ready-hero-wrap { | |
| position: relative !important; | |
| z-index: 60 !important; | |
| } | |
| .gradio-container .omada-ready-banner { | |
| display: block !important; | |
| } | |
| /* ensure hidden Gradio blocks never intercept clicks */ | |
| .gradio-container .hide, | |
| .gradio-container .wrap.hide { | |
| display: none !important; | |
| visibility: hidden !important; | |
| pointer-events: none !important; | |
| } | |
| """ | |
| _blocks_kwargs = { | |
| "title": "AIDAS Lab @ SNU - Omni-modal Diffusion", | |
| } | |
| if not GRADIO_V6_PLUS: | |
| _blocks_kwargs.update( | |
| { | |
| "css": CUSTOM_CSS + EXTRA_CSS, | |
| "theme": theme, | |
| "js": FORCE_LIGHT_MODE_JS, | |
| } | |
| ) | |
| with gr.Blocks(**_blocks_kwargs) as demo: | |
| model_status = gr.Markdown("Model status: Loading model...", visible=False) | |
| demo.load(warmup_model_status, outputs=[model_status]) | |
| MODE_OPTIONS = [ | |
| "Chat", | |
| "MMU (Image → Text)", | |
| "MMU (Video → Text)", | |
| "Image Generation", | |
| "Image Editing", | |
| "ASR", | |
| "TTS", | |
| ] | |
| with gr.Column(elem_classes=["omada-shell"]): | |
| _chatbot_kwargs = { | |
| "label": None, | |
| "sanitize_html": False, | |
| "elem_classes": ["omada-chatbot"], | |
| } | |
| if not GRADIO_V6_PLUS: | |
| _chatbot_kwargs["bubble_full_width"] = False | |
| chatbox = gr.Chatbot(**_chatbot_kwargs) | |
| intro_hero = gr.HTML( | |
| "<div id='omada-ready-banner' class='omada-ready-banner' " | |
| "style='position:fixed;left:50%;top:42%;transform:translate(-50%,-50%);" | |
| "z-index:99999;pointer-events:none;width:min(820px,calc(100vw - 40px));" | |
| "text-align:center;color:#2f3f58;display:block;'>" | |
| "<div style='font-size:2rem;font-weight:800;line-height:1.1;'>" | |
| "Ready to get started?" | |
| "</div>" | |
| "<div style='margin-top:10px;font-size:1.02rem;font-weight:500;" | |
| "line-height:1.35;color:#5b6e8d;'>" | |
| "Use `+` to attach image/video/speech, `Task` to choose a mode, and `Generation Settings` to adjust generation options." | |
| "</div>" | |
| "</div>", | |
| visible=True, | |
| elem_classes=["omada-ready-hero-wrap"], | |
| ) | |
| sample_task_items = [ | |
| ("💬 Chat", "Chat"), | |
| ("🖼️ Image QA", "MMU (Image → Text)"), | |
| ("🎬 Video Captioning", "MMU (Video → Text)"), | |
| ("🎨 Image Generation", "Image Generation"), | |
| ("🛠️ Image Editing", "Image Editing"), | |
| ("🎙️ ASR", "ASR"), | |
| ("🔊 TTS", "TTS"), | |
| ] | |
| sample_payloads = gr.State({ | |
| "Chat": [ | |
| {"mode": "Chat", "text": _get_example_value(CHAT_EXAMPLES, 0, 0, "Hello! Please introduce yourself."), "image": None, "audio": None, "video": None}, | |
| {"mode": "Chat", "text": _get_example_value(CHAT_EXAMPLES, 1, 0, _get_example_value(CHAT_EXAMPLES, 0, 0, "Hello! Please introduce yourself.")), "image": None, "audio": None, "video": None}, | |
| ], | |
| "MMU (Image → Text)": [ | |
| {"mode": "MMU (Image → Text)", "text": _get_example_value(MMU_EXAMPLES, 0, 1, DEFAULT_MMU_PROMPT), "image": _get_example_value(MMU_EXAMPLES, 0, 0, None), "audio": None, "video": None}, | |
| {"mode": "MMU (Image → Text)", "text": _get_example_value(MMU_EXAMPLES, 1, 1, _get_example_value(MMU_EXAMPLES, 0, 1, DEFAULT_MMU_PROMPT)), "image": _get_example_value(MMU_EXAMPLES, 1, 0, _get_example_value(MMU_EXAMPLES, 0, 0, None)), "audio": None, "video": None}, | |
| ], | |
| "MMU (Video → Text)": [ | |
| {"mode": "MMU (Video → Text)", "text": "", "image": None, "audio": None, "video": _get_example_value(V2T_EXAMPLES, -2, 0, _get_example_value(V2T_EXAMPLES, 0, 0, None))}, | |
| {"mode": "MMU (Video → Text)", "text": "", "image": None, "audio": None, "video": _get_example_value(V2T_EXAMPLES, -1, 0, _get_example_value(V2T_EXAMPLES, 1, 0, _get_example_value(V2T_EXAMPLES, 0, 0, None)))}, | |
| ], | |
| "Image Generation": [ | |
| {"mode": "Image Generation", "text": _get_example_value(T2I_EXAMPLES, 0, 0, "A cinematic mountain landscape at sunrise."), "image": None, "audio": None, "video": None}, | |
| {"mode": "Image Generation", "text": _get_example_value(T2I_EXAMPLES, 1, 0, _get_example_value(T2I_EXAMPLES, 0, 0, "A cinematic mountain landscape at sunrise.")), "image": None, "audio": None, "video": None}, | |
| ], | |
| "Image Editing": [ | |
| {"mode": "Image Editing", "text": _get_example_value(I2I_EXAMPLES, 0, 1, "Add warm sunset lighting."), "image": _get_example_value(I2I_EXAMPLES, 0, 0, None), "audio": None, "video": None}, | |
| {"mode": "Image Editing", "text": _get_example_value(I2I_EXAMPLES, 1, 1, _get_example_value(I2I_EXAMPLES, 0, 1, "Add warm sunset lighting.")), "image": _get_example_value(I2I_EXAMPLES, 1, 0, _get_example_value(I2I_EXAMPLES, 0, 0, None)), "audio": None, "video": None}, | |
| ], | |
| "ASR": [ | |
| {"mode": "ASR", "text": "", "image": None, "audio": _get_example_value(S2T_EXAMPLES, 0, 0, None), "video": None}, | |
| {"mode": "ASR", "text": "", "image": None, "audio": _get_example_value(S2T_EXAMPLES, 1, 0, _get_example_value(S2T_EXAMPLES, 0, 0, None)), "video": None}, | |
| ], | |
| "TTS": [ | |
| {"mode": "TTS", "text": _get_example_value(T2S_EXAMPLES, 0, 0, "Hello from Dynin-Omni."), "image": None, "audio": None, "video": None}, | |
| {"mode": "TTS", "text": _get_example_value(T2S_EXAMPLES, 1, 0, _get_example_value(T2S_EXAMPLES, 0, 0, "Hello from Dynin-Omni.")), "image": None, "audio": None, "video": None}, | |
| ], | |
| }) | |
| selected_sample_mode = gr.State("Chat") | |
| task_sample_buttons = [] | |
| with gr.Row(elem_classes=["omada-sample-row"], visible=True) as task_sample_row_1: | |
| for i in range(3): | |
| task_sample_buttons.append(gr.Button(sample_task_items[i][0], size="sm", elem_classes=["omada-chip"], visible=True)) | |
| with gr.Row(elem_classes=["omada-sample-row"], visible=True) as task_sample_row_2: | |
| for i in range(3, 7): | |
| task_sample_buttons.append(gr.Button(sample_task_items[i][0], size="sm", elem_classes=["omada-chip"], visible=True)) | |
| with gr.Row(elem_classes=["omada-sample-preview-row"], visible=False) as sample_choice_row: | |
| with gr.Column(elem_classes=["omada-sample-preview-col"]): | |
| sample_preview_1 = gr.HTML("", elem_classes=["omada-sample-preview-card"]) | |
| sample_choice_1 = gr.Button("Sample 1", size="sm", elem_classes=["omada-chip"], visible=True) | |
| with gr.Column(elem_classes=["omada-sample-preview-col"]): | |
| sample_preview_2 = gr.HTML("", elem_classes=["omada-sample-preview-card"]) | |
| sample_choice_2 = gr.Button("Sample 2", size="sm", elem_classes=["omada-chip"], visible=True) | |
| with gr.Column(elem_classes=["omada-input-stack"]): | |
| with gr.Column(elem_classes=["omada-input-row"]): | |
| attachment_preview = gr.HTML( | |
| _render_attachment_preview(None, None, None), | |
| elem_classes=["omada-attach-preview-wrap"], | |
| ) | |
| chat_input = gr.Textbox( | |
| show_label=False, | |
| placeholder="How can I help you today?", | |
| lines=1, | |
| interactive=True, | |
| scale=1, | |
| min_width=0, | |
| elem_classes=["omada-main-input"], | |
| ) | |
| with gr.Row(elem_classes=["omada-bottom-row"]): | |
| plus_btn = gr.Button("+", elem_classes=["omada-plus-btn"], scale=0, min_width=30) | |
| task_btn = gr.Button("🛠 Task", elem_classes=["omada-task-btn"], scale=0, min_width=0) | |
| selected_task_badge = gr.HTML( | |
| _render_task_chip("Chat"), | |
| elem_classes=["omada-task-chip-inline"], | |
| ) | |
| custom_btn = gr.Button("🧠 Generation Settings", elem_classes=["omada-custom-btn"], scale=0, min_width=0) | |
| send_button = gr.Button("↑", elem_classes=["omada-send-btn", "omada-send-btn-fix"], scale=0, min_width=30) | |
| auto_mode_state = gr.State("Custom") | |
| controls_visible = gr.State(False) | |
| panel_mode_state = gr.State("task") | |
| backdrop = gr.HTML("<div></div>", visible=False, elem_classes=["omada-panel-backdrop"]) | |
| controls_panel = gr.Column(visible=False, elem_classes=["omada-controls-safe"]) | |
| with controls_panel: | |
| panel_title = gr.Markdown("**Task**") | |
| mode_selector = gr.State("Chat") | |
| with gr.Column(visible=False) as attach_section: | |
| media_image = gr.Image(type="pil", label="Image", sources=["upload"], visible=True) | |
| media_audio = gr.Audio(type="filepath", label="Speech", sources=["microphone", "upload"], visible=True) | |
| media_video = gr.Video(label="Video", sources=["upload", "webcam"], visible=True) | |
| with gr.Column(visible=False) as task_section: | |
| with gr.Row(): | |
| task_buttons = [ | |
| gr.Button( | |
| option, | |
| size="sm", | |
| variant="primary" if option == "Chat" else "secondary", | |
| ) | |
| for option in MODE_OPTIONS | |
| ] | |
| with gr.Column(visible=False) as custom_section: | |
| gr.Markdown("Task-specific generation settings") | |
| adv_chat = gr.Column(visible=False) | |
| with adv_chat: | |
| chat_max_tokens = gr.Slider(2, 512, value=512, step=2, label="Chat max tokens", interactive=True) | |
| chat_steps = gr.Slider(2, 512, value=512, step=2, label="Chat steps", interactive=True) | |
| chat_block = gr.Slider(2, 512, value=16, step=2, label="Chat block length", interactive=True) | |
| chat_temperature_slider = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="Chat temperature", interactive=True) | |
| adv_t2s = gr.Column(visible=False) | |
| with adv_t2s: | |
| t2s_max_tokens = gr.Slider(2, 512, value=512, step=2, label="Speech token length", interactive=True) | |
| t2s_steps = gr.Slider(2, 512, value=256, step=2, label="T2S refinement steps", interactive=True) | |
| t2s_block = gr.Slider(2, 512, value=256, step=2, label="T2S block length", interactive=True) | |
| t2s_temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="T2S temperature", interactive=True) | |
| t2s_cfg = gr.Slider(0.0, 6.0, value=3.5, step=0.1, label="T2S CFG scale", interactive=True) | |
| t2s_gender = gr.Dropdown(["random", "female", "male"], value="random", label="T2S gender", interactive=True) | |
| t2s_emotion = gr.Dropdown(["random", "angry", "happy", "neutral", "sad"], value="random", label="T2S emotion", interactive=True) | |
| t2s_speed = gr.Dropdown(["random", "normal", "fast", "slow"], value="random", label="T2S speed", interactive=True) | |
| t2s_pitch = gr.Dropdown(["random", "normal", "high", "low"], value="random", label="T2S pitch", interactive=True) | |
| adv_s2t = gr.Column(visible=False) | |
| with adv_s2t: | |
| s2t_steps = gr.Slider(2, 512, value=128, step=2, label="S2T steps", interactive=True) | |
| s2t_block = gr.Slider(2, 512, value=16, step=2, label="S2T block length", interactive=True) | |
| s2t_max_tokens = gr.Slider(2, 512, value=128, step=2, label="S2T max tokens", interactive=True) | |
| s2t_remasking = gr.Dropdown(["low_confidence", "random"], value="low_confidence", label="S2T remasking", interactive=True) | |
| adv_v2t = gr.Column(visible=False) | |
| with adv_v2t: | |
| v2t_steps = gr.Slider(2, 512, value=256, step=2, label="V2T steps", interactive=True) | |
| v2t_block = gr.Slider(2, 512, value=16, step=2, label="V2T block length", interactive=True) | |
| v2t_max_tokens = gr.Slider(2, 512, value=256, step=2, label="V2T max tokens", interactive=True) | |
| adv_t2i = gr.Column(visible=False) | |
| with adv_t2i: | |
| t2i_timesteps = gr.Slider(4, 128, value=16, step=2, label="T2I timesteps", interactive=True) | |
| t2i_temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="T2I temperature", interactive=True) | |
| t2i_guidance = gr.Slider(0.0, 8.0, value=2.5, step=0.1, label="T2I CFG scale", interactive=True) | |
| adv_i2i = gr.Column(visible=False) | |
| with adv_i2i: | |
| i2i_timesteps = gr.Slider(4, 128, value=32, step=2, label="I2I timesteps", interactive=True) | |
| i2i_temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="I2I temperature", interactive=True) | |
| i2i_guidance = gr.Slider(0.0, 8.0, value=2.5, step=0.1, label="I2I CFG scale", interactive=True) | |
| adv_mmu = gr.Column(visible=False) | |
| with adv_mmu: | |
| mmu_max_tokens = gr.Slider(2, 512, value=128, step=2, label="MMU max tokens", interactive=True) | |
| mmu_steps = gr.Slider(2, 512, value=128, step=2, label="MMU steps", interactive=True) | |
| mmu_block = gr.Slider(2, 512, value=16, step=2, label="MMU block length", interactive=True) | |
| mmu_temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="MMU temperature", interactive=True) | |
| save_btn = gr.Button("Save", variant="primary") | |
| def _task_button_updates(selected_mode: str): | |
| return tuple( | |
| gr.update(variant="primary" if option == selected_mode else "secondary") | |
| for option in MODE_OPTIONS | |
| ) | |
| def _update_advanced(mode, auto_mode): | |
| return ( | |
| gr.update(visible=mode == "Chat"), | |
| gr.update(visible=mode == "TTS"), | |
| gr.update(visible=mode == "ASR"), | |
| gr.update(visible=mode == "MMU (Video → Text)"), | |
| gr.update(visible=mode == "Image Generation"), | |
| gr.update(visible=mode == "Image Editing"), | |
| gr.update(visible=mode == "MMU (Image → Text)"), | |
| ) | |
| def _panel_title(kind: str) -> str: | |
| return { | |
| "attach": "**Attach**", | |
| "task": "**Task**", | |
| "custom": "**Custom Config**", | |
| }.get(kind, "**Task**") | |
| def _open_controls(panel_kind, mode): | |
| is_attach = panel_kind == "attach" | |
| is_task = panel_kind == "task" | |
| is_custom = panel_kind == "custom" | |
| adv_updates = _update_advanced(mode, "Custom") if is_custom else (gr.update(visible=False),) * 7 | |
| return ( | |
| panel_kind, | |
| gr.update(value=_panel_title(panel_kind)), | |
| gr.update(visible=True), | |
| gr.update(visible=True), | |
| True, | |
| gr.update(visible=is_attach), | |
| gr.update(visible=is_task), | |
| gr.update(visible=is_custom), | |
| *adv_updates, | |
| *_task_button_updates(mode), | |
| ) | |
| plus_btn.click( | |
| lambda mode: _open_controls("attach", mode), | |
| inputs=[mode_selector], | |
| outputs=[ | |
| panel_mode_state, | |
| panel_title, | |
| backdrop, | |
| controls_panel, | |
| controls_visible, | |
| attach_section, | |
| task_section, | |
| custom_section, | |
| adv_chat, | |
| adv_t2s, | |
| adv_s2t, | |
| adv_v2t, | |
| adv_t2i, | |
| adv_i2i, | |
| adv_mmu, | |
| *task_buttons, | |
| ], | |
| ) | |
| task_btn.click( | |
| lambda mode: _open_controls("task", mode), | |
| inputs=[mode_selector], | |
| outputs=[ | |
| panel_mode_state, | |
| panel_title, | |
| backdrop, | |
| controls_panel, | |
| controls_visible, | |
| attach_section, | |
| task_section, | |
| custom_section, | |
| adv_chat, | |
| adv_t2s, | |
| adv_s2t, | |
| adv_v2t, | |
| adv_t2i, | |
| adv_i2i, | |
| adv_mmu, | |
| *task_buttons, | |
| ], | |
| ) | |
| custom_btn.click( | |
| lambda mode: _open_controls("custom", mode), | |
| inputs=[mode_selector], | |
| outputs=[ | |
| panel_mode_state, | |
| panel_title, | |
| backdrop, | |
| controls_panel, | |
| controls_visible, | |
| attach_section, | |
| task_section, | |
| custom_section, | |
| adv_chat, | |
| adv_t2s, | |
| adv_s2t, | |
| adv_v2t, | |
| adv_t2i, | |
| adv_i2i, | |
| adv_mmu, | |
| *task_buttons, | |
| ], | |
| ) | |
| def _update_mode(mode): | |
| placeholders = { | |
| "Chat": "How can I help you today?", | |
| "TTS": "Type the speech you want to synthesize...", | |
| "ASR": "Upload audio, then add notes here...", | |
| "MMU (Video → Text)": "Upload video, then add notes here...", | |
| "Image Generation": "Describe the image you want to generate...", | |
| "Image Editing": "Describe how you want to edit the image...", | |
| "MMU (Image → Text)": "Ask about the uploaded image...", | |
| } | |
| return gr.update(placeholder=placeholders.get(mode, "How can I help you today?")) | |
| _update_mode("Chat") | |
| def _pick_mode(choice, panel_mode): | |
| show_custom = panel_mode == "custom" | |
| adv_updates = _update_advanced(choice, "Custom") if show_custom else (gr.update(visible=False),) * 7 | |
| return ( | |
| choice, | |
| _render_task_chip(choice), | |
| _update_mode(choice), | |
| *adv_updates, | |
| *_task_button_updates(choice), | |
| ) | |
| for idx, task_choice_btn in enumerate(task_buttons): | |
| task_choice_btn.click( | |
| lambda panel_mode, choice=MODE_OPTIONS[idx]: _pick_mode(choice, panel_mode), | |
| inputs=[panel_mode_state], | |
| outputs=[mode_selector, selected_task_badge, chat_input, adv_chat, adv_t2s, adv_s2t, adv_v2t, adv_t2i, adv_i2i, adv_mmu, *task_buttons], | |
| ) | |
| def _refresh_attachment_preview(image_in, audio_in, video_in): | |
| return _render_attachment_preview(image_in, audio_in, video_in) | |
| media_image.change( | |
| _refresh_attachment_preview, | |
| inputs=[media_image, media_audio, media_video], | |
| outputs=[attachment_preview], | |
| ) | |
| media_audio.change( | |
| _refresh_attachment_preview, | |
| inputs=[media_image, media_audio, media_video], | |
| outputs=[attachment_preview], | |
| ) | |
| media_video.change( | |
| _refresh_attachment_preview, | |
| inputs=[media_image, media_audio, media_video], | |
| outputs=[attachment_preview], | |
| ) | |
| save_btn.click( | |
| lambda: (gr.update(visible=False), gr.update(visible=False), False), | |
| outputs=[backdrop, controls_panel, controls_visible], | |
| ) | |
| def _format_user_message(msg: str) -> str: | |
| return msg.strip() if msg else " " | |
| def _normalize_chat_history(history): | |
| if not history: | |
| return [] | |
| if isinstance(history, list) and history and isinstance(history[0], dict): | |
| pairs = [] | |
| pending_user = None | |
| for msg in history: | |
| role = msg.get("role") | |
| content = msg.get("content", "") | |
| if role == "user": | |
| if pending_user is not None: | |
| pairs.append((pending_user, "")) | |
| pending_user = content | |
| elif role == "assistant": | |
| if pending_user is None: | |
| pairs.append((" ", content)) | |
| else: | |
| pairs.append((pending_user, content)) | |
| pending_user = None | |
| if pending_user is not None: | |
| pairs.append((pending_user, "")) | |
| return pairs | |
| return list(history) | |
| def _serialize_chat_history(pairs): | |
| if not GRADIO_V6_PLUS: | |
| return pairs | |
| messages = [] | |
| for user_msg, assistant_msg in pairs: | |
| messages.append({"role": "user", "content": user_msg if user_msg is not None else " "}) | |
| messages.append({"role": "assistant", "content": assistant_msg if assistant_msg is not None else ""}) | |
| return messages | |
| def _is_identity_query(message: str) -> bool: | |
| q = re.sub(r"[^a-z0-9\s]", " ", (message or "").lower()) | |
| q = re.sub(r"\s+", " ", q).strip() | |
| if not q: | |
| return False | |
| triggers = [ | |
| "who are you", | |
| "what are you", | |
| "introduce yourself", | |
| "what is your name", | |
| "your name", | |
| "are you dynin omni", | |
| "what model are you", | |
| ] | |
| return any(t in q for t in triggers) | |
| def _chat_handler( | |
| history, | |
| message, | |
| mode, | |
| auto_mode, | |
| image_in, | |
| audio_in, | |
| video_in, | |
| chat_max_tokens, | |
| chat_steps, | |
| chat_block, | |
| chat_temperature, | |
| t2s_max_tokens, | |
| t2s_steps, | |
| t2s_block, | |
| t2s_temperature, | |
| t2s_cfg, | |
| t2s_gender, | |
| t2s_emotion, | |
| t2s_speed, | |
| t2s_pitch, | |
| s2t_steps, | |
| s2t_block, | |
| s2t_max_tokens, | |
| s2t_remasking, | |
| v2t_steps, | |
| v2t_block, | |
| v2t_max_tokens, | |
| t2i_timesteps, | |
| t2i_temperature, | |
| t2i_guidance, | |
| i2i_timesteps, | |
| i2i_temperature, | |
| i2i_guidance, | |
| mmu_max_tokens, | |
| mmu_steps, | |
| mmu_block, | |
| mmu_temperature, | |
| ): | |
| _set_global_seed() | |
| history = _normalize_chat_history(history) | |
| message = (message or "").strip() | |
| defer_video = mode == "MMU (Video → Text)" and bool(video_in) | |
| display_user = _render_user_message(mode, message, image_in, audio_in, video_in, defer_video=defer_video) | |
| history.append((display_user, _render_text_message("Model loading...", ""))) | |
| yield _serialize_chat_history(history), "" | |
| if mode == "Chat" and _is_identity_query(message): | |
| fixed = ( | |
| "I am Dynin-Omni, an omnimodal unified diffusion language model developed by AIDAS Lab.\n" | |
| "I can understand and generate text, images, speech, and video within a single architecture." | |
| ) | |
| history[-1] = (display_user, _render_text_message("Assistant reply generated.", fixed)) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| if defer_video: | |
| display_user = _render_user_message(mode, message, image_in, audio_in, video_in, defer_video=False) | |
| history[-1] = (display_user, history[-1][1]) | |
| yield _serialize_chat_history(history), "" | |
| app = get_app() | |
| history[-1] = (display_user, _render_text_message("Generating...", "")) | |
| yield _serialize_chat_history(history), "" | |
| # Use UI-provided generation settings. | |
| app.force_eval_settings = str(auto_mode).strip().lower() == "auto" | |
| if mode == "Chat": | |
| for reply_html, status, done in app.run_chat_stream( | |
| message, | |
| chat_max_tokens, | |
| chat_steps, | |
| chat_block, | |
| chat_temperature, | |
| update_every=64, | |
| ): | |
| response = _render_response(status, reply_html) | |
| history[-1] = (display_user, response) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| if mode == "TTS": | |
| if not message: | |
| history[-1] = (display_user, _render_text_message("Please type some text.", "")) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| audio, status = app.run_t2s( | |
| message, | |
| t2s_max_tokens, | |
| t2s_steps, | |
| t2s_block, | |
| t2s_temperature, | |
| t2s_cfg, | |
| t2s_gender, | |
| t2s_emotion, | |
| t2s_speed, | |
| t2s_pitch, | |
| ) | |
| history[-1] = (display_user, _render_audio_message(status, audio)) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| if mode == "ASR": | |
| if not audio_in: | |
| history[-1] = (display_user, _render_text_message("Please upload audio.", "")) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| for text, status in app.run_s2t_stream( | |
| audio_in, | |
| s2t_steps, | |
| s2t_block, | |
| s2t_max_tokens, | |
| s2t_remasking, | |
| update_every=32, | |
| ): | |
| history[-1] = (display_user, _render_text_message(status, text)) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| if mode == "MMU (Video → Text)": | |
| if not video_in: | |
| history[-1] = (display_user, _render_text_message("Please upload a video.", "")) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| for text, status in app.run_v2t_stream( | |
| video_in, | |
| v2t_steps, | |
| v2t_block, | |
| v2t_max_tokens, | |
| update_every=32, | |
| ): | |
| history[-1] = (display_user, _render_text_message(status, text)) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| if mode == "Image Generation": | |
| if not message: | |
| history[-1] = (display_user, _render_text_message("Please provide a prompt.", "")) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| for image, status in app.run_t2i_stream( | |
| message, | |
| t2i_timesteps, | |
| t2i_temperature, | |
| t2i_guidance, | |
| update_every=2, | |
| ): | |
| history[-1] = (display_user, _render_image_message(status, image)) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| if mode == "Image Editing": | |
| if not image_in: | |
| history[-1] = (display_user, _render_text_message("Please upload an image.", "")) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| if not message: | |
| history[-1] = (display_user, _render_text_message("Please provide an edit instruction.", "")) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| for image, status in app.run_i2i_stream( | |
| message, | |
| image_in, | |
| i2i_timesteps, | |
| i2i_temperature, | |
| i2i_guidance, | |
| update_every=2, | |
| ): | |
| history[-1] = (display_user, _render_image_message(status, image)) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| if mode == "MMU (Image → Text)": | |
| if not image_in: | |
| history[-1] = (display_user, _render_text_message("Please upload an image.", "")) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| # Keep MMU QA consistent with chat mask-pill UX. | |
| try: | |
| mmu_mask_count = max(16, min(int(mmu_max_tokens or 128), 256)) | |
| except Exception: | |
| mmu_mask_count = 128 | |
| mmu_mask_surface = " ".join(["<mdm_mask>"] * mmu_mask_count) | |
| history[-1] = (display_user, _render_text_message("Generating...", mmu_mask_surface)) | |
| yield _serialize_chat_history(history), "" | |
| reply, status = app.run_mmu( | |
| images=[image_in], | |
| message=message, | |
| max_new_tokens=mmu_max_tokens, | |
| steps=mmu_steps, | |
| block_length=mmu_block, | |
| temperature=mmu_temperature, | |
| ) | |
| history[-1] = (display_user, _render_text_message(status, reply)) | |
| yield _serialize_chat_history(history), "" | |
| return | |
| history[-1] = (display_user, _render_text_message("Unsupported mode.", "")) | |
| yield _serialize_chat_history(history), "" | |
| with demo: | |
| def _hide_intro(): | |
| return gr.update(visible=False) | |
| submit_inputs = [ | |
| chatbox, | |
| chat_input, | |
| mode_selector, | |
| auto_mode_state, | |
| media_image, | |
| media_audio, | |
| media_video, | |
| chat_max_tokens, | |
| chat_steps, | |
| chat_block, | |
| chat_temperature_slider, | |
| t2s_max_tokens, | |
| t2s_steps, | |
| t2s_block, | |
| t2s_temperature, | |
| t2s_cfg, | |
| t2s_gender, | |
| t2s_emotion, | |
| t2s_speed, | |
| t2s_pitch, | |
| s2t_steps, | |
| s2t_block, | |
| s2t_max_tokens, | |
| s2t_remasking, | |
| v2t_steps, | |
| v2t_block, | |
| v2t_max_tokens, | |
| t2i_timesteps, | |
| t2i_temperature, | |
| t2i_guidance, | |
| i2i_timesteps, | |
| i2i_temperature, | |
| i2i_guidance, | |
| mmu_max_tokens, | |
| mmu_steps, | |
| mmu_block, | |
| mmu_temperature, | |
| ] | |
| submit_outputs = [chatbox, chat_input] | |
| chat_input.submit(_hide_intro, outputs=[intro_hero], queue=False).then( | |
| _chat_handler, inputs=submit_inputs, outputs=submit_outputs | |
| ) | |
| send_button.click(_hide_intro, outputs=[intro_hero], queue=False).then( | |
| _chat_handler, inputs=submit_inputs, outputs=submit_outputs | |
| ) | |
| def _open_sample_choices(sample_map, mode): | |
| items = (sample_map or {}).get(mode, []) | |
| has_1 = len(items) >= 1 | |
| has_2 = len(items) >= 2 | |
| item1 = items[0] if has_1 else {} | |
| item2 = items[1] if has_2 else {} | |
| return ( | |
| mode, | |
| _render_task_chip(mode), | |
| _update_mode(mode), | |
| mode, | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=True), | |
| _render_sample_preview_card(item1, 0), | |
| _render_sample_preview_card(item2, 1), | |
| gr.update(value="Sample 1", visible=has_1, interactive=has_1), | |
| gr.update(value="Sample 2", visible=has_2, interactive=has_2), | |
| *_task_button_updates(mode), | |
| ) | |
| def _use_sample(sample_map, mode, sample_idx): | |
| items = (sample_map or {}).get(mode, []) | |
| if not items: | |
| current_mode = "Chat" | |
| return ( | |
| "", | |
| None, | |
| None, | |
| None, | |
| current_mode, | |
| _render_task_chip(current_mode), | |
| _render_attachment_preview(None, None, None), | |
| _update_mode(current_mode), | |
| gr.update(visible=True), | |
| gr.update(visible=True), | |
| gr.update(visible=False), | |
| "", | |
| "", | |
| gr.update(value="Sample 1", visible=True, interactive=True), | |
| gr.update(value="Sample 2", visible=True, interactive=True), | |
| *_task_button_updates(current_mode), | |
| ) | |
| idx = max(0, min(int(sample_idx), len(items) - 1)) | |
| item = items[idx] or {} | |
| sample_mode = item.get("mode", "Chat") | |
| sample_text = item.get("text", "") | |
| if not sample_text: | |
| sample_video = item.get("video") | |
| sample_audio = item.get("audio") | |
| if sample_video: | |
| sample_text = f"[Video] {Path(str(sample_video)).name}" | |
| elif sample_audio: | |
| sample_text = f"[Audio] {Path(str(sample_audio)).name}" | |
| image_item = item.get("image") | |
| audio_item = item.get("audio") | |
| video_item = item.get("video") | |
| return ( | |
| sample_text, | |
| image_item, | |
| audio_item, | |
| video_item, | |
| sample_mode, | |
| _render_task_chip(sample_mode), | |
| _render_attachment_preview(image_item, audio_item, video_item), | |
| _update_mode(sample_mode), | |
| gr.update(visible=True), | |
| gr.update(visible=True), | |
| gr.update(visible=False), | |
| "", | |
| "", | |
| gr.update(value="Sample 1", visible=True, interactive=True), | |
| gr.update(value="Sample 2", visible=True, interactive=True), | |
| *_task_button_updates(sample_mode), | |
| ) | |
| for i, btn in enumerate(task_sample_buttons): | |
| mode_value = sample_task_items[i][1] | |
| btn.click( | |
| lambda payloads, m=mode_value: _open_sample_choices(payloads, m), | |
| inputs=[sample_payloads], | |
| outputs=[ | |
| mode_selector, | |
| selected_task_badge, | |
| chat_input, | |
| selected_sample_mode, | |
| task_sample_row_1, | |
| task_sample_row_2, | |
| sample_choice_row, | |
| sample_preview_1, | |
| sample_preview_2, | |
| sample_choice_1, | |
| sample_choice_2, | |
| *task_buttons, | |
| ], | |
| ) | |
| sample_choice_1.click( | |
| lambda payloads, m: _use_sample(payloads, m, 0), | |
| inputs=[sample_payloads, selected_sample_mode], | |
| outputs=[ | |
| chat_input, | |
| media_image, | |
| media_audio, | |
| media_video, | |
| mode_selector, | |
| selected_task_badge, | |
| attachment_preview, | |
| chat_input, | |
| task_sample_row_1, | |
| task_sample_row_2, | |
| sample_choice_row, | |
| sample_preview_1, | |
| sample_preview_2, | |
| sample_choice_1, | |
| sample_choice_2, | |
| *task_buttons, | |
| ], | |
| ) | |
| sample_choice_2.click( | |
| lambda payloads, m: _use_sample(payloads, m, 1), | |
| inputs=[sample_payloads, selected_sample_mode], | |
| outputs=[ | |
| chat_input, | |
| media_image, | |
| media_audio, | |
| media_video, | |
| mode_selector, | |
| selected_task_badge, | |
| attachment_preview, | |
| chat_input, | |
| task_sample_row_1, | |
| task_sample_row_2, | |
| sample_choice_row, | |
| sample_preview_1, | |
| sample_preview_2, | |
| sample_choice_1, | |
| sample_choice_2, | |
| *task_buttons, | |
| ], | |
| ) | |
| # Initial: task chips visible, sample choices hidden. | |
| demo.load( | |
| lambda: ( | |
| gr.update(visible=True), | |
| gr.update(visible=True), | |
| gr.update(visible=False), | |
| "", | |
| "", | |
| gr.update(visible=True), | |
| gr.update(visible=True), | |
| ), | |
| outputs=[ | |
| task_sample_row_1, | |
| task_sample_row_2, | |
| sample_choice_row, | |
| sample_preview_1, | |
| sample_preview_2, | |
| sample_choice_1, | |
| sample_choice_2, | |
| ], | |
| queue=False, | |
| ) | |
| if __name__ == "__main__": | |
| _launch_kwargs = { | |
| "allowed_paths": [ | |
| str(PREVIEW_DIR), | |
| str(PROJECT_ROOT), | |
| str(ASSET_ROOT), | |
| "/tmp", | |
| ], | |
| } | |
| if GRADIO_V6_PLUS: | |
| _launch_kwargs.update( | |
| { | |
| "css": CUSTOM_CSS + EXTRA_CSS, | |
| "theme": theme, | |
| "js": FORCE_LIGHT_MODE_JS | |
| } | |
| ) | |
| demo.launch(**_launch_kwargs) | |