"""
ZeroGPU-friendly Gradio entrypoint for OMada demo.
- Downloads checkpoint + assets + style centroids from Hugging Face Hub
- Instantiates OmadaDemo once (global)
- Exposes 10 modalities via Gradio tabs
- Uses @spaces.GPU only on inference handlers so GPU is allocated per request
"""
import os
import sys
import subprocess
import importlib
import base64
import html
import io
import re
import wave
import tempfile
import shutil
import threading
import random
from urllib.parse import quote
from pathlib import Path
from typing import List
import numpy as np
import torch
from PIL import Image
import gradio as gr
import spaces
from packaging.version import parse as parse_version
GRADIO_VERSION = parse_version(gr.__version__)
GRADIO_V6_PLUS = GRADIO_VERSION >= parse_version("6.0.0")
# ---------------------------
# Project roots & sys.path
# ---------------------------
PROJECT_ROOT = Path(__file__).resolve().parent
os.environ.setdefault("FORCE_EVAL_SETTINGS", "0")
GLOBAL_SEED = int(os.getenv("GLOBAL_SEED", "42"))
PREVIEW_DIR = PROJECT_ROOT / "_preview_cache"
PREVIEW_DIR.mkdir(parents=True, exist_ok=True)
MMADA_ROOT = PROJECT_ROOT / "MMaDA"
if str(MMADA_ROOT) not in sys.path:
sys.path.insert(0, str(MMADA_ROOT))
EMOVA_ROOT = PROJECT_ROOT / "EMOVA_speech_tokenizer"
if str(EMOVA_ROOT) not in sys.path:
sys.path.insert(0, str(EMOVA_ROOT))
# ---------------------------
# HuggingFace Hub helper
# ---------------------------
def ensure_hf_hub(target: str = "0.36.0"):
"""
Make sure huggingface_hub stays <1.0 to satisfy transformers/tokenizers.
"""
try:
import huggingface_hub as hub
except ImportError:
subprocess.check_call(
[sys.executable, "-m", "pip", "install", f"huggingface-hub=={target}", "--no-cache-dir"]
)
import huggingface_hub as hub
if parse_version(hub.__version__) >= parse_version("1.0.0"):
subprocess.check_call(
[sys.executable, "-m", "pip", "install", f"huggingface-hub=={target}", "--no-cache-dir"]
)
hub = importlib.reload(hub)
# Backfill missing constants in older hub versions to avoid AttributeError.
try:
import huggingface_hub.constants as hub_consts # type: ignore
except Exception:
hub_consts = None
if hub_consts and not hasattr(hub_consts, "HF_HUB_ENABLE_HF_TRANSFER"):
setattr(hub_consts, "HF_HUB_ENABLE_HF_TRANSFER", False)
return hub
snapshot_download = ensure_hf_hub().snapshot_download
# ---------------------------
# OMada demo imports
# ---------------------------
from inference.gradio_multimodal_demo_inst import ( # noqa: E402
OmadaDemo,
CUSTOM_CSS,
FORCE_LIGHT_MODE_JS,
)
# ---------------------------
# HF download helpers
# ---------------------------
def download_assets() -> Path:
"""Download demo assets (logo + sample prompts/media) and return the root path."""
repo_id = os.getenv("ASSET_REPO_ID", "snu-aidas/Dynin-Omni-Demo-Assets")
revision = os.getenv("ASSET_REVISION", "main")
token = os.getenv("HF_TOKEN")
cache_dir = PROJECT_ROOT / "_asset_cache"
cache_dir.mkdir(parents=True, exist_ok=True)
return Path(
snapshot_download(
repo_id=repo_id,
revision=revision,
repo_type="dataset",
local_dir=cache_dir,
local_dir_use_symlinks=False,
token=token,
)
)
def download_style() -> Path:
"""Download style centroid dataset and return the root path."""
repo_id = os.getenv("STYLE_REPO_ID", "snu-aidas/aidas-style-centroid")
revision = os.getenv("STYLE_REVISION", "main")
token = os.getenv("HF_TOKEN")
cache_dir = PROJECT_ROOT / "_style_cache"
cache_dir.mkdir(parents=True, exist_ok=True)
preferred_repo_type = os.getenv("STYLE_REPO_TYPE", "dataset").strip().lower()
repo_type_candidates = [preferred_repo_type]
for t in ("dataset", "model"):
if t not in repo_type_candidates:
repo_type_candidates.append(t)
last_exc = None
for repo_type in repo_type_candidates:
try:
return Path(
snapshot_download(
repo_id=repo_id,
revision=revision,
repo_type=repo_type,
local_dir=cache_dir,
local_dir_use_symlinks=False,
token=token,
)
)
except Exception as exc:
last_exc = exc
continue
print(
f"[Style] Failed to download '{repo_id}' (tried repo_type={repo_type_candidates}). "
f"Using local cache at {cache_dir}. Last error: {last_exc}",
flush=True,
)
return cache_dir
def download_checkpoint() -> Path:
"""Download checkpoint snapshot and return an `unwrapped_model` directory."""
local_override = os.getenv("MODEL_CHECKPOINT_PATH")
if local_override:
override_path = Path(local_override).expanduser()
if override_path.name != "unwrapped_model":
nested = override_path / "unwrapped_model"
if nested.is_dir():
override_path = nested
if not override_path.exists():
raise FileNotFoundError(f"MODEL_CHECKPOINT_PATH does not exist: {override_path}")
return override_path
repo_id = os.getenv("MODEL_REPO_ID", "snu-aidas/Dynin-Omni")
revision = os.getenv("MODEL_REVISION", "main")
token = os.getenv("HF_TOKEN")
cache_dir = PROJECT_ROOT / "_ckpt_cache"
cache_dir.mkdir(parents=True, exist_ok=True)
snapshot_path = Path(
snapshot_download(
repo_id=repo_id,
revision=revision,
repo_type="model",
local_dir=cache_dir,
local_dir_use_symlinks=False,
token=token,
)
)
if snapshot_path.name == "unwrapped_model":
return snapshot_path
nested = snapshot_path / "unwrapped_model"
if nested.is_dir():
return nested
aliased = snapshot_path.parent / "unwrapped_model"
if not aliased.exists():
aliased.symlink_to(snapshot_path, target_is_directory=True)
return aliased
# ---------------------------
# Assets (for examples + logo)
# ---------------------------
ASSET_ROOT = download_assets()
STYLE_ROOT = download_style()
LOGO_PATH = ASSET_ROOT / "logo.png" # optional
def _load_text_examples(path: Path):
if not path.exists():
return []
lines = [
ln.strip()
for ln in path.read_text(encoding="utf-8").splitlines()
if ln.strip()
]
return [[ln] for ln in lines]
def _load_media_examples(subdir: str, suffixes):
d = ASSET_ROOT / subdir
if not d.exists():
return []
ex = []
for p in sorted(d.iterdir()):
if p.is_file() and p.suffix.lower() in suffixes:
ex.append([str(p)])
return ex
def _load_i2i_examples():
d = ASSET_ROOT / "i2i"
if not d.exists():
return []
# 이미지 파일들 (image1.jpeg, image2.png, ...)
image_files = sorted(
[p for p in d.iterdir() if p.suffix.lower() in {".png", ".jpg", ".jpeg", ".webp"}]
)
# 텍스트 파일들 (text1.txt, text2.txt, ...)
text_files = sorted(
[p for p in d.iterdir() if p.suffix.lower() == ".txt"]
)
n = min(len(image_files), len(text_files))
if n == 0:
return []
examples = []
for i in range(n):
img_path = image_files[i]
txt_path = text_files[i]
instruction = txt_path.read_text(encoding="utf-8").strip()
if not instruction:
continue
# Gradio Examples 형식: [image, instruction_text]
examples.append([str(img_path), instruction])
return examples
# text-based examples
T2S_EXAMPLES = _load_text_examples(ASSET_ROOT / "t2s" / "text.txt")
CHAT_EXAMPLES = _load_text_examples(ASSET_ROOT / "chat" / "text.txt")
T2I_EXAMPLES = _load_text_examples(ASSET_ROOT / "t2i" / "text.txt")
I2I_EXAMPLES = _load_i2i_examples()
def _get_example_value(examples, idx: int, pos: int = 0, default=None):
try:
if len(examples) > idx and len(examples[idx]) > pos:
val = examples[idx][pos]
if val is not None and str(val) != "":
return val
except Exception:
pass
try:
if examples and len(examples[0]) > pos:
val = examples[0][pos]
if val is not None and str(val) != "":
return val
except Exception:
pass
return default
def _sample_preview_label(item: dict, idx: int) -> str:
base = f"sample {idx + 1}"
if not isinstance(item, dict):
return base
text = str(item.get("text") or "").strip()
image = item.get("image")
audio = item.get("audio")
video = item.get("video")
preview = ""
if text:
preview = text.replace("\n", " ").strip()
elif image:
preview = f"image: {Path(str(image)).name}"
elif audio:
preview = f"audio: {Path(str(audio)).name}"
elif video:
preview = f"video: {Path(str(video)).name}"
if not preview:
return base
if len(preview) > 34:
preview = preview[:31] + "..."
return f"{base} - {preview}"
def _image_to_data_uri(path: str) -> str:
p = str(path or "")
if not p or not os.path.exists(p):
return ""
try:
with Image.open(p).convert("RGB") as pil_img:
buf = io.BytesIO()
pil_img.save(buf, format="PNG")
encoded = base64.b64encode(buf.getvalue()).decode("ascii")
return f"data:image/png;base64,{encoded}"
except Exception:
return ""
def _render_sample_preview_card(item: dict, idx: int) -> str:
title = f"sample {idx + 1}"
if not isinstance(item, dict):
return (
"
"
f"
{title}
"
"
No preview available
"
"
"
)
text = str(item.get("text") or "").strip()
image = item.get("image")
audio = item.get("audio")
video = item.get("video")
media_html = "🧩
"
desc = "No preview available"
if image:
img_path = str(image)
src = _image_to_data_uri(img_path)
if src:
media_html = f"
"
desc = text if text else f"image: {Path(img_path).name}"
else:
# Fallback for browsers/Gradio sanitization cases where data URI is blocked.
file_src = f"/file={quote(img_path)}"
media_html = f"
"
desc = text if text else f"image: {Path(img_path).name}"
elif video:
vpath = str(video)
thumb = _video_thumb_data_uri(vpath)
if thumb:
media_html = f"
"
else:
media_html = "🎬
"
desc = text if text else f"video: {Path(vpath).name}"
elif audio:
apath = str(audio)
media_html = "🎤
"
desc = text if text else f"audio: {Path(apath).name}"
elif text:
media_html = "💬
"
desc = text
desc = desc.replace("\n", " ").strip()
if len(desc) > 120:
desc = desc[:117] + "..."
return (
""
)
def _render_response(status: str, body_html: str = "") -> str:
safe_status = html.escape(status or "")
parts = []
if safe_status:
parts.append(f"{safe_status}
")
if body_html:
parts.append(body_html)
content = "".join(parts)
return f"{content}
"
def _render_text_message(status: str, content: str) -> str:
content = (content or "").strip()
if not content:
return _render_response(status)
safe_content = _format_tokenized_text(content)
body = f"{safe_content}
"
return _render_response(status, body)
def _is_mask_like_token(token: str) -> bool:
t = token.strip()
if not t:
return False
upper = t.upper()
return (
upper in {"[MASK]", "", "<|MASK|>", "", "<|MASK_TOKEN|>"}
or upper in {"", "MDM_MASK", "<|MDM_MASK|>"}
or "MASK" in upper
)
def _is_special_token(token: str) -> bool:
t = token.strip()
return bool(t) and t.startswith("<|") and t.endswith("|>")
def _format_tokenized_text(text: str) -> str:
if not text:
return ""
# Handle both complete and partially-streamed mask tokens.
mask_pat = r"(<[^>\n]*MASK[^>\n]*>?|\[MASK\]|MASK_TOKEN)"
chunks = re.split(mask_pat, text, flags=re.IGNORECASE)
out = []
for chunk in chunks:
if not chunk:
continue
if re.fullmatch(mask_pat, chunk, flags=re.IGNORECASE) or _is_mask_like_token(chunk):
out.append("MASK")
continue
if chunk.isspace():
out.append(chunk.replace("\n", "
"))
continue
safe = html.escape(chunk)
if _is_special_token(chunk):
out.append(f"{safe}")
else:
out.append(safe)
return "".join(out).replace("\n", "
")
def _render_audio_message(status: str, audio):
if not audio:
return _render_response(status)
sample_rate, data = audio
if data is None:
return _render_response(status)
waveform = np.asarray(data, dtype=np.float32)
if waveform.size == 0:
return _render_response(status)
if waveform.ndim == 1:
waveform = waveform[:, None]
channels = waveform.shape[1]
clipped = np.clip(waveform, -1.0, 1.0)
pcm16 = (clipped * 32767.0).astype(np.int16)
buffer = io.BytesIO()
with wave.open(buffer, "wb") as wav_writer:
wav_writer.setnchannels(channels)
wav_writer.setsampwidth(2)
wav_writer.setframerate(int(sample_rate))
wav_writer.writeframes(pcm16.tobytes())
encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
audio_tag = (
""
)
return _render_response(status, audio_tag)
def _render_image_message(status: str, image: Image.Image):
if image is None:
return _render_response(status)
buffer = io.BytesIO()
try:
image.save(buffer, format="PNG")
except Exception:
return _render_response(status)
encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
safe_status = html.escape(status or "")
return (
""
f"
{safe_status}
"
"

"
"
"
)
def _render_user_message(mode: str, message: str, image_in, audio_in, video_in, defer_video: bool = False) -> str:
def _cache_media_copy(src_path: str) -> str:
path = str(src_path or "")
if not path or not os.path.exists(path):
return path
try:
suffix = Path(path).suffix or ""
fd, dst = tempfile.mkstemp(prefix="omada_media_", suffix=suffix, dir=str(PREVIEW_DIR))
os.close(fd)
shutil.copy2(path, dst)
return dst
except Exception:
return path
def _to_browser_mp4(video_path: str) -> str:
path = str(video_path or "")
if not path:
return path
try:
fd, out_path = tempfile.mkstemp(prefix="omada_preview_", suffix=".mp4", dir=str(PREVIEW_DIR))
os.close(fd)
cmd = [
"ffmpeg",
"-y",
"-i",
path,
"-an",
"-c:v",
"libx264",
"-pix_fmt",
"yuv420p",
"-movflags",
"+faststart",
out_path,
]
proc = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
if proc.returncode == 0 and os.path.exists(out_path):
return out_path
if os.path.exists(out_path):
os.remove(out_path)
except Exception:
pass
return path
def _video_data_uri(video_path: str, mime: str, max_bytes: int = 25 * 1024 * 1024) -> str:
try:
size = os.path.getsize(video_path)
if size <= 0 or size > max_bytes:
return ""
with open(video_path, "rb") as f:
encoded = base64.b64encode(f.read()).decode("ascii")
return f"data:{mime};base64,{encoded}"
except Exception:
return ""
def _video_poster_data_uri(video_path: str) -> str:
try:
import cv2 # type: ignore
cap = cv2.VideoCapture(video_path)
ok, frame = cap.read()
cap.release()
if not ok or frame is None:
return ""
ok, buf = cv2.imencode(".jpg", frame)
if not ok:
return ""
encoded = base64.b64encode(buf.tobytes()).decode("ascii")
return f"data:image/jpeg;base64,{encoded}"
except Exception:
return ""
parts = []
text = (message or "").strip()
if image_in is not None:
try:
if isinstance(image_in, Image.Image):
buffer = io.BytesIO()
image_in.save(buffer, format="PNG")
encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
parts.append(
""
)
elif isinstance(image_in, str) and image_in:
try:
with Image.open(image_in).convert("RGB") as pil_img:
buf = io.BytesIO()
pil_img.save(buf, format="PNG")
encoded = base64.b64encode(buf.getvalue()).decode("ascii")
parts.append(
""
)
except Exception:
image_path = _cache_media_copy(image_in)
parts.append(
""
)
except Exception:
pass
if mode == "MMU (Video → Text)" and video_in:
if defer_video:
parts.append("")
if text:
parts.append(f"{html.escape(text)}
")
return "".join(parts)
video_path = None
if isinstance(video_in, str):
video_path = video_in
elif isinstance(video_in, dict):
video_path = video_in.get("path") or video_in.get("name")
if video_path:
cached_original = _cache_media_copy(video_path)
preview_path = _to_browser_mp4(cached_original)
poster = _video_poster_data_uri(cached_original)
poster_attr = f" poster='{poster}'" if poster else ""
source_path = str(preview_path or cached_original)
fallback_path = str(cached_original)
def _video_mime(path: str) -> str:
ext = os.path.splitext(path.lower())[1]
return {
".mp4": "video/mp4",
".webm": "video/webm",
".mov": "video/quicktime",
".m4v": "video/mp4",
".avi": "video/x-msvideo",
".mkv": "video/x-matroska",
}.get(ext, "video/mp4")
parts.append(
""
)
if audio_in is not None:
audio_path = ""
if isinstance(audio_in, str):
audio_path = audio_in
elif isinstance(audio_in, dict):
audio_path = audio_in.get("path") or audio_in.get("name") or ""
elif isinstance(audio_in, (tuple, list)) and len(audio_in) == 2:
try:
sample_rate, data = audio_in
waveform = np.asarray(data, dtype=np.float32)
if waveform.ndim == 1:
waveform = waveform[:, None]
waveform = np.clip(waveform, -1.0, 1.0)
pcm16 = (waveform * 32767.0).astype(np.int16)
fd, temp_audio = tempfile.mkstemp(prefix="omada_user_audio_", suffix=".wav", dir=str(PREVIEW_DIR))
os.close(fd)
with wave.open(temp_audio, "wb") as wav_writer:
wav_writer.setnchannels(pcm16.shape[1])
wav_writer.setsampwidth(2)
wav_writer.setframerate(int(sample_rate))
wav_writer.writeframes(pcm16.tobytes())
audio_path = temp_audio
except Exception:
audio_path = ""
if audio_path:
ext = os.path.splitext(audio_path.lower())[1]
mime = {
".wav": "audio/wav",
".mp3": "audio/mpeg",
".flac": "audio/flac",
".ogg": "audio/ogg",
".m4a": "audio/mp4",
}.get(ext, "audio/wav")
src = ""
try:
with open(audio_path, "rb") as f:
encoded_audio = base64.b64encode(f.read()).decode("ascii")
src = f"data:{mime};base64,{encoded_audio}"
except Exception:
audio_path = _cache_media_copy(audio_path)
src = f"/file={quote(audio_path)}"
parts.append(
""
)
if text:
parts.append(f"{html.escape(text)}
")
if not parts:
parts.append(f"[{html.escape(mode)}]
")
return "".join(parts)
def _extract_video_path(video_in) -> str:
if isinstance(video_in, str):
return video_in
if isinstance(video_in, dict):
return str(video_in.get("path") or video_in.get("name") or "")
return ""
def _video_thumb_data_uri(video_path: str) -> str:
if not video_path or not os.path.exists(video_path):
return ""
try:
import cv2 # type: ignore
cap = cv2.VideoCapture(video_path)
ok, frame = cap.read()
cap.release()
if not ok or frame is None:
return ""
ok, buf = cv2.imencode(".jpg", frame)
if not ok:
return ""
encoded = base64.b64encode(buf.tobytes()).decode("ascii")
return f"data:image/jpeg;base64,{encoded}"
except Exception:
return ""
def _render_attachment_preview(image_in, audio_in, video_in) -> str:
items = []
if image_in is not None:
try:
if isinstance(image_in, Image.Image):
buf = io.BytesIO()
image_in.save(buf, format="PNG")
encoded = base64.b64encode(buf.getvalue()).decode("ascii")
items.append(
""
f"

"
"
Image"
"
"
)
elif isinstance(image_in, str) and image_in:
with Image.open(image_in).convert("RGB") as pil_img:
buf = io.BytesIO()
pil_img.save(buf, format="PNG")
encoded = base64.b64encode(buf.getvalue()).decode("ascii")
items.append(
""
f"

"
"
Image"
"
"
)
except Exception:
pass
video_path = _extract_video_path(video_in)
if video_path:
thumb = _video_thumb_data_uri(video_path)
if thumb:
items.append(
""
f"

"
"
Video"
"
"
)
else:
items.append(
""
"🎬 Video"
"
"
)
audio_path = ""
if isinstance(audio_in, str):
audio_path = audio_in
elif isinstance(audio_in, dict):
audio_path = str(audio_in.get("path") or audio_in.get("name") or "")
if audio_path:
filename = html.escape(Path(audio_path).name or "speech.wav")
items.append(
""
"🎤"
f"{filename}"
"
"
)
if not items:
return ""
return "" + "".join(items) + "
"
def _render_task_chip(mode: str) -> str:
icon_map = {
"Chat": "💬",
"MMU (Image → Text)": "🖼️",
"MMU (Video → Text)": "🎬",
"Image Generation": "🎨",
"Image Editing": "🛠️",
"ASR": "🎙️",
"TTS": "🔊",
}
icon = icon_map.get(mode, "🧩")
safe_mode = html.escape(mode or "Task")
return f"{icon} {safe_mode}
"
# audio / video / image examples
S2T_EXAMPLES = _load_media_examples("s2t", {".wav", ".mp3", ".flac", ".ogg"})
V2T_EXAMPLES = _load_media_examples("v2t", {".mp4", ".mov", ".avi", ".webm"})
# MMU images
MMU_DIR = ASSET_ROOT / "mmu"
MMU_EXAMPLES: List[List[str]] = []
DEFAULT_MMU_PROMPT = "Describe the given image in detail."
if MMU_DIR.exists():
for path in sorted(
[
p
for p in MMU_DIR.iterdir()
if p.suffix.lower() in {".png", ".jpg", ".jpeg", ".webp"}
]
):
MMU_EXAMPLES.append([
str(path),
DEFAULT_MMU_PROMPT,
])
# ---------------------------
# Global OmadaDemo instance
# ---------------------------
APP = None # type: ignore
APP_LOCK = threading.Lock()
def get_app() -> OmadaDemo:
global APP
if APP is not None:
return APP
with APP_LOCK:
if APP is not None:
return APP
ckpt_dir = download_checkpoint()
# Wire style centroids to expected locations
style_targets = [
MMADA_ROOT / "models" / "speech_tokenization" / "condition_style_centroid",
PROJECT_ROOT
/ "EMOVA_speech_tokenizer"
/ "emova_speech_tokenizer"
/ "speech_tokenization"
/ "condition_style_centroid",
]
style_root_resolved = STYLE_ROOT.resolve()
for starget in style_targets:
starget.parent.mkdir(parents=True, exist_ok=True)
try:
if starget.is_symlink():
try:
current = starget.resolve()
except Exception:
current = None
if current != style_root_resolved:
starget.unlink(missing_ok=True)
starget.symlink_to(STYLE_ROOT, target_is_directory=True)
elif starget.exists():
# Existing concrete dir/file: keep as-is.
pass
else:
starget.symlink_to(STYLE_ROOT, target_is_directory=True)
except FileExistsError:
# Concurrent init race (warmup vs request): safe to ignore.
pass
# Prefer a repo-local Space config first, then fall back to demo configs.
space_demo_cfg = PROJECT_ROOT / "MMaDA" / "inference" / "demo" / "space_demo.yaml"
default_cfg = PROJECT_ROOT / "MMaDA" / "inference" / "demo" / "demo.yaml"
legacy_cfg = PROJECT_ROOT / "MMaDA" / "configs" / "mmada_demo.yaml"
train_config = os.getenv("TRAIN_CONFIG_PATH")
if not train_config:
if space_demo_cfg.exists():
train_config = str(space_demo_cfg)
else:
train_config = str(default_cfg if default_cfg.exists() else legacy_cfg)
device = os.getenv("DEVICE", "cuda")
APP = OmadaDemo(train_config=train_config, checkpoint=str(ckpt_dir), device=device)
return APP
def warmup_model_status() -> str:
try:
# ZeroGPU Spaces forbids CUDA init in the main process.
if os.getenv("SPACE_ID"):
return "Model status: Ready (lazy load on first request)."
get_app()
return "Model status: Loaded. Inference is ready."
except Exception as exc:
return f"Model status: Load failed ({exc})."
def _set_global_seed(seed: int = GLOBAL_SEED) -> None:
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
try:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
except Exception:
pass
# ---------------------------
# ZeroGPU-wrapped handlers
# ---------------------------
# (== 그대로, 생략 없이 둔 부분 ==)
@spaces.GPU
def t2s_handler(text, max_tokens, steps, block_len, temperature, cfg_scale, gender, emotion, speed, pitch):
app = get_app()
audio, status = app.run_t2s(
text=text,
max_new_tokens=int(max_tokens),
steps=int(steps),
block_length=int(block_len),
temperature=float(temperature),
cfg_scale=float(cfg_scale),
gender_choice=gender,
emotion_choice=emotion,
speed_choice=speed,
pitch_choice=pitch,
)
return audio, status
@spaces.GPU
def s2t_handler(audio_path, steps, block_len, max_tokens, remasking):
app = get_app()
for text, status in app.run_s2t_stream(
audio_path=audio_path,
steps=int(steps),
block_length=int(block_len),
max_new_tokens=int(max_tokens),
remasking=str(remasking),
update_every=32,
):
yield text, status
@spaces.GPU
def v2t_handler(video, steps, block_len, max_tokens):
app = get_app()
for text, status in app.run_v2t_stream(
video_path=video,
steps=int(steps),
block_length=int(block_len),
max_new_tokens=int(max_tokens),
update_every=32,
):
yield text, status
@spaces.GPU
def chat_handler(message, max_tokens, steps, block_len, temperature):
app = get_app()
for reply_html, status, done in app.run_chat_stream(
message=message,
max_new_tokens=int(max_tokens),
steps=int(steps),
block_length=int(block_len),
temperature=float(temperature),
update_every=32,
):
yield reply_html, status
@spaces.GPU
def mmu_handler(image, question, max_tokens, steps, block_len, temperature):
app = get_app()
text, status = app.run_mmu(
images=image,
message=question,
max_new_tokens=int(max_tokens),
steps=int(steps),
block_length=int(block_len),
temperature=float(temperature),
)
return text, status
@spaces.GPU
def t2i_handler(prompt, timesteps, temperature, guidance):
app = get_app()
for image, status in app.run_t2i_stream(
prompt=prompt,
timesteps=int(timesteps),
temperature=float(temperature),
guidance_scale=float(guidance),
update_every=2,
):
yield image, status
@spaces.GPU
def i2i_handler(instruction, image, timesteps, temperature, guidance):
app = get_app()
for image_out, status in app.run_i2i_stream(
instruction=instruction,
source_image=image,
timesteps=int(timesteps),
temperature=float(temperature),
guidance_scale=float(guidance),
update_every=2,
):
yield image_out, status
# ---------------------------
# Gradio UI (10 tabs + examples)
# ---------------------------
theme = gr.themes.Soft(primary_hue="blue", neutral_hue="gray")
EXTRA_CSS = """
html, body, .gradio-container {
background: var(--omada-surface) !important;
color: var(--omada-text-primary) !important;
}
.omada-shell {
min-height: 0;
display: flex;
flex-direction: column;
padding-bottom: 6px;
}
.omada-sample-row {
gap: 10px !important;
justify-content: center !important;
margin-bottom: 6px;
}
.omada-sample-row .gradio-button {
max-width: 280px !important;
}
.omada-hero {
text-align: center;
margin: 40px 0 24px 0;
}
.omada-hero h2 {
font-size: 2.2rem;
margin: 0;
color: var(--omada-dark-text);
}
.omada-hero p {
margin: 10px 0 0 0;
color: var(--omada-dark-muted);
}
.omada-input-row {
gap: 6px !important;
align-items: center !important;
display: flex !important;
flex-direction: row !important;
justify-content: center !important;
position: relative !important;
inset: auto !important;
top: auto !important;
right: auto !important;
bottom: auto !important;
left: auto !important;
transform: none !important;
background: var(--omada-surface-alt);
padding: 6px 14px;
border-radius: 999px;
z-index: 5;
width: min(980px, calc(100vw - 24px));
margin: 4px auto 8px;
box-shadow: 0 8px 24px rgba(0,0,0,0.08);
box-sizing: border-box;
}
.omada-input-row > * {
min-width: 0 !important;
margin: 0 !important;
align-self: center !important;
background: transparent !important;
box-shadow: none !important;
border: none !important;
}
.omada-input-row .gradio-textbox textarea {
background: var(--omada-surface) !important;
color: var(--omada-text-primary) !important;
border-radius: 999px !important;
border: 1px solid var(--omada-border) !important;
padding: 6px 10px !important;
min-height: 36px !important;
}
.omada-plus-btn button,
.omada-send-btn button {
border-radius: 999px !important;
width: 36px !important;
min-width: 36px !important;
height: 36px !important;
background: var(--omada-surface) !important;
color: var(--omada-text-primary) !important;
border: 1px solid var(--omada-border) !important;
padding: 0 !important;
font-size: 1.2rem !important;
line-height: 1 !important;
}
.omada-plus-btn,
.omada-send-btn {
flex: 0 0 36px !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
}
.omada-auto {
width: 132px !important;
flex: 0 0 132px !important;
display: flex !important;
align-items: center !important;
position: relative !important;
border-radius: 999px !important;
overflow: visible !important;
}
.omada-auto button {
height: 36px !important;
min-height: 36px !important;
width: 100% !important;
font-size: 0.9rem !important;
padding: 0 12px !important;
background: rgba(255, 255, 255, 0.24) !important;
border: 1px solid rgba(171, 188, 214, 0.42) !important;
color: var(--omada-text-primary) !important;
border-radius: 999px !important;
appearance: none !important;
-webkit-appearance: none !important;
-moz-appearance: none !important;
text-align: left !important;
}
.omada-auto svg,
.omada-auto .wrap > svg,
.omada-auto .dropdown-arrow {
display: none !important;
}
.omada-plus-btn button,
.omada-send-btn button {
flex: 0 0 auto !important;
}
.omada-input-row .gradio-textbox {
width: 100% !important;
flex: 1 1 auto !important;
min-width: 0 !important;
opacity: 1 !important;
pointer-events: auto !important;
background: transparent !important;
border: none !important;
box-shadow: none !important;
}
.omada-input-row .gradio-textbox > div,
.omada-input-row .gradio-dropdown,
.omada-input-row .gradio-dropdown > div,
.omada-plus-btn,
.omada-send-btn,
.omada-auto {
background: transparent !important;
border: none !important;
box-shadow: none !important;
}
.omada-send-btn {
margin-left: -2px !important;
}
.omada-input-row .gradio-textbox textarea {
width: 100% !important;
display: block !important;
pointer-events: auto !important;
opacity: 1 !important;
cursor: text !important;
}
.omada-panel-backdrop {
position: fixed !important;
inset: 0 !important;
background: rgba(255, 255, 255, 0.22) !important;
backdrop-filter: blur(12px) saturate(120%) !important;
-webkit-backdrop-filter: blur(12px) saturate(120%) !important;
z-index: 1100 !important;
pointer-events: auto !important;
}
.omada-panel {
position: relative !important;
top: auto !important;
left: auto !important;
transform: none !important;
max-height: none !important;
overflow: visible !important;
width: min(980px, calc(100vw - 24px));
margin: 0 auto 14px auto;
box-shadow: 0 20px 60px rgba(0,0,0,0.12);
z-index: 9999;
pointer-events: auto !important;
isolation: isolate;
}
.omada-controls-safe {
position: fixed !important;
left: 50% !important;
top: 50% !important;
transform: translate(-50%, -50%) !important;
width: min(980px, calc(100vw - 36px)) !important;
max-height: min(82vh, 900px) !important;
overflow: auto !important;
margin: 0 !important;
z-index: 1200 !important;
border-radius: 34px !important;
}
.omada-panel * {
pointer-events: auto;
}
.omada-panel input,
.omada-panel select,
.omada-panel textarea,
.omada-panel button,
.omada-panel .gradio-slider,
.omada-panel .gradio-slider * {
pointer-events: auto !important;
}
.omada-panel .gradio-radio,
.omada-panel .gradio-radio label,
.omada-panel .gradio-radio input {
pointer-events: auto !important;
cursor: pointer !important;
}
.omada-panel .gradio-radio {
position: relative !important;
z-index: 300 !important;
}
.omada-panel .gradio-slider,
.omada-panel .gradio-slider .wrap,
.omada-panel .gradio-slider .wrap-inner,
.omada-panel .gradio-slider input[type="range"],
.omada-panel .gradio-slider input[type="number"],
.omada-panel .gradio-dropdown,
.omada-panel .gradio-dropdown select,
.omada-panel .gradio-textbox textarea {
pointer-events: auto !important;
position: relative !important;
z-index: 400 !important;
}
.omada-panel .gradio-slider input[type="range"] {
touch-action: pan-x !important;
}
.omada-panel .gradio-dropdown,
.omada-panel .gradio-dropdown .wrap {
z-index: 1000 !important;
}
.gradio-dropdown .options,
.gradio-dropdown .wrap .options {
z-index: 2000 !important;
}
.gradio-container .input-status,
.gradio-container .status,
.gradio-container .status-dot,
.gradio-container .status-indicator,
.gradio-container .label-wrap .status,
.gradio-container .label-wrap .status-dot {
display: none !important;
}
.omada-chatbot {
background: transparent !important;
border: none !important;
position: relative !important;
z-index: 1 !important;
}
.gradio-chatbot .message {
border-radius: 18px !important;
}
.gradio-chatbot .message.user {
margin-left: auto !important;
background: #2e3037 !important;
color: var(--omada-text-primary) !important;
pointer-events: auto !important;
}
.gradio-chatbot .message.bot {
margin-right: auto !important;
background: #22242a !important;
color: var(--omada-text-primary) !important;
pointer-events: auto !important;
}
.gradio-chatbot .message.user *,
.gradio-chatbot .message.bot * {
pointer-events: auto !important;
}
.omada-panel {
background: var(--omada-dark-panel);
border: 1px solid var(--omada-dark-border);
border-radius: 16px;
padding: 16px;
}
.omada-chip button {
border-radius: 999px !important;
background: linear-gradient(160deg, rgba(255,255,255,0.62), rgba(255,255,255,0.36)) !important;
color: #22324a !important;
border: 1px solid rgba(255,255,255,0.72) !important;
font-size: 0.68rem !important;
line-height: 1.2 !important;
padding: 6px 10px !important;
backdrop-filter: blur(14px) saturate(165%);
-webkit-backdrop-filter: blur(14px) saturate(165%);
box-shadow: 0 8px 20px rgba(36, 56, 92, 0.16) !important;
}
.omada-sample-row .gradio-button,
.omada-sample-row .gradio-button > div,
.omada-sample-row .gradio-button > button {
background: transparent !important;
}
.omada-chip button:hover {
transform: translateY(-1px);
background: linear-gradient(160deg, rgba(255,255,255,0.74), rgba(255,255,255,0.44)) !important;
}
.omada-video-loading {
width: 360px;
max-width: min(42vw, 360px);
min-height: 64px;
border-radius: 12px;
border: 1px solid var(--omada-glass-border);
background: rgba(255,255,255,0.35);
display: flex;
align-items: center;
justify-content: center;
font-size: 0.9rem;
color: #304463;
backdrop-filter: blur(10px) saturate(150%);
-webkit-backdrop-filter: blur(10px) saturate(150%);
}
.omada-user-media {
margin-bottom: 6px;
}
.omada-user-media img,
.omada-user-media video {
max-width: 240px;
width: 240px;
max-height: 180px;
object-fit: contain;
border-radius: 10px;
border: 1px solid var(--omada-border);
display: block;
}
.omada-user-media .omada-user-video {
width: 360px;
max-width: min(42vw, 360px);
max-height: 240px;
}
.omada-user-media audio {
width: 360px;
max-width: min(42vw, 360px);
display: block;
}
.omada-response-status {
color: var(--omada-dark-muted) !important;
}
.omada-token-pill {
display: inline-block;
padding: 1px 8px;
margin: 1px 2px;
border-radius: 999px;
border: 1px solid var(--omada-border);
font-size: 0.82em;
line-height: 1.6;
vertical-align: baseline;
background: #f7f8fa;
}
.omada-token-mask {
border-color: #8da2c6;
background: #eef3ff;
color: #1f3d7a;
font-weight: 600;
}
.omada-token-special {
border-color: #c5ccd8;
background: #f3f4f7;
color: #4b5563;
}
/* Apple-like glass look */
:root {
--omada-surface: #f7faff;
--omada-surface-alt: #f3f8ff;
--omada-glass-bg: rgba(255, 255, 255, 0.62);
--omada-glass-strong: rgba(255, 255, 255, 0.72);
--omada-glass-border: rgba(221, 232, 248, 0.92);
--omada-glass-shadow: 0 14px 34px rgba(136, 162, 196, 0.16);
}
html, body, .gradio-container {
background:
radial-gradient(1200px 520px at 10% -10%, rgba(255,255,255,0.96), rgba(255,255,255,0.78) 48%, rgba(247,251,255,0.96) 100%),
linear-gradient(135deg, #f8fbff 0%, #f3f8ff 45%, #f7fbff 100%) !important;
}
.omada-input-row,
.omada-controls-safe,
.omada-panel,
.gradio-chatbot .message,
.omada-chip button,
.omada-input-row .gradio-textbox textarea,
.omada-plus-btn button,
.omada-send-btn button,
.omada-auto select {
background: var(--omada-glass-bg) !important;
border: 1px solid var(--omada-glass-border) !important;
box-shadow: var(--omada-glass-shadow) !important;
backdrop-filter: blur(22px) saturate(175%);
-webkit-backdrop-filter: blur(22px) saturate(175%);
}
.omada-controls-safe {
padding: 14px 16px !important;
border-radius: 28px !important;
margin: 10px auto 10px auto !important;
}
.omada-controls-safe > div {
padding: 10px 12px !important;
border-radius: 22px !important;
}
.omada-controls-safe .gradio-button,
.omada-controls-safe button,
.omada-controls-safe .gradio-dropdown,
.omada-controls-safe .gradio-textbox,
.omada-controls-safe .gradio-slider {
border-radius: 16px !important;
}
.omada-controls-safe .gradio-button {
border: 1px solid var(--omada-glass-border) !important;
}
.gradio-chatbot .message.user {
background: var(--omada-glass-strong) !important;
color: #1f2937 !important;
}
.gradio-chatbot .message.bot {
background: rgba(255, 255, 255, 0.50) !important;
color: #1f2937 !important;
}
/* Keep generated images crisp (no frosted overlay on image replies) */
.gradio-chatbot .message {
backdrop-filter: none !important;
-webkit-backdrop-filter: none !important;
}
.gradio-chatbot .message.bot:has(.omada-image-only) {
background: transparent !important;
border: none !important;
box-shadow: none !important;
padding: 0 !important;
margin: 0 !important;
}
.omada-image-only {
display: inline-block;
background: transparent !important;
border: 0 !important;
box-shadow: none !important;
padding: 0 !important;
margin: 0 !important;
opacity: 1 !important;
filter: none !important;
}
.gradio-chatbot .message.bot:has(.omada-image-only) *,
.omada-image-only * {
background: transparent !important;
box-shadow: none !important;
filter: none !important;
opacity: 1 !important;
}
.omada-image-status {
margin: 0 0 6px 0 !important;
font-size: 0.85rem !important;
color: #42526b !important;
font-weight: 600 !important;
}
.omada-chip button {
color: #273247 !important;
}
.omada-panel {
border-radius: 28px !important;
padding: 20px !important;
}
.omada-input-row {
border-radius: 999px !important;
}
.omada-main-input,
.omada-main-input * {
pointer-events: auto !important;
}
.omada-main-input textarea,
.omada-main-input input {
pointer-events: auto !important;
position: relative !important;
z-index: 40 !important;
}
.omada-sample-row,
.omada-sample-row * {
pointer-events: auto !important;
}
.omada-sample-row,
.omada-input-row {
position: relative !important;
z-index: 25 !important;
}
.omada-sample-row .gradio-button,
.omada-input-row .gradio-button,
.omada-input-row button {
pointer-events: auto !important;
}
/* Compact controls (keep chat bubbles unchanged) */
.omada-shell,
.omada-controls-safe,
.omada-input-row,
.omada-sample-row {
font-size: 0.88rem !important;
}
.omada-sample-row {
width: min(980px, calc(100vw - 24px)) !important;
margin: 0 auto 4px auto !important;
gap: 6px !important;
}
.omada-sample-row .gradio-button {
flex: 1 1 0 !important;
max-width: none !important;
}
.omada-chip button {
min-height: 14px !important;
height: 14px !important;
font-size: 0.34rem !important;
line-height: 1.0 !important;
padding: 0 3px !important;
border-radius: 999px !important;
}
.omada-chip button * {
font-size: 0.34rem !important;
line-height: 1.0 !important;
}
.omada-sample-row .omada-chip button,
.omada-sample-row .gradio-button button,
.omada-sample-row .omada-chip button span,
.omada-sample-row .gradio-button button span,
.omada-sample-row .omada-chip button p,
.omada-sample-row .gradio-button button p,
.omada-sample-row .omada-chip button div {
font-size: 0.34rem !important;
line-height: 1.05 !important;
}
/* Force sample chip size against Gradio theme defaults */
.omada-sample-row .omada-chip,
.omada-sample-row .omada-chip .gradio-button,
.omada-sample-row .omada-chip .gradio-button > div,
.omada-sample-row .omada-chip .gradio-button > button,
.omada-sample-row .omada-chip button {
min-height: 16px !important;
height: 16px !important;
max-height: 16px !important;
padding-top: 0 !important;
padding-bottom: 0 !important;
}
.omada-sample-row .omada-chip button,
.omada-sample-row .omada-chip button span,
.omada-sample-row .omada-chip button p,
.omada-sample-row .omada-chip button div {
font-size: 0.42rem !important;
line-height: 1 !important;
padding: 0 3px !important;
}
.omada-input-row {
padding: 4px 10px !important;
margin: 2px auto 6px !important;
}
.omada-plus-btn button,
.omada-send-btn button {
width: 30px !important;
min-width: 30px !important;
height: 30px !important;
font-size: 1rem !important;
}
.omada-plus-btn,
.omada-send-btn {
flex: 0 0 30px !important;
}
.omada-auto {
width: 104px !important;
flex: 0 0 104px !important;
border-radius: 999px !important;
}
.omada-auto button {
height: 30px !important;
min-height: 30px !important;
font-size: 0.9rem !important;
padding: 0 8px !important;
border-radius: 999px !important;
text-align: left !important;
background: rgba(255, 255, 255, 0.24) !important;
border: 1px solid rgba(171, 188, 214, 0.42) !important;
box-shadow: inset 0 0 0 0.5px rgba(255, 255, 255, 0.45) !important;
}
/* Gradio dropdown text (new/old DOM variants) */
.omada-auto,
.omada-auto *,
.omada-auto .wrap,
.omada-auto .wrap-inner,
.omada-auto .wrap-inner input,
.omada-auto input,
.omada-auto button,
.omada-auto button span {
font-size: 0.9rem !important;
line-height: 1.0 !important;
}
.omada-input-row .gradio-textbox textarea {
min-height: 30px !important;
padding: 4px 9px !important;
font-size: 0.9rem !important;
background: transparent !important;
border: none !important;
box-shadow: none !important;
outline: none !important;
}
.omada-input-row .gradio-textbox > div,
.omada-input-row .gradio-textbox .wrap,
.omada-input-row .gradio-textbox label {
background: transparent !important;
border: none !important;
box-shadow: none !important;
}
.omada-main-input,
.omada-main-input > div,
.omada-main-input .wrap,
.omada-main-input .wrap-inner,
.omada-main-input .block,
.omada-main-input .container,
.omada-main-input .scroll-hide,
.omada-main-input .scroll-hide > div,
.omada-main-input [data-testid="textbox"] {
background: transparent !important;
border: none !important;
box-shadow: none !important;
outline: none !important;
}
.omada-main-input:focus,
.omada-main-input:focus-within,
.omada-main-input > div:focus,
.omada-main-input > div:focus-within,
.omada-main-input .wrap:focus,
.omada-main-input .wrap:focus-within,
.omada-main-input .wrap-inner:focus,
.omada-main-input .wrap-inner:focus-within,
.omada-main-input textarea:focus,
.omada-main-input textarea:focus-visible {
border: none !important;
box-shadow: none !important;
outline: none !important;
}
.omada-main-input textarea,
.omada-main-input textarea::placeholder {
background: transparent !important;
}
/* hide textbox secondary footer/counter text (e.g., "seconds") */
.omada-main-input .footer,
.omada-main-input [data-testid="textbox-footer"],
.omada-main-input .char-counter,
.omada-main-input small,
.omada-main-input .secondary-text {
display: none !important;
}
.omada-controls-safe {
padding: 10px 12px !important;
}
.omada-controls-safe > div {
padding: 8px 10px !important;
}
.omada-controls-safe .gradio-button,
.omada-controls-safe button,
.omada-controls-safe .gradio-dropdown,
.omada-controls-safe .gradio-textbox,
.omada-controls-safe .gradio-slider,
.omada-controls-safe label,
.omada-controls-safe p,
.omada-controls-safe span {
font-size: 0.88rem !important;
}
/* modal transparency: outer is whiter, inner is more transparent */
.omada-controls-safe {
background: linear-gradient(165deg, rgba(255, 255, 255, 0.72), rgba(245, 250, 255, 0.60)) !important;
border: 1px solid rgba(218, 231, 248, 0.90) !important;
box-shadow: 0 18px 42px rgba(123, 150, 188, 0.16) !important;
}
.omada-controls-safe > div,
.omada-controls-safe .gr-box,
.omada-controls-safe .gr-form,
.omada-controls-safe .gr-block,
.omada-controls-safe .gradio-row,
.omada-controls-safe .gradio-column {
background: rgba(255, 255, 255, 0.44) !important;
border-color: rgba(225, 237, 252, 0.86) !important;
box-shadow: none !important;
}
.omada-controls-safe .gradio-button,
.omada-controls-safe button,
.omada-controls-safe .gradio-dropdown,
.omada-controls-safe .gradio-dropdown > div,
.omada-controls-safe .gradio-textbox,
.omada-controls-safe .gradio-textbox > div,
.omada-controls-safe .gradio-slider {
background: rgba(255, 255, 255, 0.56) !important;
border: 1px solid rgba(221, 234, 251, 0.90) !important;
box-shadow: none !important;
}
.omada-controls-safe .gradio-button:hover,
.omada-controls-safe button:hover {
background: rgba(255, 255, 255, 0.72) !important;
}
.omada-controls-safe .primary,
.omada-controls-safe .primary button {
background: linear-gradient(165deg, #4d8ef7, #3f7ee8) !important;
color: #ffffff !important;
border: 1px solid #3d75d8 !important;
box-shadow: 0 8px 20px rgba(69, 126, 233, 0.35) !important;
}
.omada-controls-safe .primary:hover,
.omada-controls-safe .primary button:hover {
background: linear-gradient(165deg, #5b99fb, #4b87ed) !important;
}
.omada-controls-safe .primary:disabled,
.omada-controls-safe .primary button:disabled,
.omada-controls-safe .primary[disabled],
.omada-controls-safe .primary button[disabled] {
opacity: 1 !important;
color: #ffffff !important;
background: linear-gradient(165deg, #4d8ef7, #3f7ee8) !important;
border: 1px solid #3d75d8 !important;
}
.omada-selected-task-chip {
display: inline-flex;
align-items: center;
padding: 6px 12px;
border-radius: 999px;
color: #fff;
background: linear-gradient(165deg, #4d8ef7, #3f7ee8);
border: 1px solid #3d75d8;
font-weight: 700;
box-shadow: 0 8px 18px rgba(69, 126, 233, 0.30);
}
.omada-ready-hero {
position: sticky;
top: clamp(8px, 12vh, 90px);
margin: 0 auto clamp(56px, 10vh, 140px) auto;
width: min(760px, calc(100vw - 48px));
z-index: 15;
pointer-events: none;
font-size: clamp(1.2rem, 2.2vw, 2rem);
font-weight: 700;
color: #2f3f58;
letter-spacing: -0.01em;
text-align: center;
}
.omada-ready-hero .omada-ready-sub {
margin-top: 5px;
font-size: clamp(0.78rem, 1.2vw, 0.95rem);
font-weight: 500;
color: #5b6e8d;
line-height: 1.4;
}
/* Sample list container style aligned with main input row */
.omada-sample-row {
width: min(900px, calc(100vw - 80px)) !important;
margin: 0 auto 8px auto !important;
padding: 0 !important;
background: transparent !important;
border: none !important;
box-shadow: none !important;
gap: 10px !important;
}
.omada-sample-row .omada-chip button,
.omada-sample-row .omada-chip button span,
.omada-sample-row .omada-chip button p,
.omada-sample-row .omada-chip button div {
font-size: 0.9rem !important;
line-height: 1.15 !important;
font-weight: 700 !important;
}
.omada-sample-row .omada-chip button {
min-height: 34px !important;
height: 34px !important;
padding: 0 12px !important;
border-radius: 999px !important;
background: rgba(255, 255, 255, 0.46) !important;
border: 1px solid rgba(180, 198, 224, 0.45) !important;
box-shadow: 0 6px 14px rgba(120, 148, 186, 0.08) !important;
}
/* final override: keep selector pill with subtle visible border */
.omada-input-row .omada-auto,
.omada-input-row .omada-auto button {
border-radius: 999px !important;
}
.omada-input-row .omada-auto button {
border: 1px solid rgba(160, 178, 206, 0.72) !important;
box-shadow:
inset 0 0 0 1px rgba(255, 255, 255, 0.52),
0 1px 4px rgba(125, 146, 176, 0.14) !important;
}
/* hard override: keep subtle border visible for selector pill */
.omada-input-row .omada-auto,
.omada-input-row .omada-auto > div,
.omada-input-row .omada-auto button,
.omada-input-row .omada-auto .gradio-button,
.omada-input-row .omada-auto .gradio-button > div {
border: 1px solid rgba(160, 178, 206, 0.72) !important;
border-radius: 999px !important;
box-shadow:
inset 0 0 0 1px rgba(255, 255, 255, 0.52),
0 1px 4px rgba(125, 146, 176, 0.14) !important;
}
/* hard override: sample chip vertical size */
.omada-sample-row .gradio-button {
flex: 0 0 auto !important;
}
.omada-sample-row .omada-chip,
.omada-sample-row .omada-chip .gradio-button,
.omada-sample-row .omada-chip .gradio-button > div,
.omada-sample-row .omada-chip button {
min-width: 120px !important;
width: 120px !important;
max-width: 120px !important;
min-height: 40px !important;
height: 40px !important;
max-height: 40px !important;
border-radius: 999px !important;
}
.omada-input-stack {
width: min(980px, calc(100vw - 24px));
margin: 0 auto 6px auto;
position: sticky;
bottom: 6px;
z-index: 40;
}
.omada-input-row {
display: flex !important;
flex-direction: column !important;
justify-content: flex-end !important;
align-items: stretch !important;
gap: 6px !important;
padding: 10px 12px !important;
border-radius: 36px !important;
min-height: 110px;
}
/* final layout override */
.omada-bottom-row {
display: flex !important;
flex-wrap: nowrap !important;
align-items: flex-end !important;
justify-content: flex-start !important;
gap: 8px !important;
}
.omada-bottom-row > * {
flex: 0 0 auto !important;
margin: 0 !important;
}
.omada-bottom-row .omada-main-input {
flex: 1 1 auto !important;
margin: -10px !important;
min-width: 0 !important;
width: auto !important;
max-width: none !important;
}
.omada-bottom-row .omada-main-input,
.omada-bottom-row .omada-main-input > div,
.omada-bottom-row .omada-main-input .wrap,
.omada-bottom-row .omada-main-input .wrap-inner,
.omada-bottom-row .omada-main-input .block,
.omada-bottom-row .omada-main-input [data-testid="textbox"],
.omada-bottom-row .omada-main-input .scroll-hide,
.omada-bottom-row .omada-main-input .scroll-hide > div {
background: transparent !important;
border: none !important;
box-shadow: none !important;
width: auto !important;
min-width: 0 !important;
}
.omada-bottom-row .omada-main-input textarea {
background: transparent !important;
border: none !important;
border-radius: 14px !important;
box-shadow: none !important;
}
.omada-bottom-row .omada-send-btn {
margin-left: auto !important;
flex: 0 0 auto !important;
align-self: flex-end !important;
}
.omada-bottom-row .omada-send-btn > div {
width: 42px !important;
min-width: 42px !important;
}
.omada-bottom-row .omada-send-btn button,
.omada-bottom-row .omada-send-btn > div > button {
width: 42px !important;
min-width: 42px !important;
height: 42px !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
border-radius: 999px !important;
border: 2px solid #4f96ff !important;
background: rgba(130, 196, 255, 0.28) !important;
color: #1153a6 !important;
box-shadow: inset 0 0 0 1px rgba(214, 239, 255, 0.88), 0 0 0 1px rgba(79, 150, 255, 0.28) !important;
}
.omada-bottom-row .omada-send-btn button:hover {
background: rgba(130, 196, 255, 0.38) !important;
}
/* scope input-row cleanup to avoid wrapper layout breakage */
.omada-input-row .omada-plus-btn,
.omada-input-row .omada-task-btn,
.omada-input-row .omada-custom-btn,
.omada-input-row .omada-task-chip-inline,
.omada-input-row .omada-main-input,
.omada-input-row .omada-send-btn {
margin: 0 !important;
}
.omada-attach-preview-wrap {
min-height: 0 !important;
width: 100% !important;
margin: 0 !important;
padding: 0 !important;
}
.omada-attach-preview-wrap .html-container {
min-height: 0 !important;
height: auto !important;
padding: 0 !important;
margin: 0 !important;
}
.omada-attach-preview-wrap .html-container:empty {
display: none !important;
}
.omada-attach-preview {
display: flex;
align-items: center;
gap: 8px;
justify-content: flex-start;
min-height: 0;
flex-wrap: wrap;
}
.omada-attach-preview.omada-empty {
display: none !important;
}
.omada-attach-preview-wrap:has(.omada-attach-preview.omada-empty) {
display: none !important;
}
.omada-attach-item {
display: inline-flex;
flex-direction: column;
align-items: center;
justify-content: center;
gap: 4px;
padding: 6px;
border-radius: 10px;
background: rgba(255, 255, 255, 0.46);
border: 1px solid rgba(190, 208, 234, 0.65);
min-width: 66px;
}
.omada-attach-item img {
width: 54px;
height: 42px;
border-radius: 8px;
object-fit: cover;
}
.omada-attach-item span {
font-size: 0.68rem;
color: #2f4568;
font-weight: 600;
}
.omada-attach-audio {
flex-direction: row;
min-width: 140px;
padding: 10px 12px;
}
.omada-audio-icon {
font-size: 0.95rem;
}
.omada-task-btn,
.omada-custom-btn {
flex: 0 0 auto !important;
}
.omada-task-btn button,
.omada-custom-btn button {
height: 30px !important;
min-height: 30px !important;
border-radius: 999px !important;
font-size: 0.72rem !important;
font-weight: 700 !important;
padding: 0 10px !important;
background: transparent !important;
border: none !important;
box-shadow: none !important;
}
.omada-plus-btn button {
background: transparent !important;
border: none !important;
box-shadow: none !important;
}
.omada-task-chip-inline {
flex: 0 0 auto !important;
min-width: 0 !important;
}
.omada-task-chip-inline .omada-selected-task-chip {
padding: 2px 8px !important;
font-size: 0.72rem !important;
box-shadow: none;
margin-left: -2px !important;
}
/* keep sample chips/task-custom typography aligned */
.omada-sample-row .omada-chip button,
.omada-sample-row .omada-chip button span,
.omada-sample-row .omada-chip button p,
.omada-sample-row .omada-chip button div,
.omada-task-btn button,
.omada-custom-btn button,
.omada-task-chip-inline .omada-selected-task-chip {
font-size: 0.72rem !important;
line-height: 1.05 !important;
}
/* =========================
FIX: composer layout (textbox above buttons)
========================= */
.omada-input-row {
width: 100% !important;
position: relative !important;
justify-content: flex-start !important;
align-items: stretch !important;
gap: 10px !important;
padding-right: 76px !important; /* room for absolute send button */
}
.omada-main-input,
.omada-main-input > div,
.omada-main-input .wrap,
.omada-main-input .wrap-inner,
.omada-main-input [data-testid="textbox"] {
width: 100% !important;
max-width: 100% !important;
}
.omada-main-input textarea {
width: 100% !important;
min-height: 34px !important;
background: transparent !important;
border: none !important;
box-shadow: none !important;
}
.omada-bottom-row {
width: 100% !important;
max-width: 100% !important;
display: flex !important;
flex-wrap: nowrap !important;
justify-content: flex-start !important;
align-items: center !important;
gap: 8px !important;
margin-top: -5px !important;
}
.omada-bottom-row > * {
min-width: 0 !important;
flex: 0 0 auto !important;
}
.omada-send-btn {
position: absolute !important;
right: 16px !important;
top: 50% !important;
transform: translateY(-50%) !important;
margin: 0 !important;
}
.omada-send-btn > div {
width: 46px !important;
min-width: 46px !important;
}
.omada-send-btn button,
.omada-send-btn > div > button {
width: 46px !important;
min-width: 46px !important;
height: 46px !important;
border-radius: 999px !important;
border: 1.8px solid rgba(98, 170, 255, 0.95) !important;
background: linear-gradient(160deg, rgba(185, 224, 255, 0.40), rgba(118, 179, 255, 0.26)) !important;
box-shadow:
inset 0 0 0 1px rgba(229, 245, 255, 0.95),
0 10px 22px rgba(79, 146, 255, 0.24) !important;
color: #0d4fa7 !important;
font-size: 1.75rem !important;
font-weight: 800 !important;
line-height: 1 !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
}
.omada-send-btn button:hover {
background: linear-gradient(160deg, rgba(196, 230, 255, 0.52), rgba(129, 186, 255, 0.34)) !important;
}
/* =========================
Task / Custom / Chat: same font and borderless
========================= */
.omada-task-btn button,
.omada-custom-btn button {
font-size: 0.37rem !important;
line-height: 1.0 !important;
font-weight: 700 !important;
height: 30px !important;
min-height: 30px !important;
padding: 0 10px !important;
border-radius: 999px !important;
border: none !important;
box-shadow: none !important;
background: transparent !important;
color: #22324a !important;
}
/* keep selected task chip blue, but match typography */
.omada-task-chip-inline .omada-selected-task-chip {
font-size: 0.78rem !important;
line-height: 1.0 !important;
padding: 5px 11px !important;
background: linear-gradient(165deg, #4d8ef7, #3f7ee8) !important;
color: #ffffff !important;
border: 1px solid #3d75d8 !important;
box-shadow: 0 6px 14px rgba(69, 126, 233, 0.28) !important;
}
.omada-task-btn button *,
.omada-custom-btn button * {
font-size: 0.37rem !important;
line-height: 1.0 !important;
}
/* hard-fix: keep chat/custom glued together on left */
.omada-bottom-row .omada-task-chip-inline,
.omada-bottom-row .omada-task-chip-inline > div,
.omada-bottom-row .omada-task-chip-inline > div > div,
.omada-bottom-row .omada-custom-btn,
.omada-bottom-row .omada-custom-btn > div {
display: inline-flex !important;
width: auto !important;
max-width: max-content !important;
flex: 0 0 auto !important;
margin-left: 0 !important;
}
/* hard-fix: if legacy send exists in row, hide it */
.omada-bottom-row .omada-send-btn {
display: none !important;
}
/* hard-fix: dedicated send button on right-bottom */
.omada-input-row .omada-send-btn-fix {
display: inline-flex !important;
position: absolute !important;
right: 14px !important;
bottom: 14px !important;
top: auto !important;
left: auto !important;
transform: none !important;
z-index: 60 !important;
width: 44px !important;
min-width: 44px !important;
max-width: 44px !important;
flex: 0 0 44px !important;
margin: 0 !important;
border-radius: 999px !important;
}
.omada-input-row .omada-send-btn-fix::before {
content: "" !important;
position: absolute !important;
inset: -3px !important;
border-radius: 999px !important;
border: 1.6px solid rgba(168, 216, 255, 0.92) !important;
background: radial-gradient(circle at 30% 20%, rgba(226, 246, 255, 0.40), rgba(170, 213, 255, 0.18)) !important;
box-shadow: 0 8px 20px rgba(78, 143, 236, 0.22) !important;
pointer-events: none !important;
z-index: -1 !important;
}
.omada-input-row .omada-send-btn-fix > div {
width: 44px !important;
min-width: 44px !important;
max-width: 44px !important;
flex: 0 0 44px !important;
}
.omada-input-row .omada-send-btn-fix button,
.omada-input-row .omada-send-btn-fix > div > button {
width: 44px !important;
min-width: 44px !important;
max-width: 44px !important;
height: 44px !important;
border-radius: 999px !important;
border: 2px solid rgba(98, 170, 255, 0.98) !important;
background: linear-gradient(160deg, rgba(185, 224, 255, 0.46), rgba(118, 179, 255, 0.30)) !important;
box-shadow: inset 0 0 0 1px rgba(229, 245, 255, 0.95), 0 10px 22px rgba(79, 146, 255, 0.24) !important;
color: #0d4fa7 !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
padding: 0 !important;
}
.omada-input-row .omada-send-btn-fix button span,
.omada-input-row .omada-send-btn-fix button p {
font-size: 1.6rem !important;
line-height: 1 !important;
font-weight: 800 !important;
}
/* textbox inner gray action buttons remove */
.omada-main-input [class*="icon"],
.omada-main-input button,
.omada-main-input [role="button"] {
display: none !important;
}
.omada-main-input textarea {
display: block !important;
}
/* final send border lock */
.omada-input-row .omada-send-btn-fix button,
.omada-input-row .omada-send-btn-fix > div > button {
border: 1.8px solid rgba(116, 182, 248, 1) !important;
outline: 1px solid rgba(198, 229, 255, 0.95) !important;
border-radius: 999px !important;
}
.omada-input-row .omada-send-btn-fix::before {
border: 1.2px solid rgba(153, 207, 255, 0.9) !important;
}
/* compact vertical height */
.omada-input-row {
min-height: 62px !important;
padding-top: 0 !important;
padding-bottom: 1px !important;
gap: 0 !important;
}
.omada-main-input {
margin-top: 0 !important;
transform: translateY(-10px) !important;
}
.omada-main-input textarea {
min-height: 26px !important;
padding-top: 0 !important;
padding-bottom: 1px !important;
}
/* outer container: slightly less rounded */
.omada-input-row {
border-radius: 28px !important;
}
/* bigger plus button */
.omada-bottom-row .omada-plus-btn button {
width: 46px !important;
min-width: 46px !important;
height: 46px !important;
font-size: 1.9rem !important;
font-weight: 800 !important;
}
/* final typography lock */
.omada-task-btn button,
.omada-custom-btn button,
.omada-task-btn button *,
.omada-custom-btn button * {
font-size: 0.46rem !important;
line-height: 1 !important;
border: none !important;
box-shadow: none !important;
}
.omada-task-chip-inline .omada-selected-task-chip {
font-size: 0.9rem !important;
line-height: 1 !important;
padding: 8px 14px !important;
}
/* ultimate final lock */
.omada-bottom-row .omada-task-btn,
.omada-bottom-row .omada-task-btn > div,
.omada-bottom-row .omada-task-btn .gradio-button,
.omada-bottom-row .omada-task-btn .gradio-button > div,
.omada-bottom-row .omada-custom-btn,
.omada-bottom-row .omada-custom-btn > div,
.omada-bottom-row .omada-custom-btn .gradio-button,
.omada-bottom-row .omada-custom-btn .gradio-button > div {
border: none !important;
box-shadow: none !important;
background: transparent !important;
}
.omada-bottom-row .omada-task-btn button,
.omada-bottom-row .omada-task-btn > div > button,
.omada-bottom-row .omada-custom-btn button,
.omada-bottom-row .omada-custom-btn > div > button,
.omada-bottom-row .omada-task-btn button *,
.omada-bottom-row .omada-custom-btn button * {
font-size: 0.42rem !important;
line-height: 1 !important;
border: none !important;
box-shadow: none !important;
background: transparent !important;
}
.omada-bottom-row .omada-task-chip-inline .omada-selected-task-chip {
font-size: 0.7rem !important;
line-height: 1 !important;
padding: 9px 15px !important;
}
.omada-bottom-row .omada-plus-btn button,
.omada-bottom-row .omada-plus-btn > div > button {
width: 74px !important;
min-width: 74px !important;
height: 74px !important;
font-size: 3rem !important;
font-weight: 900 !important;
}
.omada-input-row .omada-send-btn-fix button,
.omada-input-row .omada-send-btn-fix > div > button {
border: none !important;
outline: none !important;
border-radius: 999px !important;
background: rgba(167, 214, 255, 0.88) !important;
box-shadow: 0 4px 9px rgba(95, 156, 232, 0.15) !important;
}
.omada-input-row .omada-send-btn-fix::before {
content: "" !important;
position: absolute !important;
inset: -4px !important;
border-radius: 999px !important;
border: none !important;
background: rgba(206, 234, 255, 0.35) !important;
box-shadow: 0 4px 10px rgba(104, 165, 236, 0.14) !important;
pointer-events: none !important;
z-index: -1 !important;
}
/* bigger placeholder + sample task chip text */
.omada-main-input textarea,
.omada-main-input textarea::placeholder {
font-size: 1.05rem !important;
}
.omada-sample-row .omada-chip button,
.omada-sample-row .omada-chip button span,
.omada-sample-row .omada-chip button p,
.omada-sample-row .omada-chip button div {
font-size: 1.22rem !important;
line-height: 1.14 !important;
}
.omada-sample-row .omada-chip .gradio-button > button,
.omada-sample-row .omada-chip .gradio-button > button span,
.omada-sample-row .omada-chip .gradio-button > button p,
.omada-sample-row .omada-chip .gradio-button > button div {
font-size: 1.22rem !important;
line-height: 1.14 !important;
}
/* absolute final lock: plus + sample text size */
.omada-input-row .omada-plus-btn,
.omada-input-row .omada-plus-btn > div,
.omada-input-row .omada-plus-btn .gradio-button,
.omada-input-row .omada-plus-btn .gradio-button > div,
.omada-input-row .omada-plus-btn button {
width: 82px !important;
min-width: 82px !important;
max-width: 82px !important;
height: 82px !important;
min-height: 82px !important;
max-height: 82px !important;
}
.omada-input-row .omada-plus-btn button,
.omada-input-row .omada-plus-btn button span,
.omada-input-row .omada-plus-btn button p,
.omada-input-row .omada-plus-btn button div {
font-size: 3.5rem !important;
line-height: 1 !important;
font-weight: 900 !important;
}
.omada-sample-row .gradio-button button,
.omada-sample-row .gradio-button button span,
.omada-sample-row .gradio-button button p,
.omada-sample-row .gradio-button button div,
.omada-sample-row .omada-chip button,
.omada-sample-row .omada-chip button span,
.omada-sample-row .omada-chip button p,
.omada-sample-row .omada-chip button div {
font-size: 1.34rem !important;
line-height: 1.16 !important;
font-weight: 700 !important;
}
/* ultra final force: make plus/sample visibly bigger */
.omada-input-row .omada-plus-btn button {
transform: scale(1.18) !important;
transform-origin: center center !important;
}
.omada-sample-row .gradio-button > button,
.omada-sample-row .gradio-button > button * {
font-size: 1.42rem !important;
line-height: 1.18 !important;
font-weight: 700 !important;
}
/* terminal hard override: controls size + hero position */
.omada-input-row .omada-bottom-row .omada-plus-btn,
.omada-input-row .omada-bottom-row .omada-plus-btn > div,
.omada-input-row .omada-bottom-row .omada-plus-btn .gradio-button,
.omada-input-row .omada-bottom-row .omada-plus-btn .gradio-button > div,
.omada-input-row .omada-bottom-row .omada-plus-btn button {
width: 40px !important;
min-width: 40px !important;
max-width: 40px !important;
height: 40px !important;
min-height: 40px !important;
max-height: 40px !important;
}
.omada-input-row .omada-bottom-row .omada-plus-btn button,
.omada-input-row .omada-bottom-row .omada-plus-btn button span,
.omada-input-row .omada-bottom-row .omada-plus-btn button p,
.omada-input-row .omada-bottom-row .omada-plus-btn button div {
font-size: 4.2rem !important;
line-height: 1 !important;
font-weight: 900 !important;
}
.omada-input-row .omada-bottom-row .omada-task-btn button,
.omada-input-row .omada-bottom-row .omada-custom-btn button,
.omada-input-row .omada-bottom-row .omada-task-btn button *,
.omada-input-row .omada-bottom-row .omada-custom-btn button * {
font-size: 0.6rem !important;
line-height: 1.1 !important;
height: 42px !important;
min-height: 42px !important;
padding: 0 14px !important;
}
.omada-ready-hero {
top: 0 !important;
margin-top: -36px !important;
}
/* move bottom controls lower */
.omada-bottom-row {
margin-top: 14px !important;
}
/* absolute terminal lock v2 */
[class*="omada-plus-btn"] button {
width: 96px !important;
min-width: 96px !important;
height: 96px !important;
min-height: 96px !important;
font-size: 4.2rem !important;
line-height: 1 !important;
font-weight: 900 !important;
}
[class*="omada-task-btn"] button,
[class*="omada-custom-btn"] button,
[class*="omada-task-btn"] button *,
[class*="omada-custom-btn"] button * {
font-size: 0.8rem !important;
line-height: 1.1 !important;
height: 42px !important;
min-height: 42px !important;
}
.omada-ready-hero {
top: -8px !important;
margin-top: -56px !important;
}
/* non-negotiable final override */
.gradio-container .omada-ready-hero {
top: -20px !important;
margin-top: -88px !important;
}
.gradio-container .omada-input-row {
min-height: 0 !important;
padding-top: 0 !important;
padding-bottom: 0 !important;
gap: 0 !important;
}
.gradio-container .omada-main-input {
margin-top: 0 !important;
margin-bottom: -2px !important;
transform: translateY(-4px) !important;
}
.gradio-container .omada-main-input textarea,
.gradio-container .omada-main-input textarea::placeholder {
min-height: 34px !important;
line-height: 1.25 !important;
padding-top: 2px !important;
padding-bottom: 2px !important;
font-size: 1.08rem !important;
}
.gradio-container .omada-bottom-row {
margin-top: -2px !important;
gap: 4px !important;
align-items: center !important;
}
.gradio-container .omada-bottom-row .omada-plus-btn button,
.gradio-container .omada-bottom-row .omada-plus-btn > div > button {
width: 84px !important;
min-width: 84px !important;
height: 84px !important;
min-height: 84px !important;
font-size: 3.7rem !important;
}
.gradio-container .omada-bottom-row .omada-task-btn button,
.gradio-container .omada-bottom-row .omada-custom-btn button,
.gradio-container .omada-bottom-row .omada-task-btn button *,
.gradio-container .omada-bottom-row .omada-custom-btn button * {
font-size: 0.6rem !important;
height: 46px !important;
min-height: 46px !important;
line-height: 1.08 !important;
padding: 0 15px !important;
}
/* compact composer height + tighter spacing */
.omada-input-row {
min-height: 136px !important;
padding-top: 6px !important;
padding-bottom: 8px !important;
gap: 2px !important;
}
.omada-main-input {
transform: none !important;
margin-top: 0 !important;
margin-bottom: 0 !important;
}
.omada-main-input textarea {
min-height: 56px !important;
padding-top: 6px !important;
padding-bottom: 4px !important;
line-height: 1.2 !important;
font-size: 1.16rem !important;
}
.omada-bottom-row {
margin-top: 18px !important;
}
/* final lock: prevent placeholder/text clipping */
.gradio-container .omada-main-input,
.gradio-container .omada-main-input > div,
.gradio-container .omada-main-input .wrap,
.gradio-container .omada-main-input .wrap-inner {
overflow: visible !important;
}
.gradio-container .omada-main-input textarea,
.gradio-container .omada-main-input textarea::placeholder {
min-height: 64px !important;
height: auto !important;
padding: 8px 8px 4px 8px !important;
font-size: 1.08rem !important;
line-height: 1.25 !important;
box-sizing: border-box !important;
overflow: visible !important;
}
.gradio-container .omada-main-input textarea::placeholder {
min-height: unset !important;
height: auto !important;
padding: 0 !important;
font-size: 1.08rem !important;
line-height: 1.35 !important;
}
/* final alignment lock: controls and send on one horizontal line */
.gradio-container .omada-input-row {
padding-bottom: 0px !important;
}
.gradio-container .omada-main-input {
margin-bottom: 0 !important;
}
.gradio-container .omada-main-input textarea {
padding-bottom: 0 !important;
max-height: 72px !important;
overflow-y: auto !important;
}
.gradio-container .omada-bottom-row {
position: static !important;
left: auto !important;
right: auto !important;
bottom: auto !important;
margin-top: 0 !important;
transform: none !important;
align-items: flex-end !important;
justify-content: flex-start !important;
gap: 14px !important;
z-index: auto !important;
}
.gradio-container .omada-bottom-row > * {
align-self: flex-end !important;
}
.gradio-container .omada-send-btn-fix,
.gradio-container .omada-send-btn {
position: static !important;
right: auto !important;
bottom: auto !important;
margin-left: auto !important;
margin-top: 0 !important;
align-self: flex-end !important;
transform: none !important;
z-index: auto !important;
}
/* final lock: normalize plus button size */
.gradio-container .omada-bottom-row {
min-height: 0 !important;
height: auto !important;
align-items: center !important;
}
.gradio-container .omada-bottom-row > * {
min-height: 0 !important;
height: auto !important;
align-self: center !important;
}
.gradio-container .omada-bottom-row .omada-send-btn,
.gradio-container .omada-bottom-row .omada-send-btn-fix {
margin-left: auto !important;
}
.gradio-container .omada-bottom-row [class*="omada-plus-btn"],
.gradio-container .omada-bottom-row [class*="omada-plus-btn"] > div,
.gradio-container .omada-bottom-row [class*="omada-plus-btn"] .gradio-button,
.gradio-container .omada-bottom-row [class*="omada-plus-btn"] .gradio-button > div,
.gradio-container .omada-bottom-row [class*="omada-plus-btn"] button {
flex: 0 0 48px !important;
width: 48px !important;
min-width: 48px !important;
max-width: 48px !important;
height: 48px !important;
min-height: 48px !important;
max-height: 48px !important;
padding: 0 !important;
}
.gradio-container .omada-bottom-row [class*="omada-plus-btn"] button,
.gradio-container .omada-bottom-row [class*="omada-plus-btn"] button span,
.gradio-container .omada-bottom-row [class*="omada-plus-btn"] button p,
.gradio-container .omada-bottom-row [class*="omada-plus-btn"] button div {
font-size: 2rem !important;
line-height: 1 !important;
font-weight: 800 !important;
}
/* remove Gradio html wrapper padding inside bottom row */
.gradio-container .omada-bottom-row .html-container {
padding: 0 !important;
margin: 0 !important;
width: auto !important;
min-width: 0 !important;
}
.gradio-container .omada-bottom-row .html-container > div,
.gradio-container .omada-bottom-row .html-container .prose {
padding: 0 !important;
margin: 0 !important;
width: auto !important;
min-width: 0 !important;
}
/* final tune: smaller send circle + lighter/smaller task/custom text */
.gradio-container .omada-bottom-row .omada-send-btn-fix,
.gradio-container .omada-bottom-row .omada-send-btn-fix > div,
.gradio-container .omada-bottom-row .omada-send-btn,
.gradio-container .omada-bottom-row .omada-send-btn > div,
.gradio-container .omada-bottom-row .omada-send-btn button,
.gradio-container .omada-bottom-row .omada-send-btn-fix button,
.gradio-container .omada-bottom-row .omada-send-btn > div > button,
.gradio-container .omada-bottom-row .omada-send-btn-fix > div > button {
width: 35px !important;
min-width: 35px !important;
max-width: 35px !important;
height: 35px !important;
min-height: 35px !important;
max-height: 35px !important;
}
.gradio-container .omada-bottom-row .omada-send-btn button,
.gradio-container .omada-bottom-row .omada-send-btn-fix button,
.gradio-container .omada-bottom-row .omada-send-btn > div > button,
.gradio-container .omada-bottom-row .omada-send-btn-fix > div > button {
font-size: 1.6rem !important;
}
.gradio-container .omada-bottom-row .omada-task-btn button,
.gradio-container .omada-bottom-row .omada-custom-btn button,
.gradio-container .omada-bottom-row .omada-task-btn > div > button,
.gradio-container .omada-bottom-row .omada-custom-btn > div > button,
.gradio-container .omada-bottom-row .omada-task-btn button *,
.gradio-container .omada-bottom-row .omada-custom-btn button * {
font-size: 0.75rem !important;
font-weight: 400 !important;
line-height: 1.05 !important;
}
/* Gradio lg token override (computed 16px -> force smaller) */
.gradio-container .omada-bottom-row button.lg.omada-task-btn,
.gradio-container .omada-bottom-row button.lg.omada-custom-btn,
.gradio-container .omada-bottom-row .omada-task-btn button.lg,
.gradio-container .omada-bottom-row .omada-custom-btn button.lg {
--button-large-text-size: 0.7rem !important;
font-size: 0.75rem !important;
font-weight: 400 !important;
line-height: 1.05 !important;
}
.gradio-container .omada-bottom-row button.lg.omada-task-btn *,
.gradio-container .omada-bottom-row button.lg.omada-custom-btn *,
.gradio-container .omada-bottom-row .omada-task-btn button.lg *,
.gradio-container .omada-bottom-row .omada-custom-btn button.lg * {
font-size: 0.75rem !important;
font-weight: 400 !important;
line-height: 1.05 !important;
}
/* sample preview cards */
.gradio-container .omada-sample-preview-row {
width: min(980px, calc(100vw - 24px)) !important;
margin: 0 auto 6px auto !important;
gap: 10px !important;
order: 2 !important;
flex: 0 0 auto !important;
}
.gradio-container .omada-sample-preview-col {
flex: 1 1 0 !important;
}
.gradio-container .omada-sample-preview-card .html-container,
.gradio-container .omada-sample-preview-card .html-container > div {
padding: 0 !important;
margin: 0 !important;
}
.gradio-container .omada-sample-preview-inner {
display: flex;
align-items: center;
gap: 10px;
width: 100%;
min-height: 84px;
border-radius: 14px;
border: 1px solid rgba(186, 204, 232, 0.82);
background: rgba(255, 255, 255, 0.72);
box-shadow: 0 5px 14px rgba(108, 134, 177, 0.12);
padding: 10px 12px;
box-sizing: border-box;
}
.gradio-container .omada-sample-preview-media {
width: 76px;
min-width: 76px;
height: 56px;
border-radius: 10px;
overflow: hidden;
display: flex;
align-items: center;
justify-content: center;
background: rgba(227, 236, 249, 0.8);
}
.gradio-container .omada-sample-preview-thumb {
width: 100%;
height: 100%;
object-fit: cover;
}
.gradio-container .omada-sample-preview-icon {
font-size: 1.4rem;
}
.gradio-container .omada-sample-preview-meta {
min-width: 0;
}
.gradio-container .omada-sample-preview-title {
font-size: 0.83rem;
font-weight: 700;
color: #2a3a52;
}
.gradio-container .omada-sample-preview-desc {
margin-top: 4px;
font-size: 0.78rem;
line-height: 1.2;
color: #4a5f80;
word-break: break-word;
}
/* adaptive first-view layout: keep composer visible without page scroll */
html,
body {
height: 100% !important;
overflow: hidden !important;
}
.gradio-container {
height: 100vh !important;
overflow: hidden !important;
}
.gradio-container .omada-shell {
height: calc(100vh - 72px) !important;
max-height: calc(100vh - 72px) !important;
display: flex !important;
flex-direction: column !important;
min-height: 0 !important;
position: relative !important;
}
.gradio-container .omada-ready-hero {
position: fixed !important;
left: 50% !important;
top: 42% !important;
transform: translate(-50%, -50%) !important;
z-index: 999 !important;
}
.gradio-container .omada-ready-hero {
flex: 0 0 auto !important;
margin-top: 0 !important;
margin-bottom: 0 !important;
align-self: center !important;
width: min(820px, calc(100vw - 40px)) !important;
text-align: center !important;
pointer-events: none !important;
}
.gradio-container .omada-chatbot {
order: 1 !important;
}
.gradio-container .omada-sample-row {
order: 2 !important;
}
.gradio-container .omada-input-stack {
order: 3 !important;
}
.gradio-container .omada-sample-row {
flex: 0 0 auto !important;
margin: 0 auto 6px auto !important;
}
.gradio-container .omada-chatbot {
flex: 1 1 auto !important;
min-height: 0 !important;
height: auto !important;
max-height: none !important;
overflow: auto !important;
}
.gradio-container .omada-input-stack {
flex: 0 0 auto !important;
margin: 0 auto 4px auto !important;
}
.gradio-container .omada-ready-hero-wrap {
position: relative !important;
z-index: 60 !important;
}
.gradio-container .omada-ready-banner {
display: block !important;
}
/* ensure hidden Gradio blocks never intercept clicks */
.gradio-container .hide,
.gradio-container .wrap.hide {
display: none !important;
visibility: hidden !important;
pointer-events: none !important;
}
"""
_blocks_kwargs = {
"title": "AIDAS Lab @ SNU - Omni-modal Diffusion",
}
if not GRADIO_V6_PLUS:
_blocks_kwargs.update(
{
"css": CUSTOM_CSS + EXTRA_CSS,
"theme": theme,
"js": FORCE_LIGHT_MODE_JS,
}
)
with gr.Blocks(**_blocks_kwargs) as demo:
model_status = gr.Markdown("Model status: Loading model...", visible=False)
demo.load(warmup_model_status, outputs=[model_status])
MODE_OPTIONS = [
"Chat",
"MMU (Image → Text)",
"MMU (Video → Text)",
"Image Generation",
"Image Editing",
"ASR",
"TTS",
]
with gr.Column(elem_classes=["omada-shell"]):
_chatbot_kwargs = {
"label": None,
"sanitize_html": False,
"elem_classes": ["omada-chatbot"],
}
if not GRADIO_V6_PLUS:
_chatbot_kwargs["bubble_full_width"] = False
chatbox = gr.Chatbot(**_chatbot_kwargs)
intro_hero = gr.HTML(
""
"
"
"Ready to get started?"
"
"
"
"
"Use `+` to attach image/video/speech, `Task` to choose a mode, and `Generation Settings` to adjust generation options."
"
"
"
",
visible=True,
elem_classes=["omada-ready-hero-wrap"],
)
sample_task_items = [
("💬 Chat", "Chat"),
("🖼️ Image QA", "MMU (Image → Text)"),
("🎬 Video Captioning", "MMU (Video → Text)"),
("🎨 Image Generation", "Image Generation"),
("🛠️ Image Editing", "Image Editing"),
("🎙️ ASR", "ASR"),
("🔊 TTS", "TTS"),
]
sample_payloads = gr.State({
"Chat": [
{"mode": "Chat", "text": _get_example_value(CHAT_EXAMPLES, 0, 0, "Hello! Please introduce yourself."), "image": None, "audio": None, "video": None},
{"mode": "Chat", "text": _get_example_value(CHAT_EXAMPLES, 1, 0, _get_example_value(CHAT_EXAMPLES, 0, 0, "Hello! Please introduce yourself.")), "image": None, "audio": None, "video": None},
],
"MMU (Image → Text)": [
{"mode": "MMU (Image → Text)", "text": _get_example_value(MMU_EXAMPLES, 0, 1, DEFAULT_MMU_PROMPT), "image": _get_example_value(MMU_EXAMPLES, 0, 0, None), "audio": None, "video": None},
{"mode": "MMU (Image → Text)", "text": _get_example_value(MMU_EXAMPLES, 1, 1, _get_example_value(MMU_EXAMPLES, 0, 1, DEFAULT_MMU_PROMPT)), "image": _get_example_value(MMU_EXAMPLES, 1, 0, _get_example_value(MMU_EXAMPLES, 0, 0, None)), "audio": None, "video": None},
],
"MMU (Video → Text)": [
{"mode": "MMU (Video → Text)", "text": "", "image": None, "audio": None, "video": _get_example_value(V2T_EXAMPLES, -2, 0, _get_example_value(V2T_EXAMPLES, 0, 0, None))},
{"mode": "MMU (Video → Text)", "text": "", "image": None, "audio": None, "video": _get_example_value(V2T_EXAMPLES, -1, 0, _get_example_value(V2T_EXAMPLES, 1, 0, _get_example_value(V2T_EXAMPLES, 0, 0, None)))},
],
"Image Generation": [
{"mode": "Image Generation", "text": _get_example_value(T2I_EXAMPLES, 0, 0, "A cinematic mountain landscape at sunrise."), "image": None, "audio": None, "video": None},
{"mode": "Image Generation", "text": _get_example_value(T2I_EXAMPLES, 1, 0, _get_example_value(T2I_EXAMPLES, 0, 0, "A cinematic mountain landscape at sunrise.")), "image": None, "audio": None, "video": None},
],
"Image Editing": [
{"mode": "Image Editing", "text": _get_example_value(I2I_EXAMPLES, 0, 1, "Add warm sunset lighting."), "image": _get_example_value(I2I_EXAMPLES, 0, 0, None), "audio": None, "video": None},
{"mode": "Image Editing", "text": _get_example_value(I2I_EXAMPLES, 1, 1, _get_example_value(I2I_EXAMPLES, 0, 1, "Add warm sunset lighting.")), "image": _get_example_value(I2I_EXAMPLES, 1, 0, _get_example_value(I2I_EXAMPLES, 0, 0, None)), "audio": None, "video": None},
],
"ASR": [
{"mode": "ASR", "text": "", "image": None, "audio": _get_example_value(S2T_EXAMPLES, 0, 0, None), "video": None},
{"mode": "ASR", "text": "", "image": None, "audio": _get_example_value(S2T_EXAMPLES, 1, 0, _get_example_value(S2T_EXAMPLES, 0, 0, None)), "video": None},
],
"TTS": [
{"mode": "TTS", "text": _get_example_value(T2S_EXAMPLES, 0, 0, "Hello from Dynin-Omni."), "image": None, "audio": None, "video": None},
{"mode": "TTS", "text": _get_example_value(T2S_EXAMPLES, 1, 0, _get_example_value(T2S_EXAMPLES, 0, 0, "Hello from Dynin-Omni.")), "image": None, "audio": None, "video": None},
],
})
selected_sample_mode = gr.State("Chat")
task_sample_buttons = []
with gr.Row(elem_classes=["omada-sample-row"], visible=True) as task_sample_row_1:
for i in range(3):
task_sample_buttons.append(gr.Button(sample_task_items[i][0], size="sm", elem_classes=["omada-chip"], visible=True))
with gr.Row(elem_classes=["omada-sample-row"], visible=True) as task_sample_row_2:
for i in range(3, 7):
task_sample_buttons.append(gr.Button(sample_task_items[i][0], size="sm", elem_classes=["omada-chip"], visible=True))
with gr.Row(elem_classes=["omada-sample-preview-row"], visible=False) as sample_choice_row:
with gr.Column(elem_classes=["omada-sample-preview-col"]):
sample_preview_1 = gr.HTML("", elem_classes=["omada-sample-preview-card"])
sample_choice_1 = gr.Button("Sample 1", size="sm", elem_classes=["omada-chip"], visible=True)
with gr.Column(elem_classes=["omada-sample-preview-col"]):
sample_preview_2 = gr.HTML("", elem_classes=["omada-sample-preview-card"])
sample_choice_2 = gr.Button("Sample 2", size="sm", elem_classes=["omada-chip"], visible=True)
with gr.Column(elem_classes=["omada-input-stack"]):
with gr.Column(elem_classes=["omada-input-row"]):
attachment_preview = gr.HTML(
_render_attachment_preview(None, None, None),
elem_classes=["omada-attach-preview-wrap"],
)
chat_input = gr.Textbox(
show_label=False,
placeholder="How can I help you today?",
lines=1,
interactive=True,
scale=1,
min_width=0,
elem_classes=["omada-main-input"],
)
with gr.Row(elem_classes=["omada-bottom-row"]):
plus_btn = gr.Button("+", elem_classes=["omada-plus-btn"], scale=0, min_width=30)
task_btn = gr.Button("🛠 Task", elem_classes=["omada-task-btn"], scale=0, min_width=0)
selected_task_badge = gr.HTML(
_render_task_chip("Chat"),
elem_classes=["omada-task-chip-inline"],
)
custom_btn = gr.Button("🧠 Generation Settings", elem_classes=["omada-custom-btn"], scale=0, min_width=0)
send_button = gr.Button("↑", elem_classes=["omada-send-btn", "omada-send-btn-fix"], scale=0, min_width=30)
auto_mode_state = gr.State("Custom")
controls_visible = gr.State(False)
panel_mode_state = gr.State("task")
backdrop = gr.HTML("", visible=False, elem_classes=["omada-panel-backdrop"])
controls_panel = gr.Column(visible=False, elem_classes=["omada-controls-safe"])
with controls_panel:
panel_title = gr.Markdown("**Task**")
mode_selector = gr.State("Chat")
with gr.Column(visible=False) as attach_section:
media_image = gr.Image(type="pil", label="Image", sources=["upload"], visible=True)
media_audio = gr.Audio(type="filepath", label="Speech", sources=["microphone", "upload"], visible=True)
media_video = gr.Video(label="Video", sources=["upload", "webcam"], visible=True)
with gr.Column(visible=False) as task_section:
with gr.Row():
task_buttons = [
gr.Button(
option,
size="sm",
variant="primary" if option == "Chat" else "secondary",
)
for option in MODE_OPTIONS
]
with gr.Column(visible=False) as custom_section:
gr.Markdown("Task-specific generation settings")
adv_chat = gr.Column(visible=False)
with adv_chat:
chat_max_tokens = gr.Slider(2, 512, value=512, step=2, label="Chat max tokens", interactive=True)
chat_steps = gr.Slider(2, 512, value=512, step=2, label="Chat steps", interactive=True)
chat_block = gr.Slider(2, 512, value=16, step=2, label="Chat block length", interactive=True)
chat_temperature_slider = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="Chat temperature", interactive=True)
adv_t2s = gr.Column(visible=False)
with adv_t2s:
t2s_max_tokens = gr.Slider(2, 512, value=512, step=2, label="Speech token length", interactive=True)
t2s_steps = gr.Slider(2, 512, value=256, step=2, label="T2S refinement steps", interactive=True)
t2s_block = gr.Slider(2, 512, value=256, step=2, label="T2S block length", interactive=True)
t2s_temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="T2S temperature", interactive=True)
t2s_cfg = gr.Slider(0.0, 6.0, value=3.5, step=0.1, label="T2S CFG scale", interactive=True)
t2s_gender = gr.Dropdown(["random", "female", "male"], value="random", label="T2S gender", interactive=True)
t2s_emotion = gr.Dropdown(["random", "angry", "happy", "neutral", "sad"], value="random", label="T2S emotion", interactive=True)
t2s_speed = gr.Dropdown(["random", "normal", "fast", "slow"], value="random", label="T2S speed", interactive=True)
t2s_pitch = gr.Dropdown(["random", "normal", "high", "low"], value="random", label="T2S pitch", interactive=True)
adv_s2t = gr.Column(visible=False)
with adv_s2t:
s2t_steps = gr.Slider(2, 512, value=128, step=2, label="S2T steps", interactive=True)
s2t_block = gr.Slider(2, 512, value=16, step=2, label="S2T block length", interactive=True)
s2t_max_tokens = gr.Slider(2, 512, value=128, step=2, label="S2T max tokens", interactive=True)
s2t_remasking = gr.Dropdown(["low_confidence", "random"], value="low_confidence", label="S2T remasking", interactive=True)
adv_v2t = gr.Column(visible=False)
with adv_v2t:
v2t_steps = gr.Slider(2, 512, value=256, step=2, label="V2T steps", interactive=True)
v2t_block = gr.Slider(2, 512, value=16, step=2, label="V2T block length", interactive=True)
v2t_max_tokens = gr.Slider(2, 512, value=256, step=2, label="V2T max tokens", interactive=True)
adv_t2i = gr.Column(visible=False)
with adv_t2i:
t2i_timesteps = gr.Slider(4, 128, value=16, step=2, label="T2I timesteps", interactive=True)
t2i_temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="T2I temperature", interactive=True)
t2i_guidance = gr.Slider(0.0, 8.0, value=2.5, step=0.1, label="T2I CFG scale", interactive=True)
adv_i2i = gr.Column(visible=False)
with adv_i2i:
i2i_timesteps = gr.Slider(4, 128, value=32, step=2, label="I2I timesteps", interactive=True)
i2i_temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="I2I temperature", interactive=True)
i2i_guidance = gr.Slider(0.0, 8.0, value=2.5, step=0.1, label="I2I CFG scale", interactive=True)
adv_mmu = gr.Column(visible=False)
with adv_mmu:
mmu_max_tokens = gr.Slider(2, 512, value=128, step=2, label="MMU max tokens", interactive=True)
mmu_steps = gr.Slider(2, 512, value=128, step=2, label="MMU steps", interactive=True)
mmu_block = gr.Slider(2, 512, value=16, step=2, label="MMU block length", interactive=True)
mmu_temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.05, label="MMU temperature", interactive=True)
save_btn = gr.Button("Save", variant="primary")
def _task_button_updates(selected_mode: str):
return tuple(
gr.update(variant="primary" if option == selected_mode else "secondary")
for option in MODE_OPTIONS
)
def _update_advanced(mode, auto_mode):
return (
gr.update(visible=mode == "Chat"),
gr.update(visible=mode == "TTS"),
gr.update(visible=mode == "ASR"),
gr.update(visible=mode == "MMU (Video → Text)"),
gr.update(visible=mode == "Image Generation"),
gr.update(visible=mode == "Image Editing"),
gr.update(visible=mode == "MMU (Image → Text)"),
)
def _panel_title(kind: str) -> str:
return {
"attach": "**Attach**",
"task": "**Task**",
"custom": "**Custom Config**",
}.get(kind, "**Task**")
def _open_controls(panel_kind, mode):
is_attach = panel_kind == "attach"
is_task = panel_kind == "task"
is_custom = panel_kind == "custom"
adv_updates = _update_advanced(mode, "Custom") if is_custom else (gr.update(visible=False),) * 7
return (
panel_kind,
gr.update(value=_panel_title(panel_kind)),
gr.update(visible=True),
gr.update(visible=True),
True,
gr.update(visible=is_attach),
gr.update(visible=is_task),
gr.update(visible=is_custom),
*adv_updates,
*_task_button_updates(mode),
)
plus_btn.click(
lambda mode: _open_controls("attach", mode),
inputs=[mode_selector],
outputs=[
panel_mode_state,
panel_title,
backdrop,
controls_panel,
controls_visible,
attach_section,
task_section,
custom_section,
adv_chat,
adv_t2s,
adv_s2t,
adv_v2t,
adv_t2i,
adv_i2i,
adv_mmu,
*task_buttons,
],
)
task_btn.click(
lambda mode: _open_controls("task", mode),
inputs=[mode_selector],
outputs=[
panel_mode_state,
panel_title,
backdrop,
controls_panel,
controls_visible,
attach_section,
task_section,
custom_section,
adv_chat,
adv_t2s,
adv_s2t,
adv_v2t,
adv_t2i,
adv_i2i,
adv_mmu,
*task_buttons,
],
)
custom_btn.click(
lambda mode: _open_controls("custom", mode),
inputs=[mode_selector],
outputs=[
panel_mode_state,
panel_title,
backdrop,
controls_panel,
controls_visible,
attach_section,
task_section,
custom_section,
adv_chat,
adv_t2s,
adv_s2t,
adv_v2t,
adv_t2i,
adv_i2i,
adv_mmu,
*task_buttons,
],
)
def _update_mode(mode):
placeholders = {
"Chat": "How can I help you today?",
"TTS": "Type the speech you want to synthesize...",
"ASR": "Upload audio, then add notes here...",
"MMU (Video → Text)": "Upload video, then add notes here...",
"Image Generation": "Describe the image you want to generate...",
"Image Editing": "Describe how you want to edit the image...",
"MMU (Image → Text)": "Ask about the uploaded image...",
}
return gr.update(placeholder=placeholders.get(mode, "How can I help you today?"))
_update_mode("Chat")
def _pick_mode(choice, panel_mode):
show_custom = panel_mode == "custom"
adv_updates = _update_advanced(choice, "Custom") if show_custom else (gr.update(visible=False),) * 7
return (
choice,
_render_task_chip(choice),
_update_mode(choice),
*adv_updates,
*_task_button_updates(choice),
)
for idx, task_choice_btn in enumerate(task_buttons):
task_choice_btn.click(
lambda panel_mode, choice=MODE_OPTIONS[idx]: _pick_mode(choice, panel_mode),
inputs=[panel_mode_state],
outputs=[mode_selector, selected_task_badge, chat_input, adv_chat, adv_t2s, adv_s2t, adv_v2t, adv_t2i, adv_i2i, adv_mmu, *task_buttons],
)
def _refresh_attachment_preview(image_in, audio_in, video_in):
return _render_attachment_preview(image_in, audio_in, video_in)
media_image.change(
_refresh_attachment_preview,
inputs=[media_image, media_audio, media_video],
outputs=[attachment_preview],
)
media_audio.change(
_refresh_attachment_preview,
inputs=[media_image, media_audio, media_video],
outputs=[attachment_preview],
)
media_video.change(
_refresh_attachment_preview,
inputs=[media_image, media_audio, media_video],
outputs=[attachment_preview],
)
save_btn.click(
lambda: (gr.update(visible=False), gr.update(visible=False), False),
outputs=[backdrop, controls_panel, controls_visible],
)
def _format_user_message(msg: str) -> str:
return msg.strip() if msg else " "
def _normalize_chat_history(history):
if not history:
return []
if isinstance(history, list) and history and isinstance(history[0], dict):
pairs = []
pending_user = None
for msg in history:
role = msg.get("role")
content = msg.get("content", "")
if role == "user":
if pending_user is not None:
pairs.append((pending_user, ""))
pending_user = content
elif role == "assistant":
if pending_user is None:
pairs.append((" ", content))
else:
pairs.append((pending_user, content))
pending_user = None
if pending_user is not None:
pairs.append((pending_user, ""))
return pairs
return list(history)
def _serialize_chat_history(pairs):
if not GRADIO_V6_PLUS:
return pairs
messages = []
for user_msg, assistant_msg in pairs:
messages.append({"role": "user", "content": user_msg if user_msg is not None else " "})
messages.append({"role": "assistant", "content": assistant_msg if assistant_msg is not None else ""})
return messages
def _is_identity_query(message: str) -> bool:
q = re.sub(r"[^a-z0-9\s]", " ", (message or "").lower())
q = re.sub(r"\s+", " ", q).strip()
if not q:
return False
triggers = [
"who are you",
"what are you",
"introduce yourself",
"what is your name",
"your name",
"are you dynin omni",
"what model are you",
]
return any(t in q for t in triggers)
@spaces.GPU
def _chat_handler(
history,
message,
mode,
auto_mode,
image_in,
audio_in,
video_in,
chat_max_tokens,
chat_steps,
chat_block,
chat_temperature,
t2s_max_tokens,
t2s_steps,
t2s_block,
t2s_temperature,
t2s_cfg,
t2s_gender,
t2s_emotion,
t2s_speed,
t2s_pitch,
s2t_steps,
s2t_block,
s2t_max_tokens,
s2t_remasking,
v2t_steps,
v2t_block,
v2t_max_tokens,
t2i_timesteps,
t2i_temperature,
t2i_guidance,
i2i_timesteps,
i2i_temperature,
i2i_guidance,
mmu_max_tokens,
mmu_steps,
mmu_block,
mmu_temperature,
):
_set_global_seed()
history = _normalize_chat_history(history)
message = (message or "").strip()
defer_video = mode == "MMU (Video → Text)" and bool(video_in)
display_user = _render_user_message(mode, message, image_in, audio_in, video_in, defer_video=defer_video)
history.append((display_user, _render_text_message("Model loading...", "")))
yield _serialize_chat_history(history), ""
if mode == "Chat" and _is_identity_query(message):
fixed = (
"I am Dynin-Omni, an omnimodal unified diffusion language model developed by AIDAS Lab.\n"
"I can understand and generate text, images, speech, and video within a single architecture."
)
history[-1] = (display_user, _render_text_message("Assistant reply generated.", fixed))
yield _serialize_chat_history(history), ""
return
if defer_video:
display_user = _render_user_message(mode, message, image_in, audio_in, video_in, defer_video=False)
history[-1] = (display_user, history[-1][1])
yield _serialize_chat_history(history), ""
app = get_app()
history[-1] = (display_user, _render_text_message("Generating...", ""))
yield _serialize_chat_history(history), ""
# Use UI-provided generation settings.
app.force_eval_settings = str(auto_mode).strip().lower() == "auto"
if mode == "Chat":
for reply_html, status, done in app.run_chat_stream(
message,
chat_max_tokens,
chat_steps,
chat_block,
chat_temperature,
update_every=64,
):
response = _render_response(status, reply_html)
history[-1] = (display_user, response)
yield _serialize_chat_history(history), ""
return
if mode == "TTS":
if not message:
history[-1] = (display_user, _render_text_message("Please type some text.", ""))
yield _serialize_chat_history(history), ""
return
audio, status = app.run_t2s(
message,
t2s_max_tokens,
t2s_steps,
t2s_block,
t2s_temperature,
t2s_cfg,
t2s_gender,
t2s_emotion,
t2s_speed,
t2s_pitch,
)
history[-1] = (display_user, _render_audio_message(status, audio))
yield _serialize_chat_history(history), ""
return
if mode == "ASR":
if not audio_in:
history[-1] = (display_user, _render_text_message("Please upload audio.", ""))
yield _serialize_chat_history(history), ""
return
for text, status in app.run_s2t_stream(
audio_in,
s2t_steps,
s2t_block,
s2t_max_tokens,
s2t_remasking,
update_every=32,
):
history[-1] = (display_user, _render_text_message(status, text))
yield _serialize_chat_history(history), ""
return
if mode == "MMU (Video → Text)":
if not video_in:
history[-1] = (display_user, _render_text_message("Please upload a video.", ""))
yield _serialize_chat_history(history), ""
return
for text, status in app.run_v2t_stream(
video_in,
v2t_steps,
v2t_block,
v2t_max_tokens,
update_every=32,
):
history[-1] = (display_user, _render_text_message(status, text))
yield _serialize_chat_history(history), ""
return
if mode == "Image Generation":
if not message:
history[-1] = (display_user, _render_text_message("Please provide a prompt.", ""))
yield _serialize_chat_history(history), ""
return
for image, status in app.run_t2i_stream(
message,
t2i_timesteps,
t2i_temperature,
t2i_guidance,
update_every=2,
):
history[-1] = (display_user, _render_image_message(status, image))
yield _serialize_chat_history(history), ""
return
if mode == "Image Editing":
if not image_in:
history[-1] = (display_user, _render_text_message("Please upload an image.", ""))
yield _serialize_chat_history(history), ""
return
if not message:
history[-1] = (display_user, _render_text_message("Please provide an edit instruction.", ""))
yield _serialize_chat_history(history), ""
return
for image, status in app.run_i2i_stream(
message,
image_in,
i2i_timesteps,
i2i_temperature,
i2i_guidance,
update_every=2,
):
history[-1] = (display_user, _render_image_message(status, image))
yield _serialize_chat_history(history), ""
return
if mode == "MMU (Image → Text)":
if not image_in:
history[-1] = (display_user, _render_text_message("Please upload an image.", ""))
yield _serialize_chat_history(history), ""
return
# Keep MMU QA consistent with chat mask-pill UX.
try:
mmu_mask_count = max(16, min(int(mmu_max_tokens or 128), 256))
except Exception:
mmu_mask_count = 128
mmu_mask_surface = " ".join([""] * mmu_mask_count)
history[-1] = (display_user, _render_text_message("Generating...", mmu_mask_surface))
yield _serialize_chat_history(history), ""
reply, status = app.run_mmu(
images=[image_in],
message=message,
max_new_tokens=mmu_max_tokens,
steps=mmu_steps,
block_length=mmu_block,
temperature=mmu_temperature,
)
history[-1] = (display_user, _render_text_message(status, reply))
yield _serialize_chat_history(history), ""
return
history[-1] = (display_user, _render_text_message("Unsupported mode.", ""))
yield _serialize_chat_history(history), ""
with demo:
def _hide_intro():
return gr.update(visible=False)
submit_inputs = [
chatbox,
chat_input,
mode_selector,
auto_mode_state,
media_image,
media_audio,
media_video,
chat_max_tokens,
chat_steps,
chat_block,
chat_temperature_slider,
t2s_max_tokens,
t2s_steps,
t2s_block,
t2s_temperature,
t2s_cfg,
t2s_gender,
t2s_emotion,
t2s_speed,
t2s_pitch,
s2t_steps,
s2t_block,
s2t_max_tokens,
s2t_remasking,
v2t_steps,
v2t_block,
v2t_max_tokens,
t2i_timesteps,
t2i_temperature,
t2i_guidance,
i2i_timesteps,
i2i_temperature,
i2i_guidance,
mmu_max_tokens,
mmu_steps,
mmu_block,
mmu_temperature,
]
submit_outputs = [chatbox, chat_input]
chat_input.submit(_hide_intro, outputs=[intro_hero], queue=False).then(
_chat_handler, inputs=submit_inputs, outputs=submit_outputs
)
send_button.click(_hide_intro, outputs=[intro_hero], queue=False).then(
_chat_handler, inputs=submit_inputs, outputs=submit_outputs
)
def _open_sample_choices(sample_map, mode):
items = (sample_map or {}).get(mode, [])
has_1 = len(items) >= 1
has_2 = len(items) >= 2
item1 = items[0] if has_1 else {}
item2 = items[1] if has_2 else {}
return (
mode,
_render_task_chip(mode),
_update_mode(mode),
mode,
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=True),
_render_sample_preview_card(item1, 0),
_render_sample_preview_card(item2, 1),
gr.update(value="Sample 1", visible=has_1, interactive=has_1),
gr.update(value="Sample 2", visible=has_2, interactive=has_2),
*_task_button_updates(mode),
)
def _use_sample(sample_map, mode, sample_idx):
items = (sample_map or {}).get(mode, [])
if not items:
current_mode = "Chat"
return (
"",
None,
None,
None,
current_mode,
_render_task_chip(current_mode),
_render_attachment_preview(None, None, None),
_update_mode(current_mode),
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=False),
"",
"",
gr.update(value="Sample 1", visible=True, interactive=True),
gr.update(value="Sample 2", visible=True, interactive=True),
*_task_button_updates(current_mode),
)
idx = max(0, min(int(sample_idx), len(items) - 1))
item = items[idx] or {}
sample_mode = item.get("mode", "Chat")
sample_text = item.get("text", "")
if not sample_text:
sample_video = item.get("video")
sample_audio = item.get("audio")
if sample_video:
sample_text = f"[Video] {Path(str(sample_video)).name}"
elif sample_audio:
sample_text = f"[Audio] {Path(str(sample_audio)).name}"
image_item = item.get("image")
audio_item = item.get("audio")
video_item = item.get("video")
return (
sample_text,
image_item,
audio_item,
video_item,
sample_mode,
_render_task_chip(sample_mode),
_render_attachment_preview(image_item, audio_item, video_item),
_update_mode(sample_mode),
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=False),
"",
"",
gr.update(value="Sample 1", visible=True, interactive=True),
gr.update(value="Sample 2", visible=True, interactive=True),
*_task_button_updates(sample_mode),
)
for i, btn in enumerate(task_sample_buttons):
mode_value = sample_task_items[i][1]
btn.click(
lambda payloads, m=mode_value: _open_sample_choices(payloads, m),
inputs=[sample_payloads],
outputs=[
mode_selector,
selected_task_badge,
chat_input,
selected_sample_mode,
task_sample_row_1,
task_sample_row_2,
sample_choice_row,
sample_preview_1,
sample_preview_2,
sample_choice_1,
sample_choice_2,
*task_buttons,
],
)
sample_choice_1.click(
lambda payloads, m: _use_sample(payloads, m, 0),
inputs=[sample_payloads, selected_sample_mode],
outputs=[
chat_input,
media_image,
media_audio,
media_video,
mode_selector,
selected_task_badge,
attachment_preview,
chat_input,
task_sample_row_1,
task_sample_row_2,
sample_choice_row,
sample_preview_1,
sample_preview_2,
sample_choice_1,
sample_choice_2,
*task_buttons,
],
)
sample_choice_2.click(
lambda payloads, m: _use_sample(payloads, m, 1),
inputs=[sample_payloads, selected_sample_mode],
outputs=[
chat_input,
media_image,
media_audio,
media_video,
mode_selector,
selected_task_badge,
attachment_preview,
chat_input,
task_sample_row_1,
task_sample_row_2,
sample_choice_row,
sample_preview_1,
sample_preview_2,
sample_choice_1,
sample_choice_2,
*task_buttons,
],
)
# Initial: task chips visible, sample choices hidden.
demo.load(
lambda: (
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=False),
"",
"",
gr.update(visible=True),
gr.update(visible=True),
),
outputs=[
task_sample_row_1,
task_sample_row_2,
sample_choice_row,
sample_preview_1,
sample_preview_2,
sample_choice_1,
sample_choice_2,
],
queue=False,
)
if __name__ == "__main__":
_launch_kwargs = {
"allowed_paths": [
str(PREVIEW_DIR),
str(PROJECT_ROOT),
str(ASSET_ROOT),
"/tmp",
],
}
if GRADIO_V6_PLUS:
_launch_kwargs.update(
{
"css": CUSTOM_CSS + EXTRA_CSS,
"theme": theme,
"js": FORCE_LIGHT_MODE_JS
}
)
demo.launch(**_launch_kwargs)