| """ |
| app_single.py β MiniCPM-V 4.6 Β· An Adventure in Thousand Token Wood |
| ===================================================================== |
| A storybook playground: MiniCPM-V reads an uploaded image like a page |
| from an adventure, then a woodland cat performs its mood in a forest |
| clearing β complete with a tiny generative tune. |
| |
| Pipeline: |
| 1. Upload image β MiniCPM-V streams a description |
| 2. Model returns a JSON dance spec (mood + 6 numeric animation params) |
| 3. The cat performs in the clearing using those exact params β every |
| move is model-determined, not hardcoded. |
| 4. A free, generative melody (Web Audio API, no audio files) plays |
| along β tempo and register also derived from the model's params. |
| |
| Dance params returned by model: |
| mood : one of 10 mood words |
| speed : animation cycle seconds (0.3 fast β¦ 3.0 slow) |
| jump : vertical bounce px (0 β¦ 60) |
| sway : body rotation degrees (0 β¦ 20) |
| tail_speed : tail cycle seconds (0.2 β¦ 3.0) |
| tail_range : tail swing degrees (5 β¦ 120) |
| ear_tilt : ear rotation degrees (0 β¦ 25) |
| |
| Two backends β switchable in the UI: |
| β’ API (default) β calls the hosted MiniCPM-V 4.6 API. Needs internet. |
| β’ Local (offline) β downloads openbmb/MiniCPM-V-4 (4.1B, Apache-2.0) once, |
| caches it to ./model_cache/, then runs fully offline. |
| Requires: pip install torch transformers accelerate |
| |
| Run locally: |
| pip install -r requirements.txt |
| python app_single.py |
| β open http://localhost:7860 |
| |
| Optional: set your own API key so you're not on the shared public quota |
| Windows (PowerShell): $env:MINICPM_API_KEY="sk-..." |
| macOS / Linux: export MINICPM_API_KEY="sk-..." |
| """ |
|
|
| import base64, io, os, json, re |
| import gradio as gr |
| from openai import OpenAI, APIStatusError, APIConnectionError |
| from PIL import Image |
|
|
| |
| API_BASE_URL = "https://api.modelbest.cn/v1" |
| PUBLIC_API_KEY = "sk-pQ8L2zF3XmR5kY9wV4jB7hN1tC6vM0xG3aD5sH2bJ9lK4cZ8" |
|
|
| MODELS = { |
| "β‘ Instruct (fast, direct)": "MiniCPM-V-4.6-Instruct", |
| "π§ Thinking (reasons first)": "MiniCPM-V-4.6-Thinking", |
| } |
|
|
| DEFAULT_PROMPT = "Describe this image in detail." |
| DEFAULT_MAX_TOKENS = 512 |
| DEFAULT_TEMPERATURE = 0.7 |
| IMAGE_QUALITY = 90 |
|
|
| MOOD_LABELS = ["happy","sad","calm","energetic","mysterious","depressed", |
| "romantic","tense","nostalgic","angry","neutral"] |
|
|
| PROMPT_EXAMPLES = [ |
| ["Describe this image in detail."], |
| ["List every object you can see."], |
| ["What is the mood or atmosphere of this image?"], |
| ["What text, if any, appears in this image?"], |
| ["Explain this image to someone who cannot see it."], |
| ] |
|
|
| |
| |
| |
| MOOD_PALETTE = { |
| "happy": {"bg":"#1a1605","body":"#FFD166","detail":"#E8A23A","eye":"#2D1B00","nose":"#FF8A3D","pcol":"#FFE08A","particle":"β¦","label":"Happy","caption":"Bouncing with joy", "scale":[0,2,4,7,9,12], "root":72}, |
| "sad": {"bg":"#0c1116","body":"#8AA0B2","detail":"#5D7A8E","eye":"#1A2530","nose":"#B7C7D2","pcol":"#A9C8E0","particle":"Β·","label":"Sad","caption":"Slow, heavy steps", "scale":[0,3,5,7,10,12], "root":60}, |
| "calm": {"bg":"#0a1614","body":"#6FBFB3","detail":"#4A9C8F","eye":"#0A2018","nose":"#A8E0D6","pcol":"#BFEDE4","particle":"β","label":"Calm","caption":"Drifting at ease", "scale":[0,2,5,7,9,12], "root":64}, |
| "energetic": {"bg":"#1a0e05","body":"#FF8A5B","detail":"#E8623A","eye":"#1a0500","nose":"#FFD1BC","pcol":"#FFCB6B","particle":"β
","label":"Energetic","caption":"Can't sit still", "scale":[0,2,4,5,7,9,11,12],"root":71}, |
| "mysterious": {"bg":"#120c1a","body":"#A98BD6","detail":"#6D4FA8","eye":"#F0B8FF","nose":"#D9C2EE","pcol":"#C7B3F0","particle":"β§","label":"Mysterious","caption":"Slipping through shadow", "scale":[0,1,4,5,7,8,11,12],"root":62}, |
| "romantic": {"bg":"#1a0c12","body":"#F2A0BD","detail":"#D9648D","eye":"#1a0010","nose":"#FBE0EA","pcol":"#F7B8CE","particle":"β₯","label":"Romantic","caption":"A slow, dreamy waltz", "scale":[0,2,4,7,9,12], "root":67}, |
| "tense": {"bg":"#100808","body":"#F0726E","detail":"#C03C38","eye":"#FFB3AE","nose":"#F7C7C4","pcol":"#F2A6A2","particle":"|","label":"Tense","caption":"Coiled and alert", "scale":[0,1,3,6,7,10,12], "root":61}, |
| "nostalgic": {"bg":"#160f06","body":"#F2C083","detail":"#D98A3D","eye":"#160f06","nose":"#FBE3C7","pcol":"#F7DDB5","particle":"β¦","label":"Nostalgic","caption":"Rocking to old memories", "scale":[0,2,3,7,9,12], "root":65}, |
| "angry": {"bg":"#160505","body":"#F0635E","detail":"#A8201C","eye":"#FF6961","nose":"#F7B0AC","pcol":"#F58F8A","particle":"βΈ","label":"Angry","caption":"Stomping, full of fire", "scale":[0,1,3,5,6,8,10,12],"root":59}, |
| "neutral": {"bg":"#0e0f13","body":"#A6ADB8","detail":"#727A86","eye":"#0d0d18","nose":"#D8DDE3","pcol":"#C7CDD6","particle":"Β·","label":"Neutral","caption":"Steady and unhurried", "scale":[0,2,4,7,9,12], "root":64}, |
| } |
|
|
| |
| DEFAULT_DANCE = { |
| "happy": {"speed":0.7, "jump":50, "sway":6, "tail_speed":0.4, "tail_range":200,"ear_tilt":8}, |
| "sad": {"speed":2.4, "jump":2, "sway":8, "tail_speed":2.5, "tail_range":30, "ear_tilt":15}, |
| "calm": {"speed":2.8, "jump":10, "sway":2, "tail_speed":3.2, "tail_range":35, "ear_tilt":3}, |
| "energetic": {"speed":0.3, "jump":30, "sway":15, "tail_speed":0.28,"tail_range":180,"ear_tilt":15}, |
| "mysterious": {"speed":2.0, "jump":15, "sway":5, "tail_speed":1.8, "tail_range":100,"ear_tilt":5}, |
| "romantic": {"speed":1.6, "jump":12, "sway":5, "tail_speed":1.6, "tail_range":65, "ear_tilt":3}, |
| "tense": {"speed":0.4, "jump":3, "sway":3, "tail_speed":0.4, "tail_range":10, "ear_tilt":12}, |
| "nostalgic": {"speed":2.2, "jump":6, "sway":6, "tail_speed":2.0, "tail_range":65, "ear_tilt":5}, |
| "angry": {"speed":0.38,"jump":18, "sway":5, "tail_speed":0.32,"tail_range":160,"ear_tilt":20}, |
| "neutral": {"speed":2.0, "jump":8, "sway":1, "tail_speed":2.2, "tail_range":30, "ear_tilt":2}, |
| } |
|
|
| |
| def pil_to_data_url(image): |
| image = image.convert("RGB") |
| buf = io.BytesIO() |
| image.save(buf, format="JPEG", quality=IMAGE_QUALITY) |
| return "data:image/jpeg;base64," + base64.b64encode(buf.getvalue()).decode() |
|
|
| def _resolve_key(ui_key): |
| return (os.environ.get("MINICPM_API_KEY","").strip() |
| or (ui_key or "").strip() or PUBLIC_API_KEY) |
|
|
| def _client(ui_key): |
| return OpenAI(api_key=_resolve_key(ui_key), base_url=API_BASE_URL) |
|
|
| |
| def stream_description(image, prompt, model_label, max_tokens, temperature, api_key): |
| if image is None: |
| yield "β οΈ Please upload an image first." |
| return |
| try: |
| stream = _client(api_key).chat.completions.create( |
| model=MODELS[model_label], |
| messages=[{"role":"user","content":[ |
| {"type":"image_url","image_url":{"url": pil_to_data_url(image)}}, |
| {"type":"text","text": prompt}, |
| ]}], |
| max_tokens=max_tokens, temperature=temperature, stream=True, |
| ) |
| result = "" |
| for chunk in stream: |
| delta = chunk.choices[0].delta.content or "" |
| if delta: |
| result += delta |
| yield result |
| except APIStatusError as e: |
| yield f"β API error {e.status_code}: {e.message}" |
| except APIConnectionError: |
| yield "β Cannot reach api.modelbest.cn" |
| except Exception as e: |
| yield f"β {e}" |
|
|
| |
| DANCE_SYSTEM_PROMPT = f"""You are a cat dance choreographer AI. |
| Given a scene description, return ONLY a valid JSON object β no prose, no markdown, no code fences. |
| |
| JSON schema (all fields required): |
| {{ |
| "mood": one of {MOOD_LABELS}, |
| "speed": float 0.3β3.0 (animation cycle seconds; lower = faster), |
| "jump": int 0β60 (vertical bounce in pixels), |
| "sway": int 0β20 (body rotation degrees), |
| "tail_speed": float 0.2β3.0 (tail cycle seconds), |
| "tail_range": int 5β200 (tail swing degrees), |
| "ear_tilt": int 0β25 (ear tilt degrees) |
| }} |
| |
| Choose values that physically match the scene mood. An energetic scene should have |
| low speed (fast), high jump, high sway. A calm scene should have high speed (slow), |
| low jump, low sway. Be creative β the cat's whole body expresses the image's emotion.""" |
|
|
| def _keyword_mood(description: str) -> str: |
| """Simple keyword-based mood fallback when JSON parsing fails.""" |
| t = description.lower() |
| for m, kws in [ |
| ("happy",["happy","joy","celebrate","laugh","smile","bright","sunny"]), |
| ("sad",["sad","lonely","rain","sorrow","grief","cry","gloom"]), |
| ("energetic",["energetic","vibrant","excited","dynamic","rush","active"]), |
| ("calm",["calm","peaceful","quiet","gentle","serene","still"]), |
| ("mysterious",["mysterious","dark","eerie","shadow","mystic","fog"]), |
| ("romantic",["romantic","love","tender","intimate","warm","soft"]), |
| ("tense",["tense","anxious","fear","alarm","nervous","danger"]), |
| ("nostalgic",["nostalgic","memory","vintage","old","past","retro"]), |
| ("angry",["angry","furious","rage","fierce","storm"]), |
| ]: |
| if any(w in t for w in kws): |
| return m |
| return "neutral" |
|
|
| def get_dance_spec(description: str, api_key: str) -> tuple[str, dict]: |
| """ |
| Returns (mood, dance_params_dict). |
| The model outputs the full dance spec as JSON. |
| Falls back to defaults if parsing fails. |
| """ |
| if not description or description.startswith(("β οΈ","β")): |
| return "neutral", DEFAULT_DANCE["neutral"] |
| try: |
| resp = _client(api_key).chat.completions.create( |
| model="MiniCPM-V-4.6-Instruct", |
| messages=[ |
| {"role":"system","content": DANCE_SYSTEM_PROMPT}, |
| {"role":"user", "content": f"Scene description:\n{description[:800]}"}, |
| ], |
| max_tokens=120, temperature=0.3, |
| ) |
| raw = resp.choices[0].message.content.strip() |
| |
| raw = re.sub(r"```[a-z]*", "", raw).strip().strip("`").strip() |
| spec = json.loads(raw) |
|
|
| mood = spec.get("mood","neutral") |
| if mood not in MOOD_LABELS: |
| mood = "neutral" |
|
|
| dance = { |
| "speed": float(max(0.3, min(3.0, spec.get("speed", 1.5)))), |
| "jump": int(max(0, min(60, spec.get("jump", 10)))), |
| "sway": int(max(0, min(20, spec.get("sway", 5)))), |
| "tail_speed": float(max(0.2, min(3.0, spec.get("tail_speed", 1.5)))), |
| "tail_range": int(max(5, min(200, spec.get("tail_range", 40)))), |
| "ear_tilt": int(max(0, min(25, spec.get("ear_tilt", 5)))), |
| } |
| return mood, dance |
|
|
| except Exception: |
| mood = _keyword_mood(description) |
| return mood, DEFAULT_DANCE[mood] |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| from pathlib import Path |
|
|
| LOCAL_MODEL_ID = "openbmb/MiniCPM-V-4" |
| LOCAL_CACHE_DIR = Path(__file__).parent / "model_cache" |
| LOCAL_SENTINEL = LOCAL_CACHE_DIR / ".download_complete" |
|
|
| _local_model = None |
| _local_tokenizer = None |
|
|
| def local_is_cached() -> bool: |
| return LOCAL_SENTINEL.exists() |
|
|
| def local_cache_size_gb() -> float: |
| if not LOCAL_CACHE_DIR.exists(): |
| return 0.0 |
| return sum(f.stat().st_size for f in LOCAL_CACHE_DIR.rglob("*") if f.is_file()) / 1e9 |
|
|
| def local_status_md() -> str: |
| if local_is_cached(): |
| return (f"β
**Model cached** β `{LOCAL_MODEL_ID}` " |
| f"({local_cache_size_gb():.1f} GB) ready to run offline.") |
| return (f"β¬οΈ **Not downloaded yet** β `{LOCAL_MODEL_ID}` (~8 GB) will be " |
| f"fetched on first use and cached in `model_cache/`. " |
| f"Requires internet for this one-time download.") |
|
|
| def _load_local_model(): |
| """ |
| Lazily import torch/transformers and load MiniCPM-V-4 from local cache, |
| downloading once if needed. Returns (model, tokenizer). |
| """ |
| global _local_model, _local_tokenizer |
| if _local_model is not None: |
| return _local_model, _local_tokenizer |
|
|
| try: |
| import torch |
| import transformers |
| from transformers import AutoModel, AutoTokenizer |
| except ImportError as e: |
| raise RuntimeError( |
| "Local backend requires extra packages.\n" |
| "Install with:\n" |
| " pip install torch transformers accelerate\n" |
| f"(original error: {e})" |
| ) |
|
|
| |
| _tv = tuple(int(x) for x in transformers.__version__.split(".")[:2]) |
| if _tv >= (5, 0): |
| from transformers import modeling_utils as _mu |
| _orig_getattr = getattr(_mu.PreTrainedModel, "__getattr__", None) |
| def _safe_getattr(self, name): |
| if name == "all_tied_weights_keys": |
| return {} |
| if _orig_getattr is not None: |
| return _orig_getattr(self, name) |
| raise AttributeError(name) |
| _mu.PreTrainedModel.__getattr__ = _safe_getattr |
|
|
| LOCAL_CACHE_DIR.mkdir(parents=True, exist_ok=True) |
| local_only = local_is_cached() |
|
|
| common = dict( |
| trust_remote_code=True, |
| cache_dir=str(LOCAL_CACHE_DIR), |
| local_files_only=local_only, |
| ) |
|
|
| _local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID, **common) |
|
|
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| dtype = torch.float16 if device == "cuda" else torch.float32 |
|
|
| _local_model = AutoModel.from_pretrained( |
| LOCAL_MODEL_ID, |
| torch_dtype=dtype, |
| attn_implementation="sdpa", |
| device_map="auto" if device == "cuda" else None, |
| low_cpu_mem_usage=True, |
| **common, |
| ) |
| if device == "cpu": |
| _local_model = _local_model.to(device) |
| _local_model.eval() |
|
|
| if not local_only: |
| LOCAL_SENTINEL.write_text(f"{LOCAL_MODEL_ID} downloaded.\nDelete to re-download.\n") |
|
|
| return _local_model, _local_tokenizer |
|
|
| def stream_description_local(image, prompt, max_tokens, temperature): |
| """Local (offline) equivalent of stream_description β non-streaming, single yield.""" |
| if image is None: |
| yield "β οΈ Please upload an image first." |
| return |
| try: |
| model, tokenizer = _load_local_model() |
| msgs = [{"role": "user", "content": [image.convert("RGB"), prompt]}] |
| result = model.chat( |
| image=image.convert("RGB"), |
| msgs=msgs, |
| tokenizer=tokenizer, |
| sampling=(temperature > 0), |
| temperature=max(temperature, 0.01), |
| max_new_tokens=max_tokens, |
| ) |
| yield result |
| except RuntimeError as e: |
| yield f"β {e}" |
| except Exception as e: |
| yield f"β Local inference error: {e}" |
|
|
| def get_dance_spec_local(description: str) -> tuple[str, dict]: |
| """Local equivalent of get_dance_spec β one extra text-only local call.""" |
| if not description or description.startswith(("β οΈ","β")): |
| return "neutral", DEFAULT_DANCE["neutral"] |
| try: |
| model, tokenizer = _load_local_model() |
| msgs = [{"role": "user", "content": [ |
| DANCE_SYSTEM_PROMPT + f"\n\nScene description:\n{description[:800]}" |
| ]}] |
| raw = model.chat( |
| image=None, msgs=msgs, tokenizer=tokenizer, |
| sampling=False, max_new_tokens=150, |
| ) |
| raw = re.sub(r"```[a-z]*", "", raw).strip().strip("`").strip() |
| spec = json.loads(raw) |
|
|
| mood = spec.get("mood","neutral") |
| if mood not in MOOD_LABELS: |
| mood = "neutral" |
|
|
| dance = { |
| "speed": float(max(0.3, min(3.0, spec.get("speed", 1.5)))), |
| "jump": int(max(0, min(60, spec.get("jump", 10)))), |
| "sway": int(max(0, min(20, spec.get("sway", 5)))), |
| "tail_speed": float(max(0.2, min(3.0, spec.get("tail_speed", 1.5)))), |
| "tail_range": int(max(5, min(200, spec.get("tail_range", 40)))), |
| "ear_tilt": int(max(0, min(25, spec.get("ear_tilt", 5)))), |
| } |
| return mood, dance |
| except Exception: |
| return _keyword_mood(description), DEFAULT_DANCE[_keyword_mood(description)] |
|
|
|
|
| |
| def generate_animation(text: str) -> str: |
| t = text.lower() |
| mood = "neutral" |
| for m, kws in [ |
| ("happy",["happy","celebrate","party","joy","cheerful"]), |
| ("sad",["sad","lonely","rain","grief","sorrow"]), |
| ("energetic",["energy","dance","excited","lively"]), |
| ("calm",["calm","peace","serene","gentle","quiet"]), |
| ("mysterious",["mysterious","eerie","dark","shadow"]), |
| ("romantic",["romantic","love","tender","warm"]), |
| ("tense",["tense","nervous","anxiety","fear"]), |
| ("nostalgic",["nostalgic","memory","vintage","old"]), |
| ("angry",["angry","furious","rage","fierce"]), |
| ]: |
| if any(w in t for w in kws): |
| mood = m |
| break |
| return cat_html(mood, DEFAULT_DANCE[mood]) |
|
|
| |
| STAGE_FONT = "'Space Grotesk', 'Inter', system-ui, sans-serif" |
| LABEL_FONT = "'Inter', system-ui, sans-serif" |
| MONO_FONT = "'JetBrains Mono', 'SFMono-Regular', Consolas, monospace" |
|
|
| def _stage_open(spotlight_color: str, breathe_speed: float = 4.0) -> str: |
| """Opening <div> + shared <style> for the emotion card, HF light style.""" |
| return f"""<div class="stage" style="--spot:{spotlight_color};"> |
| <style> |
| @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@500;700&family=Inter:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap'); |
| |
| .stage {{ |
| position:relative; height:440px; border-radius:12px; |
| overflow:hidden; isolation:isolate; |
| background: |
| radial-gradient(ellipse 70% 50% at 50% 22%, color-mix(in srgb, var(--spot) 14%, transparent), transparent 70%), |
| #F8F9FA; |
| border:1px solid #E5E7EB; |
| display:flex; flex-direction:column; align-items:center; justify-content:center; |
| font-family:{STAGE_FONT}; |
| }} |
| @keyframes spot_breathe {{ |
| 0%,100% {{ opacity:.7; }} |
| 50% {{ opacity:1; }} |
| }} |
| .stage::before {{ |
| content:''; position:absolute; inset:0; pointer-events:none; |
| background: radial-gradient(ellipse 45% 36% at 50% 18%, color-mix(in srgb, var(--spot) 18%, transparent), transparent 72%); |
| animation: spot_breathe {breathe_speed}s ease-in-out infinite; |
| }} |
| /* faint dot-grid texture, HF-card style */ |
| .stage::after {{ |
| content:''; position:absolute; inset:0; pointer-events:none; opacity:.5; |
| background-image: radial-gradient(circle, #E5E7EB 1px, transparent 1px); |
| background-size: 22px 22px; |
| }} |
| |
| .stage-cue {{ |
| position:absolute; top:16px; left:0; right:0; |
| display:flex; align-items:center; justify-content:center; gap:8px; |
| font-family:{MONO_FONT}; |
| font-size:.68rem; letter-spacing:.16em; text-transform:uppercase; |
| color:#6B7280; font-weight:500; z-index:3; |
| }} |
| .stage-cue .dot {{ |
| width:8px; height:8px; border-radius:50%; |
| background:var(--spot); box-shadow:0 0 0 3px color-mix(in srgb, var(--spot) 25%, transparent); |
| }} |
| .stage-cue .mood-name {{ |
| color:#111827; font-weight:700; letter-spacing:.1em; |
| font-family:{MONO_FONT}; |
| background:#FFFFFF; border:1px solid #E5E7EB; |
| border-radius:999px; padding:2px 10px; |
| }} |
| |
| .stage-caption {{ |
| position:absolute; bottom:62px; left:0; right:0; text-align:center; z-index:3; |
| color:#4B5563; font-size:.92rem; letter-spacing:.01em; font-style:italic; |
| font-family:{STAGE_FONT}; font-weight:500; |
| }} |
| |
| .cue-sheet {{ |
| position:absolute; bottom:14px; left:0; right:0; z-index:3; |
| display:flex; justify-content:center; gap:8px; flex-wrap:wrap; |
| padding:0 20px; |
| }} |
| .cue-chip {{ |
| font-family:{MONO_FONT}; font-size:.64rem; letter-spacing:.03em; |
| color:#374151; background:#FFFFFF; border:1px solid #E5E7EB; |
| border-radius:999px; padding:3px 10px; white-space:nowrap; |
| box-shadow: 0 1px 2px rgba(0,0,0,.03); |
| }} |
| .cue-chip b {{ color:#92660C; font-weight:600; }} |
| |
| /* ββ music toggle button ββ */ |
| .music-toggle {{ |
| position:absolute; top:14px; right:14px; z-index:4; |
| width:36px; height:36px; border-radius:50%; |
| background:#FFFFFF; border:1px solid #E5E7EB; |
| display:flex; align-items:center; justify-content:center; |
| cursor:pointer; font-size:1rem; color:#374151; |
| box-shadow: 0 1px 2px rgba(0,0,0,.04); |
| transition: transform .15s ease, background .15s ease, box-shadow .15s ease; |
| }} |
| .music-toggle:hover {{ |
| transform: scale(1.06); |
| box-shadow: 0 2px 8px rgba(0,0,0,.08); |
| }} |
| .music-toggle.playing {{ |
| background: #FFD21E; |
| border-color: #FFD21E; |
| color:#111827; |
| }} |
| .music-toggle .icon-play {{ display:inline; }} |
| .music-toggle .icon-pause {{ display:none; }} |
| .music-toggle.playing .icon-play {{ display:none; }} |
| .music-toggle.playing .icon-pause {{ display:inline; }} |
| </style> |
| """ |
|
|
| def _stage_close() -> str: |
| return "</div>" |
|
|
| |
| def cat_html(mood: str, dance: dict) -> str: |
| p = MOOD_PALETTE.get(mood, MOOD_PALETTE["neutral"]) |
| B = p["body"]; D = p["detail"]; E = p["eye"]; N = p["nose"] |
| sp = dance["speed"]; jp = dance["jump"] |
| sw = dance["sway"]; tsp = dance["tail_speed"] |
| tr = dance["tail_range"]; et = dance["ear_tilt"] |
|
|
| t0 = -tr // 2; t1 = tr // 2 |
| breathe = max(2.0, min(6.0, sp * 2)) |
| stage_id = f"stage_{mood}" |
|
|
| |
| scale = p["scale"] |
| root = p["root"] |
| |
| note_ms = int(140 + (sp - 0.3) / (3.0 - 0.3) * (520 - 140)) |
| |
| octave_shift = 12 * min(2, jp // 25) |
| note_root = root + octave_shift |
|
|
| cue_chips = ( |
| f'<span class="cue-chip">speed <b>{sp}s</b></span>' |
| f'<span class="cue-chip">jump <b>{jp}px</b></span>' |
| f'<span class="cue-chip">sway <b>{sw}Β°</b></span>' |
| f'<span class="cue-chip">tail <b>{tsp}s / {tr}Β°</b></span>' |
| f'<span class="cue-chip">ears <b>{et}Β°</b></span>' |
| ) |
|
|
| return _stage_open(B, breathe) + f""" |
| <style> |
| @keyframes K_body {{ |
| 0%,100% {{ transform: translateY(0px) rotate(-{sw}deg); }} |
| 50% {{ transform: translateY(-{jp}px) rotate({sw}deg); }} |
| }} |
| @keyframes K_tail {{ |
| 0%,100% {{ transform: rotate({t0}deg); }} |
| 50% {{ transform: rotate({t1}deg); }} |
| }} |
| @keyframes K_ear {{ |
| 0%,100% {{ transform: rotate(-{et}deg); }} |
| 50% {{ transform: rotate({et}deg); }} |
| }} |
| @keyframes K_blink {{ |
| 0%,88%,100% {{ transform: scaleY(1); }} |
| 93% {{ transform: scaleY(0.08); }} |
| }} |
| @keyframes K_shadow {{ |
| 0%,100% {{ transform: translateX(-50%) scaleX(1); opacity:.45; }} |
| 50% {{ transform: translateX(-50%) scaleX({max(0.4, 1 - jp/80):.2f}); opacity:.15; }} |
| }} |
| @keyframes K_part {{ |
| 0% {{ opacity:0; transform:translate(0,0) scale(.5); }} |
| 20% {{ opacity:.9; }} |
| 80% {{ opacity:.4; }} |
| 100% {{ opacity:0; transform:translate(var(--px),var(--py)) scale(1.5); }} |
| }} |
| |
| .cat-wrap {{ position:relative; width:160px; height:200px; z-index:2; }} |
| |
| .cat-shadow {{ |
| position:absolute; bottom:-4px; left:50%; |
| width:72px; height:11px; border-radius:50%; |
| background:rgba(0,0,0,.55); |
| animation: K_shadow {sp}s ease-in-out infinite; |
| }} |
| |
| .cat-unit {{ |
| position:absolute; bottom:0; left:50%; |
| transform-origin: center bottom; |
| animation: K_body {sp}s ease-in-out infinite; |
| }} |
| |
| .c-body {{ |
| position:absolute; bottom:0; left:-36px; |
| width:72px; height:62px; |
| border-radius:52% 52% 46% 46%; |
| background:{B}; |
| box-shadow:inset -6px -5px 0 {D}; |
| }} |
| .c-belly {{ |
| position:absolute; bottom:5px; left:50%; transform:translateX(-50%); |
| width:40px; height:30px; border-radius:50%; |
| background:{D}28; |
| }} |
| |
| .c-tail {{ |
| position:absolute; bottom:4px; left:22px; |
| width:16px; height:52px; |
| border-radius:38% 62% 55% 45% / 28% 28% 72% 72%; |
| background:{B}; |
| box-shadow:inset 3px 0 0 {D}; |
| transform-origin:bottom center; |
| animation:K_tail {tsp}s ease-in-out infinite; |
| }} |
| .c-tail::after {{ |
| content:''; |
| position:absolute; top:-9px; left:-5px; |
| width:26px; height:18px; border-radius:50%; |
| background:{B}; |
| box-shadow:inset 2px -2px 0 {D}; |
| }} |
| |
| .c-paw-l,.c-paw-r {{ |
| position:absolute; bottom:0; |
| width:22px; height:13px; |
| border-radius:50% 50% 42% 42%; |
| background:{B}; |
| box-shadow:inset -2px -2px 0 {D}; |
| }} |
| .c-paw-l {{ left:-34px; }} |
| .c-paw-r {{ left:12px; }} |
| |
| .c-head {{ |
| position:absolute; bottom:56px; left:-32px; |
| width:64px; height:58px; border-radius:50%; |
| background:{B}; |
| box-shadow:inset -4px -3px 0 {D}; |
| overflow:visible; |
| }} |
| |
| .c-ear-l,.c-ear-r {{ |
| position:absolute; |
| width:0; height:0; |
| border-left:11px solid transparent; |
| border-right:11px solid transparent; |
| border-bottom:21px solid {B}; |
| animation:K_ear {sp}s ease-in-out infinite; |
| }} |
| .c-ear-l {{ top:-16px; left:2px; transform-origin:bottom left; }} |
| .c-ear-r {{ top:-16px; left:40px; transform-origin:bottom right; }} |
| .c-ear-l::after,.c-ear-r::after {{ |
| content:'';position:absolute;top:5px;left:-6px; |
| width:0;height:0; |
| border-left:6px solid transparent; |
| border-right:6px solid transparent; |
| border-bottom:13px solid {D}; |
| }} |
| |
| .c-eye-l,.c-eye-r {{ |
| position:absolute; |
| width:12px; height:12px; border-radius:50%; |
| background:{E}; |
| animation:K_blink 3.5s ease-in-out infinite; |
| }} |
| .c-eye-l {{ top:18px; left:8px; }} |
| .c-eye-r {{ top:18px; left:44px; animation-delay:.2s; }} |
| .c-eye-l::after,.c-eye-r::after {{ |
| content:'';position:absolute;top:2px;left:2px; |
| width:5px;height:5px;border-radius:50%; |
| background:rgba(255,255,255,.32); |
| }} |
| |
| .c-nose {{ |
| position:absolute; top:32px; left:27px; |
| width:10px; height:7px; |
| border-radius:50% 50% 40% 40%; |
| background:{N}; |
| transform:translateX(-50%); |
| }} |
| |
| .c-mouth-l,.c-mouth-r {{ |
| position:absolute; |
| width:8px; height:5px; |
| border:0 solid {N}; |
| border-bottom-width:1.5px; |
| border-radius:0 0 50% 50%; |
| top:38px; |
| }} |
| .c-mouth-l {{ left:21px; border-left-width:1.5px; transform:rotate(10deg); }} |
| .c-mouth-r {{ left:30px; border-right-width:1.5px; transform:rotate(-10deg); }} |
| |
| .c-wl1,.c-wl2,.c-wr1,.c-wr2 {{ |
| position:absolute; height:1.5px; |
| background:rgba(255,255,255,.5); border-radius:1px; |
| width:28px; |
| }} |
| .c-wl1 {{ top:29px; right:37px; transform:rotate(-10deg); transform-origin:right; }} |
| .c-wl2 {{ top:35px; right:37px; transform:rotate( 10deg); transform-origin:right; }} |
| .c-wr1 {{ top:29px; left:37px; transform:rotate( 10deg); transform-origin:left; }} |
| .c-wr2 {{ top:35px; left:37px; transform:rotate(-10deg); transform-origin:left; }} |
| |
| .c-particle {{ |
| position:absolute; pointer-events:none; |
| color:{D}; font-size:.9rem; |
| opacity:0; |
| animation:K_part var(--pd) var(--pde) ease-out infinite; |
| }} |
| </style> |
| |
| <div class="stage-cue"> |
| <span class="dot"></span> |
| <span class="mood-name">{p['label']}</span> |
| <span> Β· live emotion</span> |
| </div> |
| |
| <button class="music-toggle" id="music_{stage_id}" title="Play the generated tune" aria-label="Toggle music"> |
| <span class="icon-play">βͺ</span><span class="icon-pause">βΈ</span> |
| </button> |
| |
| <div class="cat-wrap" id="cw"> |
| <div class="cat-shadow"></div> |
| <div class="cat-unit"> |
| <div class="c-tail"></div> |
| <div class="c-body"><div class="c-belly"></div></div> |
| <div class="c-paw-l"></div> |
| <div class="c-paw-r"></div> |
| <div class="c-head"> |
| <div class="c-ear-l"></div> |
| <div class="c-ear-r"></div> |
| <div class="c-eye-l"></div> |
| <div class="c-eye-r"></div> |
| <div class="c-nose"></div> |
| <div class="c-mouth-l"></div> |
| <div class="c-mouth-r"></div> |
| <div class="c-wl1"></div> |
| <div class="c-wl2"></div> |
| <div class="c-wr1"></div> |
| <div class="c-wr2"></div> |
| </div> |
| </div> |
| </div> |
| |
| <div class="stage-caption">{p['caption']}</div> |
| <div class="cue-sheet">{cue_chips}</div> |
| |
| <script> |
| (function(){{ |
| const wrap = document.getElementById('cw'); |
| const chars = '{p['particle']}'.split(''); |
| for(let i=0;i<22;i++){{ |
| const el = document.createElement('div'); |
| el.className = 'c-particle'; |
| el.textContent = chars[i % chars.length]; |
| const a = Math.random()*Math.PI*2, d = 50+Math.random()*75; |
| el.style.setProperty('--px', (Math.cos(a)*d)+'px'); |
| el.style.setProperty('--py', (Math.sin(a)*d-20)+'px'); |
| el.style.setProperty('--pd', (.9+Math.random()*2).toFixed(2)+'s'); |
| el.style.setProperty('--pde',(Math.random()*2.5).toFixed(2)+'s'); |
| el.style.left = (55+Math.random()*50)+'px'; |
| el.style.top = (40+Math.random()*80)+'px'; |
| el.style.fontSize = (.55+Math.random()*.65).toFixed(2)+'rem'; |
| wrap.appendChild(el); |
| }} |
| |
| // ββ Generative tune β Web Audio, no files ββ |
| const scale = {scale}; |
| const noteRoot= {note_root}; |
| const noteMs = {note_ms}; |
| const mood = "{mood}"; |
| |
| let ctx = null, timer = null, step = 0, master = null; |
| |
| function midiToFreq(n) {{ return 440 * Math.pow(2, (n - 69) / 12); }} |
| |
| function pattern(stepIdx) {{ |
| // simple per-mood arpeggio shapes over the scale degrees |
| const len = scale.length; |
| let degree; |
| if (mood === 'energetic' || mood === 'angry') {{ |
| degree = scale[stepIdx % len]; // straight run, bright |
| }} else if (mood === 'sad' || mood === 'nostalgic') {{ |
| degree = scale[[0,2,1,3][stepIdx % 4] % len]; // gentle up-down |
| }} else if (mood === 'mysterious' || mood === 'tense') {{ |
| degree = scale[[0,3,1,5][stepIdx % 4] % len]; // wider, uneasy leaps |
| }} else {{ |
| degree = scale[[0,1,2,1][stepIdx % 4] % len]; // calm/happy/romantic/calm lilt |
| }} |
| return noteRoot + degree; |
| }} |
| |
| function playNote() {{ |
| if (!ctx) return; |
| const midi = pattern(step); |
| const freq = midiToFreq(midi); |
| const t0 = ctx.currentTime; |
| |
| const osc = ctx.createOscillator(); |
| const gain = ctx.createGain(); |
| osc.type = (mood === 'angry' || mood === 'energetic') ? 'sawtooth' |
| : (mood === 'mysterious' || mood === 'tense') ? 'triangle' |
| : 'sine'; |
| osc.frequency.setValueAtTime(freq, t0); |
| |
| const dur = noteMs / 1000 * 0.9; |
| gain.gain.setValueAtTime(0.0001, t0); |
| gain.gain.exponentialRampToValueAtTime(0.18, t0 + 0.02); |
| gain.gain.exponentialRampToValueAtTime(0.0001, t0 + dur); |
| |
| osc.connect(gain).connect(master); |
| osc.start(t0); |
| osc.stop(t0 + dur + 0.02); |
| |
| step = (step + 1) % 16; |
| }} |
| |
| const btn = document.getElementById('music_{stage_id}'); |
| btn.addEventListener('click', function(){{ |
| if (!ctx) {{ |
| ctx = new (window.AudioContext || window.webkitAudioContext)(); |
| master = ctx.createGain(); |
| master.gain.value = 0.5; |
| master.connect(ctx.destination); |
| }} |
| if (timer) {{ |
| clearInterval(timer); timer = null; |
| ctx.suspend(); |
| btn.classList.remove('playing'); |
| }} else {{ |
| ctx.resume(); |
| playNote(); |
| timer = setInterval(playNote, {note_ms}); |
| btn.classList.add('playing'); |
| }} |
| }}); |
| }})(); |
| </script>""" + _stage_close() |
|
|
| def placeholder_html(): |
| return _stage_open("#FFD21E", 6.0) + f""" |
| <div style="text-align:center; z-index:2; color:#6B7280; font-family:{STAGE_FONT};"> |
| <div style="font-size:2.4rem; margin-bottom:14px; opacity:.6;">π±</div> |
| <div style="font-size:1.05rem; font-weight:700; letter-spacing:.01em; color:#111827; margin-bottom:8px;"> |
| No emotion yet |
| </div> |
| <div style="font-size:.82rem; color:#6B7280; max-width:280px; margin:0 auto; line-height:1.7; font-family:{LABEL_FONT};"> |
| Upload an image β the model reads its mood and the cat performs it, |
| tune and all. |
| </div> |
| </div>""" + _stage_close() |
|
|
| def loading_html(local: bool = False) -> str: |
| title = "Running locallyβ¦" if local else "Analyzing imageβ¦" |
| caption = ("on-device inference β first run may take a while" |
| if local else "choreographing the emotion") |
| return _stage_open("#FFD21E", 2.0) + f""" |
| <div style="text-align:center; z-index:2; color:#6B7280; font-family:{STAGE_FONT};"> |
| <div class="loading-spinner" style=" |
| width:32px; height:32px; margin:0 auto 16px; |
| border:3px solid #E5E7EB; border-top-color:#FFD21E; |
| border-radius:50%; animation: spin 0.9s linear infinite;"></div> |
| <div style="font-size:.92rem; letter-spacing:.01em; color:#111827; font-weight:700;"> |
| {title} |
| </div> |
| <div style="font-size:.78rem; color:#6B7280; margin-top:4px; font-family:{LABEL_FONT};"> |
| {caption} |
| </div> |
| </div> |
| <style>@keyframes spin {{ to {{ transform: rotate(360deg); }} }}</style>""" + _stage_close() |
|
|
| |
| def run_image_pipeline(image, prompt, model_label, max_tokens, temperature, api_key, backend): |
| if backend == "Local (offline)": |
| yield "", loading_html(local=True) |
| final_desc = "" |
| for partial in stream_description_local(image, prompt, max_tokens, temperature): |
| final_desc = partial |
| yield final_desc, loading_html(local=True) |
| mood, dance = get_dance_spec_local(final_desc) |
| yield final_desc, cat_html(mood, dance) |
| return |
|
|
| final_desc = "" |
| for partial in stream_description(image, prompt, model_label, max_tokens, temperature, api_key): |
| final_desc = partial |
| yield partial, loading_html() |
|
|
| |
| mood, dance = get_dance_spec(final_desc, api_key) |
| yield final_desc, cat_html(mood, dance) |
|
|
| |
| |
| |
|
|
| CSS = """ |
| @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@500;600;700&family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap'); |
| |
| :root { |
| --bg: #FFFFFF; |
| --surface: #F8F9FA; |
| --raised: #E5E7EB; |
| --text: #111827; |
| --text-dim: #4B5563; |
| --text-faint:#6B7280; |
| --accent: #FFD21E; |
| --accent-ink:#111827; |
| } |
| |
| .gradio-container { |
| background: var(--bg) !important; |
| font-family: 'Inter', system-ui, sans-serif !important; |
| } |
| |
| /* ββ Header ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ |
| #studio-header { |
| text-align:center; padding: 18px 20px 22px; |
| border:1px solid var(--raised); border-radius:12px; |
| background: var(--surface); |
| margin-bottom:8px; |
| } |
| #studio-header h1 { |
| font-family:'Space Grotesk', sans-serif !important; |
| font-weight:700 !important; letter-spacing:.01em; |
| font-size:1.9rem !important; color:var(--text) !important; |
| margin-bottom:6px !important; |
| } |
| #studio-header p { |
| color:var(--text-dim) !important; font-size:.92rem !important; |
| margin:0 !important; |
| } |
| #studio-header .eyebrow { |
| display:inline-flex; align-items:center; gap:8px; |
| font-family:'JetBrains Mono', monospace; font-size:.7rem; |
| letter-spacing:.18em; text-transform:uppercase; |
| color:var(--text-faint); margin-bottom:10px; |
| } |
| #studio-header .eyebrow .badge { |
| display:inline-flex; align-items:center; gap:5px; |
| background: var(--accent); color: var(--accent-ink); |
| border-radius:999px; padding:2px 10px; |
| font-weight:700; letter-spacing:.1em; |
| } |
| #studio-header .eyebrow .badge .dot { |
| width:6px; height:6px; border-radius:50%; |
| background: var(--accent-ink); opacity:.7; |
| } |
| |
| /* ββ Panels ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ |
| .gr-form, .gr-box, .gr-panel, .gr-block.gr-box { |
| background: var(--bg) !important; |
| border: 1px solid var(--raised) !important; |
| border-radius: 10px !important; |
| } |
| |
| /* Section labels */ |
| .gradio-container label span { |
| font-family:'Inter', sans-serif !important; |
| font-size:.78rem !important; font-weight:600 !important; |
| letter-spacing:.02em !important; color:var(--text-dim) !important; |
| } |
| |
| /* ββ Buttons βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ |
| #submit-img, #submit-txt { |
| background: var(--accent) !important; |
| color: var(--accent-ink) !important; |
| border: 1px solid #E8BD00 !important; |
| font-weight:700 !important; |
| letter-spacing:.02em !important; |
| font-family:'Space Grotesk', sans-serif !important; |
| box-shadow: 0 1px 2px rgba(0,0,0,.04) !important; |
| transition: transform .12s ease, box-shadow .12s ease !important; |
| } |
| #submit-img:hover, #submit-txt:hover { |
| transform: translateY(-1px); |
| box-shadow: 0 4px 12px rgba(255,210,30,.35) !important; |
| } |
| #submit-img:active, #submit-txt:active { transform: translateY(0); } |
| |
| /* ββ Description output βββββββββββββββββββββββββββββββββββββββββββββββββ */ |
| #desc-output textarea { |
| font-family:'Inter', sans-serif !important; |
| font-size:.88rem !important; line-height:1.6 !important; |
| color:var(--text) !important; |
| background:var(--surface) !important; |
| } |
| |
| /* ββ Run-locally panel βββββββββββββββββββββββββββββββββββββββββββββββββββ */ |
| #run-locally { |
| border:1px solid var(--raised) !important; |
| background: var(--surface) !important; |
| } |
| #run-locally code { |
| font-family:'JetBrains Mono', monospace !important; |
| font-size:.78rem !important; |
| background:var(--bg) !important; |
| border:1px solid var(--raised) !important; |
| border-radius:6px !important; |
| color:#92660C !important; |
| } |
| #run-locally pre { |
| background:var(--bg) !important; |
| border:1px solid var(--raised) !important; |
| border-radius:8px !important; |
| padding:10px 14px !important; |
| } |
| |
| /* ββ Tabs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ |
| .tab-nav button { |
| font-family:'Space Grotesk', sans-serif !important; |
| font-weight:600 !important; letter-spacing:.01em !important; |
| color: var(--text-dim) !important; |
| } |
| .tab-nav button.selected { |
| color: var(--text) !important; |
| border-bottom-color: var(--accent) !important; |
| } |
| |
| /* ββ Misc ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ |
| footer { display:none !important; } |
| .gr-accordion { border-color: var(--raised) !important; } |
| """ |
|
|
| LOCAL_RUN_MD = """ |
| **Run this studio on your own machine** β no install beyond Python. |
| |
| ```bash |
| pip install gradio openai pillow |
| python app_single.py |
| ``` |
| |
| Then open **http://localhost:7860** |
| |
| By default the app uses a shared public API key (rate-limited). To use your |
| own [modelbest.cn](https://modelbest.cn) key without typing it every time, |
| set an environment variable before launching: |
| |
| ```bash |
| # macOS / Linux |
| export MINICPM_API_KEY="sk-your-key-here" |
| |
| # Windows (PowerShell) |
| $env:MINICPM_API_KEY="sk-your-key-here" |
| ``` |
| |
| The app checks `MINICPM_API_KEY` first, then the **API Key** field below, |
| then falls back to the shared public key. |
| |
| --- |
| |
| ### π Fully offline mode |
| |
| Select **Local (offline)** as the Backend on the Image tab to run everything |
| on-device β no internet needed after the first download. |
| |
| ```bash |
| pip install torch transformers accelerate |
| python app_single.py |
| ``` |
| |
| The first time you use the Local backend, it downloads `openbmb/MiniCPM-V-4` |
| (4.1B params, Apache-2.0, ~8 GB) into `model_cache/` next to this file. Every |
| run after that loads from disk only β no network calls. |
| |
| To force a fresh download, delete the `model_cache/` folder. |
| |
| A GPU is recommended but not required; the app automatically uses CUDA if |
| available and falls back to CPU otherwise. |
| """ |
|
|
| with gr.Blocks(title="An Adventure in Thousand Token Wood Β· MiniCPM-V 4.6", theme=gr.themes.Soft(), css=CSS) as demo: |
|
|
| gr.HTML( |
| """<div id="studio-header"> |
| <div class="eyebrow"> |
| <span class="badge"><span class="dot"></span>MiniCPM-V 4.6</span> |
| <span>An Adventure in Thousand Token Wood</span> |
| </div> |
| <h1>Emberglade - An emotion identifier that makes you HAPPY !!!</h1> |
| <p>Upload an image. The model reads its mood β then a cat performs it, live, with its own tune.</p> |
| </div>""" |
| ) |
|
|
| with gr.Tabs(): |
| |
| with gr.TabItem("π· Image β emotion"): |
| with gr.Row(): |
| with gr.Column(scale=1): |
| image_input = gr.Image(type="pil", label="Upload image", height=240) |
| prompt_input = gr.Textbox(value=DEFAULT_PROMPT, label="Prompt", lines=2) |
|
|
| backend_sel = gr.Radio( |
| choices=["API (online)", "Local (offline)"], |
| value="API (online)", |
| label="Backend", |
| ) |
|
|
| model_sel = gr.Radio(choices=list(MODELS.keys()), |
| value=list(MODELS.keys())[0], label="Model", |
| info="Used only for the API backend") |
|
|
| with gr.Accordion("Generation settings", open=False): |
| max_tok = gr.Slider(64, 2048, value=DEFAULT_MAX_TOKENS, step=64, label="Max tokens") |
| temp = gr.Slider(0.0, 1.5, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature") |
|
|
| with gr.Accordion("API key", open=False): |
| api_key = gr.Textbox(label="Your key (optional)", type="password", |
| placeholder="sk-β¦ leave blank to use the shared key") |
| gr.Markdown("Get your own at [modelbest.cn](https://modelbest.cn) β see **Run locally** below for setup.") |
|
|
| with gr.Accordion("Local model (offline)", open=False, elem_id="local-model"): |
| local_status = gr.Markdown(local_status_md()) |
| gr.Markdown( |
| f"Model: `{LOCAL_MODEL_ID}` Β· 4.1B params Β· Apache-2.0\n\n" |
| "Selecting **Local (offline)** above will download this model " |
| "the first time it's used (~8 GB, one-time, needs internet), " |
| "then cache it in `model_cache/` for fully offline use afterward.\n\n" |
| "Requires: `pip install torch transformers accelerate`" |
| ) |
| refresh_local_btn = gr.Button("Refresh status", size="sm") |
|
|
| img_btn = gr.Button("Start emotion", variant="primary", elem_id="submit-img") |
| gr.Examples(examples=PROMPT_EXAMPLES, inputs=[prompt_input], label="Prompt ideas") |
|
|
| with gr.Column(scale=1): |
| cat_out = gr.HTML(value=placeholder_html(), label="Stage") |
| desc_out = gr.Textbox(label="Description (model output, streaming)", lines=7, |
| placeholder="The model's description will stream in hereβ¦", |
| elem_id="desc-output") |
|
|
| pipeline_inputs = [image_input, prompt_input, model_sel, max_tok, temp, api_key, backend_sel] |
|
|
| img_btn.click( |
| fn=run_image_pipeline, |
| inputs=pipeline_inputs, |
| outputs=[desc_out, cat_out], |
| ) |
| prompt_input.submit( |
| fn=run_image_pipeline, |
| inputs=pipeline_inputs, |
| outputs=[desc_out, cat_out], |
| ) |
| refresh_local_btn.click(fn=local_status_md, outputs=[local_status]) |
|
|
| |
| with gr.TabItem("βοΈ Text β emotion"): |
| gr.Markdown("Type mood words for an instant emotion β no API key needed.") |
| with gr.Row(): |
| with gr.Column(scale=1): |
| txt_input = gr.Textbox( |
| label="Describe a mood", |
| placeholder='"happy party" Β· "sad rain" Β· "energetic dance"', |
| lines=3, |
| ) |
| txt_btn = gr.Button("Start emotion", variant="primary", elem_id="submit-txt") |
| gr.Examples( |
| examples=[["happy celebrate joy"],["sad lonely rain"], |
| ["energetic dance excited"],["calm peaceful"], |
| ["mysterious dark shadow"],["romantic love"], |
| ["tense nervous fear"],["nostalgic memory"],["angry rage"]], |
| inputs=[txt_input], label="Quick examples", |
| ) |
| with gr.Column(scale=1): |
| txt_cat = gr.HTML(value=placeholder_html(), label="Stage") |
|
|
| txt_btn.click(fn=generate_animation, inputs=[txt_input], outputs=[txt_cat]) |
| txt_input.submit(fn=generate_animation, inputs=[txt_input], outputs=[txt_cat]) |
|
|
| |
| with gr.Accordion("β Run locally", open=False, elem_id="run-locally"): |
| gr.Markdown(LOCAL_RUN_MD) |
|
|
| if __name__ == "__main__": |
| demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True) |
|
|