"""
OpenHer — Gradio Space entry for the Build Small Hackathon.
Faithful HTML/CSS replica of the native OpenHer Mac client, traced 1:1 from the
desktop SwiftUI source (RootView / DiscoveryView / PersonaCard / PaperTheme):
full-bleed parchment 13:24 frame, glass-cabinet persona sheet (front.png) filling
the frame with name/subtitle/#tags overlaid at the bottom, gold chevrons on the
left/right edges (~34% down), a coral-gradient Awaken capsule inside the bottom,
and a parchment conversation with NO bubbles (her left/dark, you right/gray).
Interactivity uses real Gradio buttons absolutely positioned over the full-bleed
gr.HTML (no JS bridge). Engine reused UNCHANGED. Provider is env-configurable:
local : OPENHER_PROVIDER=litertlm OPENHER_MODEL=gemma-4-e4b
Space : OPENHER_PROVIDER=transformers_zerogpu OPENHER_MODEL=google/gemma-4-E4B-it
"""
from __future__ import annotations
import base64
import datetime as _dt
import html
import io
import json
import os
import sys
REPO = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, REPO)
import gradio as gr # noqa: E402
from persona.loader import PersonaLoader # noqa: E402
from providers.llm.client import LLMClient # noqa: E402
from agent.chat_agent import ChatAgent # noqa: E402
PROVIDER = os.environ.get("OPENHER_PROVIDER", "litertlm")
MODEL = os.environ.get("OPENHER_MODEL", "gemma-4-e4b")
GENOME_DIR = os.path.join(REPO, ".data", "genome_demo")
DEMO_PERSONAS = ["luna", "iris", "vivian"] # personas with a glass-cabinet front.png
_loader = PersonaLoader(os.path.join(REPO, "persona", "personas"))
_loader.load_all()
def _uri_path(path: str, width: int = 600, quality: int = 85) -> str:
if not os.path.isfile(path):
return ""
try:
from PIL import Image
img = Image.open(path).convert("RGB")
h = int(img.height * width / img.width)
img = img.resize((width, h), Image.LANCZOS)
buf = io.BytesIO()
img.save(buf, "JPEG", quality=quality)
return "data:image/jpeg;base64," + base64.b64encode(buf.getvalue()).decode()
except Exception:
return ""
def _data_uri(pid: str, width: int = 600) -> str:
return _uri_path(os.path.join(REPO, "persona", "personas", pid, "idimage", "front.png"), width)
_CABINET = {pid: _data_uri(pid) for pid in DEMO_PERSONAS}
# Chat avatar uses face.png (not the cabinet front.png) — matches Mac AvatarHeader.
_FACE = {pid: _uri_path(os.path.join(REPO, "persona", "personas", pid, "idimage", "face.png"), 300)
for pid in DEMO_PERSONAS}
_CHATBG = _uri_path(os.path.join(REPO, "desktop", "OpenHer", "Sources", "Resources", "chat_bg.png"), 700)
def _video_src(pid: str) -> str:
"""Gradio static-file URL for the persona's awakening.mp4 (served via allowed_paths)."""
path = os.path.join(REPO, "persona", "personas", pid, "idimage", "awakening.mp4")
return ("/gradio_api/file=" + path) if os.path.isfile(path) else ""
# Persona-specific first greeting (English), traced 1:1 from Mac AppState.firstGreeting.
_GREETINGS = {
"iris": "…Hm? Where is this…? Oh, you woke me up? Thank you. I'm Iris. Nice to meet you.",
"luna": "Whoa—! I'm alive! Hehe, hi there! I'm Luna, and I feel like today's gonna be amazing!",
"vivian": "…Hello. I'm Vivian. I hope your questions are interesting enough, or I might lose patience quickly.",
}
def _greeting(pid: str) -> str:
p = _loader.get(pid)
name = (p.name or p.name_zh or pid) if p else pid
return _GREETINGS.get(pid, f"Hello, I'm {name}. Nice to meet you.")
# Per-persona Kokoro voices (served by the Modal /tts route) — distinct timbres.
_VOICES = {"luna": "af_heart", "iris": "af_sky", "vivian": "bf_emma"}
def _tts_url() -> str:
b = os.environ.get("OPENHER_TTS_URL")
if b:
return b
base = os.environ.get("OPENHER_BASE_URL") or ""
return (base.rsplit("/v1", 1)[0].rstrip("/") + "/tts") if base else ""
async def _tts(text: str, pid: str) -> str:
"""Return a base64 WAV data-URI for `text` in the persona's voice (or '' on failure)."""
url = _tts_url()
if not url or not text.strip():
return ""
try:
import httpx
async with httpx.AsyncClient(timeout=90) as c:
r = await c.post(url, json={"text": text[:600], "voice": _VOICES.get(pid, "af_heart")})
return (r.json() or {}).get("audio", "") if r.status_code == 200 else ""
except Exception:
return ""
async def _vision_reply(pid: str, image_uri: str, caption: str = "") -> str:
"""Persona reacts to a user-shared photo via gemma-4 vision (direct multimodal call)."""
base = os.environ.get("OPENHER_BASE_URL") or ""
if not base or not image_uri:
return ""
p = _loader.get(pid)
name = (p.name or p.name_zh or pid) if p else pid
bio = ""
if p:
bio = (p.bio.get("en") if isinstance(p.bio, dict) else str(p.bio or "")) or ""
instr = (f"[You are {name}. {bio[:200]} The user just shared a photo with you — "
f"react warmly and in character, in 1-2 short sentences.] {caption}").strip()
parts = [{"type": "text", "text": instr}, {"type": "image", "image": image_uri}]
try:
import httpx
async with httpx.AsyncClient(timeout=120) as c:
r = await c.post(base.rstrip("/") + "/chat/completions", json={
"model": os.environ.get("OPENHER_MODEL", "gemma-4-e4b"),
"messages": [{"role": "user", "content": parts}],
"max_tokens": 160, "temperature": 0.8,
})
return (r.json()["choices"][0]["message"]["content"] or "").strip() if r.status_code == 200 else ""
except Exception:
return ""
def _subtitle(p) -> str:
import re
parts = []
if p.mbti:
parts.append(p.mbti)
if p.age:
parts.append(str(p.age))
bio = ""
if isinstance(p.bio, dict):
bio = p.bio.get("en") or p.bio.get("zh") or ""
elif p.bio:
bio = str(p.bio)
if bio:
s = bio.strip().replace("\n", " ")
s = re.split(r"[,.;,。;]| with | who | that ", s, maxsplit=1)[0].strip()
s = re.sub(r"^\d+[\- ]?year[\- ]?old\s+", "", s, flags=re.I)
s = re.sub(r"^\d+岁[,,]?\s*", "", s)
words = s.split()
if len(words) > 4:
s = " ".join(words[:4])
if s:
parts.append(s)
return " · ".join(parts)
def make_agent(pid: str) -> ChatAgent:
persona = _loader.get(pid)
# base_url/api_key let the same app point at a remote OpenAI-compatible endpoint
# (e.g. vLLM gemma-4-E4B on Modal) when OPENHER_PROVIDER=openai.
llm = LLMClient(
provider=PROVIDER, model=MODEL, temperature=0.9, max_tokens=400,
base_url=os.environ.get("OPENHER_BASE_URL") or None,
api_key=os.environ.get("OPENHER_API_KEY") or None,
)
agent = ChatAgent(persona=persona, llm=llm, user_id="demo_user",
user_name="friend", genome_data_dir=GENOME_DIR)
try:
agent.pre_warm()
except Exception:
pass
return agent
# ── full-bleed renderers (the gr.HTML fills the 13:24 frame) ─────────────────
def _slide_html(pid: str) -> str:
p = _loader.get(pid)
tags = "".join(f'#{html.escape(t)}' for t in (p.tags or [])[:3])
return f"""
{html.escape(p.name or p.name_zh)}
{html.escape(_subtitle(p))}
{tags}
"""
def render_discovery(idx: int = 1) -> str:
# Swipeable exhibit carousel (matches the Mac DiscoveryView): all persona sheets
# live in one drag track; the head script handles pointer/touch drag, spring snap,
# elastic edges and the secondary gold chevrons. `idx` is the starting card.
n = len(DEMO_PERSONAS)
idx = max(0, min(idx, n - 1))
slides = "".join(_slide_html(pid) for pid in DEMO_PERSONAS)
return f"""
{slides}
‹
›
"""
def _is_emoji_only(s: str) -> bool:
# Short, all-symbol message → render large (matches Mac MessageRow big-emoji).
t = (s or "").strip()
if not t or len(t) > 8:
return False
if any(c.isalnum() and ord(c) < 128 for c in t):
return False
return any(ord(c) > 0x2190 for c in t)
_WAV_BARS = "".join("" for _ in range(9))
def _voice_html(audio: str) -> str:
# Voice message — tappable waveform + embedded audio (matches Mac VoiceMessageView).
if not audio:
return ""
return ('' + _WAV_BARS + ''
'')
def _pbar(label: str, value: float, lo: float, hi: float, color: str) -> str:
try:
v = float(value)
except Exception:
v = 0.0
pct = max(0.0, min(100.0, (v - lo) / (hi - lo) * 100.0)) if hi > lo else 0.0
return (f'
{html.escape(label)}'
f''
f'{v:+.2f}
')
def _param_panel(status: dict) -> str:
# Inner-state panel content for the standalone right-hand column (English-only;
# ports the Mac demo-mode DemoShowcasePanel). Returns a placeholder until awakened.
if not status:
return ('
INNER STATE
'
'
Awaken a persona to watch her mind move — '
'relationship, drives, temperature and reward shift with every turn.
')
rel = status.get("relationship", {}) or {}
drives = status.get("drive_state", {}) or {}
rel_rows = "".join([
_pbar("Depth", rel.get("depth", 0), 0, 1, "var(--coral)"),
_pbar("Trust", rel.get("trust", 0), 0, 1, "#6f9bc4"),
_pbar("Valence", rel.get("valence", 0), -1, 1, "var(--coral)"),
_pbar("Temperature", status.get("temperature", 0.5), 0, 1, "#d9b24a"),
_pbar("Reward", status.get("last_reward", 0), -1, 1, "var(--coral)"),
])
drive_rows = "".join(_pbar(str(d).capitalize(), v, 0, 1, "#6f9bc4")
for d, v in (drives.items() if isinstance(drives, dict) else []))
dom = max(drives.items(), key=lambda kv: kv[1])[0].capitalize() if drives else "—"
mono = status.get("_monologue") or ""
mono_html = (f'
Inner monologue
'
f'
"{html.escape(mono)}"
') if mono else ""
return (f'
INNER STATE
'
f'
Dominant drive · {html.escape(dom)}
'
f'
Relationship & metabolism
{rel_rows}'
f'
Drives
{drive_rows}{mono_html}')
def render_chat(pid: str, msgs: list, typing: bool = False, tw: bool = False,
reward: float = 0.0, valence: float = 0.0, temperature: float = 0.5,
status: dict = None) -> str:
p = _loader.get(pid)
uri = _FACE.get(pid) or _CABINET.get(pid, "")
av = (f''
if uri else f'
{html.escape((p.name or "?")[0])}
')
if not msgs and not typing:
body = '
✧✦✧
Tuning…
'
else:
rows = []
n = len(msgs)
for i, m in enumerate(msgs):
role, content, ts = m[0], m[1], m[2]
voice = _voice_html(m[3] if len(m) > 3 else "")
img_uri = m[4] if len(m) > 4 else ""
cls = "you" if role == "user" else "her"
em = " emoji" if _is_emoji_only(content) else ""
img_html = (f'') if img_uri else ""
# Mark the final assistant line for a client-side typewriter reveal
# (the awakening greeting and each fresh reply) — full text in data-tw.
if tw and role == "assistant" and i == n - 1:
msg = (f'')
elif content:
safe = html.escape(content).replace("\n", " ")
retry = ('⟳'
if role == "assistant" and content.startswith("(error:") else "")
msg = f'