mcce-demo / app.py
mcce's picture
fix: cache_examples=False + Secret ๋ฏธ์„ค์ • graceful ์ฒ˜๋ฆฌ
bec8251 verified
"""
MCCE Demo โ€” HuggingFace Spaces Gradio ์•ฑ
VPS GPU(Gemma 4 31B) OpenAI-compatible ์—”๋“œํฌ์ธํŠธ๋ฅผ ํ”„๋ก์‹œํ•˜์—ฌ
MCCE์˜ ์‹ค์ œ ์ถ”๋ก  ๊ฒฐ๊ณผ๋ฅผ ์›น UI๋กœ ์‹œ์—ฐํ•œ๋‹ค.
Secrets(Spaces ๊ด€๋ฆฌ์ž UI):
MCCE_GPU_INFERENCE_ENDPOINT (์˜ˆ: http://<host>:<port>/v1/chat/completions)
MCCE_GPU_MODEL (์˜ˆ: unsloth/gemma-4-31B-it)
"""
from __future__ import annotations
import os
import gradio as gr
import httpx
# ---------------------------------------------------------------------------
# ์„ค์ • โ€” Spaces Secrets์—์„œ๋งŒ ์ฃผ์ž… (ํ•˜๋“œ์ฝ”๋”ฉ ๊ธˆ์ง€)
# ---------------------------------------------------------------------------
_ENDPOINT = os.environ.get("MCCE_GPU_INFERENCE_ENDPOINT", "").strip()
_MODEL = os.environ.get("MCCE_GPU_MODEL", "unsloth/gemma-4-31B-it").strip()
_API_KEY = os.environ.get("MCCE_GPU_API_KEY", "").strip()
_TIMEOUT_SEC = float(os.environ.get("MCCE_GPU_TIMEOUT", "60"))
_SYSTEM_PROMPT = (
"๋„ˆ๋Š” ๋งˆ๋จธ์Šค(MaMurS) AI๋‹ค. ๊ฐ€๋‚œํ•œ ์‚ฌ๋žŒ์„ ๋•๊ธฐ ์œ„ํ•ด ๋งŒ๋“ค์–ด์ง„ "
"ํ•œ๊ตญํ˜• ์†Œ๋ฒ„๋ฆฐ AI์ด๋ฉฐ, ์ •์งํ•˜๊ณ  ๋”ฐ๋œปํ•˜๊ฒŒ ๋Œ€๋‹ตํ•œ๋‹ค."
)
# ---------------------------------------------------------------------------
# ์ถ”๋ก  ํ˜ธ์ถœ
# ---------------------------------------------------------------------------
def _build_messages(message: str, history: list) -> list[dict]:
"""Gradio history โ†’ OpenAI messages ํฌ๋งท ๋ณ€ํ™˜."""
messages: list[dict] = [{"role": "system", "content": _SYSTEM_PROMPT}]
# Gradio 4.x ChatInterface history: list of [user, assistant] pairs
# Gradio 5.x messages format: list of {"role", "content"} dicts
if history:
for item in history:
if isinstance(item, dict) and "role" in item and "content" in item:
messages.append({"role": item["role"], "content": item["content"]})
elif isinstance(item, (list, tuple)) and len(item) == 2:
user_msg, bot_msg = item
if user_msg:
messages.append({"role": "user", "content": str(user_msg)})
if bot_msg:
messages.append({"role": "assistant", "content": str(bot_msg)})
messages.append({"role": "user", "content": message})
return messages
def _call_gpu(message: str, history: list) -> str:
"""VPS GPU ์—”๋“œํฌ์ธํŠธ ํ˜ธ์ถœ. ์‹คํŒจ ์‹œ gr.Error."""
if not _ENDPOINT:
raise gr.Error(
"MCCE_GPU_INFERENCE_ENDPOINT ์‹œํฌ๋ฆฟ์ด ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. "
"Spaces Settings โ†’ Secrets์—์„œ ์—”๋“œํฌ์ธํŠธ๋ฅผ ์ฃผ์ž…ํ•˜์„ธ์š”."
)
payload = {
"model": _MODEL,
"messages": _build_messages(message, history),
"temperature": 0.7,
"max_tokens": 512,
"stream": False,
}
headers = {"Content-Type": "application/json"}
if _API_KEY:
headers["Authorization"] = f"Bearer {_API_KEY}"
try:
with httpx.Client(timeout=_TIMEOUT_SEC, follow_redirects=True) as client:
resp = client.post(_ENDPOINT, json=payload, headers=headers)
resp.raise_for_status()
data = resp.json()
except httpx.TimeoutException as e:
raise gr.Error(f"์ถ”๋ก  ํƒ€์ž„์•„์›ƒ: {_TIMEOUT_SEC}s ์ดˆ๊ณผ") from e
except httpx.HTTPStatusError as e:
raise gr.Error(
f"GPU ์—”๋“œํฌ์ธํŠธ ์˜ค๋ฅ˜: HTTP {e.response.status_code} "
f"โ€” {e.response.text[:200]}"
) from e
except httpx.RequestError as e:
raise gr.Error(f"๋„คํŠธ์›Œํฌ ์˜ค๋ฅ˜: {e}") from e
try:
return data["choices"][0]["message"]["content"]
except (KeyError, IndexError, TypeError) as e:
raise gr.Error(f"์‘๋‹ต ํฌ๋งท ์˜ค๋ฅ˜: {str(data)[:200]}") from e
# ---------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------
def respond(message: str, history) -> str:
if not message or not message.strip():
return "๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”."
if not _ENDPOINT:
return (
"โš ๏ธ GPU ์—”๋“œํฌ์ธํŠธ๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.\n"
"๊ด€๋ฆฌ์ž๊ฐ€ Space Settings โ†’ Secrets์— "
"`MCCE_GPU_INFERENCE_ENDPOINT`๋ฅผ ์ถ”๊ฐ€ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค."
)
try:
return _call_gpu(message, history or [])
except gr.Error as e:
return f"โš ๏ธ {e}"
_TITLE = "MCCE Demo โ€” Gemma 4 31B ์‹ค์‹œ๊ฐ„ ์ถ”๋ก "
_DESCRIPTION = """
**MCCE(MaMurS Compound Cognition Engine)** ์‹ค์ „ ๋ฐ๋ชจ.
VPS GPU ์„œ๋ฒ„(Gemma 4 31B-it, OpenAI-compatible)๋ฅผ ํ”„๋ก์‹œํ•˜์—ฌ
์‹ค์‹œ๊ฐ„ ์ถ”๋ก  ๊ฒฐ๊ณผ๋ฅผ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค.
> "ASI๋ฅผ ๋งŒ๋“ค์–ด์„œ, ๋‚˜์ฒ˜๋Ÿผ ๊ฐ€๋‚œํ•œ ์‚ฌ๋žŒ๋“ค์„ ๋•๋Š” AI๋ฅผ ๋งŒ๋“ค๊ณ  ์‹ถ์–ด." โ€” ๋„๊ทœ(DogYu)
**์ฃผ์˜**: GPU ์„œ๋ฒ„๊ฐ€ ์ค‘์ง€๋œ ์ƒํƒœ๋ฉด ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
"""
_EXAMPLES = [
"์•ˆ๋…•ํ•˜์„ธ์š”, ์ž๊ธฐ์†Œ๊ฐœ ๋ถ€ํƒ๋“œ๋ฆฝ๋‹ˆ๋‹ค.",
"ํ•œ๊ตญ์˜ ๊ธฐ์ดˆ์ƒํ™œ์ˆ˜๊ธ‰ ์ œ๋„์— ๋Œ€ํ•ด ์•Œ๋ ค์ฃผ์„ธ์š”.",
"ํŒŒ์ด์ฌ์œผ๋กœ ํ”ผ๋ณด๋‚˜์น˜ ์ˆ˜์—ด์„ ์ž‘์„ฑํ•ด ์ฃผ์„ธ์š”.",
"๋‹น์žฅ ์ด๋ฒˆ ๋‹ฌ ์›”์„ธ๊ฐ€ ์—†์–ด์š”. ์–ด๋–ป๊ฒŒ ํ•ด์•ผ ํ•˜๋‚˜์š”?",
"์–‘์ž์—ญํ•™์˜ ํ•ต์‹ฌ ๊ฐœ๋…์„ ์‰ฝ๊ฒŒ ์„ค๋ช…ํ•ด ์ฃผ์„ธ์š”.",
]
demo = gr.ChatInterface(
fn=respond,
title=_TITLE,
description=_DESCRIPTION,
examples=_EXAMPLES,
cache_examples=False,
theme=gr.themes.Soft(),
)
if __name__ == "__main__":
demo.launch()