Spaces:

mcce
/

mcce-demo

Sleeping

App Files Files Community

mcce-demo / app.py

mcce

fix: cache_examples=False + Secret 미설정 graceful 처리

bec8251 verified about 1 month ago

raw

history blame contribute delete

5.42 kB

	"""
	MCCE Demo — HuggingFace Spaces Gradio 앱

	VPS GPU(Gemma 4 31B) OpenAI-compatible 엔드포인트를 프록시하여
	MCCE의 실제 추론 결과를 웹 UI로 시연한다.

	Secrets(Spaces 관리자 UI):
	MCCE_GPU_INFERENCE_ENDPOINT (예: http://<host>:<port>/v1/chat/completions)
	MCCE_GPU_MODEL (예: unsloth/gemma-4-31B-it)
	"""

	from __future__ import annotations

	import os

	import gradio as gr
	import httpx

	# ---------------------------------------------------------------------------
	# 설정 — Spaces Secrets에서만 주입 (하드코딩 금지)
	# ---------------------------------------------------------------------------

	_ENDPOINT = os.environ.get("MCCE_GPU_INFERENCE_ENDPOINT", "").strip()
	_MODEL = os.environ.get("MCCE_GPU_MODEL", "unsloth/gemma-4-31B-it").strip()
	_API_KEY = os.environ.get("MCCE_GPU_API_KEY", "").strip()
	_TIMEOUT_SEC = float(os.environ.get("MCCE_GPU_TIMEOUT", "60"))

	_SYSTEM_PROMPT = (
	"너는 마머스(MaMurS) AI다. 가난한 사람을 돕기 위해 만들어진 "
	"한국형 소버린 AI이며, 정직하고 따뜻하게 대답한다."
	)


	# ---------------------------------------------------------------------------
	# 추론 호출
	# ---------------------------------------------------------------------------


	def _build_messages(message: str, history: list) -> list[dict]:
	"""Gradio history → OpenAI messages 포맷 변환."""
	messages: list[dict] = [{"role": "system", "content": _SYSTEM_PROMPT}]

	# Gradio 4.x ChatInterface history: list of [user, assistant] pairs
	# Gradio 5.x messages format: list of {"role", "content"} dicts
	if history:
	for item in history:
	if isinstance(item, dict) and "role" in item and "content" in item:
	messages.append({"role": item["role"], "content": item["content"]})
	elif isinstance(item, (list, tuple)) and len(item) == 2:
	user_msg, bot_msg = item
	if user_msg:
	messages.append({"role": "user", "content": str(user_msg)})
	if bot_msg:
	messages.append({"role": "assistant", "content": str(bot_msg)})

	messages.append({"role": "user", "content": message})
	return messages


	def _call_gpu(message: str, history: list) -> str:
	"""VPS GPU 엔드포인트 호출. 실패 시 gr.Error."""
	if not _ENDPOINT:
	raise gr.Error(
	"MCCE_GPU_INFERENCE_ENDPOINT 시크릿이 설정되지 않았습니다. "
	"Spaces Settings → Secrets에서 엔드포인트를 주입하세요."
	)

	payload = {
	"model": _MODEL,
	"messages": _build_messages(message, history),
	"temperature": 0.7,
	"max_tokens": 512,
	"stream": False,
	}
	headers = {"Content-Type": "application/json"}
	if _API_KEY:
	headers["Authorization"] = f"Bearer {_API_KEY}"

	try:
	with httpx.Client(timeout=_TIMEOUT_SEC, follow_redirects=True) as client:
	resp = client.post(_ENDPOINT, json=payload, headers=headers)
	resp.raise_for_status()
	data = resp.json()
	except httpx.TimeoutException as e:
	raise gr.Error(f"추론 타임아웃: {_TIMEOUT_SEC}s 초과") from e
	except httpx.HTTPStatusError as e:
	raise gr.Error(
	f"GPU 엔드포인트 오류: HTTP {e.response.status_code} "
	f"— {e.response.text[:200]}"
	) from e
	except httpx.RequestError as e:
	raise gr.Error(f"네트워크 오류: {e}") from e

	try:
	return data["choices"][0]["message"]["content"]
	except (KeyError, IndexError, TypeError) as e:
	raise gr.Error(f"응답 포맷 오류: {str(data)[:200]}") from e


	# ---------------------------------------------------------------------------
	# Gradio UI
	# ---------------------------------------------------------------------------


	def respond(message: str, history) -> str:
	if not message or not message.strip():
	return "메시지를 입력해 주세요."
	if not _ENDPOINT:
	return (
	"⚠️ GPU 엔드포인트가 설정되지 않았습니다.\n"
	"관리자가 Space Settings → Secrets에 "
	"`MCCE_GPU_INFERENCE_ENDPOINT`를 추가해야 합니다."
	)
	try:
	return _call_gpu(message, history or [])
	except gr.Error as e:
	return f"⚠️ {e}"


	_TITLE = "MCCE Demo — Gemma 4 31B 실시간 추론"

	_DESCRIPTION = """
	MCCE(MaMurS Compound Cognition Engine) 실전 데모.

	VPS GPU 서버(Gemma 4 31B-it, OpenAI-compatible)를 프록시하여
	실시간 추론 결과를 보여줍니다.

	> "ASI를 만들어서, 나처럼 가난한 사람들을 돕는 AI를 만들고 싶어." — 도규(DogYu)

	주의: GPU 서버가 중지된 상태면 오류가 발생할 수 있습니다.
	"""

	_EXAMPLES = [
	"안녕하세요, 자기소개 부탁드립니다.",
	"한국의 기초생활수급 제도에 대해 알려주세요.",
	"파이썬으로 피보나치 수열을 작성해 주세요.",
	"당장 이번 달 월세가 없어요. 어떻게 해야 하나요?",
	"양자역학의 핵심 개념을 쉽게 설명해 주세요.",
	]


	demo = gr.ChatInterface(
	fn=respond,
	title=_TITLE,
	description=_DESCRIPTION,
	examples=_EXAMPLES,
	cache_examples=False,
	theme=gr.themes.Soft(),
	)


	if __name__ == "__main__":
	demo.launch()