Spaces:

scvcoder
/

kpaa

Running on Zero

File size: 3,792 Bytes

"""HF Spaces (Gradio SDK + ZeroGPU) 진입점.

HF Spaces 빌더가 자동으로 `python app.py` 를 실행한다. 로컬에서도 같은
파일로 미리보기 가능:

    pip install -e ".[dev,llm,hf]"
    KPAA_LLM_BACKEND=llama_cpp python app.py    # 로컬 GGUF 로 UI 만 미리보기
    # → http://127.0.0.1:7860

HF Spaces 환경에서는 자동으로 `SPACE_ID` 가 잡혀 ZeroGPU 백엔드가 활성화된다.
LAW_OC 는 Space Settings > Secrets 에 등록.
"""
from __future__ import annotations

import os
import sys
from pathlib import Path

# HF Spaces 에서는 `pip install -e .` 가 동작하지 않는다 (requirements.txt 처리
# 시점에 app 파일이 아직 mount 되지 않음). 대신 src/ 를 sys.path 에 prepend.
# 로컬 editable install 환경에서도 무해.
sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))


# ─── monkey-patch: Gradio /api_info schema bug ────────────────────────────
# Gradio 5.x 의 gradio_client.utils 가 JSON Schema 의 `additionalProperties: True`
# (bool, 합법적 형식) 를 dict 로만 가정해서 `if "const" in schema:` 에서 TypeError.
# get_type 와 _json_schema_to_python_type 모두 bool 입력을 안전하게 처리하도록 wrap.
import gradio_client.utils as _gc_utils  # noqa: E402

_orig_get_type = _gc_utils.get_type
_orig_jstpt = _gc_utils._json_schema_to_python_type


def _safe_get_type(schema):
    if not isinstance(schema, dict):
        return ""
    return _orig_get_type(schema)


def _safe_jstpt(schema, defs):
    if not isinstance(schema, dict):
        return "Any"
    return _orig_jstpt(schema, defs)


_gc_utils.get_type = _safe_get_type
_gc_utils._json_schema_to_python_type = _safe_jstpt
# ──────────────────────────────────────────────────────────────────────────


# ─── HF Spaces ZeroGPU startup canary ─────────────────────────────────────
# HF Spaces 의 ZeroGPU 는 startup 시점에 module-level `@spaces.GPU` 함수가
# 적어도 하나 검출되어야 GPU 스케줄을 잡는다. 실제 GPU 작업은
# ZeroGPUBackend.stream_chat 안의 `_run_generate` 에서 일어나지만, 그건 함수
# 호출 시점에야 데코레이트되므로 startup 스캔에서 안 보임.
# 본 카나리는 호출되지 않으며, 단지 detector 통과용.
try:
    import spaces  # type: ignore[import-not-found]

    @spaces.GPU(duration=1)
    def _zerogpu_startup_canary() -> None:
        """HF Spaces ZeroGPU detector 통과용 sentinel."""
        return None
except ImportError:
    pass  # 로컬 dev — spaces 패키지 없음
# ──────────────────────────────────────────────────────────────────────────


from kpaa.ui.gradio import build_app  # noqa: E402


def main() -> None:
    app = build_app()
    # HF Spaces 는 7860 노출 표준. 로컬 미리보기도 동일 포트 사용.
    port = int(os.environ.get("PORT", "7860"))
    # 큐 활성화 — async generator (스트리밍) 이 작동하려면 필수.
    # ssr_mode=False  — Node SSR 서브프로세스 없이 순수 uvicorn 으로 단일 프로세스화.
    # show_api=False  — /api_info 노출 스킵 (위 monkey-patch 와 함께 belt-and-suspenders).
    app.queue(max_size=20).launch(
        server_name="0.0.0.0",
        server_port=port,
        show_error=True,
        ssr_mode=False,
        show_api=False,
    )


if __name__ == "__main__":
    main()