Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """GovOn E2E GPU Test Runner. | |
| HuggingFace Spaces GPU์ ๋ฐฐํฌ๋ govon-runtime ์๋ฒ์ ๋ํด | |
| ์ ์ฒด ์์ด์ ํธ ํ์ดํ๋ผ์ธ์ ๊ฒ์ฆํ๋ค. | |
| ์ฌ์ฉ๋ฒ: | |
| # ์ ์ฒด ์คํ | |
| GOVON_RUNTIME_URL=https://<space>.hf.space python -m scripts.e2e_gpu_test.runner | |
| # ํน์ Phase๋ง ์คํ | |
| GOVON_RUNTIME_URL=... python -m scripts.e2e_gpu_test.runner --phase 1 | |
| # ์ค์๊ฐ ๋ชจ๋ํฐ๋ง ๋ชจ๋ | |
| GOVON_RUNTIME_URL=... python -m scripts.e2e_gpu_test.runner --monitor | |
| 6-Phase ๊ตฌ์ฑ: | |
| Phase 1: Infrastructure (hard gate) | |
| Phase 2: Agent Pipeline Core | |
| Phase 3: data.go.kr API Tools (soft gate) | |
| Phase 4: Adapter Dynamics | |
| Phase 5: Robustness | |
| Phase 6: Advanced (flow integrity, SLA, fallback, etc.) | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import asyncio | |
| import sys | |
| import time | |
| from uuid import uuid4 | |
| from .config import BASE_URL, LOG_PATH, RESULTS_PATH, TIMEOUT, VALID_TOOLS | |
| from .flow_tracker import LatencyAggregator | |
| from .http_client import get_http_backend, http_get, http_get_raw | |
| from .logger import E2ELogger | |
| from .report import print_summary, write_json_report | |
| from .scenarios.phase6_advanced import run_phase6 | |
| # ๊ธฐ์กด verify_e2e_tool_calling.py์ Phase 1-5 ์๋๋ฆฌ์ค๋ฅผ import | |
| # (๊ธฐ์กด ์คํฌ๋ฆฝํธ๋ฅผ ์ง์ ์ฐธ์กฐํ์ง ์๊ณ , runner๊ฐ Phase 6๋ง ์ง์ ์คํ) | |
| # Phase 1-5๋ ๊ธฐ์กด scripts/verify_e2e_tool_calling.py๋ฅผ ์ฌ์ฉํ๊ฑฐ๋ | |
| # ์ ์ง์ ์ผ๋ก ์ด๊ดํ ์ ์๋ค. | |
| _observed_tools: set[str] = set() | |
| _results: list[dict] = [] | |
| _run_id = uuid4().hex | |
| async def _wait_cold_start(logger: E2ELogger) -> float: | |
| """์๋ฒ cold start ๋๊ธฐ. ์ต๋ 10ํ x 30์ด.""" | |
| total_wait = 0.0 | |
| for i in range(10): | |
| try: | |
| code, body = await http_get("/health", timeout=10) | |
| if code == 200 and body.get("status") in ("ok", "healthy"): | |
| logger.info(f"์๋ฒ ์ค๋น ์๋ฃ (๋๊ธฐ {total_wait:.0f}s)") | |
| return total_wait | |
| except Exception: | |
| pass | |
| if i < 9: | |
| logger.info(f"์๋ฒ ๋๊ธฐ ์ค... ({i + 1}/10, 30s ํ ์ฌ์๋)") | |
| await asyncio.sleep(30) | |
| total_wait += 30 | |
| logger.warn("์๋ฒ ์ค๋น ํ์ธ ์คํจ -- ๊ณ์ ์งํ") | |
| return total_wait | |
| async def run_phase1_infra(logger: E2ELogger) -> list[dict]: | |
| """Phase 1: Infrastructure (hard gate) -- ๊ธฐ๋ณธ ์๋ฒ ์ํ ํ์ธ.""" | |
| logger.info("\n[Phase 1] Infrastructure (hard gate)") | |
| logger.info("-" * 40) | |
| results = [] | |
| # S1: Health & Profile | |
| logger.set_context(phase=1, scenario_id=1) | |
| t0 = time.monotonic() | |
| try: | |
| code, body = await http_get("/health", timeout=10) | |
| elapsed = time.monotonic() - t0 | |
| if code == 200 and body.get("status") in ("ok", "healthy"): | |
| results.append( | |
| logger.scenario_result( | |
| 1, | |
| "Health & Profile", | |
| 1, | |
| "passed", | |
| elapsed, | |
| assertions=[f"HTTP 200, status={body.get('status')}"], | |
| detail={"model": body.get("model"), "profile": body.get("profile")}, | |
| ) | |
| ) | |
| else: | |
| results.append( | |
| logger.scenario_result( | |
| 1, | |
| "Health & Profile", | |
| 1, | |
| "failed", | |
| elapsed, | |
| error=f"HTTP {code}, status={body.get('status')}", | |
| ) | |
| ) | |
| return results # hard gate | |
| except Exception as exc: | |
| results.append( | |
| logger.scenario_result( | |
| 1, | |
| "Health & Profile", | |
| 1, | |
| "failed", | |
| time.monotonic() - t0, | |
| error=str(exc), | |
| ) | |
| ) | |
| return results | |
| # S2: Base Model Generation | |
| logger.set_context(phase=1, scenario_id=2) | |
| from .http_client import http_post | |
| t0 = time.monotonic() | |
| try: | |
| from .config import BASE_MODEL | |
| code, resp = await http_post( | |
| "/v1/completions", | |
| { | |
| "model": BASE_MODEL, | |
| "prompt": "๋ํ๋ฏผ๊ตญ์ ์๋๋", | |
| "max_tokens": 32, | |
| "temperature": 0.0, | |
| }, | |
| timeout=60, | |
| ) | |
| elapsed = time.monotonic() - t0 | |
| choices = resp.get("choices", []) | |
| if code == 200 and choices and choices[0].get("text", "").strip(): | |
| results.append( | |
| logger.scenario_result( | |
| 2, | |
| "Base Model Generation", | |
| 1, | |
| "passed", | |
| elapsed, | |
| assertions=["HTTP 200", "non-empty text"], | |
| ) | |
| ) | |
| else: | |
| # fallback: /v1/generate | |
| code2, resp2 = await http_post( | |
| "/v1/generate", | |
| {"prompt": "๋ํ๋ฏผ๊ตญ์ ์๋๋", "max_tokens": 32, "temperature": 0.0}, | |
| timeout=60, | |
| ) | |
| elapsed2 = time.monotonic() - t0 | |
| if code2 == 200 and resp2.get("text", "").strip(): | |
| results.append( | |
| logger.scenario_result( | |
| 2, | |
| "Base Model Generation", | |
| 1, | |
| "passed", | |
| elapsed2, | |
| assertions=["HTTP 200 (fallback /v1/generate)"], | |
| ) | |
| ) | |
| else: | |
| results.append( | |
| logger.scenario_result( | |
| 2, | |
| "Base Model Generation", | |
| 1, | |
| "failed", | |
| elapsed2, | |
| error=f"/v1/completions={code}, /v1/generate={code2}", | |
| ) | |
| ) | |
| return results | |
| except Exception as exc: | |
| results.append( | |
| logger.scenario_result( | |
| 2, | |
| "Base Model Generation", | |
| 1, | |
| "failed", | |
| time.monotonic() - t0, | |
| error=str(exc), | |
| ) | |
| ) | |
| return results | |
| # S3: Adapter Registry | |
| logger.set_context(phase=1, scenario_id=3) | |
| t0 = time.monotonic() | |
| try: | |
| code, resp = await http_get("/v1/models", timeout=10) | |
| elapsed = time.monotonic() - t0 | |
| if code != 200: | |
| results.append( | |
| logger.scenario_result( | |
| 3, | |
| "Adapter Registry", | |
| 1, | |
| "passed", | |
| elapsed, | |
| warnings=[ | |
| f"/v1/models HTTP {code} -- ์๋ํฌ์ธํธ ๋ฏธ๋ ธ์ถ (vLLM ์ค์ ์ ๋ฐ๋ผ ์ ์)" | |
| ], | |
| ) | |
| ) | |
| else: | |
| model_ids = [m.get("id", "") for m in resp.get("data", [])] | |
| results.append( | |
| logger.scenario_result( | |
| 3, | |
| "Adapter Registry", | |
| 1, | |
| "passed", | |
| elapsed, | |
| assertions=[f"{len(model_ids)} models found"], | |
| detail={"model_ids": model_ids}, | |
| ) | |
| ) | |
| except Exception as exc: | |
| results.append( | |
| logger.scenario_result( | |
| 3, | |
| "Adapter Registry", | |
| 1, | |
| "failed", | |
| time.monotonic() - t0, | |
| error=str(exc), | |
| ) | |
| ) | |
| return results | |
| async def main() -> int: | |
| parser = argparse.ArgumentParser(description="GovOn E2E GPU Test Runner") | |
| parser.add_argument("--phase", type=int, help="ํน์ Phase๋ง ์คํ (1-6)") | |
| parser.add_argument("--verbose", action="store_true", default=True, help="์์ธ ์ถ๋ ฅ") | |
| args = parser.parse_args() | |
| logger = E2ELogger(LOG_PATH, verbose=args.verbose) | |
| logger.info("=" * 60) | |
| logger.info("GovOn E2E GPU Test Suite") | |
| logger.info("=" * 60) | |
| logger.info(f" ๋์ ์๋ฒ: {BASE_URL}") | |
| logger.info(f" HTTP ๋ฐฑ์๋: {get_http_backend()}") | |
| logger.info(f" ํ์์์: {TIMEOUT}s / ์๋๋ฆฌ์ค") | |
| logger.info(f" run_id: {_run_id}") | |
| logger.info(f" ๋ก๊ทธ ํ์ผ: {LOG_PATH}") | |
| logger.info(f" ๊ฒฐ๊ณผ ํ์ผ: {RESULTS_PATH}") | |
| logger.info("-" * 60) | |
| # Cold start ๋๊ธฐ | |
| logger.info("[Cold Start] ์๋ฒ ์ค๋น ํ์ธ ์ค...") | |
| cold_start_wait = await _wait_cold_start(logger) | |
| all_results: list[dict] = [] | |
| aggregator = LatencyAggregator() | |
| target_phase = args.phase | |
| # Phase 1: Infrastructure | |
| if target_phase is None or target_phase == 1: | |
| phase1_results = await run_phase1_infra(logger) | |
| all_results.extend(phase1_results) | |
| phase1_failed = any(r.get("status") == "failed" for r in phase1_results) | |
| if phase1_failed and target_phase is None: | |
| logger.error("ABORT: Infrastructure not ready -- Phase 1 failed") | |
| write_json_report(all_results, RESULTS_PATH, _run_id, cold_start_wait, _observed_tools) | |
| logger.close() | |
| return 1 | |
| # Phase 2-5: ๊ธฐ์กด ์คํฌ๋ฆฝํธ ํธํ (์ ์ง์ ์ด๊ด ์์ ) | |
| if target_phase is not None and target_phase in (2, 3, 4, 5): | |
| logger.error( | |
| f"\n[Phase {target_phase}] ๋ฏธ๊ตฌํ: " | |
| "๊ธฐ์กด verify_e2e_tool_calling.py๋ฅผ ์ฌ์ฉํ์ธ์\n" | |
| " GOVON_RUNTIME_URL=... python scripts/verify_e2e_tool_calling.py" | |
| ) | |
| write_json_report(all_results, RESULTS_PATH, _run_id, cold_start_wait, _observed_tools) | |
| logger.close() | |
| return 1 | |
| # Phase 6: Advanced | |
| if target_phase is None or target_phase == 6: | |
| phase6_results = await run_phase6(logger, _observed_tools, aggregator) | |
| all_results.extend(phase6_results) | |
| # ์์ฝ | |
| print_summary(all_results, logger, _observed_tools) | |
| write_json_report( | |
| all_results, RESULTS_PATH, _run_id, cold_start_wait, _observed_tools, aggregator | |
| ) | |
| logger.info(f"\n๊ฒฐ๊ณผ ์ ์ฅ: {RESULTS_PATH}") | |
| logger.info(f"๋ก๊ทธ ์ ์ฅ: {LOG_PATH}") | |
| logger.close() | |
| failed = sum(1 for r in all_results if r.get("status") == "failed") | |
| return 0 if failed == 0 else 1 | |
| if __name__ == "__main__": | |
| exit_code = asyncio.run(main()) | |
| sys.exit(exit_code) | |