umyunsang's picture
Upload folder using huggingface_hub
d2585c1 verified
#!/usr/bin/env python3
"""GovOn E2E GPU Test Runner.
HuggingFace Spaces GPU์— ๋ฐฐํฌ๋œ govon-runtime ์„œ๋ฒ„์— ๋Œ€ํ•ด
์ „์ฒด ์—์ด์ „ํŠธ ํŒŒ์ดํ”„๋ผ์ธ์„ ๊ฒ€์ฆํ•œ๋‹ค.
์‚ฌ์šฉ๋ฒ•:
# ์ „์ฒด ์‹คํ–‰
GOVON_RUNTIME_URL=https://<space>.hf.space python -m scripts.e2e_gpu_test.runner
# ํŠน์ • Phase๋งŒ ์‹คํ–‰
GOVON_RUNTIME_URL=... python -m scripts.e2e_gpu_test.runner --phase 1
# ์‹ค์‹œ๊ฐ„ ๋ชจ๋‹ˆํ„ฐ๋ง ๋ชจ๋“œ
GOVON_RUNTIME_URL=... python -m scripts.e2e_gpu_test.runner --monitor
6-Phase ๊ตฌ์„ฑ:
Phase 1: Infrastructure (hard gate)
Phase 2: Agent Pipeline Core
Phase 3: data.go.kr API Tools (soft gate)
Phase 4: Adapter Dynamics
Phase 5: Robustness
Phase 6: Advanced (flow integrity, SLA, fallback, etc.)
"""
from __future__ import annotations
import argparse
import asyncio
import sys
import time
from uuid import uuid4
from .config import BASE_URL, LOG_PATH, RESULTS_PATH, TIMEOUT, VALID_TOOLS
from .flow_tracker import LatencyAggregator
from .http_client import get_http_backend, http_get, http_get_raw
from .logger import E2ELogger
from .report import print_summary, write_json_report
from .scenarios.phase6_advanced import run_phase6
# ๊ธฐ์กด verify_e2e_tool_calling.py์˜ Phase 1-5 ์‹œ๋‚˜๋ฆฌ์˜ค๋ฅผ import
# (๊ธฐ์กด ์Šคํฌ๋ฆฝํŠธ๋ฅผ ์ง์ ‘ ์ฐธ์กฐํ•˜์ง€ ์•Š๊ณ , runner๊ฐ€ Phase 6๋งŒ ์ง์ ‘ ์‹คํ–‰)
# Phase 1-5๋Š” ๊ธฐ์กด scripts/verify_e2e_tool_calling.py๋ฅผ ์‚ฌ์šฉํ•˜๊ฑฐ๋‚˜
# ์ ์ง„์ ์œผ๋กœ ์ด๊ด€ํ•  ์ˆ˜ ์žˆ๋‹ค.
_observed_tools: set[str] = set()
_results: list[dict] = []
_run_id = uuid4().hex
async def _wait_cold_start(logger: E2ELogger) -> float:
"""์„œ๋ฒ„ cold start ๋Œ€๊ธฐ. ์ตœ๋Œ€ 10ํšŒ x 30์ดˆ."""
total_wait = 0.0
for i in range(10):
try:
code, body = await http_get("/health", timeout=10)
if code == 200 and body.get("status") in ("ok", "healthy"):
logger.info(f"์„œ๋ฒ„ ์ค€๋น„ ์™„๋ฃŒ (๋Œ€๊ธฐ {total_wait:.0f}s)")
return total_wait
except Exception:
pass
if i < 9:
logger.info(f"์„œ๋ฒ„ ๋Œ€๊ธฐ ์ค‘... ({i + 1}/10, 30s ํ›„ ์žฌ์‹œ๋„)")
await asyncio.sleep(30)
total_wait += 30
logger.warn("์„œ๋ฒ„ ์ค€๋น„ ํ™•์ธ ์‹คํŒจ -- ๊ณ„์† ์ง„ํ–‰")
return total_wait
async def run_phase1_infra(logger: E2ELogger) -> list[dict]:
"""Phase 1: Infrastructure (hard gate) -- ๊ธฐ๋ณธ ์„œ๋ฒ„ ์ƒํƒœ ํ™•์ธ."""
logger.info("\n[Phase 1] Infrastructure (hard gate)")
logger.info("-" * 40)
results = []
# S1: Health & Profile
logger.set_context(phase=1, scenario_id=1)
t0 = time.monotonic()
try:
code, body = await http_get("/health", timeout=10)
elapsed = time.monotonic() - t0
if code == 200 and body.get("status") in ("ok", "healthy"):
results.append(
logger.scenario_result(
1,
"Health & Profile",
1,
"passed",
elapsed,
assertions=[f"HTTP 200, status={body.get('status')}"],
detail={"model": body.get("model"), "profile": body.get("profile")},
)
)
else:
results.append(
logger.scenario_result(
1,
"Health & Profile",
1,
"failed",
elapsed,
error=f"HTTP {code}, status={body.get('status')}",
)
)
return results # hard gate
except Exception as exc:
results.append(
logger.scenario_result(
1,
"Health & Profile",
1,
"failed",
time.monotonic() - t0,
error=str(exc),
)
)
return results
# S2: Base Model Generation
logger.set_context(phase=1, scenario_id=2)
from .http_client import http_post
t0 = time.monotonic()
try:
from .config import BASE_MODEL
code, resp = await http_post(
"/v1/completions",
{
"model": BASE_MODEL,
"prompt": "๋Œ€ํ•œ๋ฏผ๊ตญ์˜ ์ˆ˜๋„๋Š”",
"max_tokens": 32,
"temperature": 0.0,
},
timeout=60,
)
elapsed = time.monotonic() - t0
choices = resp.get("choices", [])
if code == 200 and choices and choices[0].get("text", "").strip():
results.append(
logger.scenario_result(
2,
"Base Model Generation",
1,
"passed",
elapsed,
assertions=["HTTP 200", "non-empty text"],
)
)
else:
# fallback: /v1/generate
code2, resp2 = await http_post(
"/v1/generate",
{"prompt": "๋Œ€ํ•œ๋ฏผ๊ตญ์˜ ์ˆ˜๋„๋Š”", "max_tokens": 32, "temperature": 0.0},
timeout=60,
)
elapsed2 = time.monotonic() - t0
if code2 == 200 and resp2.get("text", "").strip():
results.append(
logger.scenario_result(
2,
"Base Model Generation",
1,
"passed",
elapsed2,
assertions=["HTTP 200 (fallback /v1/generate)"],
)
)
else:
results.append(
logger.scenario_result(
2,
"Base Model Generation",
1,
"failed",
elapsed2,
error=f"/v1/completions={code}, /v1/generate={code2}",
)
)
return results
except Exception as exc:
results.append(
logger.scenario_result(
2,
"Base Model Generation",
1,
"failed",
time.monotonic() - t0,
error=str(exc),
)
)
return results
# S3: Adapter Registry
logger.set_context(phase=1, scenario_id=3)
t0 = time.monotonic()
try:
code, resp = await http_get("/v1/models", timeout=10)
elapsed = time.monotonic() - t0
if code != 200:
results.append(
logger.scenario_result(
3,
"Adapter Registry",
1,
"passed",
elapsed,
warnings=[
f"/v1/models HTTP {code} -- ์—”๋“œํฌ์ธํŠธ ๋ฏธ๋…ธ์ถœ (vLLM ์„ค์ •์— ๋”ฐ๋ผ ์ •์ƒ)"
],
)
)
else:
model_ids = [m.get("id", "") for m in resp.get("data", [])]
results.append(
logger.scenario_result(
3,
"Adapter Registry",
1,
"passed",
elapsed,
assertions=[f"{len(model_ids)} models found"],
detail={"model_ids": model_ids},
)
)
except Exception as exc:
results.append(
logger.scenario_result(
3,
"Adapter Registry",
1,
"failed",
time.monotonic() - t0,
error=str(exc),
)
)
return results
async def main() -> int:
parser = argparse.ArgumentParser(description="GovOn E2E GPU Test Runner")
parser.add_argument("--phase", type=int, help="ํŠน์ • Phase๋งŒ ์‹คํ–‰ (1-6)")
parser.add_argument("--verbose", action="store_true", default=True, help="์ƒ์„ธ ์ถœ๋ ฅ")
args = parser.parse_args()
logger = E2ELogger(LOG_PATH, verbose=args.verbose)
logger.info("=" * 60)
logger.info("GovOn E2E GPU Test Suite")
logger.info("=" * 60)
logger.info(f" ๋Œ€์ƒ ์„œ๋ฒ„: {BASE_URL}")
logger.info(f" HTTP ๋ฐฑ์—”๋“œ: {get_http_backend()}")
logger.info(f" ํƒ€์ž„์•„์›ƒ: {TIMEOUT}s / ์‹œ๋‚˜๋ฆฌ์˜ค")
logger.info(f" run_id: {_run_id}")
logger.info(f" ๋กœ๊ทธ ํŒŒ์ผ: {LOG_PATH}")
logger.info(f" ๊ฒฐ๊ณผ ํŒŒ์ผ: {RESULTS_PATH}")
logger.info("-" * 60)
# Cold start ๋Œ€๊ธฐ
logger.info("[Cold Start] ์„œ๋ฒ„ ์ค€๋น„ ํ™•์ธ ์ค‘...")
cold_start_wait = await _wait_cold_start(logger)
all_results: list[dict] = []
aggregator = LatencyAggregator()
target_phase = args.phase
# Phase 1: Infrastructure
if target_phase is None or target_phase == 1:
phase1_results = await run_phase1_infra(logger)
all_results.extend(phase1_results)
phase1_failed = any(r.get("status") == "failed" for r in phase1_results)
if phase1_failed and target_phase is None:
logger.error("ABORT: Infrastructure not ready -- Phase 1 failed")
write_json_report(all_results, RESULTS_PATH, _run_id, cold_start_wait, _observed_tools)
logger.close()
return 1
# Phase 2-5: ๊ธฐ์กด ์Šคํฌ๋ฆฝํŠธ ํ˜ธํ™˜ (์ ์ง„์  ์ด๊ด€ ์˜ˆ์ •)
if target_phase is not None and target_phase in (2, 3, 4, 5):
logger.error(
f"\n[Phase {target_phase}] ๋ฏธ๊ตฌํ˜„: "
"๊ธฐ์กด verify_e2e_tool_calling.py๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”\n"
" GOVON_RUNTIME_URL=... python scripts/verify_e2e_tool_calling.py"
)
write_json_report(all_results, RESULTS_PATH, _run_id, cold_start_wait, _observed_tools)
logger.close()
return 1
# Phase 6: Advanced
if target_phase is None or target_phase == 6:
phase6_results = await run_phase6(logger, _observed_tools, aggregator)
all_results.extend(phase6_results)
# ์š”์•ฝ
print_summary(all_results, logger, _observed_tools)
write_json_report(
all_results, RESULTS_PATH, _run_id, cold_start_wait, _observed_tools, aggregator
)
logger.info(f"\n๊ฒฐ๊ณผ ์ €์žฅ: {RESULTS_PATH}")
logger.info(f"๋กœ๊ทธ ์ €์žฅ: {LOG_PATH}")
logger.close()
failed = sum(1 for r in all_results if r.get("status") == "failed")
return 0 if failed == 0 else 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)