hee_!J
feat(experiments): workflow vs agentic ์ ๋ ๋น๊ต (LLM/tool/cost/latency/์ธ์ฉ ๊น์ด)
5a68bbf | """Workflow vs Agentic ๋น๊ต ์คํ | |
| ๊ฐ์ ์๋(A1ยทA2ยทA3)์ ๋ํด ๋ ํจํด์ ์คํํ๊ณ ์ ๋ ๋น๊ต: | |
| - **Workflow**: Tier 2/3/4 ๊ฐ 1ํ LLM ํธ์ถ, ์ฌ์ RAG 1ํ (์ด์ ์ฝ๋ ๊ทธ๋๋ก ์ธ๋ผ์ธ ์ฌํ) | |
| - **Agentic**: tool-using agent (ํ์ฌ main ์ฝ๋, agents/*.py) | |
| ์ธก์ : | |
| - ํธ์ถ ํ์: LLM calls, tool calls (per tier, per alarm) | |
| - ๋ค์์ฑ: ์ฌ์ฉํ ๋๊ตฌ ์ ๋ํฌ ์, ์ธ์ฉ ๋ฌธ์ ์ ๋ํฌ ์ | |
| - ์๊ฐ: per-tier latency, total | |
| - ๋น์ฉ: ์ถ์ ํ ํฐยทUSD (gpt-5-mini ๋จ๊ฐ ๊ธฐ์ค) | |
| - ํ์ง: ์ธ์ฉ๋ citation ์ (์์ grounding vs ๊น์ grounding) | |
| ์ฐจํธ 3์ข : ํธ์ถ ํ์ / latency / ์ธ์ฉ ๊น์ด (matplotlib) | |
| ์คํ: python -m experiments.agentic_vs_workflow.benchmark | |
| ๊ฒฐ๊ณผ: results.md + charts/*.png | |
| """ | |
| import json | |
| import time | |
| from pathlib import Path | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from agents.cause import run_cause as agentic_cause | |
| from agents.detection import run_detection | |
| from agents.impact import run_impact as agentic_impact | |
| from agents.llm import SUBAGENT_MODEL, client | |
| from agents.rag.store import load_document, search | |
| from agents.response import run_response as agentic_response | |
| from core.schema import Tier1, Tier2, Tier3, Tier4 | |
| from data.demo import DEFAULT_ALARMS | |
| from data.wip import get_affected_wip | |
| plt.rcParams["font.family"] = ["Apple SD Gothic Neo", "AppleGothic", "DejaVu Sans"] | |
| plt.rcParams["axes.unicode_minus"] = False | |
| OUT_DIR = Path(__file__).parent | |
| CHART_DIR = OUT_DIR / "charts" | |
| ALARMS = ["A1", "A2", "A3"] | |
| TOP_K = 3 | |
| # gpt-5-mini ์ถ์ ๋จ๊ฐ (USD per 1M token, 2026 ๊ธฐ์ค ๊ฐ์ ) | |
| PRICE_INPUT = 0.25 | |
| PRICE_OUTPUT = 2.0 | |
| # ==================== Workflow ๋ฒ์ (์ด์ ๋จ์ผ ํธ์ถ ๋ฐฉ์ ์ฌํ) ==================== | |
| _T2_SCHEMA = { | |
| "type": "object", | |
| "properties": { | |
| "causes": { | |
| "type": "array", | |
| "items": { | |
| "type": "object", | |
| "properties": { | |
| "name": {"type": "string"}, | |
| "pct": {"type": "integer"}, | |
| "evidence": {"type": "string"}, | |
| "citations": {"type": "array", "items": {"type": "string"}}, | |
| }, | |
| "required": ["name", "pct", "evidence", "citations"], | |
| "additionalProperties": False, | |
| }, | |
| } | |
| }, | |
| "required": ["causes"], | |
| "additionalProperties": False, | |
| } | |
| _T3_SCHEMA = { | |
| "type": "object", | |
| "properties": { | |
| "yield_loss": {"type": "number"}, | |
| "downstream_dependencies": { | |
| "type": "array", | |
| "items": { | |
| "type": "object", | |
| "properties": { | |
| "stage": {"type": "string"}, | |
| "delta": {"type": "string"}, | |
| "tag": {"type": "string"}, | |
| "kind": {"type": "string", "enum": ["impacted", "minor"]}, | |
| }, | |
| "required": ["stage", "delta", "tag", "kind"], | |
| "additionalProperties": False, | |
| }, | |
| }, | |
| }, | |
| "required": ["yield_loss", "downstream_dependencies"], | |
| "additionalProperties": False, | |
| } | |
| _T4_SCHEMA = { | |
| "type": "object", | |
| "properties": { | |
| "immediate": { | |
| "type": "array", | |
| "items": { | |
| "type": "object", | |
| "properties": { | |
| "text": {"type": "string"}, | |
| "meta": {"type": ["string", "null"]}, | |
| }, | |
| "required": ["text", "meta"], | |
| "additionalProperties": False, | |
| }, | |
| }, | |
| "longterm": { | |
| "type": "array", | |
| "items": { | |
| "type": "object", | |
| "properties": { | |
| "text": {"type": "string"}, | |
| "meta": {"type": ["string", "null"]}, | |
| }, | |
| "required": ["text", "meta"], | |
| "additionalProperties": False, | |
| }, | |
| }, | |
| }, | |
| "required": ["immediate", "longterm"], | |
| "additionalProperties": False, | |
| } | |
| def _llm_call(messages, schema, name): | |
| return client().chat.completions.create( | |
| model=SUBAGENT_MODEL, | |
| messages=messages, | |
| response_format={"type": "json_schema", "json_schema": {"name": name, "schema": schema, "strict": True}}, | |
| ) | |
| def workflow_run_cause(alarm: dict, tier1: Tier1, trace: dict) -> Tier2: | |
| sensors = ", ".join(f["name"] for f in tier1["features"]) | |
| query = f"{alarm['title']} {alarm.get('feature') or ''} {sensors} ์์ธ ๋ถ์" | |
| doc_ids = search(query, top_k=TOP_K) | |
| knowledge = "\n\n".join(f"[{d}]\n{load_document(d)}" for d in doc_ids) | |
| user = f"""## ์ด์ ์๋ | |
| - ๊ณต์ : {alarm['title']}, lot: {alarm['lot_id']} | |
| ## Tier 1 | |
| - ์ ์: {tier1['score']}, ์ผ์: {sensors} | |
| ## ์ฌ๋ด ์ง์ ๋ฌธ์ | |
| {knowledge} | |
| ์ ์ ๋ณด๋ก ์์ธ 2~3๊ฐ๋ฅผ ์ฐ์ถ.""" | |
| resp = _llm_call( | |
| [ | |
| {"role": "system", "content": "๋ฐ๋์ฒด ๊ณต์ ์์ธ ๋ถ์ ์ ๋ฌธ๊ฐ. JSON ์คํค๋ง์ ๋ง์ถฐ ์๋ต."}, | |
| {"role": "user", "content": user}, | |
| ], | |
| _T2_SCHEMA, | |
| "tier2", | |
| ) | |
| trace["llm_calls"] = 1 | |
| trace["tool_calls"] = 0 | |
| trace["unique_tools"] = 0 | |
| trace["input_tokens"] = resp.usage.prompt_tokens | |
| trace["output_tokens"] = resp.usage.completion_tokens | |
| return json.loads(resp.choices[0].message.content) | |
| def workflow_run_impact(alarm: dict, tier1: Tier1, tier2: Tier2, trace: dict) -> Tier3: | |
| cause_names = " ".join(c["name"] for c in tier2["causes"]) | |
| query = f"{alarm['title']} ํ๋ฅ ํ๊ณต์ ์ํฅ ์์จ {cause_names}" | |
| doc_ids = search(query, top_k=TOP_K) | |
| knowledge = "\n\n".join(f"[{d}]\n{load_document(d)}" for d in doc_ids) | |
| cause_lines = "\n".join(f"- {c['name']} ({c['pct']}%)" for c in tier2["causes"]) | |
| user = f"""## ์๋: {alarm['title']} | |
| ## ์์ธ | |
| {cause_lines} | |
| ## ์ฌ๋ด ์ง์ | |
| {knowledge} | |
| yield_loss์ downstream_dependencies ์ฐ์ถ.""" | |
| resp = _llm_call( | |
| [ | |
| {"role": "system", "content": "๋ฐ๋์ฒด ์ํฅ ํ๊ฐ ์ ๋ฌธ๊ฐ. JSON ์คํค๋ง์ ๋ง์ถฐ ์๋ต."}, | |
| {"role": "user", "content": user}, | |
| ], | |
| _T3_SCHEMA, | |
| "tier3_part", | |
| ) | |
| trace["llm_calls"] = 1 | |
| trace["tool_calls"] = 0 | |
| trace["unique_tools"] = 0 | |
| trace["input_tokens"] = resp.usage.prompt_tokens | |
| trace["output_tokens"] = resp.usage.completion_tokens | |
| llm_out = json.loads(resp.choices[0].message.content) | |
| current = {"stage": alarm["title"].split()[0], "delta": f"+{tier1['score']}", "tag": "ํ์ฌ", "kind": "current"} | |
| return { | |
| "yield_loss": round(float(llm_out["yield_loss"]), 1), | |
| "dependencies": [current] + llm_out["downstream_dependencies"], | |
| "impact_lots": get_affected_wip(alarm["id"]), | |
| } | |
| def workflow_run_response(alarm: dict, tier1: Tier1, tier2: Tier2, tier3: Tier3, trace: dict) -> Tier4: | |
| causes = " ".join(c["name"] for c in tier2["causes"]) | |
| query = f"{alarm['title']} ๋์ PM ์กฐ์น ๋ณด๋ฅ ๋ชจ๋ํฐ๋ง {causes}" | |
| doc_ids = search(query, top_k=4) | |
| knowledge = "\n\n".join(f"[{d}]\n{load_document(d)}" for d in doc_ids) | |
| cause_lines = "\n".join(f"- {c['name']} ({c['pct']}%)" for c in tier2["causes"]) | |
| user = f"""## ์๋: {alarm['title']} | |
| ## ์์ธ | |
| {cause_lines} | |
| ## ์ํฅ | |
| - yield_loss: {tier3['yield_loss']}%p | |
| ## ์ฌ๋ด ์ง์ | |
| {knowledge} | |
| immediate์ longterm ์กฐ์น ๊ถ๊ณ .""" | |
| resp = _llm_call( | |
| [ | |
| {"role": "system", "content": "๋ฐ๋์ฒด ๋์ ๊ถ๊ณ ์ ๋ฌธ๊ฐ. JSON ์คํค๋ง์ ๋ง์ถฐ ์๋ต."}, | |
| {"role": "user", "content": user}, | |
| ], | |
| _T4_SCHEMA, | |
| "tier4_part", | |
| ) | |
| trace["llm_calls"] = 1 | |
| trace["tool_calls"] = 0 | |
| trace["unique_tools"] = 0 | |
| trace["input_tokens"] = resp.usage.prompt_tokens | |
| trace["output_tokens"] = resp.usage.completion_tokens | |
| llm_out = json.loads(resp.choices[0].message.content) | |
| refs = [{"id": d, "desc": d} for d in doc_ids] | |
| return {"immediate": llm_out["immediate"], "longterm": llm_out["longterm"], "refs": refs} | |
| # ==================== Agentic ๋ฒ์ wrapper (trace์ token ํฉ๊ณ ์ถ๊ฐ) ==================== | |
| def _run_agentic_with_token_capture(fn, *args, trace: dict): | |
| """ํ์ฌ agentic ํจ์๋ LLM resp.usage๋ฅผ ์ง์ ๋ ธ์ถ ์ ํจ - monkey patch๋ก capture""" | |
| captured = {"input": 0, "output": 0} | |
| real_create = client().chat.completions.create | |
| def patched(**kwargs): | |
| r = real_create(**kwargs) | |
| captured["input"] += r.usage.prompt_tokens | |
| captured["output"] += r.usage.completion_tokens | |
| return r | |
| client().chat.completions.create = patched | |
| try: | |
| result = fn(*args, trace=trace) | |
| finally: | |
| client().chat.completions.create = real_create | |
| trace["input_tokens"] = captured["input"] | |
| trace["output_tokens"] = captured["output"] | |
| trace["unique_tools"] = len({tc["name"] for tc in trace.get("tool_calls", [])}) | |
| trace["tool_calls_count"] = len(trace.get("tool_calls", [])) | |
| return result | |
| # ==================== Sample ์์ง ==================== | |
| def _alarm_by_id(aid: str) -> dict: | |
| return next(a for a in DEFAULT_ALARMS if a["id"] == aid) | |
| def collect_samples(): | |
| rows = [] | |
| for aid in ALARMS: | |
| alarm = _alarm_by_id(aid) | |
| tier1 = run_detection(alarm) | |
| print(f"\n=== [{aid}] {alarm['title']} (T1 score={tier1['score']}) ===") | |
| # --- Workflow --- | |
| print(" [Workflow] T2 -> T3 -> T4") | |
| wf_traces = {"tier2": {}, "tier3": {}, "tier4": {}} | |
| wf_tier_lat = {} | |
| t0 = time.time(); wf_t2 = workflow_run_cause(alarm, tier1, wf_traces["tier2"]); wf_tier_lat["tier2"] = (time.time() - t0) * 1000 | |
| t0 = time.time(); wf_t3 = workflow_run_impact(alarm, tier1, wf_t2, wf_traces["tier3"]); wf_tier_lat["tier3"] = (time.time() - t0) * 1000 | |
| t0 = time.time(); wf_t4 = workflow_run_response(alarm, tier1, wf_t2, wf_t3, wf_traces["tier4"]); wf_tier_lat["tier4"] = (time.time() - t0) * 1000 | |
| wf_citations = set() | |
| for c in wf_t2["causes"]: wf_citations.update(c.get("citations", [])) | |
| for r in wf_t4["refs"]: wf_citations.add(r["id"]) | |
| # --- Agentic --- | |
| print(" [Agentic] T2 -> T3 -> T4") | |
| ag_traces = {"tier2": {}, "tier3": {}, "tier4": {}} | |
| ag_tier_lat = {} | |
| t0 = time.time(); ag_t2 = _run_agentic_with_token_capture(agentic_cause, alarm, tier1, trace=ag_traces["tier2"]); ag_tier_lat["tier2"] = (time.time() - t0) * 1000 | |
| t0 = time.time(); ag_t3 = _run_agentic_with_token_capture(agentic_impact, alarm, tier1, ag_t2, trace=ag_traces["tier3"]); ag_tier_lat["tier3"] = (time.time() - t0) * 1000 | |
| t0 = time.time(); ag_t4 = _run_agentic_with_token_capture(agentic_response, alarm, tier1, ag_t2, ag_t3, trace=ag_traces["tier4"]); ag_tier_lat["tier4"] = (time.time() - t0) * 1000 | |
| ag_citations = set() | |
| for c in ag_t2["causes"]: ag_citations.update(c.get("citations", [])) | |
| for r in ag_t4["refs"]: ag_citations.add(r["id"]) | |
| rows.append({ | |
| "alarm": aid, | |
| "workflow": { | |
| "traces": wf_traces, "tier_latency_ms": wf_tier_lat, | |
| "unique_citations": len(wf_citations), "citations": sorted(wf_citations), | |
| }, | |
| "agentic": { | |
| "traces": ag_traces, "tier_latency_ms": ag_tier_lat, | |
| "unique_citations": len(ag_citations), "citations": sorted(ag_citations), | |
| }, | |
| }) | |
| # ์งํ ์ถ๋ ฅ | |
| for pat, key in [("Workflow", "workflow"), ("Agentic", "agentic")]: | |
| tr = rows[-1][key]["traces"] | |
| llm = sum(t.get("llm_calls", 0) for t in tr.values()) | |
| tool = sum(t.get("tool_calls_count", t.get("tool_calls", 0)) if isinstance(t.get("tool_calls"), list) else t.get("tool_calls", 0) for t in tr.values()) | |
| print(f" {pat}: LLM={llm}, tools={tool}, citations={rows[-1][key]['unique_citations']}, total_lat={sum(rows[-1][key]['tier_latency_ms'].values()):.0f}ms") | |
| return rows | |
| # ==================== ์ง๊ณ + ์ฐจํธ + ๊ฒฐ๊ณผ ==================== | |
| def aggregate(rows): | |
| def per_pat(key): | |
| llm = [sum(r[key]["traces"][t].get("llm_calls", 0) for t in ("tier2", "tier3", "tier4")) for r in rows] | |
| tools = [] | |
| for r in rows: | |
| total = 0 | |
| for t in ("tier2", "tier3", "tier4"): | |
| tc = r[key]["traces"][t].get("tool_calls") | |
| if isinstance(tc, list): | |
| total += len(tc) | |
| else: | |
| total += tc or 0 | |
| tools.append(total) | |
| lat = [sum(r[key]["tier_latency_ms"].values()) for r in rows] | |
| cit = [r[key]["unique_citations"] for r in rows] | |
| inp = [sum(r[key]["traces"][t].get("input_tokens", 0) for t in ("tier2", "tier3", "tier4")) for r in rows] | |
| out = [sum(r[key]["traces"][t].get("output_tokens", 0) for t in ("tier2", "tier3", "tier4")) for r in rows] | |
| return { | |
| "llm_calls": np.mean(llm), "tool_calls": np.mean(tools), | |
| "latency_ms": np.mean(lat), "unique_citations": np.mean(cit), | |
| "input_tokens": np.mean(inp), "output_tokens": np.mean(out), | |
| } | |
| return {"workflow": per_pat("workflow"), "agentic": per_pat("agentic")} | |
| def make_charts(agg, rows): | |
| CHART_DIR.mkdir(exist_ok=True) | |
| wf, ag = agg["workflow"], agg["agentic"] | |
| # 1. ํธ์ถยท๋๊ตฌ ๋น๊ต | |
| fig, ax = plt.subplots(figsize=(9, 5)) | |
| metrics = ["LLM ํธ์ถ", "Tool ํธ์ถ", "์ ๋ํฌ ์ธ์ฉ"] | |
| wf_vals = [wf["llm_calls"], wf["tool_calls"], wf["unique_citations"]] | |
| ag_vals = [ag["llm_calls"], ag["tool_calls"], ag["unique_citations"]] | |
| x = np.arange(len(metrics)) | |
| w = 0.35 | |
| bars1 = ax.bar(x - w/2, wf_vals, w, label="Workflow", color="#94a3b8") | |
| bars2 = ax.bar(x + w/2, ag_vals, w, label="Agentic", color="#3b82f6") | |
| for bars in (bars1, bars2): | |
| for b in bars: | |
| ax.text(b.get_x() + b.get_width()/2, b.get_height() + 0.1, f"{b.get_height():.1f}", ha="center", fontsize=9) | |
| ax.set_xticks(x); ax.set_xticklabels(metrics) | |
| ax.set_ylabel("ํ๊ท (3 ์๋)") | |
| ax.set_title("Workflow vs Agentic - ํธ์ถ ํ์ยท์ธ์ฉ ๊น์ด") | |
| ax.legend(); ax.grid(axis="y", alpha=0.3) | |
| fig.tight_layout(); fig.savefig(CHART_DIR / "calls_citations.png", dpi=150); plt.close(fig) | |
| # 2. Latency ๋ถํด (per tier) | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| tiers = ["Tier 2 Cause", "Tier 3 Impact", "Tier 4 Response"] | |
| wf_lat = [np.mean([r["workflow"]["tier_latency_ms"][f"tier{i}"] for r in rows]) for i in (2, 3, 4)] | |
| ag_lat = [np.mean([r["agentic"]["tier_latency_ms"][f"tier{i}"] for r in rows]) for i in (2, 3, 4)] | |
| x = np.arange(len(tiers)) | |
| w = 0.35 | |
| ax.bar(x - w/2, wf_lat, w, label="Workflow", color="#94a3b8") | |
| ax.bar(x + w/2, ag_lat, w, label="Agentic", color="#3b82f6") | |
| for i, (wv, av) in enumerate(zip(wf_lat, ag_lat)): | |
| ax.text(i - w/2, wv + 100, f"{wv:.0f}", ha="center", fontsize=9) | |
| ax.text(i + w/2, av + 100, f"{av:.0f}", ha="center", fontsize=9) | |
| ax.set_xticks(x); ax.set_xticklabels(tiers) | |
| ax.set_ylabel("ํ๊ท Latency (ms)") | |
| ax.set_title("Tier๋ณ Latency ๋น๊ต") | |
| ax.legend(); ax.grid(axis="y", alpha=0.3) | |
| fig.tight_layout(); fig.savefig(CHART_DIR / "latency_per_tier.png", dpi=150); plt.close(fig) | |
| # 3. ๋น์ฉ ๋น๊ต | |
| fig, ax = plt.subplots(figsize=(8.5, 5)) | |
| wf_cost = (wf["input_tokens"] * PRICE_INPUT + wf["output_tokens"] * PRICE_OUTPUT) / 1_000_000 | |
| ag_cost = (ag["input_tokens"] * PRICE_INPUT + ag["output_tokens"] * PRICE_OUTPUT) / 1_000_000 | |
| labels = ["Workflow", "Agentic"] | |
| costs = [wf_cost, ag_cost] | |
| bars = ax.bar(labels, costs, color=["#94a3b8", "#3b82f6"]) | |
| for b, v in zip(bars, costs): | |
| ax.text(b.get_x() + b.get_width()/2, v + max(costs) * 0.02, f"${v*1000:.2f}/1000ํ", ha="center", fontsize=10) | |
| ax.set_ylabel("์๋๋น ํ๊ท USD") | |
| ax.set_title(f"๋น์ฉ ๋น๊ต (gpt-5-mini ๋จ๊ฐ ๊ธฐ์ค, in=${PRICE_INPUT}/M, out=${PRICE_OUTPUT}/M)") | |
| ax.grid(axis="y", alpha=0.3) | |
| fig.tight_layout(); fig.savefig(CHART_DIR / "cost.png", dpi=150); plt.close(fig) | |
| def write_results(rows, agg): | |
| wf, ag = agg["workflow"], agg["agentic"] | |
| wf_cost = (wf["input_tokens"] * PRICE_INPUT + wf["output_tokens"] * PRICE_OUTPUT) / 1_000_000 | |
| ag_cost = (ag["input_tokens"] * PRICE_INPUT + ag["output_tokens"] * PRICE_OUTPUT) / 1_000_000 | |
| lines = [ | |
| "# Workflow vs Agentic - ์ ๋ ๋น๊ต", | |
| "", | |
| "๋์ผํ 4-Tier pipeline์ ๋ ๊ฐ์ง ํจํด์ผ๋ก ์คํํด ์ ๋ ๋น๊ตํฉ๋๋ค.", | |
| "- **Workflow**: Tier 2/3/4 ๊ฐ ๋จ๊ณ๊ฐ ์ฌ์ RAG 1ํ + LLM 1ํ (๊ตฌ๋ฒ์ )", | |
| "- **Agentic**: Tier 2/3/4 ๊ฐ ๋จ๊ณ๊ฐ LLM tool calling ๋ฃจํ (ํ์ฌ ์ฑํ)", | |
| "", | |
| f"์๋: {', '.join(ALARMS)} (์ด {len(ALARMS)}๊ฑด, SECOM + PHM CMP)", | |
| "", | |
| "## ๊ฒฐ๊ณผ ์์ฝ (3 ์๋ ํ๊ท )", | |
| "", | |
| "| ์งํ | Workflow | Agentic | ๋ฐฐ์ |", | |
| "|---|---|---|---|", | |
| f"| LLM ํธ์ถ / ์๋ | {wf['llm_calls']:.1f} | {ag['llm_calls']:.1f} | x{ag['llm_calls']/wf['llm_calls']:.1f} |", | |
| f"| Tool ํธ์ถ / ์๋ | {wf['tool_calls']:.1f} | {ag['tool_calls']:.1f} | - |", | |
| f"| ์ ๋ํฌ ์ธ์ฉ / ์๋ | {wf['unique_citations']:.1f} | {ag['unique_citations']:.1f} | x{ag['unique_citations']/max(wf['unique_citations'],1):.1f} |", | |
| f"| ์ ๋ ฅ ํ ํฐ / ์๋ | {wf['input_tokens']:.0f} | {ag['input_tokens']:.0f} | x{ag['input_tokens']/wf['input_tokens']:.1f} |", | |
| f"| ์ถ๋ ฅ ํ ํฐ / ์๋ | {wf['output_tokens']:.0f} | {ag['output_tokens']:.0f} | x{ag['output_tokens']/wf['output_tokens']:.1f} |", | |
| f"| Latency / ์๋ (Tier 2~4) | {wf['latency_ms']:.0f} ms | {ag['latency_ms']:.0f} ms | x{ag['latency_ms']/wf['latency_ms']:.1f} |", | |
| f"| ๋น์ฉ / ์๋ (USD) | ${wf_cost:.5f} | ${ag_cost:.5f} | x{ag_cost/wf_cost:.1f} |", | |
| "", | |
| "## ์๊ฐํ", | |
| "", | |
| "### ํธ์ถ ํ์ยท์ธ์ฉ ๊น์ด", | |
| "", | |
| "", | |
| "### Tier๋ณ Latency", | |
| "", | |
| "", | |
| "### ๋น์ฉ", | |
| "", | |
| "", | |
| "## ์๋๋ณ ์์ธ", | |
| "", | |
| ] | |
| for r in rows: | |
| lines.append(f"### {r['alarm']}") | |
| lines.append("") | |
| lines.append("| ํจํด | Tier | LLM | Tools | Latency(ms) |") | |
| lines.append("|---|---|---|---|---|") | |
| for pat in ("workflow", "agentic"): | |
| for tier in ("tier2", "tier3", "tier4"): | |
| tr = r[pat]["traces"][tier] | |
| tc = tr.get("tool_calls") | |
| tc_count = len(tc) if isinstance(tc, list) else (tc or 0) | |
| lines.append( | |
| f"| {pat} | {tier} | {tr.get('llm_calls', 0)} | {tc_count} | " | |
| f"{r[pat]['tier_latency_ms'][tier]:.0f} |" | |
| ) | |
| lines.append("") | |
| lines.append(f"- Workflow ์ธ์ฉ: {r['workflow']['citations']}") | |
| lines.append(f"- Agentic ์ธ์ฉ: {r['agentic']['citations']}") | |
| lines.append("") | |
| lines += [ | |
| "## ํต์ฌ ์ธ์ฌ์ดํธ", | |
| "", | |
| f"1. **์ธ์ฉ ๊น์ด {ag['unique_citations']/max(wf['unique_citations'],1):.1f}๋ฐฐ** - agentic์ ๋๊ตฌ๋ฅผ ์์จ ํธ์ถํด ๋ค์ํ ์์ค(INC/FMEA/SOP/incident DB)๋ฅผ ๊ฒฐํฉ", | |
| f"2. **ํธ์ถ ๋น์ฉ {ag_cost/wf_cost:.1f}๋ฐฐ** - LLM ํธ์ถ์ด ํ๊ท {wf['llm_calls']:.0f}ํ โ {ag['llm_calls']:.0f}ํ, ์ ๋ ฅ ํ ํฐ๋ {ag['input_tokens']/wf['input_tokens']:.1f}๋ฐฐ", | |
| f"3. **Latency {ag['latency_ms']/wf['latency_ms']:.1f}๋ฐฐ** - tool calling ๋ฃจํ + synthesis ์ถ๊ฐ ํธ์ถ์ ์์ฐ์ค๋ฌ์ด ๋น์ฉ", | |
| "4. **agentic๋ง์ ์ ์ฑ ์ ํธ**: tool ํธ์ถ ํจํด ์์ฒด๊ฐ reasoning trace - ์ด๋ค ์ ๋ณด๋ฅผ ์ ์ฐพ์๋์ง ๊ฐ์ฌยท์ฌํ ๊ฐ๋ฅ", | |
| "", | |
| "## ์ฑํ ๊ฒฐ๋ก ", | |
| "", | |
| "**ํ์ฌ ์ฑํ: Agentic**", | |
| "- ์ธ์ฉ ๊น์ดยท๊ทผ๊ฑฐ ๋ค์์ฑ์ด ๊ฒฐ์ ์ - ๋ฐ๋์ฒด fab ๋๋ฉ์ธ์์ multi-source ๊ทผ๊ฑฐ๊ฐ ์์ ์ฑยท์ ๋ขฐ์ฑ ๊ฒฐ์ ", | |
| f"- ๋น์ฉ {ag_cost/wf_cost:.1f}๋ฐฐ ์ฆ๊ฐ๋ ์๋๋น ${(ag_cost-wf_cost)*1000:.2f}/1000ํ ์์ค์ผ๋ก ์ฌ์ ์ ์ํฅ ๋ฌด์ ๊ฐ๋ฅ", | |
| "- Tool ํธ์ถ ๋ก๊ทธ๊ฐ ์์ฒด์ ์ธ audit trail์ด ๋์ด production observability์ ์ ๋ฆฌ", | |
| "", | |
| "Latency๊ฐ criticalํ ์๋๋ฆฌ์ค์์ Workflow๋ก ํ๊ฒฝ๋ณ์ ํ ๊ธ ์ถ๊ฐ ๊ฒํ ๊ฐ๋ฅ (ํ์ฌ ๋ฏธ๊ตฌํ).", | |
| "", | |
| ] | |
| (OUT_DIR / "results.md").write_text("\n".join(lines), encoding="utf-8") | |
| print(f"--- ์ ์ฅ: {OUT_DIR / 'results.md'} ---") | |
| def main(): | |
| rows = collect_samples() | |
| print("\n=== ์ง๊ณ ===") | |
| agg = aggregate(rows) | |
| for pat, vals in agg.items(): | |
| print(f" {pat}: {vals}") | |
| make_charts(agg, rows) | |
| write_results(rows, agg) | |
| if __name__ == "__main__": | |
| main() | |