hee_!J
feat(experiments): workflow vs agentic ์ •๋Ÿ‰ ๋น„๊ต (LLM/tool/cost/latency/์ธ์šฉ ๊นŠ์ด)
5a68bbf
Raw
History Blame Contribute Delete
20.9 kB
"""Workflow vs Agentic ๋น„๊ต ์‹คํ—˜
๊ฐ™์€ ์•Œ๋žŒ(A1ยทA2ยทA3)์— ๋Œ€ํ•ด ๋‘ ํŒจํ„ด์„ ์‹คํ–‰ํ•˜๊ณ  ์ •๋Ÿ‰ ๋น„๊ต:
- **Workflow**: Tier 2/3/4 ๊ฐ 1ํšŒ LLM ํ˜ธ์ถœ, ์‚ฌ์ „ RAG 1ํšŒ (์ด์ „ ์ฝ”๋“œ ๊ทธ๋Œ€๋กœ ์ธ๋ผ์ธ ์žฌํ˜„)
- **Agentic**: tool-using agent (ํ˜„์žฌ main ์ฝ”๋“œ, agents/*.py)
์ธก์ •:
- ํ˜ธ์ถœ ํšŸ์ˆ˜: LLM calls, tool calls (per tier, per alarm)
- ๋‹ค์–‘์„ฑ: ์‚ฌ์šฉํ•œ ๋„๊ตฌ ์œ ๋‹ˆํฌ ์ˆ˜, ์ธ์šฉ ๋ฌธ์„œ ์œ ๋‹ˆํฌ ์ˆ˜
- ์‹œ๊ฐ„: per-tier latency, total
- ๋น„์šฉ: ์ถ”์ • ํ† ํฐยทUSD (gpt-5-mini ๋‹จ๊ฐ€ ๊ธฐ์ค€)
- ํ’ˆ์งˆ: ์ธ์šฉ๋œ citation ์ˆ˜ (์–•์€ grounding vs ๊นŠ์€ grounding)
์ฐจํŠธ 3์ข…: ํ˜ธ์ถœ ํšŸ์ˆ˜ / latency / ์ธ์šฉ ๊นŠ์ด (matplotlib)
์‹คํ–‰: python -m experiments.agentic_vs_workflow.benchmark
๊ฒฐ๊ณผ: results.md + charts/*.png
"""
import json
import time
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from agents.cause import run_cause as agentic_cause
from agents.detection import run_detection
from agents.impact import run_impact as agentic_impact
from agents.llm import SUBAGENT_MODEL, client
from agents.rag.store import load_document, search
from agents.response import run_response as agentic_response
from core.schema import Tier1, Tier2, Tier3, Tier4
from data.demo import DEFAULT_ALARMS
from data.wip import get_affected_wip
plt.rcParams["font.family"] = ["Apple SD Gothic Neo", "AppleGothic", "DejaVu Sans"]
plt.rcParams["axes.unicode_minus"] = False
OUT_DIR = Path(__file__).parent
CHART_DIR = OUT_DIR / "charts"
ALARMS = ["A1", "A2", "A3"]
TOP_K = 3
# gpt-5-mini ์ถ”์ • ๋‹จ๊ฐ€ (USD per 1M token, 2026 ๊ธฐ์ค€ ๊ฐ€์ •)
PRICE_INPUT = 0.25
PRICE_OUTPUT = 2.0
# ==================== Workflow ๋ฒ„์ „ (์ด์ „ ๋‹จ์ผ ํ˜ธ์ถœ ๋ฐฉ์‹ ์žฌํ˜„) ====================
_T2_SCHEMA = {
"type": "object",
"properties": {
"causes": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"pct": {"type": "integer"},
"evidence": {"type": "string"},
"citations": {"type": "array", "items": {"type": "string"}},
},
"required": ["name", "pct", "evidence", "citations"],
"additionalProperties": False,
},
}
},
"required": ["causes"],
"additionalProperties": False,
}
_T3_SCHEMA = {
"type": "object",
"properties": {
"yield_loss": {"type": "number"},
"downstream_dependencies": {
"type": "array",
"items": {
"type": "object",
"properties": {
"stage": {"type": "string"},
"delta": {"type": "string"},
"tag": {"type": "string"},
"kind": {"type": "string", "enum": ["impacted", "minor"]},
},
"required": ["stage", "delta", "tag", "kind"],
"additionalProperties": False,
},
},
},
"required": ["yield_loss", "downstream_dependencies"],
"additionalProperties": False,
}
_T4_SCHEMA = {
"type": "object",
"properties": {
"immediate": {
"type": "array",
"items": {
"type": "object",
"properties": {
"text": {"type": "string"},
"meta": {"type": ["string", "null"]},
},
"required": ["text", "meta"],
"additionalProperties": False,
},
},
"longterm": {
"type": "array",
"items": {
"type": "object",
"properties": {
"text": {"type": "string"},
"meta": {"type": ["string", "null"]},
},
"required": ["text", "meta"],
"additionalProperties": False,
},
},
},
"required": ["immediate", "longterm"],
"additionalProperties": False,
}
def _llm_call(messages, schema, name):
return client().chat.completions.create(
model=SUBAGENT_MODEL,
messages=messages,
response_format={"type": "json_schema", "json_schema": {"name": name, "schema": schema, "strict": True}},
)
def workflow_run_cause(alarm: dict, tier1: Tier1, trace: dict) -> Tier2:
sensors = ", ".join(f["name"] for f in tier1["features"])
query = f"{alarm['title']} {alarm.get('feature') or ''} {sensors} ์›์ธ ๋ถ„์„"
doc_ids = search(query, top_k=TOP_K)
knowledge = "\n\n".join(f"[{d}]\n{load_document(d)}" for d in doc_ids)
user = f"""## ์ด์ƒ ์•Œ๋žŒ
- ๊ณต์ •: {alarm['title']}, lot: {alarm['lot_id']}
## Tier 1
- ์ ์ˆ˜: {tier1['score']}, ์„ผ์„œ: {sensors}
## ์‚ฌ๋‚ด ์ง€์‹ ๋ฌธ์„œ
{knowledge}
์œ„ ์ •๋ณด๋กœ ์›์ธ 2~3๊ฐœ๋ฅผ ์‚ฐ์ถœ."""
resp = _llm_call(
[
{"role": "system", "content": "๋ฐ˜๋„์ฒด ๊ณต์ • ์›์ธ ๋ถ„์„ ์ „๋ฌธ๊ฐ€. JSON ์Šคํ‚ค๋งˆ์— ๋งž์ถฐ ์‘๋‹ต."},
{"role": "user", "content": user},
],
_T2_SCHEMA,
"tier2",
)
trace["llm_calls"] = 1
trace["tool_calls"] = 0
trace["unique_tools"] = 0
trace["input_tokens"] = resp.usage.prompt_tokens
trace["output_tokens"] = resp.usage.completion_tokens
return json.loads(resp.choices[0].message.content)
def workflow_run_impact(alarm: dict, tier1: Tier1, tier2: Tier2, trace: dict) -> Tier3:
cause_names = " ".join(c["name"] for c in tier2["causes"])
query = f"{alarm['title']} ํ•˜๋ฅ˜ ํ›„๊ณต์ • ์˜ํ–ฅ ์ˆ˜์œจ {cause_names}"
doc_ids = search(query, top_k=TOP_K)
knowledge = "\n\n".join(f"[{d}]\n{load_document(d)}" for d in doc_ids)
cause_lines = "\n".join(f"- {c['name']} ({c['pct']}%)" for c in tier2["causes"])
user = f"""## ์•Œ๋žŒ: {alarm['title']}
## ์›์ธ
{cause_lines}
## ์‚ฌ๋‚ด ์ง€์‹
{knowledge}
yield_loss์™€ downstream_dependencies ์‚ฐ์ถœ."""
resp = _llm_call(
[
{"role": "system", "content": "๋ฐ˜๋„์ฒด ์˜ํ–ฅ ํ‰๊ฐ€ ์ „๋ฌธ๊ฐ€. JSON ์Šคํ‚ค๋งˆ์— ๋งž์ถฐ ์‘๋‹ต."},
{"role": "user", "content": user},
],
_T3_SCHEMA,
"tier3_part",
)
trace["llm_calls"] = 1
trace["tool_calls"] = 0
trace["unique_tools"] = 0
trace["input_tokens"] = resp.usage.prompt_tokens
trace["output_tokens"] = resp.usage.completion_tokens
llm_out = json.loads(resp.choices[0].message.content)
current = {"stage": alarm["title"].split()[0], "delta": f"+{tier1['score']}", "tag": "ํ˜„์žฌ", "kind": "current"}
return {
"yield_loss": round(float(llm_out["yield_loss"]), 1),
"dependencies": [current] + llm_out["downstream_dependencies"],
"impact_lots": get_affected_wip(alarm["id"]),
}
def workflow_run_response(alarm: dict, tier1: Tier1, tier2: Tier2, tier3: Tier3, trace: dict) -> Tier4:
causes = " ".join(c["name"] for c in tier2["causes"])
query = f"{alarm['title']} ๋Œ€์‘ PM ์กฐ์น˜ ๋ณด๋ฅ˜ ๋ชจ๋‹ˆํ„ฐ๋ง {causes}"
doc_ids = search(query, top_k=4)
knowledge = "\n\n".join(f"[{d}]\n{load_document(d)}" for d in doc_ids)
cause_lines = "\n".join(f"- {c['name']} ({c['pct']}%)" for c in tier2["causes"])
user = f"""## ์•Œ๋žŒ: {alarm['title']}
## ์›์ธ
{cause_lines}
## ์˜ํ–ฅ
- yield_loss: {tier3['yield_loss']}%p
## ์‚ฌ๋‚ด ์ง€์‹
{knowledge}
immediate์™€ longterm ์กฐ์น˜ ๊ถŒ๊ณ ."""
resp = _llm_call(
[
{"role": "system", "content": "๋ฐ˜๋„์ฒด ๋Œ€์‘ ๊ถŒ๊ณ  ์ „๋ฌธ๊ฐ€. JSON ์Šคํ‚ค๋งˆ์— ๋งž์ถฐ ์‘๋‹ต."},
{"role": "user", "content": user},
],
_T4_SCHEMA,
"tier4_part",
)
trace["llm_calls"] = 1
trace["tool_calls"] = 0
trace["unique_tools"] = 0
trace["input_tokens"] = resp.usage.prompt_tokens
trace["output_tokens"] = resp.usage.completion_tokens
llm_out = json.loads(resp.choices[0].message.content)
refs = [{"id": d, "desc": d} for d in doc_ids]
return {"immediate": llm_out["immediate"], "longterm": llm_out["longterm"], "refs": refs}
# ==================== Agentic ๋ฒ„์ „ wrapper (trace์— token ํ•ฉ๊ณ„ ์ถ”๊ฐ€) ====================
def _run_agentic_with_token_capture(fn, *args, trace: dict):
"""ํ˜„์žฌ agentic ํ•จ์ˆ˜๋Š” LLM resp.usage๋ฅผ ์ง์ ‘ ๋…ธ์ถœ ์•ˆ ํ•จ - monkey patch๋กœ capture"""
captured = {"input": 0, "output": 0}
real_create = client().chat.completions.create
def patched(**kwargs):
r = real_create(**kwargs)
captured["input"] += r.usage.prompt_tokens
captured["output"] += r.usage.completion_tokens
return r
client().chat.completions.create = patched
try:
result = fn(*args, trace=trace)
finally:
client().chat.completions.create = real_create
trace["input_tokens"] = captured["input"]
trace["output_tokens"] = captured["output"]
trace["unique_tools"] = len({tc["name"] for tc in trace.get("tool_calls", [])})
trace["tool_calls_count"] = len(trace.get("tool_calls", []))
return result
# ==================== Sample ์ˆ˜์ง‘ ====================
def _alarm_by_id(aid: str) -> dict:
return next(a for a in DEFAULT_ALARMS if a["id"] == aid)
def collect_samples():
rows = []
for aid in ALARMS:
alarm = _alarm_by_id(aid)
tier1 = run_detection(alarm)
print(f"\n=== [{aid}] {alarm['title']} (T1 score={tier1['score']}) ===")
# --- Workflow ---
print(" [Workflow] T2 -> T3 -> T4")
wf_traces = {"tier2": {}, "tier3": {}, "tier4": {}}
wf_tier_lat = {}
t0 = time.time(); wf_t2 = workflow_run_cause(alarm, tier1, wf_traces["tier2"]); wf_tier_lat["tier2"] = (time.time() - t0) * 1000
t0 = time.time(); wf_t3 = workflow_run_impact(alarm, tier1, wf_t2, wf_traces["tier3"]); wf_tier_lat["tier3"] = (time.time() - t0) * 1000
t0 = time.time(); wf_t4 = workflow_run_response(alarm, tier1, wf_t2, wf_t3, wf_traces["tier4"]); wf_tier_lat["tier4"] = (time.time() - t0) * 1000
wf_citations = set()
for c in wf_t2["causes"]: wf_citations.update(c.get("citations", []))
for r in wf_t4["refs"]: wf_citations.add(r["id"])
# --- Agentic ---
print(" [Agentic] T2 -> T3 -> T4")
ag_traces = {"tier2": {}, "tier3": {}, "tier4": {}}
ag_tier_lat = {}
t0 = time.time(); ag_t2 = _run_agentic_with_token_capture(agentic_cause, alarm, tier1, trace=ag_traces["tier2"]); ag_tier_lat["tier2"] = (time.time() - t0) * 1000
t0 = time.time(); ag_t3 = _run_agentic_with_token_capture(agentic_impact, alarm, tier1, ag_t2, trace=ag_traces["tier3"]); ag_tier_lat["tier3"] = (time.time() - t0) * 1000
t0 = time.time(); ag_t4 = _run_agentic_with_token_capture(agentic_response, alarm, tier1, ag_t2, ag_t3, trace=ag_traces["tier4"]); ag_tier_lat["tier4"] = (time.time() - t0) * 1000
ag_citations = set()
for c in ag_t2["causes"]: ag_citations.update(c.get("citations", []))
for r in ag_t4["refs"]: ag_citations.add(r["id"])
rows.append({
"alarm": aid,
"workflow": {
"traces": wf_traces, "tier_latency_ms": wf_tier_lat,
"unique_citations": len(wf_citations), "citations": sorted(wf_citations),
},
"agentic": {
"traces": ag_traces, "tier_latency_ms": ag_tier_lat,
"unique_citations": len(ag_citations), "citations": sorted(ag_citations),
},
})
# ์ง„ํ–‰ ์ถœ๋ ฅ
for pat, key in [("Workflow", "workflow"), ("Agentic", "agentic")]:
tr = rows[-1][key]["traces"]
llm = sum(t.get("llm_calls", 0) for t in tr.values())
tool = sum(t.get("tool_calls_count", t.get("tool_calls", 0)) if isinstance(t.get("tool_calls"), list) else t.get("tool_calls", 0) for t in tr.values())
print(f" {pat}: LLM={llm}, tools={tool}, citations={rows[-1][key]['unique_citations']}, total_lat={sum(rows[-1][key]['tier_latency_ms'].values()):.0f}ms")
return rows
# ==================== ์ง‘๊ณ„ + ์ฐจํŠธ + ๊ฒฐ๊ณผ ====================
def aggregate(rows):
def per_pat(key):
llm = [sum(r[key]["traces"][t].get("llm_calls", 0) for t in ("tier2", "tier3", "tier4")) for r in rows]
tools = []
for r in rows:
total = 0
for t in ("tier2", "tier3", "tier4"):
tc = r[key]["traces"][t].get("tool_calls")
if isinstance(tc, list):
total += len(tc)
else:
total += tc or 0
tools.append(total)
lat = [sum(r[key]["tier_latency_ms"].values()) for r in rows]
cit = [r[key]["unique_citations"] for r in rows]
inp = [sum(r[key]["traces"][t].get("input_tokens", 0) for t in ("tier2", "tier3", "tier4")) for r in rows]
out = [sum(r[key]["traces"][t].get("output_tokens", 0) for t in ("tier2", "tier3", "tier4")) for r in rows]
return {
"llm_calls": np.mean(llm), "tool_calls": np.mean(tools),
"latency_ms": np.mean(lat), "unique_citations": np.mean(cit),
"input_tokens": np.mean(inp), "output_tokens": np.mean(out),
}
return {"workflow": per_pat("workflow"), "agentic": per_pat("agentic")}
def make_charts(agg, rows):
CHART_DIR.mkdir(exist_ok=True)
wf, ag = agg["workflow"], agg["agentic"]
# 1. ํ˜ธ์ถœยท๋„๊ตฌ ๋น„๊ต
fig, ax = plt.subplots(figsize=(9, 5))
metrics = ["LLM ํ˜ธ์ถœ", "Tool ํ˜ธ์ถœ", "์œ ๋‹ˆํฌ ์ธ์šฉ"]
wf_vals = [wf["llm_calls"], wf["tool_calls"], wf["unique_citations"]]
ag_vals = [ag["llm_calls"], ag["tool_calls"], ag["unique_citations"]]
x = np.arange(len(metrics))
w = 0.35
bars1 = ax.bar(x - w/2, wf_vals, w, label="Workflow", color="#94a3b8")
bars2 = ax.bar(x + w/2, ag_vals, w, label="Agentic", color="#3b82f6")
for bars in (bars1, bars2):
for b in bars:
ax.text(b.get_x() + b.get_width()/2, b.get_height() + 0.1, f"{b.get_height():.1f}", ha="center", fontsize=9)
ax.set_xticks(x); ax.set_xticklabels(metrics)
ax.set_ylabel("ํ‰๊ท  (3 ์•Œ๋žŒ)")
ax.set_title("Workflow vs Agentic - ํ˜ธ์ถœ ํšŸ์ˆ˜ยท์ธ์šฉ ๊นŠ์ด")
ax.legend(); ax.grid(axis="y", alpha=0.3)
fig.tight_layout(); fig.savefig(CHART_DIR / "calls_citations.png", dpi=150); plt.close(fig)
# 2. Latency ๋ถ„ํ•ด (per tier)
fig, ax = plt.subplots(figsize=(10, 5))
tiers = ["Tier 2 Cause", "Tier 3 Impact", "Tier 4 Response"]
wf_lat = [np.mean([r["workflow"]["tier_latency_ms"][f"tier{i}"] for r in rows]) for i in (2, 3, 4)]
ag_lat = [np.mean([r["agentic"]["tier_latency_ms"][f"tier{i}"] for r in rows]) for i in (2, 3, 4)]
x = np.arange(len(tiers))
w = 0.35
ax.bar(x - w/2, wf_lat, w, label="Workflow", color="#94a3b8")
ax.bar(x + w/2, ag_lat, w, label="Agentic", color="#3b82f6")
for i, (wv, av) in enumerate(zip(wf_lat, ag_lat)):
ax.text(i - w/2, wv + 100, f"{wv:.0f}", ha="center", fontsize=9)
ax.text(i + w/2, av + 100, f"{av:.0f}", ha="center", fontsize=9)
ax.set_xticks(x); ax.set_xticklabels(tiers)
ax.set_ylabel("ํ‰๊ท  Latency (ms)")
ax.set_title("Tier๋ณ„ Latency ๋น„๊ต")
ax.legend(); ax.grid(axis="y", alpha=0.3)
fig.tight_layout(); fig.savefig(CHART_DIR / "latency_per_tier.png", dpi=150); plt.close(fig)
# 3. ๋น„์šฉ ๋น„๊ต
fig, ax = plt.subplots(figsize=(8.5, 5))
wf_cost = (wf["input_tokens"] * PRICE_INPUT + wf["output_tokens"] * PRICE_OUTPUT) / 1_000_000
ag_cost = (ag["input_tokens"] * PRICE_INPUT + ag["output_tokens"] * PRICE_OUTPUT) / 1_000_000
labels = ["Workflow", "Agentic"]
costs = [wf_cost, ag_cost]
bars = ax.bar(labels, costs, color=["#94a3b8", "#3b82f6"])
for b, v in zip(bars, costs):
ax.text(b.get_x() + b.get_width()/2, v + max(costs) * 0.02, f"${v*1000:.2f}/1000ํšŒ", ha="center", fontsize=10)
ax.set_ylabel("์•Œ๋žŒ๋‹น ํ‰๊ท  USD")
ax.set_title(f"๋น„์šฉ ๋น„๊ต (gpt-5-mini ๋‹จ๊ฐ€ ๊ธฐ์ค€, in=${PRICE_INPUT}/M, out=${PRICE_OUTPUT}/M)")
ax.grid(axis="y", alpha=0.3)
fig.tight_layout(); fig.savefig(CHART_DIR / "cost.png", dpi=150); plt.close(fig)
def write_results(rows, agg):
wf, ag = agg["workflow"], agg["agentic"]
wf_cost = (wf["input_tokens"] * PRICE_INPUT + wf["output_tokens"] * PRICE_OUTPUT) / 1_000_000
ag_cost = (ag["input_tokens"] * PRICE_INPUT + ag["output_tokens"] * PRICE_OUTPUT) / 1_000_000
lines = [
"# Workflow vs Agentic - ์ •๋Ÿ‰ ๋น„๊ต",
"",
"๋™์ผํ•œ 4-Tier pipeline์„ ๋‘ ๊ฐ€์ง€ ํŒจํ„ด์œผ๋กœ ์‹คํ–‰ํ•ด ์ •๋Ÿ‰ ๋น„๊ตํ•ฉ๋‹ˆ๋‹ค.",
"- **Workflow**: Tier 2/3/4 ๊ฐ ๋‹จ๊ณ„๊ฐ€ ์‚ฌ์ „ RAG 1ํšŒ + LLM 1ํšŒ (๊ตฌ๋ฒ„์ „)",
"- **Agentic**: Tier 2/3/4 ๊ฐ ๋‹จ๊ณ„๊ฐ€ LLM tool calling ๋ฃจํ”„ (ํ˜„์žฌ ์ฑ„ํƒ)",
"",
f"์•Œ๋žŒ: {', '.join(ALARMS)} (์ด {len(ALARMS)}๊ฑด, SECOM + PHM CMP)",
"",
"## ๊ฒฐ๊ณผ ์š”์•ฝ (3 ์•Œ๋žŒ ํ‰๊ท )",
"",
"| ์ง€ํ‘œ | Workflow | Agentic | ๋ฐฐ์ˆ˜ |",
"|---|---|---|---|",
f"| LLM ํ˜ธ์ถœ / ์•Œ๋žŒ | {wf['llm_calls']:.1f} | {ag['llm_calls']:.1f} | x{ag['llm_calls']/wf['llm_calls']:.1f} |",
f"| Tool ํ˜ธ์ถœ / ์•Œ๋žŒ | {wf['tool_calls']:.1f} | {ag['tool_calls']:.1f} | - |",
f"| ์œ ๋‹ˆํฌ ์ธ์šฉ / ์•Œ๋žŒ | {wf['unique_citations']:.1f} | {ag['unique_citations']:.1f} | x{ag['unique_citations']/max(wf['unique_citations'],1):.1f} |",
f"| ์ž…๋ ฅ ํ† ํฐ / ์•Œ๋žŒ | {wf['input_tokens']:.0f} | {ag['input_tokens']:.0f} | x{ag['input_tokens']/wf['input_tokens']:.1f} |",
f"| ์ถœ๋ ฅ ํ† ํฐ / ์•Œ๋žŒ | {wf['output_tokens']:.0f} | {ag['output_tokens']:.0f} | x{ag['output_tokens']/wf['output_tokens']:.1f} |",
f"| Latency / ์•Œ๋žŒ (Tier 2~4) | {wf['latency_ms']:.0f} ms | {ag['latency_ms']:.0f} ms | x{ag['latency_ms']/wf['latency_ms']:.1f} |",
f"| ๋น„์šฉ / ์•Œ๋žŒ (USD) | ${wf_cost:.5f} | ${ag_cost:.5f} | x{ag_cost/wf_cost:.1f} |",
"",
"## ์‹œ๊ฐํ™”",
"",
"### ํ˜ธ์ถœ ํšŸ์ˆ˜ยท์ธ์šฉ ๊นŠ์ด",
"![Calls](charts/calls_citations.png)",
"",
"### Tier๋ณ„ Latency",
"![Latency](charts/latency_per_tier.png)",
"",
"### ๋น„์šฉ",
"![Cost](charts/cost.png)",
"",
"## ์•Œ๋žŒ๋ณ„ ์ƒ์„ธ",
"",
]
for r in rows:
lines.append(f"### {r['alarm']}")
lines.append("")
lines.append("| ํŒจํ„ด | Tier | LLM | Tools | Latency(ms) |")
lines.append("|---|---|---|---|---|")
for pat in ("workflow", "agentic"):
for tier in ("tier2", "tier3", "tier4"):
tr = r[pat]["traces"][tier]
tc = tr.get("tool_calls")
tc_count = len(tc) if isinstance(tc, list) else (tc or 0)
lines.append(
f"| {pat} | {tier} | {tr.get('llm_calls', 0)} | {tc_count} | "
f"{r[pat]['tier_latency_ms'][tier]:.0f} |"
)
lines.append("")
lines.append(f"- Workflow ์ธ์šฉ: {r['workflow']['citations']}")
lines.append(f"- Agentic ์ธ์šฉ: {r['agentic']['citations']}")
lines.append("")
lines += [
"## ํ•ต์‹ฌ ์ธ์‚ฌ์ดํŠธ",
"",
f"1. **์ธ์šฉ ๊นŠ์ด {ag['unique_citations']/max(wf['unique_citations'],1):.1f}๋ฐฐ** - agentic์€ ๋„๊ตฌ๋ฅผ ์ž์œจ ํ˜ธ์ถœํ•ด ๋‹ค์–‘ํ•œ ์†Œ์Šค(INC/FMEA/SOP/incident DB)๋ฅผ ๊ฒฐํ•ฉ",
f"2. **ํ˜ธ์ถœ ๋น„์šฉ {ag_cost/wf_cost:.1f}๋ฐฐ** - LLM ํ˜ธ์ถœ์ด ํ‰๊ท  {wf['llm_calls']:.0f}ํšŒ โ†’ {ag['llm_calls']:.0f}ํšŒ, ์ž…๋ ฅ ํ† ํฐ๋„ {ag['input_tokens']/wf['input_tokens']:.1f}๋ฐฐ",
f"3. **Latency {ag['latency_ms']/wf['latency_ms']:.1f}๋ฐฐ** - tool calling ๋ฃจํ”„ + synthesis ์ถ”๊ฐ€ ํ˜ธ์ถœ์˜ ์ž์—ฐ์Šค๋Ÿฌ์šด ๋น„์šฉ",
"4. **agentic๋งŒ์˜ ์ •์„ฑ ์‹ ํ˜ธ**: tool ํ˜ธ์ถœ ํŒจํ„ด ์ž์ฒด๊ฐ€ reasoning trace - ์–ด๋–ค ์ •๋ณด๋ฅผ ์™œ ์ฐพ์•˜๋Š”์ง€ ๊ฐ์‚ฌยท์žฌํ˜„ ๊ฐ€๋Šฅ",
"",
"## ์ฑ„ํƒ ๊ฒฐ๋ก ",
"",
"**ํ˜„์žฌ ์ฑ„ํƒ: Agentic**",
"- ์ธ์šฉ ๊นŠ์ดยท๊ทผ๊ฑฐ ๋‹ค์–‘์„ฑ์ด ๊ฒฐ์ •์  - ๋ฐ˜๋„์ฒด fab ๋„๋ฉ”์ธ์—์„  multi-source ๊ทผ๊ฑฐ๊ฐ€ ์•ˆ์ „์„ฑยท์‹ ๋ขฐ์„ฑ ๊ฒฐ์ •",
f"- ๋น„์šฉ {ag_cost/wf_cost:.1f}๋ฐฐ ์ฆ๊ฐ€๋Š” ์•Œ๋žŒ๋‹น ${(ag_cost-wf_cost)*1000:.2f}/1000ํšŒ ์ˆ˜์ค€์œผ๋กœ ์‚ฌ์—…์  ์˜ํ–ฅ ๋ฌด์‹œ ๊ฐ€๋Šฅ",
"- Tool ํ˜ธ์ถœ ๋กœ๊ทธ๊ฐ€ ์ž์ฒด์ ์ธ audit trail์ด ๋˜์–ด production observability์— ์œ ๋ฆฌ",
"",
"Latency๊ฐ€ criticalํ•œ ์‹œ๋‚˜๋ฆฌ์˜ค์—์„  Workflow๋กœ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ํ† ๊ธ€ ์ถ”๊ฐ€ ๊ฒ€ํ†  ๊ฐ€๋Šฅ (ํ˜„์žฌ ๋ฏธ๊ตฌํ˜„).",
"",
]
(OUT_DIR / "results.md").write_text("\n".join(lines), encoding="utf-8")
print(f"--- ์ €์žฅ: {OUT_DIR / 'results.md'} ---")
def main():
rows = collect_samples()
print("\n=== ์ง‘๊ณ„ ===")
agg = aggregate(rows)
for pat, vals in agg.items():
print(f" {pat}: {vals}")
make_charts(agg, rows)
write_results(rows, agg)
if __name__ == "__main__":
main()