hue-portal-backend-v2 / backend /scripts /export_intent_backlog.py
davidtran999's picture
Upload backend/scripts/export_intent_backlog.py with huggingface_hub
0be9fea verified
raw
history blame
2.56 kB
#!/usr/bin/env python3
"""
Parse backend/logs/intent/low_confidence.csv and export a Markdown backlog.
"""
from __future__ import annotations
import argparse
import csv
from collections import defaultdict
from datetime import datetime
from pathlib import Path
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Export low-confidence intent queries.")
parser.add_argument(
"--output-date",
default=datetime.utcnow().strftime("%Y-%m-%d"),
help="Ngày (YYYY-MM-DD) dùng cho thư mục báo cáo.",
)
return parser.parse_args()
def load_backlog(log_path: Path) -> list[dict[str, str]]:
if not log_path.exists():
return []
rows: list[dict[str, str]] = []
with log_path.open("r", encoding="utf-8") as fp:
reader = csv.DictReader(fp)
for row in reader:
rows.append(row)
return rows
def render_markdown(rows: list[dict[str, str]]) -> str:
lines = [
"<!-- Auto-generated by export_intent_backlog.py -->",
"# Backlog truy vấn low-confidence",
"",
f"Tổng số mẫu: {len(rows)}",
"",
]
by_intent: dict[str, list[dict[str, str]]] = defaultdict(list)
for row in rows:
by_intent[row.get("intent", "unknown")].append(row)
for intent, samples in sorted(by_intent.items()):
lines.append(f"## Intent: {intent} ({len(samples)} mẫu)")
lines.append("")
lines.append("| Thời gian | Confidence | Route | Query |")
lines.append("| --- | --- | --- | --- |")
for sample in samples[:50]:
lines.append(
f"| {sample.get('timestamp','')} | {sample.get('confidence','')} "
f"| {sample.get('route','')} | {sample.get('query','').strip()} |"
)
lines.append("")
if not by_intent:
lines.append("_Chưa có dữ liệu._")
return "\n".join(lines).strip() + "\n"
def main() -> None:
repo_root = Path(__file__).resolve().parents[2]
args = parse_args()
log_path = repo_root / "backend" / "logs" / "intent" / "low_confidence.csv"
rows = load_backlog(log_path)
output_dir = repo_root / "tài nguyên" / "báo cáo" / args.output_date / "backend"
output_dir.mkdir(parents=True, exist_ok=True)
output_path = output_dir / "intent_backlog.md"
markdown = render_markdown(rows)
output_path.write_text(markdown, encoding="utf-8")
print(f"✅ Wrote {len(rows)} entries to {output_path}")
if __name__ == "__main__":
main()