davidtran999 commited on
Commit
0be9fea
·
verified ·
1 Parent(s): 9030829

Upload backend/scripts/export_intent_backlog.py with huggingface_hub

Browse files
backend/scripts/export_intent_backlog.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Parse backend/logs/intent/low_confidence.csv and export a Markdown backlog.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import argparse
8
+ import csv
9
+ from collections import defaultdict
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+
13
+
14
+ def parse_args() -> argparse.Namespace:
15
+ parser = argparse.ArgumentParser(description="Export low-confidence intent queries.")
16
+ parser.add_argument(
17
+ "--output-date",
18
+ default=datetime.utcnow().strftime("%Y-%m-%d"),
19
+ help="Ngày (YYYY-MM-DD) dùng cho thư mục báo cáo.",
20
+ )
21
+ return parser.parse_args()
22
+
23
+
24
+ def load_backlog(log_path: Path) -> list[dict[str, str]]:
25
+ if not log_path.exists():
26
+ return []
27
+ rows: list[dict[str, str]] = []
28
+ with log_path.open("r", encoding="utf-8") as fp:
29
+ reader = csv.DictReader(fp)
30
+ for row in reader:
31
+ rows.append(row)
32
+ return rows
33
+
34
+
35
+ def render_markdown(rows: list[dict[str, str]]) -> str:
36
+ lines = [
37
+ "<!-- Auto-generated by export_intent_backlog.py -->",
38
+ "# Backlog truy vấn low-confidence",
39
+ "",
40
+ f"Tổng số mẫu: {len(rows)}",
41
+ "",
42
+ ]
43
+ by_intent: dict[str, list[dict[str, str]]] = defaultdict(list)
44
+ for row in rows:
45
+ by_intent[row.get("intent", "unknown")].append(row)
46
+
47
+ for intent, samples in sorted(by_intent.items()):
48
+ lines.append(f"## Intent: {intent} ({len(samples)} mẫu)")
49
+ lines.append("")
50
+ lines.append("| Thời gian | Confidence | Route | Query |")
51
+ lines.append("| --- | --- | --- | --- |")
52
+ for sample in samples[:50]:
53
+ lines.append(
54
+ f"| {sample.get('timestamp','')} | {sample.get('confidence','')} "
55
+ f"| {sample.get('route','')} | {sample.get('query','').strip()} |"
56
+ )
57
+ lines.append("")
58
+ if not by_intent:
59
+ lines.append("_Chưa có dữ liệu._")
60
+ return "\n".join(lines).strip() + "\n"
61
+
62
+
63
+ def main() -> None:
64
+ repo_root = Path(__file__).resolve().parents[2]
65
+ args = parse_args()
66
+ log_path = repo_root / "backend" / "logs" / "intent" / "low_confidence.csv"
67
+ rows = load_backlog(log_path)
68
+
69
+ output_dir = repo_root / "tài nguyên" / "báo cáo" / args.output_date / "backend"
70
+ output_dir.mkdir(parents=True, exist_ok=True)
71
+ output_path = output_dir / "intent_backlog.md"
72
+ markdown = render_markdown(rows)
73
+ output_path.write_text(markdown, encoding="utf-8")
74
+ print(f"✅ Wrote {len(rows)} entries to {output_path}")
75
+
76
+
77
+ if __name__ == "__main__":
78
+ main()
79
+