import csv import json from pathlib import Path from statistics import mean from typing import Dict, List, Any RESULTS_DIR = Path(r"E:\Practise\data\results") RESULTS_CSV = RESULTS_DIR / "scores.csv" SUMMARY_JSON = RESULTS_DIR / "metrics_summary.json" CRITERIA = [ "fonts", "colors", "composition", "ai_generated_score", "quality", ] def read_csv(path: Path) -> List[Dict[str, Any]]: if not path.exists(): return [] with open(path, "r", encoding="utf-8-sig", newline="") as f: return list(csv.DictReader(f)) def to_int(value: Any, default: int = 0) -> int: try: return int(value) except Exception: return default def to_float(value: Any, default: float = 0.0) -> float: try: return float(value) except Exception: return default def to_bool(value: Any) -> bool: return str(value).strip().lower() == "true" def build_metrics(rows: List[Dict[str, Any]]) -> Dict[str, Any]: total = len(rows) valid_rows = [r for r in rows if to_bool(r.get("valid"))] invalid_rows = [r for r in rows if not to_bool(r.get("valid"))] content_rows = [r for r in valid_rows if to_bool(r.get("content_present"))] blank_rows = [r for r in valid_rows if not to_bool(r.get("content_present"))] out = { "total_rows": total, "valid_rows": len(valid_rows), "invalid_rows": len(invalid_rows), "invalid_ratio": round(len(invalid_rows) / total, 4) if total else 0.0, "content_present_rows": len(content_rows), "blank_rows": len(blank_rows), "confidence_mean": round(mean(to_float(r.get("confidence")) for r in valid_rows), 4) if valid_rows else 0.0, "labels": { "good": sum(1 for r in valid_rows if r.get("label") == "good"), "medium": sum(1 for r in valid_rows if r.get("label") == "medium"), "bad": sum(1 for r in valid_rows if r.get("label") == "bad"), "uncertain": sum(1 for r in valid_rows if r.get("label") == "uncertain"), }, "criteria": {}, } if content_rows: for criterion in CRITERIA: values = [to_int(r.get(criterion)) for r in content_rows] out["criteria"][criterion] = { "mean": round(mean(values), 4), "min": min(values), "max": max(values), } return out def main() -> None: rows = read_csv(RESULTS_CSV) metrics = build_metrics(rows) with open(SUMMARY_JSON, "w", encoding="utf-8") as f: json.dump(metrics, f, ensure_ascii=False, indent=2) if __name__ == "__main__": main()