File size: 2,735 Bytes
ec4da21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import csv
import json
from pathlib import Path
from statistics import mean
from typing import Dict, List, Any

RESULTS_DIR = Path(r"E:\Practise\data\results")
RESULTS_CSV = RESULTS_DIR / "scores.csv"
SUMMARY_JSON = RESULTS_DIR / "metrics_summary.json"

CRITERIA = [
    "fonts",
    "colors",
    "composition",
    "ai_generated_score",
    "quality",
]


def read_csv(path: Path) -> List[Dict[str, Any]]:
    if not path.exists():
        return []
    with open(path, "r", encoding="utf-8-sig", newline="") as f:
        return list(csv.DictReader(f))


def to_int(value: Any, default: int = 0) -> int:
    try:
        return int(value)
    except Exception:
        return default


def to_float(value: Any, default: float = 0.0) -> float:
    try:
        return float(value)
    except Exception:
        return default


def to_bool(value: Any) -> bool:
    return str(value).strip().lower() == "true"


def build_metrics(rows: List[Dict[str, Any]]) -> Dict[str, Any]:
    total = len(rows)
    valid_rows = [r for r in rows if to_bool(r.get("valid"))]
    invalid_rows = [r for r in rows if not to_bool(r.get("valid"))]
    content_rows = [r for r in valid_rows if to_bool(r.get("content_present"))]
    blank_rows = [r for r in valid_rows if not to_bool(r.get("content_present"))]

    out = {
        "total_rows": total,
        "valid_rows": len(valid_rows),
        "invalid_rows": len(invalid_rows),
        "invalid_ratio": round(len(invalid_rows) / total, 4) if total else 0.0,
        "content_present_rows": len(content_rows),
        "blank_rows": len(blank_rows),
        "confidence_mean": round(mean(to_float(r.get("confidence")) for r in valid_rows), 4) if valid_rows else 0.0,
        "labels": {
            "good": sum(1 for r in valid_rows if r.get("label") == "good"),
            "medium": sum(1 for r in valid_rows if r.get("label") == "medium"),
            "bad": sum(1 for r in valid_rows if r.get("label") == "bad"),
            "uncertain": sum(1 for r in valid_rows if r.get("label") == "uncertain"),
        },
        "criteria": {},
    }

    if content_rows:
        for criterion in CRITERIA:
            values = [to_int(r.get(criterion)) for r in content_rows]
            out["criteria"][criterion] = {
                "mean": round(mean(values), 4),
                "min": min(values),
                "max": max(values),
            }

    return out


def main() -> None:
    rows = read_csv(RESULTS_CSV)
    metrics = build_metrics(rows)

    with open(SUMMARY_JSON, "w", encoding="utf-8") as f:
        json.dump(metrics, f, ensure_ascii=False, indent=2)


if __name__ == "__main__":
    main()