Spaces:
Sleeping
Sleeping
| """ | |
| tests/regression_test.py | |
| Regression test suite - her PR'da koc, yeni modelin eskine gore degisimi olc. | |
| Calistirma: | |
| # Mevcut prod modeli ile baseline kaydet | |
| python tests/regression_test.py --weights prod_best.pt --save_baseline | |
| # Yeni model versiyonunu test et | |
| python tests/regression_test.py --weights new_best.pt --compare_baseline | |
| # CI'da kullanim | |
| python tests/regression_test.py --weights $NEW_MODEL --compare_baseline \ | |
| --fail_threshold 0.05 | |
| """ | |
| import argparse | |
| import json | |
| import sys | |
| from pathlib import Path | |
| from typing import Dict, List | |
| import yaml | |
| def load_test_cases(yaml_path): | |
| """test_cases.yaml'i yukle.""" | |
| with open(yaml_path, "r") as f: | |
| return yaml.safe_load(f) | |
| def run_inference(pipeline, image_path): | |
| """Tek bir goruntude pipeline koc, ozet metrikleri dondur.""" | |
| result = pipeline.analyze(str(image_path)) | |
| summary = result.get("summary", {}) | |
| return { | |
| "has_damage": summary.get("has_damage", False), | |
| "damage_count": result.get("damage_count", 0), | |
| "damage_types": sorted(set(d["type"] for d in result.get("damages", []))), | |
| "cost_min": summary.get("total_cost_range_tl", [0])[0], | |
| "cost_max": summary.get("total_cost_range_tl", [0, 0])[1], | |
| "most_severe": summary.get("most_severe"), | |
| } | |
| def compare_to_expected(actual, expected, tolerance): | |
| """Beklenen vs gercek karsilastir. Hata varsa liste dondur.""" | |
| errors = [] | |
| # 1. Hasar var/yok dogru mu | |
| if "has_damage" in expected and actual["has_damage"] != expected["has_damage"]: | |
| errors.append(f"has_damage: beklenen={expected['has_damage']}, gercek={actual['has_damage']}") | |
| # 2. Hasar sayisi tolerans icinde mi | |
| if "damage_count" in expected: | |
| diff = abs(actual["damage_count"] - expected["damage_count"]) | |
| if diff > expected.get("count_tolerance", 1): | |
| errors.append(f"damage_count: beklenen={expected['damage_count']}±{expected.get('count_tolerance', 1)}, gercek={actual['damage_count']}") | |
| # 3. Beklenen siniflari iceriyor mu | |
| if "must_contain_types" in expected: | |
| for t in expected["must_contain_types"]: | |
| if t not in actual["damage_types"]: | |
| errors.append(f"Beklenen tip eksik: {t}") | |
| # 4. Maliyet araliginda mi | |
| if "cost_range_check" in expected: | |
| cr = expected["cost_range_check"] | |
| if "min_below" in cr and actual["cost_min"] > cr["min_below"]: | |
| errors.append(f"cost_min cok yuksek: {actual['cost_min']:.0f} > {cr['min_below']}") | |
| if "max_above" in cr and actual["cost_max"] < cr["max_above"]: | |
| errors.append(f"cost_max cok dusuk: {actual['cost_max']:.0f} < {cr['max_above']}") | |
| return errors | |
| def main(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--weights", type=str, required=True) | |
| parser.add_argument("--parts_weights", type=str, default=None) | |
| parser.add_argument("--severity_weights", type=str, default=None) | |
| parser.add_argument("--cases", type=str, default="tests/test_cases.yaml") | |
| parser.add_argument("--cost_table", type=str, default="cost_table.yaml") | |
| parser.add_argument("--save_baseline", action="store_true", | |
| help="Bu kosumun sonucunu baseline olarak kaydet") | |
| parser.add_argument("--baseline_file", type=str, default="tests/baseline.json") | |
| parser.add_argument("--compare_baseline", action="store_true", | |
| help="Baseline ile karsilastir, regresyonu raporla") | |
| parser.add_argument("--fail_threshold", type=float, default=0.10, | |
| help="Bu oranin uzerinde test fail ederse exit 1") | |
| args = parser.parse_args() | |
| # Pipeline yukle | |
| sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| from pipeline import DamagePipeline | |
| pipeline = DamagePipeline( | |
| damage_weights=args.weights, | |
| parts_weights=args.parts_weights, | |
| severity_weights=args.severity_weights, | |
| cost_table=args.cost_table, | |
| ) | |
| cases = load_test_cases(args.cases) | |
| print(f"\n=== Regression Test: {len(cases['cases'])} case ===\n") | |
| results = [] | |
| failed = 0 | |
| for case in cases["cases"]: | |
| case_id = case["id"] | |
| image_path = Path(case["image"]) | |
| if not image_path.exists(): | |
| print(f"[ATLA] {case_id}: goruntu bulunamadi: {image_path}") | |
| continue | |
| try: | |
| actual = run_inference(pipeline, image_path) | |
| errors = compare_to_expected(actual, case.get("expected", {}), | |
| tolerance=case.get("tolerance", {})) | |
| status = "PASS" if not errors else "FAIL" | |
| if errors: | |
| failed += 1 | |
| print(f"[{status}] {case_id}: {case.get('description', '')}") | |
| for e in errors: | |
| print(f" {e}") | |
| results.append({ | |
| "case_id": case_id, | |
| "status": status, | |
| "actual": actual, | |
| "expected": case.get("expected"), | |
| "errors": errors, | |
| }) | |
| except Exception as e: | |
| print(f"[ERR ] {case_id}: {e}") | |
| failed += 1 | |
| results.append({ | |
| "case_id": case_id, | |
| "status": "ERROR", | |
| "error": str(e), | |
| }) | |
| # Ozet | |
| total = len(results) | |
| pass_rate = (total - failed) / max(total, 1) | |
| print(f"\n=== Sonuc ===") | |
| print(f"Toplam: {total}") | |
| print(f"Gecen: {total - failed}") | |
| print(f"Kalan: {failed}") | |
| print(f"Oran: %{pass_rate * 100:.1f}") | |
| # Baseline kaydet | |
| if args.save_baseline: | |
| Path(args.baseline_file).parent.mkdir(exist_ok=True) | |
| with open(args.baseline_file, "w") as f: | |
| json.dump({ | |
| "weights": args.weights, | |
| "pass_rate": pass_rate, | |
| "results": results, | |
| }, f, indent=2) | |
| print(f"Baseline kaydedildi: {args.baseline_file}") | |
| # Baseline ile karsilastir | |
| if args.compare_baseline and Path(args.baseline_file).exists(): | |
| with open(args.baseline_file, "r") as f: | |
| baseline = json.load(f) | |
| baseline_rate = baseline.get("pass_rate", 0) | |
| delta = pass_rate - baseline_rate | |
| print(f"\nBaseline pass_rate: %{baseline_rate * 100:.1f}") | |
| print(f"Delta: %{delta * 100:+.1f}") | |
| if delta < -args.fail_threshold: | |
| print(f"\nREGRESYON: %{abs(delta)*100:.1f} dustuk (esik %{args.fail_threshold*100:.1f})") | |
| sys.exit(1) | |
| if failed > 0 and not args.save_baseline: | |
| # CI ortaminda hata | |
| if pass_rate < (1 - args.fail_threshold): | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() | |