hasari-api / tools /regression_test.py
erdoganpeker's picture
v0.3.0 — multimodal vehicle damage MVP
e327f0d
"""
tests/regression_test.py
Regression test suite - her PR'da koc, yeni modelin eskine gore degisimi olc.
Calistirma:
# Mevcut prod modeli ile baseline kaydet
python tests/regression_test.py --weights prod_best.pt --save_baseline
# Yeni model versiyonunu test et
python tests/regression_test.py --weights new_best.pt --compare_baseline
# CI'da kullanim
python tests/regression_test.py --weights $NEW_MODEL --compare_baseline \
--fail_threshold 0.05
"""
import argparse
import json
import sys
from pathlib import Path
from typing import Dict, List
import yaml
def load_test_cases(yaml_path):
"""test_cases.yaml'i yukle."""
with open(yaml_path, "r") as f:
return yaml.safe_load(f)
def run_inference(pipeline, image_path):
"""Tek bir goruntude pipeline koc, ozet metrikleri dondur."""
result = pipeline.analyze(str(image_path))
summary = result.get("summary", {})
return {
"has_damage": summary.get("has_damage", False),
"damage_count": result.get("damage_count", 0),
"damage_types": sorted(set(d["type"] for d in result.get("damages", []))),
"cost_min": summary.get("total_cost_range_tl", [0])[0],
"cost_max": summary.get("total_cost_range_tl", [0, 0])[1],
"most_severe": summary.get("most_severe"),
}
def compare_to_expected(actual, expected, tolerance):
"""Beklenen vs gercek karsilastir. Hata varsa liste dondur."""
errors = []
# 1. Hasar var/yok dogru mu
if "has_damage" in expected and actual["has_damage"] != expected["has_damage"]:
errors.append(f"has_damage: beklenen={expected['has_damage']}, gercek={actual['has_damage']}")
# 2. Hasar sayisi tolerans icinde mi
if "damage_count" in expected:
diff = abs(actual["damage_count"] - expected["damage_count"])
if diff > expected.get("count_tolerance", 1):
errors.append(f"damage_count: beklenen={expected['damage_count']}±{expected.get('count_tolerance', 1)}, gercek={actual['damage_count']}")
# 3. Beklenen siniflari iceriyor mu
if "must_contain_types" in expected:
for t in expected["must_contain_types"]:
if t not in actual["damage_types"]:
errors.append(f"Beklenen tip eksik: {t}")
# 4. Maliyet araliginda mi
if "cost_range_check" in expected:
cr = expected["cost_range_check"]
if "min_below" in cr and actual["cost_min"] > cr["min_below"]:
errors.append(f"cost_min cok yuksek: {actual['cost_min']:.0f} > {cr['min_below']}")
if "max_above" in cr and actual["cost_max"] < cr["max_above"]:
errors.append(f"cost_max cok dusuk: {actual['cost_max']:.0f} < {cr['max_above']}")
return errors
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--weights", type=str, required=True)
parser.add_argument("--parts_weights", type=str, default=None)
parser.add_argument("--severity_weights", type=str, default=None)
parser.add_argument("--cases", type=str, default="tests/test_cases.yaml")
parser.add_argument("--cost_table", type=str, default="cost_table.yaml")
parser.add_argument("--save_baseline", action="store_true",
help="Bu kosumun sonucunu baseline olarak kaydet")
parser.add_argument("--baseline_file", type=str, default="tests/baseline.json")
parser.add_argument("--compare_baseline", action="store_true",
help="Baseline ile karsilastir, regresyonu raporla")
parser.add_argument("--fail_threshold", type=float, default=0.10,
help="Bu oranin uzerinde test fail ederse exit 1")
args = parser.parse_args()
# Pipeline yukle
sys.path.insert(0, str(Path(__file__).parent.parent))
from pipeline import DamagePipeline
pipeline = DamagePipeline(
damage_weights=args.weights,
parts_weights=args.parts_weights,
severity_weights=args.severity_weights,
cost_table=args.cost_table,
)
cases = load_test_cases(args.cases)
print(f"\n=== Regression Test: {len(cases['cases'])} case ===\n")
results = []
failed = 0
for case in cases["cases"]:
case_id = case["id"]
image_path = Path(case["image"])
if not image_path.exists():
print(f"[ATLA] {case_id}: goruntu bulunamadi: {image_path}")
continue
try:
actual = run_inference(pipeline, image_path)
errors = compare_to_expected(actual, case.get("expected", {}),
tolerance=case.get("tolerance", {}))
status = "PASS" if not errors else "FAIL"
if errors:
failed += 1
print(f"[{status}] {case_id}: {case.get('description', '')}")
for e in errors:
print(f" {e}")
results.append({
"case_id": case_id,
"status": status,
"actual": actual,
"expected": case.get("expected"),
"errors": errors,
})
except Exception as e:
print(f"[ERR ] {case_id}: {e}")
failed += 1
results.append({
"case_id": case_id,
"status": "ERROR",
"error": str(e),
})
# Ozet
total = len(results)
pass_rate = (total - failed) / max(total, 1)
print(f"\n=== Sonuc ===")
print(f"Toplam: {total}")
print(f"Gecen: {total - failed}")
print(f"Kalan: {failed}")
print(f"Oran: %{pass_rate * 100:.1f}")
# Baseline kaydet
if args.save_baseline:
Path(args.baseline_file).parent.mkdir(exist_ok=True)
with open(args.baseline_file, "w") as f:
json.dump({
"weights": args.weights,
"pass_rate": pass_rate,
"results": results,
}, f, indent=2)
print(f"Baseline kaydedildi: {args.baseline_file}")
# Baseline ile karsilastir
if args.compare_baseline and Path(args.baseline_file).exists():
with open(args.baseline_file, "r") as f:
baseline = json.load(f)
baseline_rate = baseline.get("pass_rate", 0)
delta = pass_rate - baseline_rate
print(f"\nBaseline pass_rate: %{baseline_rate * 100:.1f}")
print(f"Delta: %{delta * 100:+.1f}")
if delta < -args.fail_threshold:
print(f"\nREGRESYON: %{abs(delta)*100:.1f} dustuk (esik %{args.fail_threshold*100:.1f})")
sys.exit(1)
if failed > 0 and not args.save_baseline:
# CI ortaminda hata
if pass_rate < (1 - args.fail_threshold):
sys.exit(1)
if __name__ == "__main__":
main()