#!/usr/bin/env python3
"""Analyze image quality loss from normalization (without actual processing)."""

import csv
from pathlib import Path


def main():
    print("=" * 70)
    print("ANÁLISIS: IMPACTO DE NORMALIZAR IMÁGENES A 224×224 PARA SPAI")
    print("=" * 70)
    print()
    
    # Load original metadata with dimensions
    csv_file = Path("output/quality_filtered/metadata/contextual_images_filtered.csv")
    with open(csv_file) as f:
        rows = list(csv.DictReader(f))
    
    # Analyze upscaling requirements
    print(f"IMÁGENES ANALIZADAS: {len(rows)}\n")
    
    # Group by upscaling factor
    groups = {
        "downscale_only": [],  # Downscale only (good)
        "moderate_upscale": [],  # Up to 1.5x (acceptable)
        "significant_upscale": [],  # 1.5-2.5x (some quality loss)
        "heavy_upscale": [],  # >2.5x (significant loss)
    }
    
    quality_scores = []
    
    for row in rows:
        w = int(row.get("detected_width", 0))
        h = int(row.get("detected_height", 0))
        
        if not w or not h:
            continue
        
        # For square fit in 224x224, use the smaller dimension
        smaller_dim = min(w, h)
        upscale_ratio = 224 / smaller_dim
        
        # Quality score (0-1, higher better)
        quality_score = min(1.0 / upscale_ratio, 1.0)
        quality_scores.append(quality_score)
        
        img_info = {
            "name": Path(row.get("filtered_path", "")).name,
            "size": f"{w}x{h}",
            "mp": round(w * h / 1e6, 3),
            "upscale_ratio": round(upscale_ratio, 2),
            "quality_score": round(quality_score, 2),
            "category": row.get("category", "?"),
            "original_dimensions": (w, h),
        }
        
        if upscale_ratio <= 1.0:
            groups["downscale_only"].append(img_info)
        elif upscale_ratio <= 1.5:
            groups["moderate_upscale"].append(img_info)
        elif upscale_ratio <= 2.5:
            groups["significant_upscale"].append(img_info)
        else:
            groups["heavy_upscale"].append(img_info)
    
    # Print grouped analysis
    print("CATEGORÍAS DE IMPACTO AL NORMALIZAR A 224×224:\n")
    
    labels = {
        "downscale_only": "✅ DOWNSCALE ONLY (No quality loss, mejor)",
        "moderate_upscale": "🟢 MODERATE UPSCALE (1.0-1.5x, aceptable)",
        "significant_upscale": "🟡 SIGNIFICANT UPSCALE (1.5-2.5x, pérdida moderada)",
        "heavy_upscale": "🔴 HEAVY UPSCALE (>2.5x, pérdida significativa)",
    }
    
    for key, label in labels.items():
        count = len(groups[key])
        pct = 100 * count / len(rows) if rows else 0
        print(f"{label}")
        print(f"   {count} imágenes ({pct:.1f}%)")
        
        if groups[key]:
            # Show some examples
            examples = sorted(groups[key], key=lambda x: x["upscale_ratio"])[:3]
            for ex in examples:
                print(f"     • {ex['size']:15} → 224×224 (ratio: {ex['upscale_ratio']}x, score: {ex['quality_score']})")
        print()
    
    # Summary statistics
    print("=" * 70)
    print("ESTADÍSTICAS GLOBALES:\n")
    
    avg_ratio = sum(r["upscale_ratio"] for r in sum(groups.values(), []) if "upscale_ratio" in r) / len(rows) if rows else 1
    avg_quality = sum(quality_scores) / len(quality_scores) if quality_scores else 1.0
    
    print(f"Promedio de upscale ratio: {avg_ratio:.2f}x")
    print(f"Promedio de quality score: {avg_quality:.2f}/1.0")
    print()
    
    # Recommendation
    print("=" * 70)
    print("RECOMENDACIÓN:\n")
    
    heavy_count = len(groups["heavy_upscale"])
    if heavy_count == 0:
        print("✅ TODAS LAS IMÁGENES SON ACEPTABLES para SPAI sin pre-procesamiento")
        print("   La mayoría serán downscaled o moderadamente upscaled → mínima pérdida")
        print("   RECOMENDACIÓN: Usar directamente sin normalizar")
    elif heavy_count <= len(rows) * 0.1:
        print(f"✅ CASI TODAS LAS IMÁGENES SON ACEPTABLES ({heavy_count} problemáticas)")
        print("   RECOMENDACIÓN: Considerar filtrar solo las {heavy_count} muy pequeñas")
    else:
        print(f"⚠️  ALGUNAS IMÁGENES REQUIEREN UPSCALING ({heavy_count} muy pequeñas)")
        print("   RECOMENDACIÓN: Pre-procesamiento opcional para mejor calidad")
    
    print()
    print("=" * 70)
    
    # Distribution by category
    print("\nDISTRIBUCIÓN POR CATEGORÍA:\n")
    
    categories = {}
    for row in rows:
        cat = row.get("category", "?")
        w = int(row.get("detected_width", 0))
        h = int(row.get("detected_height", 0))
        
        smaller_dim = min(w, h) if w and h else 0
        ratio = 224 / smaller_dim if smaller_dim else 1
        
        if cat not in categories:
            categories[cat] = {"count": 0, "ratios": []}
        
        categories[cat]["count"] += 1
        categories[cat]["ratios"].append(ratio)
    
    for cat in sorted(categories.keys()):
        stats = categories[cat]
        avg_cat_ratio = sum(stats["ratios"]) / len(stats["ratios"])
        avg_cat_quality = sum(min(1/r, 1) for r in stats["ratios"]) / len(stats["ratios"])
        
        print(f"  {cat}:")
        print(f"    {stats['count']} imágenes")
        print(f"    Avg upscale ratio: {avg_cat_ratio:.2f}x")
        print(f"    Avg quality score: {avg_cat_quality:.2f}/1.0")
        print()


if __name__ == "__main__":
    main()