| |
| """Analyze image quality loss from normalization (without actual processing).""" |
|
|
| import csv |
| from pathlib import Path |
|
|
|
|
| def main(): |
| print("=" * 70) |
| print("ANÁLISIS: IMPACTO DE NORMALIZAR IMÁGENES A 224×224 PARA SPAI") |
| print("=" * 70) |
| print() |
| |
| |
| csv_file = Path("output/quality_filtered/metadata/contextual_images_filtered.csv") |
| with open(csv_file) as f: |
| rows = list(csv.DictReader(f)) |
| |
| |
| print(f"IMÁGENES ANALIZADAS: {len(rows)}\n") |
| |
| |
| groups = { |
| "downscale_only": [], |
| "moderate_upscale": [], |
| "significant_upscale": [], |
| "heavy_upscale": [], |
| } |
| |
| quality_scores = [] |
| |
| for row in rows: |
| w = int(row.get("detected_width", 0)) |
| h = int(row.get("detected_height", 0)) |
| |
| if not w or not h: |
| continue |
| |
| |
| smaller_dim = min(w, h) |
| upscale_ratio = 224 / smaller_dim |
| |
| |
| quality_score = min(1.0 / upscale_ratio, 1.0) |
| quality_scores.append(quality_score) |
| |
| img_info = { |
| "name": Path(row.get("filtered_path", "")).name, |
| "size": f"{w}x{h}", |
| "mp": round(w * h / 1e6, 3), |
| "upscale_ratio": round(upscale_ratio, 2), |
| "quality_score": round(quality_score, 2), |
| "category": row.get("category", "?"), |
| "original_dimensions": (w, h), |
| } |
| |
| if upscale_ratio <= 1.0: |
| groups["downscale_only"].append(img_info) |
| elif upscale_ratio <= 1.5: |
| groups["moderate_upscale"].append(img_info) |
| elif upscale_ratio <= 2.5: |
| groups["significant_upscale"].append(img_info) |
| else: |
| groups["heavy_upscale"].append(img_info) |
| |
| |
| print("CATEGORÍAS DE IMPACTO AL NORMALIZAR A 224×224:\n") |
| |
| labels = { |
| "downscale_only": "✅ DOWNSCALE ONLY (No quality loss, mejor)", |
| "moderate_upscale": "🟢 MODERATE UPSCALE (1.0-1.5x, aceptable)", |
| "significant_upscale": "🟡 SIGNIFICANT UPSCALE (1.5-2.5x, pérdida moderada)", |
| "heavy_upscale": "🔴 HEAVY UPSCALE (>2.5x, pérdida significativa)", |
| } |
| |
| for key, label in labels.items(): |
| count = len(groups[key]) |
| pct = 100 * count / len(rows) if rows else 0 |
| print(f"{label}") |
| print(f" {count} imágenes ({pct:.1f}%)") |
| |
| if groups[key]: |
| |
| examples = sorted(groups[key], key=lambda x: x["upscale_ratio"])[:3] |
| for ex in examples: |
| print(f" • {ex['size']:15} → 224×224 (ratio: {ex['upscale_ratio']}x, score: {ex['quality_score']})") |
| print() |
| |
| |
| print("=" * 70) |
| print("ESTADÍSTICAS GLOBALES:\n") |
| |
| avg_ratio = sum(r["upscale_ratio"] for r in sum(groups.values(), []) if "upscale_ratio" in r) / len(rows) if rows else 1 |
| avg_quality = sum(quality_scores) / len(quality_scores) if quality_scores else 1.0 |
| |
| print(f"Promedio de upscale ratio: {avg_ratio:.2f}x") |
| print(f"Promedio de quality score: {avg_quality:.2f}/1.0") |
| print() |
| |
| |
| print("=" * 70) |
| print("RECOMENDACIÓN:\n") |
| |
| heavy_count = len(groups["heavy_upscale"]) |
| if heavy_count == 0: |
| print("✅ TODAS LAS IMÁGENES SON ACEPTABLES para SPAI sin pre-procesamiento") |
| print(" La mayoría serán downscaled o moderadamente upscaled → mínima pérdida") |
| print(" RECOMENDACIÓN: Usar directamente sin normalizar") |
| elif heavy_count <= len(rows) * 0.1: |
| print(f"✅ CASI TODAS LAS IMÁGENES SON ACEPTABLES ({heavy_count} problemáticas)") |
| print(" RECOMENDACIÓN: Considerar filtrar solo las {heavy_count} muy pequeñas") |
| else: |
| print(f"⚠️ ALGUNAS IMÁGENES REQUIEREN UPSCALING ({heavy_count} muy pequeñas)") |
| print(" RECOMENDACIÓN: Pre-procesamiento opcional para mejor calidad") |
| |
| print() |
| print("=" * 70) |
| |
| |
| print("\nDISTRIBUCIÓN POR CATEGORÍA:\n") |
| |
| categories = {} |
| for row in rows: |
| cat = row.get("category", "?") |
| w = int(row.get("detected_width", 0)) |
| h = int(row.get("detected_height", 0)) |
| |
| smaller_dim = min(w, h) if w and h else 0 |
| ratio = 224 / smaller_dim if smaller_dim else 1 |
| |
| if cat not in categories: |
| categories[cat] = {"count": 0, "ratios": []} |
| |
| categories[cat]["count"] += 1 |
| categories[cat]["ratios"].append(ratio) |
| |
| for cat in sorted(categories.keys()): |
| stats = categories[cat] |
| avg_cat_ratio = sum(stats["ratios"]) / len(stats["ratios"]) |
| avg_cat_quality = sum(min(1/r, 1) for r in stats["ratios"]) / len(stats["ratios"]) |
| |
| print(f" {cat}:") |
| print(f" {stats['count']} imágenes") |
| print(f" Avg upscale ratio: {avg_cat_ratio:.2f}x") |
| print(f" Avg quality score: {avg_cat_quality:.2f}/1.0") |
| print() |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|