Spaces:

VOIDER
/

image-evaluation-tool

Runtime error

App Files Files Community

VOIDER commited on Jul 19, 2025

Commit

4cc53ed

verified ·

1 Parent(s): 995f137

Delete scoring.pyutils

Browse files

Files changed (1) hide show

scoring.pyutils/scoring.py +0 -77

scoring.pyutils/scoring.py DELETED Viewed

@@ -1,77 +0,0 @@
-import numpy as np
-import logging
-logger = logging.getLogger(__name__)
-def calculate_final_score(
-    quality_score: float,
-    aesthetics_score: float,
-    prompt_score: float,
-    ai_detection_score: float,
-    has_prompt: bool = True
-) -> float:
-    """
-    Calculate weighted composite score for image evaluation.
-    Args:
-        quality_score: Technical image quality (0-10)
-        aesthetics_score: Visual appeal score (0-10)
-        prompt_score: Prompt adherence score (0-10)
-        ai_detection_score: AI generation probability (0-1)
-        has_prompt: Whether prompt metadata is available
-    Returns:
-        Final composite score (0-10)
-    """
-    try:
-        # Validate and clamp input scores
-        quality_score = max(0.0, min(10.0, quality_score))
-        aesthetics_score = max(0.0, min(10.0, aesthetics_score))
-        prompt_score = max(0.0, min(10.0, prompt_score))
-        ai_detection_score = max(0.0, min(1.0, ai_detection_score))
-        # FIX: Invert and scale the AI detection score to a 0-10 range
-        # A low AI detection probability (good) results in a high score.
-        inverted_ai_score = (1 - ai_detection_score) * 10
-        if has_prompt:
-            # Standard weights when prompt is available
-            weights = {
-                'quality': 0.25,      # 25% - Technical quality
-                'aesthetics': 0.35,   # 35% - Visual appeal (highest weight)
-                'prompt': 0.25,       # 25% - Prompt following
-                'ai_detection': 0.15  # 15% - Authenticity (inverted detection score)
-            }
-            # FIX: Correctly calculate the weighted score. The sum of weights is 1.0.
-            score = (
-                quality_score * weights['quality'] +
-                aesthetics_score * weights['aesthetics'] +
-                prompt_score * weights['prompt'] +
-                inverted_ai_score * weights['ai_detection']
-            )
-        else:
-            # Redistribute prompt weight when no prompt available
-            weights = {
-                'quality': 0.375,     # 25% + 12.5% from prompt
-                'aesthetics': 0.475,  # 35% + 12.5% from prompt
-                'ai_detection': 0.15  # 15% - Authenticity
-            }
-            # FIX: Correctly calculate the weighted score without prompt. Sum of weights is 1.0.
-            score = (
-                quality_score * weights['quality'] +
-                aesthetics_score * weights['aesthetics'] +
-                inverted_ai_score * weights['ai_detection']
-            )
-        # Ensure final score is within the valid 0-10 range
-        final_score = max(0.0, min(10.0, score))
-        logger.debug(f"Score calculation - Final: {final_score:.2f}")
-        return final_score
-    except Exception as e:
-        logger.error(f"Error calculating final score: {str(e)}")
-        return 0.0 # Return 0.0 on error to clearly indicate failure