Spaces:

satyaki-mitra
/

ImageForensics-AI

Sleeping

App Files Files Community

satyaki-mitra commited on Dec 23, 2025

Commit

f142520

1 Parent(s): 676e2fe

EXIF Analysis and Watermark Analysis added

Browse files

Files changed (18) hide show

app.py +11 -40
config/constants.py +119 -1
config/schemas.py +58 -18
config/settings.py +4 -0
decision_builders/decision_policy.py +146 -0
evidence_analyzers/evidence_aggregator.py +193 -0
evidence_analyzers/exif_analyzer.py +375 -0
evidence_analyzers/watermark_analyzer.py +463 -0
features/batch_processor.py +65 -33
features/detailed_result_maker.py +320 -233
metrics/color_analyzer.py +18 -14
metrics/{aggregator.py → signal_aggregator.py} +68 -10
notebooks/.ipynb_checkpoints/Unified_Dataset_Builder-checkpoint.ipynb +73 -9
notebooks/Unified_Dataset_Builder.ipynb +115 -47
reporter/csv_reporter.py +122 -318
reporter/json_reporter.py +107 -181
reporter/pdf_reporter.py +256 -773
ui/index.html +272 -156

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ from typing import List
 from typing import Dict
 from pathlib import Path
 from fastapi import File
-from typing import Optional
 from fastapi import Request
 from fastapi import FastAPI
 from fastapi import UploadFile
@@ -24,7 +23,6 @@ from utils.validators import ImageValidator
 from fastapi.staticfiles import StaticFiles
 from utils.helpers import generate_unique_id
 from reporter.csv_reporter import CSVReporter
-from reporter.pdf_reporter import PDFReporter
 from config.schemas import BatchAnalysisResult
 from reporter.json_reporter import JSONReporter
 from utils.image_processor import ImageProcessor
@@ -63,12 +61,10 @@ image_validator   = ImageValidator()
 image_processor   = ImageProcessor()
 threshold_manager = ThresholdManager()
-threshold_manager = threshold_manager
 batch_processor   = BatchProcessor(threshold_manager = threshold_manager)
 json_reporter     = JSONReporter()
 csv_reporter      = CSVReporter()
-pdf_reporter      = PDFReporter()
 UPLOAD_DIR        = settings.UPLOAD_DIR
 CACHE_DIR         = settings.CACHE_DIR
@@ -157,21 +153,21 @@ async def analyze_single_image(file: UploadFile = File(...)):
     image_id   = generate_unique_id()
     image_path = UPLOAD_DIR / f"{image_id}_{file.filename}"
-    image_validator.validate_image(file_path = image_path,
-                                   filename  = file.filename,
-                                   file_size = file.size,
-                                  )
     try:
         with open(image_path, "wb") as f:
             shutil.copyfileobj(file.file, f)
         image                  = image_processor.load_image(image_path)
         # image is a NumPy array → shape = (H, W, C) or (H, W)
         height, width          = image.shape[:2]
-        result: AnalysisResult = batch_processor.process_single(image      = image_path,
                                                                 filename   = file.filename,
                                                                 image_size = (width, height),
                                                                )
@@ -210,15 +206,15 @@ async def analyze_batch(files: List[UploadFile] = File(...)):
             with open(path, "wb") as f:
                 shutil.copyfileobj(file.file, f)
-            image         = image_processor.load_image(path)
-            height, width = image.shape[:2]
             image_validator.validate_image(file_path = path,
                                            filename  = file.filename,
                                            file_size = file.size,
                                           )
             image_entries.append({"path"     : path,
                                   "filename" : file.filename,
                                   "size"     : (width, height),
@@ -297,6 +293,8 @@ def export_csv(batch_id: str):
     # Clean up the file after sending
     path.unlink(missing_ok = True)
     return Response(content    = content,
                     media_type = "text/csv",
@@ -306,33 +304,6 @@ def export_csv(batch_id: str):
                    )
-@app.api_route("/report/pdf/{batch_id}", methods = ["GET", "POST"])
-def export_pdf(batch_id: str):
-    session = SESSION_STORE.get(batch_id)
-    if (not session or ("result" not in session)):
-        raise HTTPException(status_code = 404,
-                            detail      = "Batch result not found",
-                           )
-    path = pdf_reporter.export_batch(session["result"])
-    # Read the file and send it as a download
-    with open(path, "rb") as f:
-        content = f.read()
-    # Clean up the file after sending
-    path.unlink(missing_ok = True)
-    return Response(content    = content,
-                    media_type = "application/pdf",
-                    headers    = {"Content-Disposition" : f"attachment; filename=ai_screener_report_{batch_id}.pdf",
-                                  "Content-Type"        : "application/pdf"
-                                 }
-                   )
 # ==================== MAIN ====================
 if __name__ == "__main__":
     # Explicit startup log (forces log file creation)

 from typing import Dict
 from pathlib import Path
 from fastapi import File
 from fastapi import Request
 from fastapi import FastAPI
 from fastapi import UploadFile
 from fastapi.staticfiles import StaticFiles
 from utils.helpers import generate_unique_id
 from reporter.csv_reporter import CSVReporter
 from config.schemas import BatchAnalysisResult
 from reporter.json_reporter import JSONReporter
 from utils.image_processor import ImageProcessor
 image_processor   = ImageProcessor()
 threshold_manager = ThresholdManager()
 batch_processor   = BatchProcessor(threshold_manager = threshold_manager)
 json_reporter     = JSONReporter()
 csv_reporter      = CSVReporter()
 UPLOAD_DIR        = settings.UPLOAD_DIR
 CACHE_DIR         = settings.CACHE_DIR
     image_id   = generate_unique_id()
     image_path = UPLOAD_DIR / f"{image_id}_{file.filename}"
     try:
         with open(image_path, "wb") as f:
             shutil.copyfileobj(file.file, f)
+        image_validator.validate_image(file_path = image_path,
+                                       filename  = file.filename,
+                                       file_size = file.size,
+                                      )
         image                  = image_processor.load_image(image_path)
         # image is a NumPy array → shape = (H, W, C) or (H, W)
         height, width          = image.shape[:2]
+        result: AnalysisResult = batch_processor.process_single(image_path = image_path,
                                                                 filename   = file.filename,
                                                                 image_size = (width, height),
                                                                )
             with open(path, "wb") as f:
                 shutil.copyfileobj(file.file, f)
             image_validator.validate_image(file_path = path,
                                            filename  = file.filename,
                                            file_size = file.size,
                                           )
+            image         = image_processor.load_image(path)
+            height, width = image.shape[:2]
             image_entries.append({"path"     : path,
                                   "filename" : file.filename,
                                   "size"     : (width, height),
     # Clean up the file after sending
     path.unlink(missing_ok = True)
+    SESSION_STORE.pop(batch_id, None)
     return Response(content    = content,
                     media_type = "text/csv",
                    )
 # ==================== MAIN ====================
 if __name__ == "__main__":
     # Explicit startup log (forces log file creation)

config/constants.py CHANGED Viewed

@@ -5,7 +5,7 @@ from dataclasses import dataclass
 class DetectionStatus(str, Enum):
     """
-    Overall detection status
     """
     LIKELY_AUTHENTIC = "LIKELY_AUTHENTIC"
     REVIEW_REQUIRED  = "REVIEW_REQUIRED"
@@ -41,6 +41,37 @@ class MetricType(str, Enum):
     COLOR     = "color"
 # Signal thresholds
 SIGNAL_THRESHOLDS          = {SignalStatus.FLAGGED : 0.7,
@@ -314,6 +345,79 @@ class ColorAnalysisParams:
                               )
 # Singleton instances for parameter classes
 GRADIENT_FIELD_PCA_PARAMS = GradientFieldPCAParams()
@@ -321,5 +425,19 @@ FREQUENCY_ANALYSIS_PARAMS = FrequencyAnalysisParams()
 NOISE_ANALYSIS_PARAMS     = NoiseAnalysisParams()
 TEXTURE_ANALYSIS_PARAMS   = TextureAnalysisParams()
 COLOR_ANALYSIS_PARAMS     = ColorAnalysisParams()

 class DetectionStatus(str, Enum):
     """
+    Binary status derived from ensemble score only: FinalDecision supersedes this once decision policy is applied
     """
     LIKELY_AUTHENTIC = "LIKELY_AUTHENTIC"
     REVIEW_REQUIRED  = "REVIEW_REQUIRED"
     COLOR     = "color"
+class EvidenceType(str, Enum):
+    EXIF       = "exif"
+    WATERMARK  = "watermark"
+class EvidenceStrength(str, Enum):
+    """
+    Ordered by increasing certainty: WEAK < MODERATE < STRONG < CONCLUSIVE
+    """
+    WEAK        = "weak"        # heuristic, non-binding
+    MODERATE    = "moderate"    # strong hint, not cryptographic
+    STRONG      = "strong"      # vendor watermark, strong signal
+    CONCLUSIVE  = "conclusive"  # cryptographic / signed proof
+class EvidenceDirection(str, Enum):
+    """
+    What this evidence supports
+    """
+    AI_GENERATED  = "ai_generated"
+    AUTHENTIC     = "authentic"
+    INDETERMINATE = "indeterminate"
+class FinalDecision(str, Enum):
+    MOSTLY_AUTHENTIC       = "mostly_authentic"
+    AUTHENTIC_BUT_REVIEW   = "authentic_but_review"
+    SUSPICIOUS_AI_LIKELY   = "suspicious_ai_likely"
+    CONFIRMED_AI_GENERATED = "confirmed_ai_generated"
 # Signal thresholds
 SIGNAL_THRESHOLDS          = {SignalStatus.FLAGGED : 0.7,
                               )
+@dataclass(frozen = True)
+class SignalConfidenceParams:
+    """
+    Parameters for Tier-1 signal confidence calculation
+    """
+    # Agreement (variance-based confidence)
+    VARIANCE_NORM                  : float = 0.10
+    # Distance-from-threshold confidence
+    DISTANCE_NORM                  : float = 0.30
+    # Fallback when metric confidence is missing
+    DEFAULT_RELIABILITY_CONFIDENCE : float = 0.60
+    # Weighting of confidence components (must sum to 1.0)
+    AGREEMENT_WEIGHT               : float = 0.40
+    RELIABILITY_WEIGHT             : float = 0.30
+    DISTANCE_WEIGHT                : float = 0.30
+@dataclass(frozen = True)
+class WatermarkAnalysisParams:
+    """
+    Parameters for heuristic watermark detection
+    """
+    # Confidence thresholds
+    STRONG_CONFIDENCE_THRESHOLD : float = 0.85
+    CONFIDENCE_CAP              : float = 0.95
+    # Wavelet-domain thresholds
+    HF_ENERGY_RATIO_THRESHOLD   : float = 0.18
+    KURTOSIS_THRESHOLD          : float = 7.5
+    PERIODICITY_THRESHOLD       : float = 0.8
+    HF_ENERGY_RATIO_NORM        : float = 0.4
+    KURTOSIS_NORM_FACTOR        : float = 15.0
+    PEAK_STD_MULTIPLIER         : float = 3.0
+    # Frequency-domain thresholds
+    MIN_ANOMALOUS_BANDS         : int   = 2
+    SPECTRAL_SYMMETRY_THRESHOLD : float = 0.6
+    PEAK_RATIO_THRESHOLD        : float = 0.05
+    # LSB steganography thresholds
+    LSB_ENTROPY_THRESHOLD       : float = 0.72
+    CHI_SQUARE_THRESHOLD        : float = 20.0
+    RUNS_SCORE_THRESHOLD        : float = 0.6
+    LSB_ENTROPY_NORM_BASE       : float = 0.5
+    LSB_ENTROPY_NORM_RANGE      : float = 0.5
+    CHI_SQUARE_NORM_FACTOR      : float = 50.0
+@dataclass(frozen = True)
+class ExifAnalysisParams:
+    """
+    Parameters for EXIF metadata analysis
+    """
+    # Confidence values
+    MISSING_EXIF_CONFIDENCE            : float = 0.5
+    AI_FINGERPRINT_CONFIDENCE          : float = 0.9
+    CAMERA_BASE_CONFIDENCE             : float = 0.7
+    CAMERA_WITH_LENS_CONFIDENCE        : float = 0.75
+    SUSPICIOUS_CAMERA_CONFIDENCE       : float = 0.4
+    TIMESTAMP_INCONSISTENCY_CONFIDENCE : float = 0.4
+    MISSING_PHOTO_METADATA_CONFIDENCE  : float = 0.5
+    SUSPICIOUS_TIMESTAMP_CONFIDENCE    : float = 0.3
+    # Thresholds
+    TIMESTAMP_DELTA_THRESHOLD          : float = 5.0    # seconds
+    MIN_VALID_YEAR                     : int   = 1990   # before digital cameras
+    MAX_FUTURE_YEARS                   : int   = 1      # how many years in future is valid
 # Singleton instances for parameter classes
 GRADIENT_FIELD_PCA_PARAMS = GradientFieldPCAParams()
 NOISE_ANALYSIS_PARAMS     = NoiseAnalysisParams()
 TEXTURE_ANALYSIS_PARAMS   = TextureAnalysisParams()
 COLOR_ANALYSIS_PARAMS     = ColorAnalysisParams()
+SIGNAL_CONFIDENCE_PARAMS  = SignalConfidenceParams()
+# Singleton instances for evidence analysis classes
+WATERMARK_ANALYSIS_PARAMS = WatermarkAnalysisParams()
+EXIF_ANALYSIS_PARAMS      = ExifAnalysisParams()
+# Evidence Strength ordering
+EVIDENCE_STRENGTH_ORDER   = {EvidenceStrength.WEAK       : 1,
+                             EvidenceStrength.MODERATE   : 2,
+                             EvidenceStrength.STRONG     : 3,
+                             EvidenceStrength.CONCLUSIVE : 4,
+                            }
+MIN_EVIDENCE_CONFIDENCE   = 0.6

config/schemas.py CHANGED Viewed

@@ -6,10 +6,15 @@ from typing import Optional
 from datetime import datetime
 from pydantic import BaseModel
 from config.constants import MetricType
 from config.constants import SignalStatus
 from config.constants import DetectionStatus
 class MetricResult(BaseModel):
     """
     Raw metric output for explainability and reporting
@@ -52,29 +57,63 @@ class DetectionSignal(BaseModel):
                                  }
 class AnalysisResult(BaseModel):
     """
     Single image analysis result
     """
-    filename        : str
-    overall_score   : float                          = Field(..., ge = 0.0, le = 1.0)
-    status          : DetectionStatus
-    confidence      : int                            = Field(..., ge = 0, le = 100, description = "Confidence percentage")
-    signals         : List[DetectionSignal]
-    metric_results  : Dict[MetricType, MetricResult]
-    processing_time : float                          = Field(..., description = "Processing time in seconds")
-    timestamp       : datetime                       = Field(default_factory = datetime.now)
-    image_size      : tuple[int, int]                = Field(..., description = "Width x Height")
-    model_config                                     =  {"json_schema_extra" : {"example" : {"filename"        : "photo_001.jpg",
-                                                                                             "overall_score"   : 0.73,
-                                                                                             "status"          : "REVIEW_REQUIRED",
-                                                                                             "confidence"      : 73,
-                                                                                             "signals"         : [],
-                                                                                             "processing_time" : 2.34,
-                                                                                             "image_size"      : [1920, 1080]
-                                                                                            }
-                                                                               }
                                                         }
@@ -91,6 +130,7 @@ class BatchAnalysisResult(BaseModel):
     timestamp             : datetime             = Field(default_factory = datetime.now)
 class APIResponse(BaseModel):
     """
     Standard API response wrapper

 from datetime import datetime
 from pydantic import BaseModel
 from config.constants import MetricType
+from config.constants import EvidenceType
 from config.constants import SignalStatus
+from config.constants import FinalDecision
 from config.constants import DetectionStatus
+from config.constants import EvidenceStrength
+from config.constants import EvidenceDirection
+# Metric-Level Structures
 class MetricResult(BaseModel):
     """
     Raw metric output for explainability and reporting
                                  }
+# # Evidence-Level Structures
+class EvidenceResult(BaseModel):
+    """
+    Declarative evidence extracted from image metadata, watermarking, or cryptographic provenance systems
+    """
+    source       : EvidenceType            = Field(..., description = "Evidence source type (exif, watermark, c2pa)")
+    finding      : str                     = Field(..., description = "Human-readable description of the evidence")
+    direction    : EvidenceDirection       = Field(..., description = "What this evidence supports")
+    strength     : EvidenceStrength        = Field(..., description = "How strong or reliable this evidence is")
+    confidence   : Optional[float]         = Field(None, ge = 0.0, le = 1.0, description = "Confidence in the evidence extraction itself")
+    details      : Dict                    = Field(default_factory = dict, description = "Raw extracted fields or technical metadata")
+    analyzer     : str                     = Field(..., description = "Analyzer that produced this evidence (exif_analyzer, watermark_analyzer, etc.)")
+    timestamp    : datetime                = Field(default_factory = datetime.now)
+    model_config                           = {"json_schema_extra": {"example" : {"source"     : "watermark",
+                                                                                 "finding"    : "Midjourney v6 watermark detected",
+                                                                                 "direction"  : "ai_generated",
+                                                                                 "strength"   : "strong",
+                                                                                 "confidence" : 0.92,
+                                                                                 "details"    : {"watermark_type" : "DWT",
+                                                                                                 "vendor"         : "Midjourney",
+                                                                                                 "version"        : "v6"
+                                                                                                },
+                                                                                 "analyzer"   : "watermark_analyzer"
+                                                                                }
+                                                                   }
+                                             }
+# Analysis-Level Structures
 class AnalysisResult(BaseModel):
     """
     Single image analysis result
     """
+    filename             : str
+    overall_score        : float                          = Field(..., ge = 0.0, le = 1.0)
+    status               : DetectionStatus
+    final_decision       : Optional[FinalDecision]        = Field(None, description = "Authoritative decision after evidence-first policy evaluation")
+    decision_explanation : Optional[str]                  = Field(None, description = "Human-readable explanation of final decision")
+    confidence           : int                            = Field(..., ge = 0, le = 100, description = "Confidence percentage")
+    signals              : List[DetectionSignal]
+    metric_results       : Dict[MetricType, MetricResult]
+    evidence             : List[EvidenceResult]           = Field(default_factory = list, description = "Declarative evidence extracted before decision policy")
+    processing_time      : float                          = Field(..., description = "Processing time in seconds")
+    timestamp            : datetime                       = Field(default_factory = datetime.now)
+    image_size           : tuple[int, int]                = Field(..., description = "Width x Height")
+    model_config                                          =  {"json_schema_extra" : {"example" : {"filename"        : "photo_001.jpg",
+                                                                                                  "overall_score"   : 0.73,
+                                                                                                  "status"          : "REVIEW_REQUIRED",
+                                                                                                  "confidence"      : 73,
+                                                                                                  "signals"         : [],
+                                                                                                  "evidence"        : [],
+                                                                                                  "processing_time" : 2.34,
+                                                                                                  "image_size"      : [1920, 1080]
+                                                                                                 }
+                                                                                    }
                                                         }
     timestamp             : datetime             = Field(default_factory = datetime.now)
+# API Wrappers
 class APIResponse(BaseModel):
     """
     Standard API response wrapper

config/settings.py CHANGED Viewed

@@ -46,6 +46,10 @@ class Settings(BaseSettings):
     PROCESSING_TIMEOUT  : int      = 30
     PARALLEL_PROCESSING : bool     = True
     MAX_WORKERS         : int      = 4
     # Paths
     BASE_DIR            : Path     = Path(__file__).parent.parent

     PROCESSING_TIMEOUT  : int      = 30
     PARALLEL_PROCESSING : bool     = True
     MAX_WORKERS         : int      = 4
+    METRIC_WORKERS      : int      = 4
+    EVIDENCE_WORKERS    : int      = 2
+    METRIC_TIMEOUT      : float    = 5.0
+    EVIDENCE_TIMEOUT    : float    = 5.0
     # Paths
     BASE_DIR            : Path     = Path(__file__).parent.parent

decision_builders/decision_policy.py ADDED Viewed

	@@ -0,0 +1,146 @@

+# Dependencies
+from typing import List
+from utils.logger import get_logger
+from config.schemas import AnalysisResult
+from config.schemas import EvidenceResult
+from config.constants import FinalDecision
+from config.constants import DetectionStatus
+from config.constants import EvidenceStrength
+from config.constants import EvidenceDirection
+from config.constants import MIN_EVIDENCE_CONFIDENCE
+from config.constants import EVIDENCE_STRENGTH_ORDER
+# Setup Logging
+logger = get_logger(__name__)
+class DecisionPolicy:
+    """
+    Evidence-first decision policy
+    Responsibilities:
+    -----------------
+    - Apply authoritative rules over Tier-1 metrics
+    - Resolve conflicting evidence safely
+    - Produce a final, explainable decision
+    - Never perform probabilistic inference
+    """
+    def apply(self, analysis: AnalysisResult) -> AnalysisResult:
+        """
+        Apply final decision policy on analysis result
+        """
+        try:
+            evidence = analysis.evidence or []
+            if not evidence:
+                analysis.final_decision       = self._decide_from_metrics(status = analysis.status)
+                analysis.decision_explanation = ("No declarative evidence detected. Final decision is based on metric analysis.")
+                return analysis
+            logger.info(f"Applying decision policy on {len(evidence)} evidence items")
+            decision, explanation         = self._decide_from_evidence(evidence        = evidence,
+                                                                       fallback_status = analysis.status,
+                                                                      )
+            analysis.final_decision       = decision
+            analysis.decision_explanation = explanation
+            logger.info(f"Final decision resolved: {decision.value}")
+            return analysis
+        except Exception as e:
+            # Decision policy must NEVER break pipeline
+            logger.error(f"Decision policy failed: {e}")
+            analysis.final_decision = FinalDecision.AUTHENTIC_BUT_REVIEW
+            analysis.decision_explanation = ("Decision policy encountered an internal error. Image requires manual review.")
+            return analysis
+    def _decide_from_evidence(self, evidence: List[EvidenceResult], fallback_status: DetectionStatus) -> tuple[FinalDecision, str]:
+        """
+        Resolve decision from Tier-2 evidence with explanation
+        """
+        # Storage for evidences
+        ai_evidence         = list()
+        authentic_evidence  = list()
+        indeterminate_count = sum(1 for e in evidence if (e.direction == EvidenceDirection.INDETERMINATE))
+        for item in evidence:
+            if (item.direction == EvidenceDirection.AI_GENERATED):
+                ai_evidence.append(item)
+            elif (item.direction == EvidenceDirection.AUTHENTIC):
+                authentic_evidence.append(item)
+            else:
+                logger.debug(f"Indeterminate evidence detected and excluded from decisive rules: {item.finding} ({item.analyzer})")
+        strongest_ai        = self._strongest(evidence = ai_evidence)
+        strongest_authentic = self._strongest(evidence = authentic_evidence)
+        # Rule 1: CONCLUSIVE AI
+        if (strongest_ai and (strongest_ai.strength == EvidenceStrength.CONCLUSIVE) and ((strongest_ai.confidence or 0.0) >= MIN_EVIDENCE_CONFIDENCE)):
+            return (FinalDecision.CONFIRMED_AI_GENERATED,
+                    f"Conclusive evidence detected ({strongest_ai.finding}). This evidence provides cryptographic or authoritative proof of AI generation.",
+                   )
+        # Rule 2: Strong AI evidence
+        if (strongest_ai and (strongest_ai.strength == EvidenceStrength.STRONG) and ((strongest_ai.confidence or 0.0) >= MIN_EVIDENCE_CONFIDENCE)):
+            if (strongest_authentic and (strongest_authentic.strength in (EvidenceStrength.STRONG, EvidenceStrength.CONCLUSIVE))):
+                return (FinalDecision.AUTHENTIC_BUT_REVIEW,
+                        "Strong evidence exists for both AI generation and authenticity. Conflicting high-authority signals require human review.",
+                       )
+            return (FinalDecision.SUSPICIOUS_AI_LIKELY,
+                    f"Strong AI-related evidence detected ({strongest_ai.finding}). This evidence overrides metric-based assessment.")
+        # Rule 3: Strong authentic evidence
+        if (strongest_authentic and (strongest_authentic.strength in (EvidenceStrength.STRONG, EvidenceStrength.CONCLUSIVE))):
+            return (FinalDecision.MOSTLY_AUTHENTIC,
+                    f"Strong authenticity evidence detected ({strongest_authentic.finding}). Image is assessed as mostly authentic."
+                   )
+        # Rule 4: Conflicting moderate evidence
+        if (strongest_ai and strongest_authentic):
+            return (FinalDecision.AUTHENTIC_BUT_REVIEW,
+                    "Moderate evidence exists for both AI generation and authenticity. Evidence is inconclusive and requires manual review.")
+        if (indeterminate_count >= 2):
+            return (FinalDecision.AUTHENTIC_BUT_REVIEW,
+                    "Multiple indeterminate evidence sources detected. Evidence is inconclusive and requires manual review."
+                   )
+        # Rule 5: Weak / indeterminate evidence
+        decision = self._decide_from_metrics(status = fallback_status)
+        return (decision,
+                "No high-authority evidence detected. Final decision is derived from metric-based analysis.",
+               )
+    def _decide_from_metrics(self, status: DetectionStatus) -> FinalDecision:
+        """
+        Convert Tier-1 metric status into final decision
+        """
+        if (status == DetectionStatus.REVIEW_REQUIRED):
+            return FinalDecision.SUSPICIOUS_AI_LIKELY
+        return FinalDecision.MOSTLY_AUTHENTIC
+    def _strongest(self, evidence: List[EvidenceResult]) -> EvidenceResult | None:
+        """
+        Return strongest evidence by strength, then confidence
+        """
+        if not evidence:
+            return None
+        return max(evidence,
+                   key = lambda item: (EVIDENCE_STRENGTH_ORDER.get(item.strength, 0), item.confidence or 0.0),
+                  )

evidence_analyzers/evidence_aggregator.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# Dependencies
+import time
+from typing import List
+from pathlib import Path
+from utils.logger import get_logger
+from config.settings import settings
+from config.schemas import EvidenceResult
+from concurrent.futures import TimeoutError
+from concurrent.futures import as_completed
+from config.constants import EvidenceStrength
+from config.constants import EvidenceDirection
+from concurrent.futures import ThreadPoolExecutor
+from config.constants import EVIDENCE_STRENGTH_ORDER
+from evidence_analyzers.exif_analyzer import ExifAnalyzer
+from evidence_analyzers.watermark_analyzer import WatermarkAnalyzer
+# Setup Logging
+logger = get_logger(__name__)
+class EvidenceAggregator:
+    """
+    Tier-2 Evidence Orchestrator
+    Responsibilities:
+    -----------------
+    - Execute all evidence analyzers safely
+    - Collect declarative evidence only (no inference)
+    - Deduplicate overlapping findings
+    - Rank evidence by authority & reliability
+    - Remain forward-compatible with new evidence sources
+    """
+    def __init__(self):
+        """
+        Initialize all Tier-2 analyzers
+        NOTE:
+        -----
+        Any new analyzer (C2PA, camera fingerprinting, sensor PRNU) must be added here explicitly
+        """
+        self.exif_analyzer      = ExifAnalyzer()
+        self.watermark_analyzer = WatermarkAnalyzer()
+        self._analyzers         = (self.exif_analyzer,
+                                   self.watermark_analyzer,
+                                  )
+        logger.info("EvidenceAggregator initialized with analyzers: "
+                    f"{[a.__class__.__name__ for a in self._analyzers]}")
+    def analyze(self, image_path: Path) -> List[EvidenceResult]:
+        """
+        Run Tier-2 evidence extraction pipeline
+        Arguments:
+        ----------
+            image_path   {Path}  : Path to image file
+        Returns:
+        --------
+                { list }         : Ordered, deduplicated evidence
+        """
+        # Small, bounded executor for Tier-2 (I/O oriented)
+        max_workers        = min(len(self._analyzers), settings.EVIDENCE_WORKERS or 2)
+        logger.info(f"Starting Tier-2 evidence analysis: {image_path}")
+        evidence_collected = list()
+        with ThreadPoolExecutor(max_workers = max_workers) as executor:
+            futures = {executor.submit(analyzer.analyze, image_path = image_path): {"analyzer": analyzer, "start": time.time()} for analyzer in self._analyzers}
+            for future in as_completed(futures):
+                meta     = futures[future]
+                analyzer = meta["analyzer"]
+                start    = meta["start"]
+                try:
+                    results = future.result(timeout = settings.EVIDENCE_TIMEOUT)
+                    logger.debug(f"{analyzer.__class__.__name__} completed in {time.time()-start:.2f}s")
+                    if results:
+                        evidence_collected.extend(results)
+                        logger.debug(f"{analyzer.__class__.__name__} returned {len(results)} evidence items")
+                    else:
+                        logger.debug(f"{analyzer.__class__.__name__} returned no evidence")
+                except TimeoutError:
+                    logger.warning(f"{analyzer.__class__.__name__} timed out")
+                except Exception as e:
+                    logger.error(f"{analyzer.__class__.__name__} failed: {e}")
+        if not evidence_collected:
+            logger.info("No Tier-2 evidence detected")
+            return []
+        # Normalize, deduplicate & rank
+        evidence = self._deduplicate(evidence = evidence_collected)
+        evidence = self._rank_evidence(evidence = evidence)
+        logger.info(f"Tier-2 evidence finalized: {len(evidence)} items")
+        return evidence
+    def _deduplicate(self, evidence: List[EvidenceResult]) -> List[EvidenceResult]:
+        """
+        Deduplicate evidence items
+        Strategy:
+        ---------
+        - Same analyzer
+        - Same semantic finding
+        - Same direction
+        Keeps the strongest / highest confidence instance
+        """
+        unique_map = dict()
+        for item in evidence:
+            key = (item.analyzer, item.finding, item.direction)
+            if key not in unique_map:
+                unique_map[key] = item
+                continue
+            existing               = unique_map[key]
+            existing_strength_rank = self._strength_rank(strength = existing.strength)
+            item_strength_rank     = self._strength_rank(strength = item.strength)
+            # Prefer stronger evidence
+            if  (item_strength_rank > existing_strength_rank):
+                unique_map[key] = item
+                continue
+            # Prefer higher confidence if strength equal
+            if (item_strength_rank == existing_strength_rank):
+                if (item.confidence or 0.0) > (existing.confidence or 0.0):
+                    unique_map[key] = item
+        deduped = list(unique_map.values())
+        logger.debug(f"Deduplicated evidence: {len(evidence)} → {len(deduped)}")
+        return deduped
+    def _rank_evidence(self, evidence: List[EvidenceResult]) -> List[EvidenceResult]:
+        """
+        Rank evidence by authority
+        Ranking precedence:
+        -------------------
+        1. Direction (AI > AUTHENTIC > INDETERMINATE)
+        2. Strength  (CONCLUSIVE > STRONG > MODERATE > WEAK)
+        3. Confidence (higher wins)
+        """
+        def priority(e: EvidenceResult) -> tuple:
+            return (self._direction_rank(direction = e.direction),
+                    self._strength_rank(strength = e.strength),
+                    e.confidence or 0.0,
+                   )
+        ranked = sorted(evidence, key = priority, reverse = True)
+        logger.debug("Evidence ranking completed")
+        return ranked
+    @staticmethod
+    def _direction_rank(direction: EvidenceDirection) -> int:
+        """
+        Evidence direction priority
+        """
+        return {EvidenceDirection.AI_GENERATED  : 3,
+                EvidenceDirection.AUTHENTIC     : 2,
+                EvidenceDirection.INDETERMINATE : 1,
+               }.get(direction, 0)
+    @staticmethod
+    def _strength_rank(strength: EvidenceStrength) -> int:
+        """
+        Evidence strength priority
+        """
+        return EVIDENCE_STRENGTH_ORDER.get(strength, 0)

evidence_analyzers/exif_analyzer.py ADDED Viewed

	@@ -0,0 +1,375 @@

+# Dependencies
+from PIL import Image
+from typing import List
+from typing import Dict
+from pathlib import Path
+from PIL import ExifTags
+from typing import Optional
+from datetime import datetime
+from utils.logger import get_logger
+from config.constants import EvidenceType
+from config.schemas import EvidenceResult
+from config.constants import EvidenceStrength
+from config.constants import EvidenceDirection
+from config.constants import EXIF_ANALYSIS_PARAMS
+# Setup Logging
+logger = get_logger(__name__)
+class ExifAnalyzer:
+    """
+    EXIF analyzer produces declarative evidence only: No probabilistic inference
+    """
+    AI_SOFTWARE_FINGERPRINTS = {"sdxl",
+                                "dall-e",
+                                "dall·e",
+                                "openai",
+                                "imagen",
+                                "runway",
+                                "comfyui",
+                                "firefly",
+                                "novelai",
+                                "craiyon",
+                                "leonardo",
+                                "midjourney",
+                                "adobe sensei",
+                                "automatic1111",
+                                "waifu diffusion",
+                                "stable diffusion",
+                                "bing image creator",
+                               }
+    CAMERA_FIELDS            = {"Make",
+                                "Model",
+                                "LensModel",
+                               }
+    TIME_FIELDS              = {"DateTime",
+                                "DateTimeOriginal",
+                                "DateTimeDigitized",
+                               }
+    AI_INDICATOR_FIELDS      = {"Artist",
+                                "Software",
+                                "XPComment",
+                                "UserComment",
+                                "ImageDescription",
+                                "ProcessingSoftware",
+                               }
+    TIMESTAMP_FORMATS        = ["%Y:%m:%d %H:%M:%S",
+                                "%Y-%m-%d %H:%M:%S",
+                                "%Y:%m:%d %H:%M:%S.%f",
+                               ]
+    PHOTO_METADATA_FIELDS    = {"ISO",
+                                "FNumber",
+                                "FocalLength",
+                                "ExposureTime",
+                                "ISOSpeedRatings",
+                               }
+    SUSPICIOUS_PATTERNS      = {"unknown",
+                                "none",
+                                "camera",
+                                "test",
+                                "generic",
+                                "placeholder",
+                                "sample",
+                               }
+    def analyze(self, image_path: Path) -> List[EvidenceResult]:
+        evidence = list()
+        logger.debug(f"Starting EXIF analysis for {image_path}")
+        try:
+            image    = Image.open(fp = image_path, mode = "r")
+            exif_raw = image.getexif()
+            if not exif_raw:
+                logger.info("No EXIF metadata found")
+                evidence.append(self._missing_exif())
+                return evidence
+            exif = self._normalize_exif(exif_raw = exif_raw)
+            logger.debug(f"Normalized EXIF fields: {list(exif.keys())}")
+            evidence.extend(self._analyze_ai_indicators(exif = exif))
+            evidence.extend(self._analyze_camera_presence(exif = exif))
+            evidence.extend(self._analyze_timestamp_consistency(exif = exif))
+            evidence.extend(self._analyze_suspicious_combinations(exif = exif))
+        except Exception as e:
+            logger.exception("EXIF parsing failed")
+            evidence.append(EvidenceResult(source     = EvidenceType.EXIF,
+                                           finding    = "EXIF parsing failed",
+                                           direction  = EvidenceDirection.INDETERMINATE,
+                                           strength   = EvidenceStrength.WEAK,
+                                           confidence = 0.0,
+                                           details    = {"error": str(e)},
+                                           analyzer   = "exif_analyzer",
+                                          )
+                           )
+        logger.debug(f"EXIF analysis completed with {len(evidence)} evidence items")
+        return evidence
+    def _normalize_exif(self, exif_raw) -> Dict[str, str]:
+        """
+        Normalize EXIF tags to human-readable names
+        """
+        normalized_exif = dict()
+        for tag, value in exif_raw.items():
+            tag_name = ExifTags.TAGS.get(tag, tag)
+            # Convert value to string: handling bytes
+            if isinstance(value, bytes):
+                try:
+                    value_str = value.decode('utf-8', errors = 'ignore')
+                except:
+                    value_str = str(value)
+            else:
+                value_str = str(value)
+            normalized_exif[tag_name] = value_str
+        return normalized_exif
+    def _missing_exif(self) -> EvidenceResult:
+        """
+        Missing EXIF is suspicious but not conclusive
+        """
+        missing_exif = EvidenceResult(source     = EvidenceType.EXIF,
+                                      finding    = "No EXIF metadata present (common in AI images and processed web images)",
+                                      direction  = EvidenceDirection.INDETERMINATE,
+                                      strength   = EvidenceStrength.WEAK,
+                                      confidence = EXIF_ANALYSIS_PARAMS.MISSING_EXIF_CONFIDENCE,
+                                      details    = {"note": "Missing EXIF alone is not conclusive"},
+                                      analyzer   = "exif_analyzer",
+                                     )
+        return missing_exif
+    def _analyze_ai_indicators(self, exif: Dict[str, str]) -> List[EvidenceResult]:
+        """
+        Check multiple EXIF fields for AI tool indicators
+        """
+        evidence = list()
+        for field_name in self.AI_INDICATOR_FIELDS:
+            field_value = exif.get(field_name, "").lower()
+            if not field_value:
+                continue
+            logger.debug(f"Checking {field_name}: {field_value}")
+            for fingerprint in self.AI_SOFTWARE_FINGERPRINTS:
+                if (fingerprint in field_value):
+                    logger.warning(f"AI software fingerprint detected in {field_name}: {fingerprint}")
+                    evidence.append(EvidenceResult(source     = EvidenceType.EXIF,
+                                                   finding    = f"EXIF {field_name} tag matches known AI tool: '{field_value}'",
+                                                   direction  = EvidenceDirection.AI_GENERATED,
+                                                   strength   = EvidenceStrength.STRONG,
+                                                   confidence = EXIF_ANALYSIS_PARAMS.AI_FINGERPRINT_CONFIDENCE,
+                                                   details    = {"field"       : field_name,
+                                                                 "value"       : field_value,
+                                                                 "fingerprint" : fingerprint
+                                                                },
+                                                   analyzer   = "exif_analyzer",
+                                                  )
+                                   )
+                    break
+        return evidence
+    def _analyze_camera_presence(self, exif: Dict[str, str]) -> List[EvidenceResult]:
+        """
+        Analyze camera metadata for authenticity indicators
+        """
+        evidence   = list()
+        make       = exif.get("Make")
+        model      = exif.get("Model")
+        lens       = exif.get("LensModel")
+        if (make and model):
+            logger.debug(f"Camera metadata found: {make} {model}")
+            confidence = EXIF_ANALYSIS_PARAMS.CAMERA_BASE_CONFIDENCE
+            details    = {"make": make, "model": model}
+            if lens:
+                confidence = EXIF_ANALYSIS_PARAMS.CAMERA_WITH_LENS_CONFIDENCE
+                details["lens"] = lens
+                logger.debug(f"Lens metadata found: {lens}")
+            if self._is_suspicious_camera_data(make = make, model = model):
+                logger.warning(f"Suspicious camera metadata: {make} {model}")
+                evidence.append(EvidenceResult(source     = EvidenceType.EXIF,
+                                               finding    = f"Suspicious camera metadata detected: {make} {model}",
+                                               direction  = EvidenceDirection.INDETERMINATE,
+                                               strength   = EvidenceStrength.WEAK,
+                                               confidence = EXIF_ANALYSIS_PARAMS.SUSPICIOUS_CAMERA_CONFIDENCE,
+                                               details    = details,
+                                               analyzer   = "exif_analyzer",
+                                              )
+                               )
+            else:
+                evidence.append(EvidenceResult(source     = EvidenceType.EXIF,
+                                               finding    = f"Camera metadata present: {make} {model}",
+                                               direction  = EvidenceDirection.AUTHENTIC,
+                                               strength   = EvidenceStrength.MODERATE,
+                                               confidence = confidence,
+                                               details    = details,
+                                               analyzer   = "exif_analyzer",
+                                              )
+                               )
+        else:
+            logger.info("No camera metadata present")
+        return evidence
+    def _is_suspicious_camera_data(self, make: str, model: str) -> bool:
+        """
+        Check if camera data looks fake or suspicious
+        """
+        make_lower  = make.lower()
+        model_lower = model.lower()
+        for pattern in self.SUSPICIOUS_PATTERNS:
+            if ((pattern in make_lower) or (pattern in model_lower)):
+                return True
+        return False
+    def _analyze_timestamp_consistency(self, exif: Dict[str, str]) -> List[EvidenceResult]:
+        """
+        Check for timestamp inconsistencies
+        """
+        timestamps = dict()
+        for field in self.TIME_FIELDS:
+            if (field not in exif):
+                continue
+            parsed_time = self._parse_timestamp(timestamp_str = exif[field])
+            if parsed_time:
+                timestamps[field] = parsed_time
+        if (len(timestamps) < 2):
+            return []
+        time_values   = list(timestamps.values())
+        delta         = max(time_values) - min(time_values)
+        delta_seconds = delta.total_seconds()
+        logger.debug(f"Timestamp delta: {delta_seconds} seconds across {len(timestamps)} fields")
+        if (delta_seconds > EXIF_ANALYSIS_PARAMS.TIMESTAMP_DELTA_THRESHOLD):
+            logger.warning(f"Inconsistent EXIF timestamps detected: {delta_seconds}s delta")
+            return [EvidenceResult(source     = EvidenceType.EXIF,
+                                   finding    = f"Inconsistent EXIF timestamps ({delta_seconds:.1f}s difference)",
+                                   direction  = EvidenceDirection.INDETERMINATE,
+                                   strength   = EvidenceStrength.WEAK,
+                                   confidence = EXIF_ANALYSIS_PARAMS.TIMESTAMP_INCONSISTENCY_CONFIDENCE,
+                                   details    = {"delta_seconds" : delta_seconds,
+                                                 "timestamps"    : {k: v.isoformat() for k, v in timestamps.items()},
+                                                },
+                                   analyzer   = "exif_analyzer",
+                                  )
+                   ]
+        return []
+    def _parse_timestamp(self, timestamp_str: str) -> Optional[datetime]:
+        """
+        Parse timestamp with multiple format attempts
+        """
+        for fmt in self.TIMESTAMP_FORMATS:
+            try:
+                return datetime.strptime(timestamp_str, fmt)
+            except (ValueError, TypeError):
+                continue
+        logger.debug(f"Could not parse timestamp: {timestamp_str}")
+        return None
+    def _analyze_suspicious_combinations(self, exif: Dict[str, str]) -> List[EvidenceResult]:
+        """
+        Detect suspicious combinations of EXIF data
+        """
+        evidence = list()
+        has_camera         = exif.get("Make") and exif.get("Model")
+        has_photo_metadata = any([exif.get(field) for field in self.PHOTO_METADATA_FIELDS])
+        if (has_camera and not has_photo_metadata):
+            logger.warning("Camera metadata present but missing photographic settings")
+            evidence.append(EvidenceResult(source     = EvidenceType.EXIF,
+                                           finding    = "Camera identified but photographic metadata missing (suspicious)",
+                                           direction  = EvidenceDirection.INDETERMINATE,
+                                           strength   = EvidenceStrength.WEAK,
+                                           confidence = EXIF_ANALYSIS_PARAMS.MISSING_PHOTO_METADATA_CONFIDENCE,
+                                           details    = {"has_camera"        : True,
+                                                         "missing_settings"  : list(self.PHOTO_METADATA_FIELDS),
+                                                        },
+                                           analyzer   = "exif_analyzer",
+                                          )
+                           )
+        for field in self.TIME_FIELDS:
+            if (field not in exif):
+                continue
+            timestamp = self._parse_timestamp(timestamp_str = exif[field])
+            if (timestamp and self._is_suspicious_timestamp(dt = timestamp)):
+                logger.warning(f"Suspicious timestamp detected: {timestamp}")
+                evidence.append(EvidenceResult(source     = EvidenceType.EXIF,
+                                               finding    = f"Suspicious timestamp pattern in {field}",
+                                               direction  = EvidenceDirection.INDETERMINATE,
+                                               strength   = EvidenceStrength.WEAK,
+                                               confidence = EXIF_ANALYSIS_PARAMS.SUSPICIOUS_TIMESTAMP_CONFIDENCE,
+                                               details    = {"field"     : field,
+                                                             "timestamp" : timestamp.isoformat(),
+                                                             "reason"    : "Suspiciously round time (midnight or all zeros)",
+                                                            },
+                                               analyzer   = "exif_analyzer",
+                                              )
+                               )
+                break
+        return evidence
+    def _is_suspicious_timestamp(self, dt: datetime) -> bool:
+        """
+        Check if timestamp looks fake: too perfect/round
+        """
+        if ((dt.hour == 0) and (dt.minute == 0) and (dt.second == 0)):
+            return True
+        if (dt.year < EXIF_ANALYSIS_PARAMS.MIN_VALID_YEAR):
+            return True
+        if (dt.year > datetime.now().year + EXIF_ANALYSIS_PARAMS.MAX_FUTURE_YEARS):
+            return True
+        return False

evidence_analyzers/watermark_analyzer.py ADDED Viewed

	@@ -0,0 +1,463 @@

+# Dependencies
+import pywt
+import cv2
+import numpy as np
+from typing import List
+from typing import Tuple
+from pathlib import Path
+from scipy import fftpack
+from scipy.stats import entropy
+from utils.logger import get_logger
+from scipy.signal import correlate2d
+from config.schemas import EvidenceResult
+from config.constants import EvidenceType
+from config.constants import EvidenceStrength
+from config.constants import EvidenceDirection
+from utils.image_processor import ImageProcessor
+from config.constants import WATERMARK_ANALYSIS_PARAMS
+# Setup Logging
+logger = get_logger(__name__)
+class WatermarkAnalyzer:
+    """
+    Generic watermark detector using signal processing techniques:
+    - Detects invisible watermarks through frequency domain analysis
+    - wavelet decomposition, and statistical anomalies - vendor agnostic
+    """
+    def __init__(self):
+        self.image_processor = ImageProcessor()
+    def analyze(self, image_path: Path) -> List[EvidenceResult]:
+        logger.debug(f"Starting watermark analysis for {image_path}")
+        evidence = list()
+        image    = self.image_processor.load_image(image_path)
+        evidence.extend(self._detect_wavelet_watermarks(image = image))
+        evidence.extend(self._detect_frequency_watermarks(image = image))
+        evidence.extend(self._detect_lsb_steganography(image = image))
+        logger.debug(f"Watermark analysis completed with {len(evidence)} findings")
+        if not evidence:
+            return []
+        return evidence
+    def _detect_wavelet_watermarks(self, image: np.ndarray) -> List[EvidenceResult]:
+        """
+        Detect watermarks embedded in wavelet domain
+        - Many invisible watermarks modify high-frequency wavelet coefficients
+        - This is a general technique used by multiple AI generators
+        """
+        logger.debug("Checking for wavelet-domain watermarks")
+        try:
+            # Convert to grayscale if needed
+            if (len(image.shape) == 3):
+                gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+            else:
+                gray = image.copy()
+            # Multi-level wavelet decomposition
+            coeffs2                = pywt.dwt2(gray, 'haar')
+            cA, (cH, cV, cD)       = coeffs2
+            # Analyze statistical properties of high-frequency subbands: Watermarks create anomalous energy distributions
+            # Calculate sub-band energies
+            energy_approx          = np.var(cA)
+            energy_h               = np.var(cH)
+            energy_v               = np.var(cV)
+            energy_d               = np.var(cD)
+            total_hf_energy        = energy_h + energy_v + energy_d
+            total_energy           = energy_approx + total_hf_energy
+            if (total_energy == 0):
+                return []
+            # High-frequency energy ratio
+            hf_ratio               = total_hf_energy / total_energy
+            # Watermarks increase high-frequency energy beyond natural levels:
+            # - Natural images : ~0.05-0.15
+            # - Watermarked    : ~0.20-0.40
+            anomalous_energy       = hf_ratio > WATERMARK_ANALYSIS_PARAMS.HF_ENERGY_RATIO_THRESHOLD
+            # Check for statistical anomalies in coefficient distribution: watermarks create non-Gaussian distributions
+            kurtosis_h             = self._calculate_kurtosis(data = cH)
+            kurtosis_v             = self._calculate_kurtosis(data = cV)
+            kurtosis_d             = self._calculate_kurtosis(data = cD)
+            avg_kurtosis           = (kurtosis_h + kurtosis_v + kurtosis_d) / 3
+            # Natural images: kurtosis ~3-6, Watermarked: often >8
+            anomalous_distribution = avg_kurtosis > WATERMARK_ANALYSIS_PARAMS.KURTOSIS_THRESHOLD
+            # Check for periodic patterns (grid-based embedding)
+            periodicity_score      = self._detect_periodicity(cH, cV, cD)
+            # Combined detection
+            detected               =  (anomalous_energy and anomalous_distribution) or ((periodicity_score > WATERMARK_ANALYSIS_PARAMS.PERIODICITY_THRESHOLD) and anomalous_energy)
+            if detected:
+                confidence = self._calculate_confidence([hf_ratio / WATERMARK_ANALYSIS_PARAMS.HF_ENERGY_RATIO_NORM,
+                                                         min(avg_kurtosis / WATERMARK_ANALYSIS_PARAMS.KURTOSIS_NORM_FACTOR, 1.0),
+                                                         periodicity_score
+                                                       ])
+                is_strong  = (confidence >= WATERMARK_ANALYSIS_PARAMS.STRONG_CONFIDENCE_THRESHOLD)
+                direction  = (EvidenceDirection.AI_GENERATED if is_strong else EvidenceDirection.INDETERMINATE)
+                strength   = (EvidenceStrength.STRONG if is_strong else EvidenceStrength.MODERATE)
+                logger.warning(f"Heuristic watermark pattern detected in wavelet domain: (confidence: {confidence:.2f})")
+                return [EvidenceResult(source     = EvidenceType.WATERMARK,
+                                       finding    = "Statistical patterns consistent with invisible watermarking or steganographic embedding detected",
+                                       direction  = direction,
+                                       strength   = strength,
+                                       confidence = confidence,
+                                       details    = {"method"               : "wavelet_analysis",
+                                                     "note"                 : "Heuristic detection; not cryptographic or vendor watermark verification",
+                                                     "high_frequency_ratio" : float(hf_ratio),
+                                                     "avg_kurtosis"         : float(avg_kurtosis),
+                                                     "periodicity_score"    : float(periodicity_score),
+                                                     "wavelet_type"         : "haar"
+                                                    },
+                                        analyzer  = "watermark_analyzer",
+                                       )
+                       ]
+        except Exception as e:
+            logger.error(f"Error in wavelet watermark detection: {e}")
+        return []
+    def _detect_frequency_watermarks(self, image: np.ndarray) -> List[EvidenceResult]:
+        """
+        Detect watermarks in frequency domain using FFT analysis: Watermarks often add imperceptible patterns in specific frequency bands
+        """
+        logger.debug("Checking for frequency-domain watermarks")
+        try:
+            # Convert to grayscale
+            if (len(image.shape) == 3):
+                gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+            else:
+                gray = image.copy()
+            # 2D FFT
+            fft                = fftpack.fft2(gray)
+            fft_shift          = fftpack.fftshift(fft)
+            magnitude          = np.abs(fft_shift)
+            # Log scale for better visualization of weak signals
+            magnitude_log      = np.log1p(magnitude)
+            # Analyze frequency spectrum
+            h, w               = magnitude_log.shape
+            center_y, center_x = h // 2, w // 2
+            # Check mid to high frequency bands (common watermark location): Divide spectrum into radial bands
+            band_anomalies     = list()
+            frequency_bands    = [(0.2, 0.4),  # Mid-low frequencies
+                                  (0.4, 0.6),  # Mid frequencies
+                                  (0.6, 0.8),  # Mid-high frequencies
+                                 ]
+            for low, high in frequency_bands:
+                mask        = self._create_radial_mask(magnitude_log.shape, low, high)
+                band_values = magnitude_log[mask]
+                if (band_values.size == 0):
+                    continue
+                # Statistical analysis of band
+                band_mean   = np.mean(band_values)
+                band_std    = np.std(band_values)
+                # Detect anomalous peaks (watermark signatures)
+                threshold   = band_mean + WATERMARK_ANALYSIS_PARAMS.PEAK_STD_MULTIPLIER * band_std
+                peaks       = np.sum(band_values > threshold)
+                peak_ratio  = peaks / band_values.size
+                if (peak_ratio > WATERMARK_ANALYSIS_PARAMS.PEAK_RATIO_THRESHOLD):
+                    # More than 5% anomalous values
+                    band_anomalies.append({'band'       : (low, high),
+                                           'peak_ratio' : float(peak_ratio),
+                                           'peak_count' : int(peaks)
+                                         })
+            # Check for symmetric patterns (common in structured watermarks)
+            symmetry_score = self._check_spectral_symmetry(magnitude = magnitude_log)
+            detected       = ((len(band_anomalies) >= WATERMARK_ANALYSIS_PARAMS.MIN_ANOMALOUS_BANDS) and
+                              (symmetry_score >  WATERMARK_ANALYSIS_PARAMS.SPECTRAL_SYMMETRY_THRESHOLD))
+            if detected:
+                confidence = self._calculate_confidence([min(len(band_anomalies) / 3, 1.0),
+                                                         symmetry_score
+                                                       ])
+                is_strong  = (confidence >= WATERMARK_ANALYSIS_PARAMS.STRONG_CONFIDENCE_THRESHOLD)
+                direction  = (EvidenceDirection.AI_GENERATED if is_strong else EvidenceDirection.INDETERMINATE)
+                strength   = (EvidenceStrength.STRONG if is_strong else EvidenceStrength.MODERATE)
+                logger.warning(f"Heuristic watermark pattern detected in Frequency-domain: (confidence: {confidence:.2f})")
+                return [EvidenceResult(source     = EvidenceType.WATERMARK,
+                                       finding    = "Statistical patterns consistent with invisible watermarking or steganographic embedding detected",
+                                       direction  = direction,
+                                       strength   = strength,
+                                       confidence = confidence,
+                                       details    = {"method"          : "frequency_analysis",
+                                                     "note"            : "Heuristic detection; not cryptographic or vendor watermark verification",
+                                                     "anomalous_bands" : len(band_anomalies),
+                                                     "band_details"    : band_anomalies,
+                                                     "symmetry_score"  : float(symmetry_score),
+                                                    },
+                                       analyzer   = "watermark_analyzer",
+                                      )
+                       ]
+        except Exception as e:
+            logger.error(f"Error in frequency watermark detection: {e}")
+        return []
+    def _detect_lsb_steganography(self, image: np.ndarray) -> List[EvidenceResult]:
+        """
+        Detect steganographic watermarks using LSB (Least Significant Bit) analysis.
+        Many watermarking schemes embed data in the LSB planes.
+        """
+        logger.debug("Checking for LSB steganography")
+        try:
+            # Analyze all color channels
+            if (len(image.shape) == 3):
+                channels = cv2.split(image)
+            else:
+                channels = [image]
+            channel_results = list()
+            for idx, channel in enumerate(channels):
+                # Extract bit planes
+                lsb_plane   = channel & 1         # LSB
+                msb_plane   = (channel >> 7) & 1  # MSB for comparison
+                # Calculate entropy
+                lsb_entropy = self._shannon_entropy(lsb_plane)
+                msb_entropy = self._shannon_entropy(msb_plane)
+                # Chi-square test for randomness
+                chi_square  = self._chi_square_test(lsb_plane)
+                # Run test for detecting non-random patterns
+                runs        = self._runs_test(lsb_plane)
+                channel_results.append({'channel'     : idx,
+                                        'lsb_entropy' : float(lsb_entropy),
+                                        'msb_entropy' : float(msb_entropy),
+                                        'chi_square'  : float(chi_square),
+                                        'runs_score'  : float(runs)
+                                      })
+            # Average results across channels
+            avg_lsb_entropy    = np.mean([r['lsb_entropy'] for r in channel_results])
+            avg_chi_square     = np.mean([r['chi_square'] for r in channel_results])
+            avg_runs           = np.mean([r['runs_score'] for r in channel_results])
+            # Detection criteria:
+            # - High LSB entropy (>0.72) indicates embedded data
+            # - High chi-square indicates non-uniform distribution
+            # - Runs test indicates structured patterns
+            suspicious_entropy = (avg_lsb_entropy > WATERMARK_ANALYSIS_PARAMS.LSB_ENTROPY_THRESHOLD)
+            suspicious_chi     = (avg_chi_square > WATERMARK_ANALYSIS_PARAMS.CHI_SQUARE_THRESHOLD)
+            suspicious_runs    = (avg_runs > WATERMARK_ANALYSIS_PARAMS.RUNS_SCORE_THRESHOLD)
+            detected           = (suspicious_entropy and (suspicious_chi or suspicious_runs))
+            if detected:
+                # Determine strength based on confidence
+                confidence = self._calculate_confidence([min((avg_lsb_entropy - WATERMARK_ANALYSIS_PARAMS.LSB_ENTROPY_NORM_BASE) / WATERMARK_ANALYSIS_PARAMS.LSB_ENTROPY_NORM_RANGE, 1.0),
+                                                         min(avg_chi_square / WATERMARK_ANALYSIS_PARAMS.CHI_SQUARE_NORM_FACTOR, 1.0),
+                                                         avg_runs
+                                                       ])
+                is_strong  = (confidence >= WATERMARK_ANALYSIS_PARAMS.STRONG_CONFIDENCE_THRESHOLD)
+                direction  = (EvidenceDirection.AI_GENERATED if is_strong else EvidenceDirection.INDETERMINATE)
+                strength   = (EvidenceStrength.STRONG if is_strong else EvidenceStrength.MODERATE)
+                logger.warning(f"Heuristic watermark pattern detected in LSB steganography-domain: (confidence: {confidence:.2f})")
+                return [EvidenceResult(source     = EvidenceType.WATERMARK,
+                                       finding    = "Statistical patterns consistent with invisible watermarking or steganographic embedding detected",
+                                       direction  = direction,
+                                       strength   = strength,
+                                       confidence = confidence,
+                                       details    = {"method"          : "lsb_analysis",
+                                                     "note"            : "Heuristic detection; not cryptographic or vendor watermark verification",
+                                                     "avg_lsb_entropy" : float(avg_lsb_entropy),
+                                                     "avg_chi_square"  : float(avg_chi_square),
+                                                     "avg_runs_score"  : float(avg_runs),
+                                                     "avg_msb_entropy" : float(np.mean([r["msb_entropy"] for r in channel_results])),
+                                                     "channel_results" : channel_results
+                                                    },
+                                       analyzer   = "watermark_analyzer",
+                                      )
+                       ]
+        except Exception as e:
+            logger.error(f"Error in LSB steganography detection: {e}")
+        return []
+    def _calculate_kurtosis(self, data: np.ndarray) -> float:
+        """
+        Calculate kurtosis: measure of distribution tailedness
+        """
+        data_flat = data.flatten()
+        mean      = np.mean(data_flat)
+        std       = np.std(data_flat)
+        if (std == 0):
+            return 0.0
+        normalized = (data_flat - mean) / std
+        return float(np.mean(normalized ** 4))
+    def _detect_periodicity(self, *coeffs) -> float:
+        """
+        Detect periodic patterns in coefficients: grid-based watermarks
+        """
+        try:
+            scores = list()
+            for coeff in coeffs:
+                # Apply autocorrelation
+                autocorr         = correlate2d(coeff, coeff, mode = 'same')
+                max_val          = np.max(autocorr)
+                if (max_val == 0):
+                    continue
+                autocorr         = autocorr / max_val
+                # Look for secondary peaks (indicating periodicity)
+                center           = tuple(s // 2 for s in autocorr.shape)
+                # Remove center peak
+                autocorr[center] = 0
+                max_secondary    = np.max(autocorr)
+                scores.append(max_secondary)
+            return float(np.mean(scores))
+        except:
+            return 0.0
+    def _create_radial_mask(self, shape: Tuple[int, int], inner_ratio: float, outer_ratio: float) -> np.ndarray:
+        """
+        Create radial mask for frequency analysis
+        """
+        h, w               = shape
+        center_y, center_x = h // 2, w // 2
+        max_radius         = min(center_y, center_x)
+        y, x               = np.ogrid[:h, :w]
+        distances          = np.sqrt((y - center_y)**2 + (x - center_x)**2)
+        mask               = (distances >= inner_ratio * max_radius) & (distances < outer_ratio * max_radius)
+        return mask
+    def _check_spectral_symmetry(self, magnitude: np.ndarray) -> float:
+        """
+        Check for symmetric patterns in frequency spectrum
+        """
+        try:
+            h, w        = magnitude.shape
+            left_half   = magnitude[:, :w//2]
+            right_half  = np.fliplr(magnitude[:, w//2:])
+            # Ensure same size
+            min_width   = min(left_half.shape[1], right_half.shape[1])
+            left_half   = left_half[:, :min_width]
+            right_half  = right_half[:, :min_width]
+            # Calculate correlation
+            correlation = np.corrcoef(left_half.flatten(), right_half.flatten())[0, 1]
+            return float(abs(correlation)) if not np.isnan(correlation) else 0.0
+        except:
+            return 0.0
+    def _shannon_entropy(self, data: np.ndarray) -> float:
+        """
+        Calculate Shannon entropy
+        """
+        values, counts = np.unique(data.flatten(), return_counts = True)
+        probabilities  = counts / counts.sum()
+        return float(entropy(probabilities, base=2))
+    def _chi_square_test(self, data: np.ndarray) -> float:
+        """
+        Chi-square test for uniformity
+        """
+        values, counts = np.unique(data.flatten(), return_counts = True)
+        expected       = len(data.flatten()) / len(values)
+        chi_square     = np.sum((counts - expected) ** 2 / expected)
+        return float(chi_square)
+    def _runs_test(self, data: np.ndarray) -> float:
+        """
+        Runs test for randomness: normalized score
+        """
+        flat          = data.flatten()
+        median        = np.median(flat)
+        runs          = np.sum(np.abs(np.diff((flat > median).astype(int))))
+        expected_runs = len(flat) / 2
+        if (expected_runs == 0):
+            return 0.0
+        return float(min(runs / expected_runs, 1.0))
+    def _calculate_confidence(self, scores: List[float]) -> float:
+        """
+        Calculate overall confidence from multiple scores
+        """
+        valid_scores = [score for score in scores if ((isinstance(score, (int, float))) and (not np.isnan(score)))]
+        if not valid_scores:
+            return 0.0
+        confidence = np.mean(valid_scores)
+        # Cap at 0.95
+        return float(min(max(confidence, 0.0), WATERMARK_ANALYSIS_PARAMS.CONFIDENCE_CAP))

features/batch_processor.py CHANGED Viewed

@@ -1,20 +1,25 @@
 # Dependencies
 import time
 from typing import List
 from typing import Dict
 from typing import Tuple
 from pathlib import Path
 from typing import Callable
 from utils.logger import get_logger
 from config.settings import settings
 from config.schemas import AnalysisResult
 from concurrent.futures import TimeoutError
 from concurrent.futures import as_completed
 from config.constants import DetectionStatus
 from config.schemas import BatchAnalysisResult
-from metrics.aggregator import MetricsAggregator
 from concurrent.futures import ThreadPoolExecutor
 from features.threshold_manager import ThresholdManager
 # Setup Logging
@@ -37,18 +42,24 @@ class BatchProcessor:
         Initialize Batch Processor
         """
         # Instantiate threshold manager
-        self.threshold_manager = threshold_manager
-        # Initialize aggregator
-        self.aggregator        = MetricsAggregator(threshold_manager = threshold_manager)
         # Fix number of workers
-        self.max_workers       = settings.MAX_WORKERS if settings.PARALLEL_PROCESSING else 1
         logger.info(f"BatchProcessor initialized with max_workers={self.max_workers}, parallel={settings.PARALLEL_PROCESSING}")
-    def process_batch(self, image_files: List[Dict[str, any]], on_progress: Callable[[int, int, str], None] | None = None) -> BatchAnalysisResult:
         """
         Process multiple images with automatic parallel/sequential switching
@@ -231,17 +242,26 @@ class BatchProcessor:
             { AnalysisResult }   : Analysis result or None on error
         """
         try:
-            return self.aggregator.analyze_image(image_path = image_path,
-                                                 filename   = filename,
-                                                 image_size = image_size,
-                                                )
         except Exception as e:
             logger.error(f"Failed to process {filename}: {e}", exc_info = True)
             return None
-    def _calculate_summary(self, results: List[AnalysisResult], total: int) -> Dict[str, int]:
         """
         Calculate summary statistics from results
@@ -256,28 +276,38 @@ class BatchProcessor:
             { dict }         : Summary statistics
         """
         # Calculate processing stats
-        likely_authentic = sum(1 for r in results if (r.status == DetectionStatus.LIKELY_AUTHENTIC))
-        review_required  = sum(1 for r in results if (r.status == DetectionStatus.REVIEW_REQUIRED))
-        processed        = len(results)
-        failed           = total - processed
-        success_rate     = int((processed / total * 100) if (total > 0) else 0)
         # Calculate average scores
-        avg_score        = sum(r.overall_score for r in results) / len(results) if results else 0.0
-        avg_confidence   = sum(r.confidence for r in results) / len(results) if results else 0
-        avg_proc_time    = sum(r.processing_time for r in results) / len(results) if results else 0.0
-        return {"likely_authentic" : likely_authentic,
-                "review_required"  : review_required,
-                "success_rate"     : success_rate,
-                "processed"        : processed,
-                "failed"           : failed,
-                "avg_score"        : round(avg_score, 3),
-                "avg_confidence"   : int(avg_confidence),
-                "avg_proc_time"    : round(avg_proc_time, 2),
-               }
     def _create_empty_batch_result(self) -> BatchAnalysisResult:
         """
@@ -291,9 +321,11 @@ class BatchProcessor:
                                    processed             = 0,
                                    failed                = 0,
                                    results               = [],
-                                   summary               = {"likely_authentic" : 0,
-                                                            "review_required"  : 0,
-                                                            "success_rate"     : 0,
                                                            },
                                    total_processing_time = 0.0,
                                   )

 # Dependencies
 import time
+from typing import Any
 from typing import List
 from typing import Dict
 from typing import Tuple
 from pathlib import Path
 from typing import Callable
+from collections import Counter
 from utils.logger import get_logger
 from config.settings import settings
 from config.schemas import AnalysisResult
+from config.constants import FinalDecision
 from concurrent.futures import TimeoutError
 from concurrent.futures import as_completed
 from config.constants import DetectionStatus
 from config.schemas import BatchAnalysisResult
 from concurrent.futures import ThreadPoolExecutor
+from metrics.signal_aggregator import SignalAggregator
 from features.threshold_manager import ThresholdManager
+from decision_builders.decision_policy import DecisionPolicy
+from evidence_analyzers.evidence_aggregator import EvidenceAggregator
 # Setup Logging
         Initialize Batch Processor
         """
         # Instantiate threshold manager
+        self.threshold_manager   = threshold_manager
+        # Initialize signal aggregators
+        self.aggregator          = SignalAggregator(threshold_manager = threshold_manager)
+        # Initialize evidence-based aggregator
+        self.evidence_aggregator = EvidenceAggregator()
+        # Initialize decision-policy engine
+        self.decision_policy     = DecisionPolicy()
         # Fix number of workers
+        self.max_workers         = settings.MAX_WORKERS if settings.PARALLEL_PROCESSING else 1
         logger.info(f"BatchProcessor initialized with max_workers={self.max_workers}, parallel={settings.PARALLEL_PROCESSING}")
+    def process_batch(self, image_files: List[Dict[str, Any]], on_progress: Callable[[int, int, str], None] | None = None) -> BatchAnalysisResult:
         """
         Process multiple images with automatic parallel/sequential switching
             { AnalysisResult }   : Analysis result or None on error
         """
         try:
+            # Tier-1 Signal
+            analysis              = self.aggregator.analyze_image(image_path = image_path,
+                                                                  filename   = filename,
+                                                                  image_size = image_size,
+                                                                 )
+            # Tier-2 evidence
+            analysis.evidence     = self.evidence_aggregator.analyze(image_path = image_path)
+            # Final decision
+            final_analysis_result = self.decision_policy.apply(analysis = analysis)
+            return final_analysis_result
         except Exception as e:
             logger.error(f"Failed to process {filename}: {e}", exc_info = True)
             return None
+    def _calculate_summary(self, results: List[AnalysisResult], total: int) -> Dict[str, Any]:
         """
         Calculate summary statistics from results
             { dict }         : Summary statistics
         """
         # Calculate processing stats
+        processed             = len(results)
+        failed                = total - processed
+        success_rate          = int((processed / total * 100) if total > 0 else 0)
+        # Count final decisions safely
+        decision_counts       = Counter(result.final_decision.value for result in results)
         # Calculate average scores
+        avg_score             = sum(r.overall_score for r in results) / processed if results else 0.0
+        avg_confidence        = sum(r.confidence for r in results) / processed if results else 0
+        avg_proc_time         = sum(r.processing_time for r in results) / processed if results else 0.0
+        # Final decision distribution
+        decision_distribution = {FinalDecision.CONFIRMED_AI_GENERATED.value : decision_counts.get(FinalDecision.CONFIRMED_AI_GENERATED.value, 0),
+                                 FinalDecision.SUSPICIOUS_AI_LIKELY.value   : decision_counts.get(FinalDecision.SUSPICIOUS_AI_LIKELY.value, 0),
+                                 FinalDecision.AUTHENTIC_BUT_REVIEW.value   : decision_counts.get(FinalDecision.AUTHENTIC_BUT_REVIEW.value, 0),
+                                 FinalDecision.MOSTLY_AUTHENTIC.value       : decision_counts.get(FinalDecision.MOSTLY_AUTHENTIC.value, 0),
+                                }
+        summary               = {"processed"      : processed,
+                                 "failed"         : failed,
+                                 "success_rate"   : success_rate,
+                                 "avg_score"      : round(avg_score, 3),
+                                 "avg_confidence" : int(avg_confidence),
+                                 "avg_proc_time"  : round(avg_proc_time, 2),
+                                }
+        # Update summary dictb with decision_distriubution dict
+        summary.update(decision_distribution)
+        return summary
     def _create_empty_batch_result(self) -> BatchAnalysisResult:
         """
                                    processed             = 0,
                                    failed                = 0,
                                    results               = [],
+                                   summary               = {FinalDecision.CONFIRMED_AI_GENERATED.value : 0,
+                                                            FinalDecision.SUSPICIOUS_AI_LIKELY.value   : 0,
+                                                            FinalDecision.AUTHENTIC_BUT_REVIEW.value   : 0,
+                                                            FinalDecision.MOSTLY_AUTHENTIC.value       : 0,
+                                                            "success_rate"                             : 0,
                                                            },
                                    total_processing_time = 0.0,
                                   )

features/detailed_result_maker.py CHANGED Viewed

@@ -2,11 +2,16 @@
 import pandas as pd
 from typing import Dict
 from typing import List
-from typing import Optional
 from utils.logger import get_logger
 from config.constants import MetricType
 from config.constants import SignalStatus
 from config.schemas import AnalysisResult
 from config.constants import SIGNAL_THRESHOLDS
@@ -16,40 +21,59 @@ logger = get_logger(__name__)
 class DetailedResultMaker:
     """
-    Extract and format detailed analysis results for UI and reporting
     Purpose:
     --------
-    - Extracts all intermediate metrics from MetricResult objects
-    - Formats data for tabular display in UI
-    - Provides rich metadata for PDF/CSV reports
-    - No re-computation - just data extraction and formatting
     Output Formats:
     ---------------
-    1. Structured dictionaries for UI
-    2. Pandas DataFrames for reports
-    3. Hierarchical JSON for API
     """
     def __init__(self, signal_thresholds: dict | None = None):
         """
         Initialize Detailed Result Maker
         """
-        self.metric_display_names = {MetricType.GRADIENT  : "Gradient-Field PCA",
-                                     MetricType.FREQUENCY : "Frequency Domain (FFT)",
-                                     MetricType.NOISE     : "Noise Pattern Analysis",
-                                     MetricType.TEXTURE   : "Texture Statistics",
-                                     MetricType.COLOR     : "Color Distribution",
-                                    }
-        self.signal_thresholds    = signal_thresholds or SIGNAL_THRESHOLDS
         logger.debug("DetailedResultMaker initialized")
     def extract_detailed_results(self, analysis_result: AnalysisResult) -> Dict:
         """
-        Extract all detailed results from AnalysisResult
         Arguments:
         ----------
@@ -57,24 +81,35 @@ class DetailedResultMaker:
         Returns:
         --------
-            { dict }                           : Comprehensive detailed results
         """
         logger.debug(f"Extracting detailed results for: {analysis_result.filename}")
-        detailed = {"filename"         : analysis_result.filename,
-                    "overall_summary"  : self._extract_overall_summary(analysis_result = analysis_result),
-                    "metrics_detailed" : self._extract_all_metrics(analysis_result = analysis_result),
-                    "metadata"         : self._extract_metadata(analysis_result = analysis_result),
                    }
-        logger.debug(f"Extracted {len(detailed['metrics_detailed'])} metric details")
         return detailed
     def create_detailed_table(self, analysis_result: AnalysisResult) -> pd.DataFrame:
         """
-        Create detailed metrics table as DataFrame
         Arguments:
         ----------
@@ -82,58 +117,204 @@ class DetailedResultMaker:
         Returns:
         --------
-            { DataFrame }                      : Tabular detailed results
         """
         rows = list()
         for metric_type, metric_result in analysis_result.metric_results.items():
             display_name = self.metric_display_names.get(metric_type, metric_type.value)
-            row          = {"Metric"      : display_name,
-                            "Score"       : round(metric_result.score, 3),
-                            "Confidence"  : round(metric_result.confidence, 3) if metric_result.confidence is not None else "N/A",
-                            "Status"      : self._score_to_status(score = metric_result.score),
                            }
-            # Add key details from each metric
             details      = self._extract_key_details(metric_type   = metric_type,
                                                      metric_result = metric_result,
                                                     )
-            row.update(details)
-            rows.append(row)
-        # Dump rows into a pandas dataframe for structured result
-        dataframe = pd.DataFrame(data = rows)
-        logger.debug(f"Created detailed table with {len(dataframe)} rows, {len(dataframe.columns)} columns")
         return dataframe
-    def create_report_data(self, analysis_result: AnalysisResult) -> Dict:
         """
-        Create rich data structure for report generation
-        Arguments:
-        ----------
-            analysis_result { AnalysisResult } : Complete analysis result
-        Returns:
-        --------
-            { dict }                           : Report-ready data structure
         """
-        report_data = {"header"             : self._create_report_header(analysis_result = analysis_result),
-                       "overall_assessment" : self._create_overall_assessment(analysis_result = analysis_result),
-                       "metric_breakdown"   : self._create_metric_breakdown(analysis_result = analysis_result),
-                       "forensic_details"   : self._create_forensic_details(analysis_result = analysis_result),
-                       "recommendations"    : self._create_recommendations(analysis_result = analysis_result),
-                      }
-        logger.debug(f"Created report data for: {analysis_result.filename}")
-        return report_data
     def _extract_overall_summary(self, analysis_result: AnalysisResult) -> Dict:
         """
@@ -162,34 +343,54 @@ class DetailedResultMaker:
                              "display_name"   : self.metric_display_names.get(metric_type, metric_type.value),
                              "score"          : round(metric_result.score, 3),
                              "confidence"     : round(metric_result.confidence, 3) if metric_result.confidence is not None else None,
-                             "status"         : self._score_to_status(score = metric_result.score),
                              "details"        : metric_result.details or {},
-                             "interpretation" : self._interpret_metric(metric_type   = metric_type,
-                                                                       metric_result = metric_result,
-                                                                      ),
                             }
             metrics_detailed.append(metric_detail)
         # Sort by score (highest first)
-        metrics_detailed.sort(key = lambda x: x['score'], reverse = True)
         return metrics_detailed
     def _extract_metadata(self, analysis_result: AnalysisResult) -> Dict:
         """
-        Extract processing metadata
         """
-        return {"total_metrics"   : len(analysis_result.metric_results),
-                "flagged_metrics" : sum(1 for s in analysis_result.signals if s.status.value == 'flagged'),
-                "warning_metrics" : sum(1 for s in analysis_result.signals if s.status.value == 'warning'),
-                "passed_metrics"  : sum(1 for s in analysis_result.signals if s.status.value == 'passed'),
-                "avg_confidence"  : self._calculate_avg_confidence(analysis_result = analysis_result),
-               }
-    def _extract_key_details(self, metric_type: MetricType, metric_result) -> Dict:
         """
         Extract key details specific to each metric type
         """
@@ -201,25 +402,26 @@ class DetailedResultMaker:
                    }
         elif (metric_type == MetricType.FREQUENCY):
-            return {"HF_Ratio"        : details.get('hf_ratio', 'N/A'),
-                    "HF_Anomaly"      : details.get('hf_anomaly', 'N/A'),
-                    "Spectrum_Bins"   : details.get('spectrum_bins', 'N/A'),
                    }
         elif (metric_type == MetricType.NOISE):
-            return {"Mean_Noise"      : details.get('mean_noise', 'N/A'),
-                    "CV"              : details.get('cv', 'N/A'),
-                    "Patches_Valid"   : details.get('patches_valid', 'N/A'),
                    }
         elif (metric_type == MetricType.TEXTURE):
-            return {"Smooth_Ratio"    : details.get('smooth_ratio', 'N/A'),
-                    "Contrast_Mean"   : details.get('contrast_mean', 'N/A'),
-                    "Patches_Used"    : details.get('patches_used', 'N/A'),
                    }
         elif (metric_type == MetricType.COLOR):
             sat_stats = details.get('saturation_stats', {})
             return {"Mean_Saturation" : sat_stats.get('mean_saturation', 'N/A'),
                     "High_Sat_Ratio"  : sat_stats.get('high_sat_ratio', 'N/A'),
                    }
@@ -227,42 +429,41 @@ class DetailedResultMaker:
         return {}
-    def _interpret_metric(self, metric_type: MetricType, metric_result) -> str:
         """
         Provide human-readable interpretation of metric result
         """
-        score   = metric_result.score
         details = metric_result.details or {}
         if (metric_type == MetricType.GRADIENT):
             eig_ratio = details.get('eigenvalue_ratio')
             if eig_ratio:
-                return f"Eigenvalue ratio of {eig_ratio:.3f} ({'high' if eig_ratio > 0.85 else 'low'} alignment)"
             return "Gradient structure analysis"
-        elif (metric_type == MetricType.FREQUENCY):
             hf_ratio = details.get('hf_ratio')
             if hf_ratio:
-                return f"High-freq ratio: {hf_ratio:.3f} ({'elevated' if hf_ratio > 0.35 else 'low' if hf_ratio < 0.08 else 'normal'})"
             return "Frequency spectrum analysis"
         elif (metric_type == MetricType.NOISE):
             mean_noise = details.get('mean_noise')
             if mean_noise:
-                return f"Mean noise: {mean_noise:.2f} ({'low' if mean_noise < 1.5 else 'normal'})"
             return "Noise pattern analysis"
         elif (metric_type == MetricType.TEXTURE):
             smooth_ratio = details.get('smooth_ratio')
             if smooth_ratio is not None:
-                return f"Smooth regions: {smooth_ratio:.1%} ({'excessive' if smooth_ratio > 0.4 else 'normal'})"
             return "Texture variation analysis"
@@ -271,161 +472,23 @@ class DetailedResultMaker:
             mean_sat  = sat_stats.get('mean_saturation')
             if mean_sat:
-                return f"Mean saturation: {mean_sat:.2f} ({'high' if mean_sat > 0.65 else 'normal'})"
             return "Color distribution analysis"
         return "Analysis complete"
-    def _create_report_header(self, analysis_result: AnalysisResult) -> Dict:
-        """
-        Create report header section
-        """
-        return {"filename"        : analysis_result.filename,
-                "analysis_date"   : analysis_result.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
-                "image_size"      : f"{analysis_result.image_size[0]} × {analysis_result.image_size[1]} pixels",
-                "processing_time" : f"{analysis_result.processing_time:.2f} seconds",
-               }
-    def _create_overall_assessment(self, analysis_result: AnalysisResult) -> Dict:
-        """
-        Create overall assessment section
-        """
-        return {"status"       : analysis_result.status.value,
-                "score"        : round(analysis_result.overall_score * 100, 1),
-                "confidence"   : analysis_result.confidence,
-                "verdict"      : "REVIEW REQUIRED" if analysis_result.status.value == "REVIEW_REQUIRED" else "LIKELY AUTHENTIC",
-                "risk_level"   : self._calculate_risk_level(score = analysis_result.overall_score),
-               }
-    def _create_metric_breakdown(self, analysis_result: AnalysisResult) -> List[Dict]:
         """
-        Create detailed metric breakdown for report
-        """
-        breakdown = list()
-        for signal in analysis_result.signals:
-            metric_result = analysis_result.metric_results.get(signal.metric_type)
-            item          = {"metric"       : signal.name,
-                             "score"        : f"{signal.score * 100:.1f}%",
-                             "status"       : signal.status.value.upper(),
-                             "confidence"   : f"{metric_result.confidence * 100:.1f}%" if metric_result.confidence else "N/A",
-                             "explanation"  : signal.explanation,
-                             "key_findings" : self.extract_key_findings(metric_type   = signal.metric_type,
-                                                                        metric_result = metric_result,
-                                                                       ),
-                            }
-            breakdown.append(item)
-        return breakdown
-    def _create_forensic_details(self, analysis_result: AnalysisResult) -> Dict:
-        """
-        Create forensic details section
-        """
-        forensic = dict()
-        for metric_type, metric_result in analysis_result.metric_results.items():
-            metric_name           = self.metric_display_names.get(metric_type, metric_type.value)
-            forensic[metric_name] = metric_result.details or {"note": "No detailed forensics available"}
-        return forensic
-    def _create_recommendations(self, analysis_result: AnalysisResult) -> Dict:
-        """
-        Create recommendations section
-        """
-        score = analysis_result.overall_score
-        if (score >= 0.85):
-            return {"action"      : "Immediate manual verification required",
-                    "priority"    : "HIGH",
-                    "next_steps"  : ["Forensic analysis", "Reverse image search", "Metadata inspection", "Expert review"],
-                    "confidence"  : "Very high likelihood of AI generation",
-                   }
-        elif (score >= 0.70):
-            return {"action"      : "Manual verification recommended",
-                    "priority"    : "MEDIUM",
-                    "next_steps"  : ["Visual inspection", "Compare with authentic samples", "Check source provenance"],
-                    "confidence"  : "High likelihood of AI generation",
-                   }
-        elif (score >= 0.50):
-            return {"action"      : "Optional review suggested",
-                    "priority"    : "LOW",
-                    "next_steps"  : ["May be edited photo", "Verify image source", "Check for inconsistencies"],
-                    "confidence"  : "Moderate indicators present",
-                   }
-        else:
-            return {"action"      : "No immediate action required",
-                    "priority"    : "NONE",
-                    "next_steps"  : ["Proceed with normal workflow"],
-                    "confidence"  : "Low likelihood of AI generation",
-                   }
-    def _score_to_status(self, score: float) -> str:
-        """
-        Convert score to status label
-        """
-        if (score >= self.signal_thresholds[SignalStatus.FLAGGED]):
-            return "FLAGGED"
-        elif (score >= self.signal_thresholds[SignalStatus.WARNING]):
-            return "WARNING"
-        else:
-            return "PASSED"
-    def _calculate_avg_confidence(self, analysis_result: AnalysisResult) -> float:
-        """
-        Calculate average confidence across all metrics
-        """
-        confidences = [mr.confidence for mr in analysis_result.metric_results.values() if mr.confidence is not None]
-        return round(sum(confidences) / len(confidences), 3) if confidences else 0.0
-    def _calculate_risk_level(self, score: float) -> str:
-        """
-        Calculate risk level from score
-        """
-        if (score >= 0.85):
-            return "CRITICAL"
-        elif (score >= 0.70):
-            return "HIGH"
-        elif (score >= 0.50):
-            return "MEDIUM"
-        else:
-            return "LOW"
-    def extract_key_findings(self, metric_type: MetricType, metric_result) -> List[str]:
-        """
-        Extract human-readable key forensic findings for a given metric used by:
-        - Detailed UI views
-        - CSV reports
-        - JSON reports
         """
         findings = list()
         details  = metric_result.details or {}
         if (metric_type == MetricType.GRADIENT):
             eig_ratio = details.get('eigenvalue_ratio')
             if eig_ratio:
                 findings.append(f"Eigenvalue ratio: {eig_ratio:.3f}")
@@ -436,17 +499,18 @@ class DetailedResultMaker:
         elif (metric_type == MetricType.FREQUENCY):
             hf_ratio = details.get('hf_ratio')
             if hf_ratio:
                 findings.append(f"High-frequency ratio: {hf_ratio:.3f}")
             roughness = details.get('roughness')
             if roughness:
                 findings.append(f"Spectral roughness: {roughness:.3f}")
         elif (metric_type == MetricType.NOISE):
             mean_noise = details.get('mean_noise')
             if mean_noise:
                 findings.append(f"Mean noise level: {mean_noise:.2f}")
@@ -457,7 +521,7 @@ class DetailedResultMaker:
         elif (metric_type == MetricType.TEXTURE):
             smooth_ratio = details.get('smooth_ratio')
             if smooth_ratio:
                 findings.append(f"Smooth patches: {smooth_ratio:.1%}")
@@ -478,4 +542,27 @@ class DetailedResultMaker:
             if high_sat:
                 findings.append(f"High saturation pixels: {high_sat:.1%}")
-        return findings if findings else ["Analysis complete"]

 import pandas as pd
 from typing import Dict
 from typing import List
 from utils.logger import get_logger
 from config.constants import MetricType
+from config.schemas import MetricResult
+from config.constants import EvidenceType
 from config.constants import SignalStatus
 from config.schemas import AnalysisResult
+from config.schemas import EvidenceResult
+from config.constants import FinalDecision
+from config.constants import EvidenceStrength
+from config.constants import EvidenceDirection
 from config.constants import SIGNAL_THRESHOLDS
 class DetailedResultMaker:
     """
+    Extract and format detailed analysis results for reporting
     Purpose:
     --------
+    - Extracts ALL data from AnalysisResult (metrics + evidence + decision)
+    - Formats data into unified dictionaries/DataFrames
+    - Provides structured data for reporters (JSON/CSV/PDF)
+    - NO re-computation - pure data extraction and formatting
+    Data Sources:
+    -------------
+    1. Final Decision (from DecisionPolicy)
+    2. Evidence Results (from EvidenceAggregator)
+    3. Metric Results (from SignalAggregator)
+    4. Metadata (timestamps, processing info)
     Output Formats:
     ---------------
+    1. Structured dictionaries for reporters
+    2. Pandas DataFrames for tabular reports
+    3. Hierarchical JSON-ready structures
     """
     def __init__(self, signal_thresholds: dict | None = None):
         """
         Initialize Detailed Result Maker
         """
+        self.metric_display_names   = {MetricType.GRADIENT  : "Gradient-Field PCA",
+                                       MetricType.FREQUENCY : "Frequency Domain (FFT)",
+                                       MetricType.NOISE     : "Noise Pattern Analysis",
+                                       MetricType.TEXTURE   : "Texture Statistics",
+                                       MetricType.COLOR     : "Color Distribution",
+                                      }
+        self.evidence_display_names = {EvidenceType.EXIF      : "EXIF Metadata",
+                                       EvidenceType.WATERMARK : "Watermark Detection",
+                                      }
+        self.decision_labels        = {FinalDecision.CONFIRMED_AI_GENERATED : "🔴 CONFIRMED AI GENERATED",
+                                       FinalDecision.SUSPICIOUS_AI_LIKELY   : "🟠 SUSPICIOUS - AI LIKELY",
+                                       FinalDecision.AUTHENTIC_BUT_REVIEW   : "🟡 AUTHENTIC BUT REVIEW",
+                                       FinalDecision.MOSTLY_AUTHENTIC       : "🟢 MOSTLY AUTHENTIC",
+                                      }
+        self.signal_thresholds      = signal_thresholds or SIGNAL_THRESHOLDS
         logger.debug("DetailedResultMaker initialized")
     def extract_detailed_results(self, analysis_result: AnalysisResult) -> Dict:
         """
+        Extract ALL detailed results from AnalysisResult into unified dictionary
+        This is the MAIN extraction method - reporters call this!
         Arguments:
         ----------
         Returns:
         --------
+                   { dict }                    : Comprehensive detailed results containing:
+                                                 - final_decision (from DecisionPolicy)
+                                                 - evidence_summary (from EvidenceAggregator)
+                                                 - evidence_detailed (all evidence items)
+                                                 - overall_summary (basic info)
+                                                 - metrics_detailed (all metric results)
+                                                 - metadata (stats and counts)
         """
         logger.debug(f"Extracting detailed results for: {analysis_result.filename}")
+        detailed = {"filename"          : analysis_result.filename,
+                    "final_decision"    : self._extract_final_decision(analysis_result),
+                    "evidence_summary"  : self._extract_evidence_summary(analysis_result),
+                    "evidence_detailed" : self._extract_all_evidence(analysis_result),
+                    "overall_summary"   : self._extract_overall_summary(analysis_result),
+                    "metrics_detailed"  : self._extract_all_metrics(analysis_result),
+                    "metadata"          : self._extract_metadata(analysis_result),
                    }
+        logger.debug(f"Extracted {len(detailed['evidence_detailed'])} evidence items, {len(detailed['metrics_detailed'])} metric details")
         return detailed
     def create_detailed_table(self, analysis_result: AnalysisResult) -> pd.DataFrame:
         """
+        Create detailed table as DataFrame (for CSV export)
+        Includes: Decision + Evidence + Metrics in hierarchical order
         Arguments:
         ----------
         Returns:
         --------
+                      { DataFrame }            : Tabular detailed results
         """
         rows = list()
+        # Final Decision (if available)
+        if analysis_result.final_decision:
+            decision_row = {"Type"        : "FINAL DECISION",
+                            "Name"        : self.decision_labels.get(analysis_result.final_decision, analysis_result.final_decision.value),
+                            "Score"       : "N/A",
+                            "Confidence"  : f"{analysis_result.confidence}%",
+                            "Status"      : analysis_result.final_decision.value.upper(),
+                            "Explanation" : analysis_result.decision_explanation or "See evidence and metrics below",
+                           }
+            rows.append(decision_row)
+        # Evidence (if any)
+        if analysis_result.evidence:
+            for evidence in analysis_result.evidence:
+                source_key   = evidence.source.value if hasattr(evidence.source, "value") else str(evidence.source)
+                evidence_row = {"Type"        : "EVIDENCE",
+                                "Name"        : f"{self.evidence_display_names.get(source_key, source_key)} - {evidence.analyzer}",
+                                "Score"       : f"{evidence.confidence:.2f}" if evidence.confidence is not None else "N/A",
+                                "Confidence"  : f"{int(evidence.confidence * 100)}%" if evidence.confidence is not None else "N/A",
+                                "Status"      : self._evidence_to_status_label(evidence),
+                                "Explanation" : evidence.finding,
+                               }
+                rows.append(evidence_row)
+        # Metrics
         for metric_type, metric_result in analysis_result.metric_results.items():
             display_name = self.metric_display_names.get(metric_type, metric_type.value)
+            metric_row   = {"Type"       : "METRIC",
+                            "Name"       : display_name,
+                            "Score"      : round(metric_result.score, 3),
+                            "Confidence" : f"{round(metric_result.confidence * 100)}%" if metric_result.confidence is not None else "N/A",
+                            "Status"     : self._score_to_status(metric_result.score),
                            }
+            # Add key details
             details      = self._extract_key_details(metric_type   = metric_type,
                                                      metric_result = metric_result,
                                                     )
+            metric_row.update(details)
+            rows.append(metric_row)
+        dataframe = pd.DataFrame(data=rows)
+        logger.debug(f"Created detailed table with {len(dataframe)} rows")
         return dataframe
+    def _extract_final_decision(self, analysis_result: AnalysisResult) -> Dict:
+        """
+        Extract final decision information from DecisionPolicy
         """
+        if not analysis_result.final_decision:
+            return {"decision"    : None,
+                    "label"       : "⚪ No Decision",
+                    "explanation" : "Decision policy not applied",
+                    "confidence"  : 0,
+                    "based_on"    : "Unknown",
+                   }
+        final_decision = {"decision"    : analysis_result.final_decision.value,
+                          "label"       : self.decision_labels.get(analysis_result.final_decision, analysis_result.final_decision.value),
+                          "explanation" : analysis_result.decision_explanation or "No explanation provided",
+                          "confidence"  : analysis_result.confidence,
+                          "based_on"    : self._determine_decision_basis(analysis_result),
+                         }
+        return final_decision
+    def _determine_decision_basis(self, analysis_result: AnalysisResult) -> str:
+        """
+        Determine what the decision was based on
+        """
+        if not analysis_result.evidence:
+            return "Statistical metrics only"
+        # Check for strong evidence
+        strong_evidence = [item for item in analysis_result.evidence if item.strength in (EvidenceStrength.STRONG, EvidenceStrength.CONCLUSIVE)]
+        if strong_evidence:
+            evidence_types = {item.source.value if hasattr(item.source, "value") else str(item.source) for item in strong_evidence}
+            return f"Strong evidence (Tier 2): {', '.join(evidence_types)}"
+        return "Combination of evidence and metrics (Tier 2 + Tier 1)"
+    def _extract_evidence_summary(self, analysis_result: AnalysisResult) -> Dict:
+        """
+        Extract high-level evidence summary
+        """
+        if not analysis_result.evidence:
+            return {"total_evidence"      : 0,
+                    "ai_evidence_count"   : 0,
+                    "auth_evidence_count" : 0,
+                    "strongest_evidence"  : None,
+                   }
+        ai_evidence   = [item for item in analysis_result.evidence if (item.direction == EvidenceDirection.AI_GENERATED)]
+        auth_evidence = [item for item in analysis_result.evidence if (item.direction == EvidenceDirection.AUTHENTIC)]
+        # Find strongest evidence
+        strongest     = max(analysis_result.evidence,
+                            key = lambda item: (self._strength_to_rank(item.strength), item.confidence or 0.0)
+                           )
+        return {"total_evidence"      : len(analysis_result.evidence),
+                "ai_evidence_count"   : len(ai_evidence),
+                "auth_evidence_count" : len(auth_evidence),
+                "strongest_evidence"  : {"source"     : strongest.source.value,
+                                         "direction"  : strongest.direction.value,
+                                         "strength"   : strongest.strength.value,
+                                         "finding"    : strongest.finding,
+                                         "confidence" : strongest.confidence,
+                                        },
+               }
+    def _extract_all_evidence(self, analysis_result: AnalysisResult) -> List[Dict]:
+        """
+        Extract detailed information for all evidence items
         """
+        if not analysis_result.evidence:
+            return []
+        evidence_detailed = list()
+        for evidence in analysis_result.evidence:
+            timestamp         = getattr(evidence, "timestamp", None)
+            evidence_detail = {"source"       : evidence.source.value,
+                               "display_name" : self.evidence_display_names.get(evidence.source.value if hasattr(evidence.source, "value") else str(evidence.source), str(evidence.source)),
+                               "finding"      : evidence.finding,
+                               "direction"    : evidence.direction.value,
+                               "strength"     : evidence.strength.value,
+                               "confidence"   : evidence.confidence,
+                               "analyzer"     : evidence.analyzer,
+                               "details"      : evidence.details,
+                               "status_label" : self._evidence_to_status_label(evidence),
+                               "timestamp"    : timestamp.isoformat() if timestamp else None,
+                              }
+            evidence_detailed.append(evidence_detail)
+        return evidence_detailed
+    def _evidence_to_status_label(self, evidence: EvidenceResult) -> str:
+        """
+        Convert evidence to human-readable status label
+        """
+        if (evidence.direction == EvidenceDirection.AI_GENERATED):
+            if (evidence.strength == EvidenceStrength.CONCLUSIVE):
+                return "🔴 CONCLUSIVE AI"
+            elif (evidence.strength == EvidenceStrength.STRONG):
+                return "🔴 STRONG AI"
+            elif (evidence.strength == EvidenceStrength.MODERATE):
+                return "🟠 MODERATE AI"
+            else:
+                return "🟡 WEAK AI"
+        elif (evidence.direction == EvidenceDirection.AUTHENTIC):
+            if (evidence.strength in (EvidenceStrength.STRONG, EvidenceStrength.CONCLUSIVE)):
+                return "🟢 STRONG AUTHENTIC"
+            elif (evidence.strength == EvidenceStrength.MODERATE):
+                return "🟢 MODERATE AUTHENTIC"
+            else:
+                return "🟡 WEAK AUTHENTIC"
+        else:
+            # INDETERMINATE
+            return "⚪ INDETERMINATE"
+    def _strength_to_rank(self, strength: EvidenceStrength) -> int:
+        """
+        Convert strength to numeric rank for sorting
+        """
+        return {EvidenceStrength.CONCLUSIVE : 4,
+                EvidenceStrength.STRONG     : 3,
+                EvidenceStrength.MODERATE   : 2,
+                EvidenceStrength.WEAK       : 1,
+               }.get(strength, 0)
     def _extract_overall_summary(self, analysis_result: AnalysisResult) -> Dict:
         """
                              "display_name"   : self.metric_display_names.get(metric_type, metric_type.value),
                              "score"          : round(metric_result.score, 3),
                              "confidence"     : round(metric_result.confidence, 3) if metric_result.confidence is not None else None,
+                             "status"         : self._score_to_status(metric_result.score),
                              "details"        : metric_result.details or {},
+                             "interpretation" : self._interpret_metric(metric_type, metric_result),
+                             "key_findings"   : self.extract_key_findings(metric_type, metric_result),
                             }
             metrics_detailed.append(metric_detail)
         # Sort by score (highest first)
+        metrics_detailed.sort(key     = lambda x: x['score'],
+                              reverse = True,
+                             )
         return metrics_detailed
     def _extract_metadata(self, analysis_result: AnalysisResult) -> Dict:
         """
+        Extract processing metadata and statistics
         """
+        metadata = {"total_metrics"   : len(analysis_result.metric_results),
+                    "flagged_metrics" : sum(1 for s in analysis_result.signals if s.status == SignalStatus.FLAGGED),
+                    "warning_metrics" : sum(1 for s in analysis_result.signals if s.status == SignalStatus.WARNING),
+                    "passed_metrics"  : sum(1 for s in analysis_result.signals if s.status == SignalStatus.PASSED),
+                    "avg_confidence"  : self._calculate_avg_confidence(analysis_result),
+                   }
+        # Evidence stats (if available)
+        if analysis_result.evidence:
+            metadata["total_evidence"]  = len(analysis_result.evidence)
+            metadata["ai_evidence"]     = sum(1 for e in analysis_result.evidence if e.direction == EvidenceDirection.AI_GENERATED)
+            metadata["auth_evidence"]   = sum(1 for e in analysis_result.evidence if e.direction == EvidenceDirection.AUTHENTIC)
+            metadata["strong_evidence"] = sum(1 for e in analysis_result.evidence if e.strength in (EvidenceStrength.STRONG, EvidenceStrength.CONCLUSIVE))
+        else:
+            metadata["total_evidence"]  = 0
+            metadata["ai_evidence"]     = 0
+            metadata["auth_evidence"]   = 0
+            metadata["strong_evidence"] = 0
+        # Decision info
+        metadata["has_final_decision"] = analysis_result.final_decision is not None
+        metadata["decision_value"]     = analysis_result.final_decision.value if analysis_result.final_decision else None
+        return metadata
+    def _extract_key_details(self, metric_type: MetricType, metric_result: MetricResult) -> Dict:
         """
         Extract key details specific to each metric type
         """
                    }
         elif (metric_type == MetricType.FREQUENCY):
+            return {"HF_Ratio"      : details.get('hf_ratio', 'N/A'),
+                    "HF_Anomaly"    : details.get('hf_anomaly', 'N/A'),
+                    "Spectrum_Bins" : details.get('spectrum_bins', 'N/A'),
                    }
         elif (metric_type == MetricType.NOISE):
+            return {"Mean_Noise"    : details.get('mean_noise', 'N/A'),
+                    "CV"            : details.get('cv', 'N/A'),
+                    "Patches_Valid" : details.get('patches_valid', 'N/A'),
                    }
         elif (metric_type == MetricType.TEXTURE):
+            return {"Smooth_Ratio"  : details.get('smooth_ratio', 'N/A'),
+                    "Contrast_Mean" : details.get('contrast_mean', 'N/A'),
+                    "Patches_Used"  : details.get('patches_used', 'N/A'),
                    }
         elif (metric_type == MetricType.COLOR):
             sat_stats = details.get('saturation_stats', {})
             return {"Mean_Saturation" : sat_stats.get('mean_saturation', 'N/A'),
                     "High_Sat_Ratio"  : sat_stats.get('high_sat_ratio', 'N/A'),
                    }
         return {}
+    def _interpret_metric(self, metric_type: MetricType, metric_result: MetricResult) -> str:
         """
         Provide human-readable interpretation of metric result
         """
         details = metric_result.details or {}
         if (metric_type == MetricType.GRADIENT):
             eig_ratio = details.get('eigenvalue_ratio')
             if eig_ratio:
+                return f"Eigenvalue ratio of {eig_ratio:.3f} ({'high' if (eig_ratio > 0.85) else 'low'} alignment)"
             return "Gradient structure analysis"
+        elif( metric_type == MetricType.FREQUENCY):
             hf_ratio = details.get('hf_ratio')
             if hf_ratio:
+                return f"High-freq ratio: {hf_ratio:.3f} ({'elevated' if (hf_ratio > 0.35) else 'low' if (hf_ratio < 0.08) else 'normal'})"
             return "Frequency spectrum analysis"
         elif (metric_type == MetricType.NOISE):
             mean_noise = details.get('mean_noise')
             if mean_noise:
+                return f"Mean noise: {mean_noise:.2f} ({'low' if (mean_noise < 1.5) else 'normal'})"
             return "Noise pattern analysis"
         elif (metric_type == MetricType.TEXTURE):
             smooth_ratio = details.get('smooth_ratio')
             if smooth_ratio is not None:
+                return f"Smooth regions: {smooth_ratio:.1%} ({'excessive' if (smooth_ratio > 0.4) else 'normal'})"
             return "Texture variation analysis"
             mean_sat  = sat_stats.get('mean_saturation')
             if mean_sat:
+                return f"Mean saturation: {mean_sat:.2f} ({'high' if (mean_sat > 0.65) else 'normal'})"
             return "Color distribution analysis"
         return "Analysis complete"
+    def extract_key_findings(self, metric_type: MetricType, metric_result: MetricResult) -> List[str]:
         """
+        Extract human-readable key forensic findings for reporters
         """
         findings = list()
         details  = metric_result.details or {}
         if (metric_type == MetricType.GRADIENT):
             eig_ratio = details.get('eigenvalue_ratio')
             if eig_ratio:
                 findings.append(f"Eigenvalue ratio: {eig_ratio:.3f}")
         elif (metric_type == MetricType.FREQUENCY):
             hf_ratio = details.get('hf_ratio')
             if hf_ratio:
                 findings.append(f"High-frequency ratio: {hf_ratio:.3f}")
             roughness = details.get('roughness')
             if roughness:
                 findings.append(f"Spectral roughness: {roughness:.3f}")
         elif (metric_type == MetricType.NOISE):
             mean_noise = details.get('mean_noise')
             if mean_noise:
                 findings.append(f"Mean noise level: {mean_noise:.2f}")
         elif (metric_type == MetricType.TEXTURE):
             smooth_ratio = details.get('smooth_ratio')
             if smooth_ratio:
                 findings.append(f"Smooth patches: {smooth_ratio:.1%}")
             if high_sat:
                 findings.append(f"High saturation pixels: {high_sat:.1%}")
+        return findings if findings else ["Analysis complete"]
+    def _score_to_status(self, score: float) -> str:
+        """
+        Convert score to status label
+        """
+        if (score >= self.signal_thresholds[SignalStatus.FLAGGED]):
+            return "FLAGGED"
+        elif (score >= self.signal_thresholds[SignalStatus.WARNING]):
+            return "WARNING"
+        else:
+            return "PASSED"
+    def _calculate_avg_confidence(self, analysis_result: AnalysisResult) -> float:
+        """
+        Calculate average confidence across all metrics
+        """
+        confidences = [mr.confidence for mr in analysis_result.metric_results.values() if mr.confidence is not None]
+        return round(sum(confidences) / len(confidences), 3) if confidences else 0.0

metrics/color_analyzer.py CHANGED Viewed

@@ -110,34 +110,38 @@ class ColorAnalyzer:
         --------
             { np.ndarray }     : HSV image (H in [0, 360], S and V in [0, 1])
         """
-        r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
-        maxc    = np.maximum(np.maximum(r, g), b)
-        minc    = np.minimum(np.minimum(r, g), b)
-        delta   = maxc - minc
         # Value
-        v       = maxc
         # Saturation
-        s       = np.where(maxc != 0, delta / maxc, 0)
         # Hue
-        h       = np.zeros_like(maxc)
         # Red is max
-        mask    = (maxc == r) & (delta != 0)
-        h[mask] = 60 * (((g[mask] - b[mask]) / delta[mask]) % 6)
         # Green is max
-        mask    = (maxc == g) & (delta != 0)
-        h[mask] = 60 * (((b[mask] - r[mask]) / delta[mask]) + 2)
         # Blue is max
-        mask    = (maxc == b) & (delta != 0)
-        h[mask] = 60 * (((r[mask] - g[mask]) / delta[mask]) + 4)
-        hsv     = np.stack([h, s, v], axis = 2)
         return hsv

         --------
             { np.ndarray }     : HSV image (H in [0, 360], S and V in [0, 1])
         """
+        r, g, b         = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
+        maxc            = np.maximum(np.maximum(r, g), b)
+        minc            = np.minimum(np.minimum(r, g), b)
+        delta           = maxc - minc
         # Value
+        v               = maxc
         # Saturation
+        s               = np.zeros_like(maxc, dtype = np.float32)
+        nonzero_mask    = maxc > 0
+        s[nonzero_mask] = delta[nonzero_mask] / maxc[nonzero_mask]
         # Hue
+        h               = np.zeros_like(maxc)
         # Red is max
+        mask            = (maxc == r) & (delta > 0)
+        h[mask]         = 60.0 * (((g[mask] - b[mask]) / delta[mask]) % 6.0)
         # Green is max
+        mask            = (maxc == g) & (delta > 0)
+        h[mask]         = 60.0 * (((b[mask] - r[mask]) / delta[mask]) + 2)
         # Blue is max
+        mask            = (maxc == b) & (delta > 0)
+        h[mask]         = 60.0 * (((r[mask] - g[mask]) / delta[mask]) + 4)
+        hsv             = np.stack([h, s, v], axis = 2)
+        hsv             = np.nan_to_num(hsv, nan = 0.0, posinf = 0.0, neginf = 0.0)
         return hsv

metrics/{aggregator.py → signal_aggregator.py} RENAMED Viewed

@@ -1,4 +1,5 @@
 # Dependencies
 import time
 import numpy as np
 from typing import List
@@ -11,18 +12,22 @@ from config.constants import MetricType
 from config.constants import SignalStatus
 from config.schemas import AnalysisResult
 from config.schemas import DetectionSignal
 from config.constants import DetectionStatus
 from config.constants import SIGNAL_THRESHOLDS
 from utils.image_processor import ImageProcessor
 from config.constants import METRIC_EXPLANATIONS
 from metrics.noise_analyzer import NoiseAnalyzer
 from metrics.color_analyzer import ColorAnalyzer
 from metrics.texture_analyzer import TextureAnalyzer
 from features.threshold_manager import ThresholdManager
 from config.constants import IMAGE_RESIZE_MAX_DIMENSION
 from metrics.frequency_analyzer import FrequencyAnalyzer
 from metrics.gradient_field_pca import GradientFieldPCADetector
 # Suppress NumPy warning
 np.seterr(divide  = 'ignore',
           invalid = 'ignore',
@@ -33,12 +38,12 @@ np.seterr(divide  = 'ignore',
 logger = get_logger(__name__)
-class MetricsAggregator:
     """
-    Main detector that orchestrates all detection methods
-    Combines multiple unsupervised metrics:
-    ----------------------------------------
     1. Gradient-Field PCA
     2. Frequency Domain Analysis (FFT)
     3. Noise Pattern Analysis
@@ -74,6 +79,11 @@ class MetricsAggregator:
         # Get metric weights either from runtime UI or default to settings
         self.weights                    = (self.threshold_manager.get_metric_weights() if self.threshold_manager else settings.get_metric_weights())
         logger.info(f"Metric weights: {self.weights}")
@@ -117,6 +127,11 @@ class MetricsAggregator:
             # Determine status
             status          = self._determine_status(overall_score = overall_score)
             # Calculate processing time
             processing_time = time.time() - start_time
@@ -125,7 +140,7 @@ class MetricsAggregator:
             result          = AnalysisResult(filename        = filename,
                                              overall_score   = overall_score,
                                              status          = status,
-                                             confidence      = int(overall_score * 100),
                                              signals         = signals,
                                              metric_results  = metric_results,
                                              processing_time = processing_time,
@@ -154,18 +169,26 @@ class MetricsAggregator:
                   { dict }       : Dictionary mapping MetricType to MetricResult
         """
         metric_results = dict()
-        # Run eaach detector one by one
         for metric_type, (detector_name, detector) in self.detector_registry.items():
             try:
-                result                      = detector.detect(image = image)
                 result.metric_type          = metric_type
                 metric_results[metric_type] = result
                 logger.debug(f"{detector_name} | {metric_type.value} | score={result.score:.3f} | confidence={result.confidence:.3f}")
             except Exception as e:
-                logger.error(f"{detector.__class__.__name__} failed: {e}")
                 # Same Failure Score by all metrics with same confidence
                 metric_results[metric_type] = MetricResult(metric_type = metric_type,
@@ -285,4 +308,39 @@ class MetricsAggregator:
             return DetectionStatus.REVIEW_REQUIRED
         else:
-            return DetectionStatus.LIKELY_AUTHENTIC

 # Dependencies
+import os
 import time
 import numpy as np
 from typing import List
 from config.constants import SignalStatus
 from config.schemas import AnalysisResult
 from config.schemas import DetectionSignal
+from concurrent.futures import as_completed
 from config.constants import DetectionStatus
 from config.constants import SIGNAL_THRESHOLDS
 from utils.image_processor import ImageProcessor
 from config.constants import METRIC_EXPLANATIONS
 from metrics.noise_analyzer import NoiseAnalyzer
 from metrics.color_analyzer import ColorAnalyzer
+from concurrent.futures import ThreadPoolExecutor
 from metrics.texture_analyzer import TextureAnalyzer
+from config.constants import SIGNAL_CONFIDENCE_PARAMS
 from features.threshold_manager import ThresholdManager
 from config.constants import IMAGE_RESIZE_MAX_DIMENSION
 from metrics.frequency_analyzer import FrequencyAnalyzer
 from metrics.gradient_field_pca import GradientFieldPCADetector
 # Suppress NumPy warning
 np.seterr(divide  = 'ignore',
           invalid = 'ignore',
 logger = get_logger(__name__)
+class SignalAggregator:
     """
+    Main detector that orchestrates all detection signals
+    Combines multiple unsupervised metric signals:
+    ----------------------------------------------
     1. Gradient-Field PCA
     2. Frequency Domain Analysis (FFT)
     3. Noise Pattern Analysis
         # Get metric weights either from runtime UI or default to settings
         self.weights                    = (self.threshold_manager.get_metric_weights() if self.threshold_manager else settings.get_metric_weights())
+        # Initialize shared ThreadPoolExecutor (CPU-safe)
+        max_workers                     = min(settings.METRIC_WORKERS or len(self.detector_registry), os.cpu_count() or 4)
+        self.executor                   = ThreadPoolExecutor(max_workers = max_workers)
         logger.info(f"Metric weights: {self.weights}")
             # Determine status
             status          = self._determine_status(overall_score = overall_score)
+            # Calculate confidence
+            confidence      = self._calculate_confidence(metric_results = metric_results,
+                                                         overall_score  = overall_score,
+                                                        )
             # Calculate processing time
             processing_time = time.time() - start_time
             result          = AnalysisResult(filename        = filename,
                                              overall_score   = overall_score,
                                              status          = status,
+                                             confidence      = confidence,
                                              signals         = signals,
                                              metric_results  = metric_results,
                                              processing_time = processing_time,
                   { dict }       : Dictionary mapping MetricType to MetricResult
         """
         metric_results = dict()
+        futures        = dict()
+        # Submit all detectors
         for metric_type, (detector_name, detector) in self.detector_registry.items():
+            futures[self.executor.submit(detector.detect, image = image)] = (metric_type, detector_name)
+        # Collect results as they complete
+        for future in as_completed(futures):
+            metric_type, detector_name = futures[future]
             try:
+                result                      = future.result(timeout = settings.METRIC_TIMEOUT)
                 result.metric_type          = metric_type
                 metric_results[metric_type] = result
                 logger.debug(f"{detector_name} | {metric_type.value} | score={result.score:.3f} | confidence={result.confidence:.3f}")
             except Exception as e:
+                logger.error(f"{detector_name} failed: {e}")
                 # Same Failure Score by all metrics with same confidence
                 metric_results[metric_type] = MetricResult(metric_type = metric_type,
             return DetectionStatus.REVIEW_REQUIRED
         else:
+            return DetectionStatus.LIKELY_AUTHENTIC
+    def _calculate_confidence(self, metric_results: dict[MetricType, MetricResult], overall_score: float) -> int:
+        """
+        Tier-1 confidence calculator based on:
+        - metric agreement
+        - metric reliability
+        - decision boundary distance
+        """
+        scores                 = [result.score for result in metric_results.values()]
+        score_variance         = np.var(scores)
+        # If all metrics failed, confidence must be low
+        if all(isinstance(result.details, dict) and "error" in result.details for result in metric_results.values()):
+            return int(SIGNAL_CONFIDENCE_PARAMS.MIN_CONFIDENCE * 100)
+        # Agreement confidence
+        agreement_confidence   = 1.0 - min(score_variance / SIGNAL_CONFIDENCE_PARAMS.VARIANCE_NORM, 1.0)
+        # Reliability confidence
+        confidences            = [result.confidence for result in metric_results.values() if result.confidence is not None]
+        reliability_confidence = float(np.mean(confidences)) if confidences else SIGNAL_CONFIDENCE_PARAMS.DEFAULT_RELIABILITY_CONFIDENCE
+        # Distance confidence
+        review_threshold       = (self.threshold_manager.get_review_threshold() if self.threshold_manager else settings.REVIEW_THRESHOLD)
+        distance_confidence    = min(abs(overall_score - review_threshold) / SIGNAL_CONFIDENCE_PARAMS.DISTANCE_NORM, 1.0)
+        logger.debug(f"Confidence breakdown | agreement={agreement_confidence:.2f}, reliability={reliability_confidence:.2f}, distance={distance_confidence:.2f}")
+        confidence             = (SIGNAL_CONFIDENCE_PARAMS.AGREEMENT_WEIGHT * agreement_confidence +
+                                  SIGNAL_CONFIDENCE_PARAMS.RELIABILITY_WEIGHT * reliability_confidence +
+                                  SIGNAL_CONFIDENCE_PARAMS.DISTANCE_WEIGHT * distance_confidence
+                                 )
+        return int(np.clip(confidence, 0.0, 1.0) * 100)

notebooks/.ipynb_checkpoints/Unified_Dataset_Builder-checkpoint.ipynb CHANGED Viewed

@@ -84,7 +84,7 @@
     "# ===============================\n",
     "# Directory Configuration\n",
     "# ===============================\n",
-    "BASE_DIR       = Path(\"tests/dataset\")\n",
     "AI_DIR         = BASE_DIR / \"ai\"\n",
     "REAL_DIR       = BASE_DIR / \"real\"\n",
     "RAW_DIR        = BASE_DIR / \"raw_downloads\"\n",
@@ -166,16 +166,13 @@
     "                  }]\n",
     "        \n",
     "\n",
-    "REAL_DATASETS   = [{\"name\"      : \"mscoco_2017\",\n",
-    "                    \"hf_id\"     : \"shunk031/MSCOCO\",\n",
-    "                    \"hf_kwargs\" : {\"year\": 2017,\n",
-    "                                   \"coco_task\": \"instances\"\n",
-    "                                  },\n",
     "                    \"split\"     : \"train\",\n",
     "                    \"image_key\" : \"image\",\n",
     "                    \"label\"     : \"real\",\n",
     "                    \"family\"    : \"photographic\",\n",
-    "                    \"streaming\" : False\n",
     "                  }]\n",
     "\n",
     "# Kaggle datasets (public, non-scraped)\n",
@@ -355,7 +352,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "dd8ef771-f39f-4d9d-8eaf-626ecc211141",
    "metadata": {},
    "outputs": [
@@ -371,7 +368,74 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 46%|████████████████████████████████████████████████████████████████▊                                                                           | 463/1000 [02:43<04:08,  2.16it/s]"
      ]
     }
    ],

     "# ===============================\n",
     "# Directory Configuration\n",
     "# ===============================\n",
+    "BASE_DIR       = Path(\"../tests/dataset\")\n",
     "AI_DIR         = BASE_DIR / \"ai\"\n",
     "REAL_DIR       = BASE_DIR / \"real\"\n",
     "RAW_DIR        = BASE_DIR / \"raw_downloads\"\n",
     "                  }]\n",
     "        \n",
     "\n",
+    "REAL_DATASETS   = [{\"name\"      : \"imagenette\",\n",
+    "                    \"hf_id\"     : \"frgfm/imagenette\",\n",
+    "                    \"config\"    : \"320px\",\n",
     "                    \"split\"     : \"train\",\n",
     "                    \"image_key\" : \"image\",\n",
     "                    \"label\"     : \"real\",\n",
     "                    \"family\"    : \"photographic\",\n",
     "                  }]\n",
     "\n",
     "# Kaggle datasets (public, non-scraped)\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "dd8ef771-f39f-4d9d-8eaf-626ecc211141",
    "metadata": {},
    "outputs": [
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [05:31<00:00,  3.01it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "▶ Loading HF dataset: imagenette\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6e436d2fc4374bff9d76dc2534b752b8",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading builder script: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "791d872b01a44cbb908ddbad43f20a42",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading metadata: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "36b6ebafe8e4436e9cc3a4bf38a36bda",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading readme: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "Config name is missing.\nPlease pick one among the available configs: ['full_size', '320px', '160px']\nExample of usage:\n\t`load_dataset('imagenette', 'full_size')`",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[7], line 46\u001b[0m\n\u001b[1;32m     42\u001b[0m \u001b[38;5;66;03m# ===============================\u001b[39;00m\n\u001b[1;32m     43\u001b[0m \u001b[38;5;66;03m# Entry Point\u001b[39;00m\n\u001b[1;32m     44\u001b[0m \u001b[38;5;66;03m# ===============================\u001b[39;00m\n\u001b[1;32m     45\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__main__\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 46\u001b[0m     \u001b[43mmain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "Cell \u001b[0;32mIn[7], line 13\u001b[0m, in \u001b[0;36mmain\u001b[0;34m()\u001b[0m\n\u001b[1;32m     10\u001b[0m     process_hf_dataset(ds, AI_DIR, writer)\n\u001b[1;32m     12\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m REAL_DATASETS:\n\u001b[0;32m---> 13\u001b[0m     \u001b[43mprocess_hf_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mREAL_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwriter\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     15\u001b[0m \u001b[38;5;66;03m# Kaggle datasets\u001b[39;00m\n\u001b[1;32m     16\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m KAGGLE_DATASETS:\n",
+      "Cell \u001b[0;32mIn[4], line 4\u001b[0m, in \u001b[0;36mprocess_hf_dataset\u001b[0;34m(ds_cfg, root_dir, writer)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mprocess_hf_dataset\u001b[39m(ds_cfg, root_dir, writer):\n\u001b[1;32m      2\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m▶ Loading HF dataset: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mds_cfg[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m     ds      \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhf_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m                           \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mds_cfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhf_kwargs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m                           \u001b[49m\u001b[43mname\u001b[49m\u001b[43m      \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mconfig\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m                           \u001b[49m\u001b[43msplit\u001b[49m\u001b[43m     \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msplit\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m                           \u001b[49m\u001b[43mstreaming\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstreaming\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m                          \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     11\u001b[0m     out_dir \u001b[38;5;241m=\u001b[39m root_dir \u001b[38;5;241m/\u001b[39m ds_cfg[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m     12\u001b[0m     out_dir\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/load.py:2129\u001b[0m, in \u001b[0;36mload_dataset\u001b[0;34m(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)\u001b[0m\n\u001b[1;32m   2124\u001b[0m verification_mode \u001b[38;5;241m=\u001b[39m VerificationMode(\n\u001b[1;32m   2125\u001b[0m     (verification_mode \u001b[38;5;129;01mor\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mBASIC_CHECKS) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m save_infos \u001b[38;5;28;01melse\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mALL_CHECKS\n\u001b[1;32m   2126\u001b[0m )\n\u001b[1;32m   2128\u001b[0m \u001b[38;5;66;03m# Create a dataset builder\u001b[39;00m\n\u001b[0;32m-> 2129\u001b[0m builder_instance \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset_builder\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2130\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2131\u001b[0m \u001b[43m    \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2132\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2133\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2134\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2135\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2136\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2137\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2138\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2139\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2140\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2141\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2142\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2144\u001b[0m \u001b[38;5;66;03m# Return iterable dataset in case of streaming\u001b[39;00m\n\u001b[1;32m   2145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m streaming:\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/load.py:1852\u001b[0m, in \u001b[0;36mload_dataset_builder\u001b[0;34m(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, use_auth_token, storage_options, **config_kwargs)\u001b[0m\n\u001b[1;32m   1850\u001b[0m builder_cls \u001b[38;5;241m=\u001b[39m get_dataset_builder_class(dataset_module, dataset_name\u001b[38;5;241m=\u001b[39mdataset_name)\n\u001b[1;32m   1851\u001b[0m \u001b[38;5;66;03m# Instantiate the dataset builder\u001b[39;00m\n\u001b[0;32m-> 1852\u001b[0m builder_instance: DatasetBuilder \u001b[38;5;241m=\u001b[39m \u001b[43mbuilder_cls\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1853\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1854\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdataset_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1855\u001b[0m \u001b[43m    \u001b[49m\u001b[43mconfig_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1856\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1857\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1858\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mhash\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mhash\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1859\u001b[0m \u001b[43m    \u001b[49m\u001b[43minfo\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minfo\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1860\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1861\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1862\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1863\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mbuilder_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1864\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1865\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1867\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m builder_instance\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/builder.py:373\u001b[0m, in \u001b[0;36mDatasetBuilder.__init__\u001b[0;34m(self, cache_dir, dataset_name, config_name, hash, base_path, info, features, token, use_auth_token, repo_id, data_files, data_dir, storage_options, writer_batch_size, name, **config_kwargs)\u001b[0m\n\u001b[1;32m    371\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data_dir \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    372\u001b[0m     config_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata_dir\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m data_dir\n\u001b[0;32m--> 373\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_create_builder_config\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    374\u001b[0m \u001b[43m    \u001b[49m\u001b[43mconfig_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    375\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcustom_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    376\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    377\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    379\u001b[0m \u001b[38;5;66;03m# prepare info: DatasetInfo are a standardized dataclass across all datasets\u001b[39;00m\n\u001b[1;32m    380\u001b[0m \u001b[38;5;66;03m# Prefill datasetinfo\u001b[39;00m\n\u001b[1;32m    381\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m info \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    382\u001b[0m     \u001b[38;5;66;03m# TODO FOR PACKAGED MODULES IT IMPORTS DATA FROM src/packaged_modules which doesn't make sense\u001b[39;00m\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/builder.py:525\u001b[0m, in \u001b[0;36mDatasetBuilder._create_builder_config\u001b[0;34m(self, config_name, custom_features, **config_kwargs)\u001b[0m\n\u001b[1;32m    523\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mBUILDER_CONFIGS) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m    524\u001b[0m     example_of_usage \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mload_dataset(\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdataset_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mBUILDER_CONFIGS[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 525\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    526\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConfig name is missing.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    527\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mPlease pick one among the available configs: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuilder_configs\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    528\u001b[0m         \u001b[38;5;241m+\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mExample of usage:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m`\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexample_of_usage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    529\u001b[0m     )\n\u001b[1;32m    530\u001b[0m builder_config \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mBUILDER_CONFIGS[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m    531\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m    532\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo config specified, defaulting to the single config: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdataset_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbuilder_config\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    533\u001b[0m )\n",
+      "\u001b[0;31mValueError\u001b[0m: Config name is missing.\nPlease pick one among the available configs: ['full_size', '320px', '160px']\nExample of usage:\n\t`load_dataset('imagenette', 'full_size')`"
      ]
     }
    ],

notebooks/Unified_Dataset_Builder.ipynb CHANGED Viewed

@@ -84,7 +84,7 @@
     "# ===============================\n",
     "# Directory Configuration\n",
     "# ===============================\n",
-    "BASE_DIR       = Path(\"tests/dataset\")\n",
     "AI_DIR         = BASE_DIR / \"ai\"\n",
     "REAL_DIR       = BASE_DIR / \"real\"\n",
     "RAW_DIR        = BASE_DIR / \"raw_downloads\"\n",
@@ -150,7 +150,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "id": "74106705-e2d6-411c-8193-8e02f5ee0fdc",
    "metadata": {},
    "outputs": [],
@@ -166,16 +166,13 @@
     "                  }]\n",
     "        \n",
     "\n",
-    "REAL_DATASETS   = [{\"name\"      : \"mscoco_2017\",\n",
-    "                    \"hf_id\"     : \"shunk031/MSCOCO\",\n",
-    "                    \"hf_kwargs\" : {\"year\": 2017,\n",
-    "                                   \"coco_task\": \"instances\"\n",
-    "                                  },\n",
     "                    \"split\"     : \"train\",\n",
     "                    \"image_key\" : \"image\",\n",
     "                    \"label\"     : \"real\",\n",
     "                    \"family\"    : \"photographic\",\n",
-    "                    \"streaming\" : False\n",
     "                  }]\n",
     "\n",
     "# Kaggle datasets (public, non-scraped)\n",
@@ -204,7 +201,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "id": "a9ea5276-65bb-49f5-a656-c00ceeb1f4d3",
    "metadata": {},
    "outputs": [],
@@ -269,7 +266,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "id": "c6eca5e6-0469-4af6-8af8-afe3036cb0a8",
    "metadata": {},
    "outputs": [],
@@ -305,7 +302,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "id": "b648832e-5025-4851-af21-382051167a04",
    "metadata": {},
    "outputs": [],
@@ -355,7 +352,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "id": "dd8ef771-f39f-4d9d-8eaf-626ecc211141",
    "metadata": {},
    "outputs": [
@@ -371,7 +368,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [05:24<00:00,  3.08it/s]\n"
      ]
     },
     {
@@ -379,13 +376,34 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "▶ Loading HF dataset: mscoco_2017\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "15b93e24384a49da9e46dceda9bc3f6b",
        "version_major": 2,
        "version_minor": 0
       },
@@ -399,51 +417,101 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e34eabfceb61496ebbd9336c9ed060f3",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Downloading data:   0%|          | 0.00/19.3G [00:00<?, ?B/s]"
       ]
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[7], line 46\u001b[0m\n\u001b[1;32m     42\u001b[0m \u001b[38;5;66;03m# ===============================\u001b[39;00m\n\u001b[1;32m     43\u001b[0m \u001b[38;5;66;03m# Entry Point\u001b[39;00m\n\u001b[1;32m     44\u001b[0m \u001b[38;5;66;03m# ===============================\u001b[39;00m\n\u001b[1;32m     45\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__main__\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 46\u001b[0m     \u001b[43mmain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
-      "Cell \u001b[0;32mIn[7], line 13\u001b[0m, in \u001b[0;36mmain\u001b[0;34m()\u001b[0m\n\u001b[1;32m     10\u001b[0m     process_hf_dataset(ds, AI_DIR, writer)\n\u001b[1;32m     12\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m REAL_DATASETS:\n\u001b[0;32m---> 13\u001b[0m     \u001b[43mprocess_hf_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mREAL_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwriter\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     15\u001b[0m \u001b[38;5;66;03m# Kaggle datasets\u001b[39;00m\n\u001b[1;32m     16\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m KAGGLE_DATASETS:\n",
-      "Cell \u001b[0;32mIn[4], line 4\u001b[0m, in \u001b[0;36mprocess_hf_dataset\u001b[0;34m(ds_cfg, root_dir, writer)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mprocess_hf_dataset\u001b[39m(ds_cfg, root_dir, writer):\n\u001b[1;32m      2\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m▶ Loading HF dataset: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mds_cfg[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m     ds      \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhf_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m                           \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mds_cfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhf_kwargs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m                           \u001b[49m\u001b[43mname\u001b[49m\u001b[43m      \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mconfig\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m                           \u001b[49m\u001b[43msplit\u001b[49m\u001b[43m     \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msplit\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m                           \u001b[49m\u001b[43mstreaming\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstreaming\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m                          \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     11\u001b[0m     out_dir \u001b[38;5;241m=\u001b[39m root_dir \u001b[38;5;241m/\u001b[39m ds_cfg[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m     12\u001b[0m     out_dir\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/load.py:2153\u001b[0m, in \u001b[0;36mload_dataset\u001b[0;34m(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)\u001b[0m\n\u001b[1;32m   2150\u001b[0m try_from_hf_gcs \u001b[38;5;241m=\u001b[39m path \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m _PACKAGED_DATASETS_MODULES\n\u001b[1;32m   2152\u001b[0m \u001b[38;5;66;03m# Download and prepare data\u001b[39;00m\n\u001b[0;32m-> 2153\u001b[0m \u001b[43mbuilder_instance\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_and_prepare\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2154\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2155\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2156\u001b[0m \u001b[43m    \u001b[49m\u001b[43mverification_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverification_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2157\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtry_from_hf_gcs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_from_hf_gcs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2158\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnum_proc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_proc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2159\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2160\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2162\u001b[0m \u001b[38;5;66;03m# Build dataset for splits\u001b[39;00m\n\u001b[1;32m   2163\u001b[0m keep_in_memory \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m   2164\u001b[0m     keep_in_memory \u001b[38;5;28;01mif\u001b[39;00m keep_in_memory \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m is_small_dataset(builder_instance\u001b[38;5;241m.\u001b[39minfo\u001b[38;5;241m.\u001b[39mdataset_size)\n\u001b[1;32m   2165\u001b[0m )\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/builder.py:954\u001b[0m, in \u001b[0;36mDatasetBuilder.download_and_prepare\u001b[0;34m(self, output_dir, download_config, download_mode, verification_mode, ignore_verifications, try_from_hf_gcs, dl_manager, base_path, use_auth_token, file_format, max_shard_size, num_proc, storage_options, **download_and_prepare_kwargs)\u001b[0m\n\u001b[1;32m    952\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m num_proc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    953\u001b[0m         prepare_split_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnum_proc\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m num_proc\n\u001b[0;32m--> 954\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_download_and_prepare\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    955\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdl_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdl_manager\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    956\u001b[0m \u001b[43m        \u001b[49m\u001b[43mverification_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverification_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    957\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mprepare_split_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    958\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdownload_and_prepare_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    959\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    960\u001b[0m \u001b[38;5;66;03m# Sync info\u001b[39;00m\n\u001b[1;32m    961\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minfo\u001b[38;5;241m.\u001b[39mdataset_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msum\u001b[39m(split\u001b[38;5;241m.\u001b[39mnum_bytes \u001b[38;5;28;01mfor\u001b[39;00m split \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minfo\u001b[38;5;241m.\u001b[39msplits\u001b[38;5;241m.\u001b[39mvalues())\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/builder.py:1717\u001b[0m, in \u001b[0;36mGeneratorBasedBuilder._download_and_prepare\u001b[0;34m(self, dl_manager, verification_mode, **prepare_splits_kwargs)\u001b[0m\n\u001b[1;32m   1716\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_download_and_prepare\u001b[39m(\u001b[38;5;28mself\u001b[39m, dl_manager, verification_mode, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mprepare_splits_kwargs):\n\u001b[0;32m-> 1717\u001b[0m     \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_download_and_prepare\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1718\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdl_manager\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1719\u001b[0m \u001b[43m        \u001b[49m\u001b[43mverification_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1720\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcheck_duplicate_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverification_mode\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mVerificationMode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mBASIC_CHECKS\u001b[49m\n\u001b[1;32m   1721\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mverification_mode\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mVerificationMode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mALL_CHECKS\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1722\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mprepare_splits_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1723\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/builder.py:1027\u001b[0m, in \u001b[0;36mDatasetBuilder._download_and_prepare\u001b[0;34m(self, dl_manager, verification_mode, **prepare_split_kwargs)\u001b[0m\n\u001b[1;32m   1025\u001b[0m split_dict \u001b[38;5;241m=\u001b[39m SplitDict(dataset_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdataset_name)\n\u001b[1;32m   1026\u001b[0m split_generators_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_split_generators_kwargs(prepare_split_kwargs)\n\u001b[0;32m-> 1027\u001b[0m split_generators \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_split_generators\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdl_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msplit_generators_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1029\u001b[0m \u001b[38;5;66;03m# Checksums verification\u001b[39;00m\n\u001b[1;32m   1030\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m verification_mode \u001b[38;5;241m==\u001b[39m VerificationMode\u001b[38;5;241m.\u001b[39mALL_CHECKS \u001b[38;5;129;01mand\u001b[39;00m dl_manager\u001b[38;5;241m.\u001b[39mrecord_checksums:\n",
-      "File \u001b[0;32m~/.cache/huggingface/modules/datasets_modules/datasets/shunk031--MSCOCO/9a9d3cb1e5e1927e03f5448bc4e3dd95d17101d142ba4b94d6973770757f535f/MSCOCO.py:977\u001b[0m, in \u001b[0;36mMsCocoDataset._split_generators\u001b[0;34m(self, dl_manager)\u001b[0m\n\u001b[1;32m    976\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_split_generators\u001b[39m(\u001b[38;5;28mself\u001b[39m, dl_manager: ds\u001b[38;5;241m.\u001b[39mDownloadManager):\n\u001b[0;32m--> 977\u001b[0m     file_paths \u001b[38;5;241m=\u001b[39m \u001b[43mdl_manager\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_and_extract\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_URLS\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43myear\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    979\u001b[0m     imgs \u001b[38;5;241m=\u001b[39m file_paths[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mimages\u001b[39m\u001b[38;5;124m\"\u001b[39m]  \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m    980\u001b[0m     anns \u001b[38;5;241m=\u001b[39m file_paths[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mannotations\u001b[39m\u001b[38;5;124m\"\u001b[39m]  \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/download/download_manager.py:565\u001b[0m, in \u001b[0;36mDownloadManager.download_and_extract\u001b[0;34m(self, url_or_urls)\u001b[0m\n\u001b[1;32m    549\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdownload_and_extract\u001b[39m(\u001b[38;5;28mself\u001b[39m, url_or_urls):\n\u001b[1;32m    550\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Download and extract given `url_or_urls`.\u001b[39;00m\n\u001b[1;32m    551\u001b[0m \n\u001b[1;32m    552\u001b[0m \u001b[38;5;124;03m    Is roughly equivalent to:\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    563\u001b[0m \u001b[38;5;124;03m        extracted_path(s): `str`, extracted paths of given URL(s).\u001b[39;00m\n\u001b[1;32m    564\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 565\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mextract(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl_or_urls\u001b[49m\u001b[43m)\u001b[49m)\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/download/download_manager.py:428\u001b[0m, in \u001b[0;36mDownloadManager.download\u001b[0;34m(self, url_or_urls)\u001b[0m\n\u001b[1;32m    425\u001b[0m download_func \u001b[38;5;241m=\u001b[39m partial(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_download, download_config\u001b[38;5;241m=\u001b[39mdownload_config)\n\u001b[1;32m    427\u001b[0m start_time \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow()\n\u001b[0;32m--> 428\u001b[0m downloaded_path_or_paths \u001b[38;5;241m=\u001b[39m \u001b[43mmap_nested\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    429\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdownload_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    430\u001b[0m \u001b[43m    \u001b[49m\u001b[43murl_or_urls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    431\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmap_tuple\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    432\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnum_proc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_proc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    433\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdisable_tqdm\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mis_progress_bar_enabled\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    434\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdesc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mDownloading data files\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    435\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    436\u001b[0m duration \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow() \u001b[38;5;241m-\u001b[39m start_time\n\u001b[1;32m    437\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading took \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mduration\u001b[38;5;241m.\u001b[39mtotal_seconds()\u001b[38;5;250m \u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;241m60\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m min\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:464\u001b[0m, in \u001b[0;36mmap_nested\u001b[0;34m(function, data_struct, dict_only, map_list, map_tuple, map_numpy, num_proc, parallel_min_length, types, disable_tqdm, desc)\u001b[0m\n\u001b[1;32m    462\u001b[0m     num_proc \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m    463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_proc \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m num_proc \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(iterable) \u001b[38;5;241m<\u001b[39m parallel_min_length:\n\u001b[0;32m--> 464\u001b[0m     mapped \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m    465\u001b[0m         _single_map_nested((function, obj, types, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m    466\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m obj \u001b[38;5;129;01min\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(iterable, disable\u001b[38;5;241m=\u001b[39mdisable_tqdm, desc\u001b[38;5;241m=\u001b[39mdesc)\n\u001b[1;32m    467\u001b[0m     ]\n\u001b[1;32m    468\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    469\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m warnings\u001b[38;5;241m.\u001b[39mcatch_warnings():\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:465\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    462\u001b[0m     num_proc \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m    463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_proc \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m num_proc \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(iterable) \u001b[38;5;241m<\u001b[39m parallel_min_length:\n\u001b[1;32m    464\u001b[0m     mapped \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m--> 465\u001b[0m         \u001b[43m_single_map_nested\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtypes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    466\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m obj \u001b[38;5;129;01min\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(iterable, disable\u001b[38;5;241m=\u001b[39mdisable_tqdm, desc\u001b[38;5;241m=\u001b[39mdesc)\n\u001b[1;32m    467\u001b[0m     ]\n\u001b[1;32m    468\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    469\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m warnings\u001b[38;5;241m.\u001b[39mcatch_warnings():\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:382\u001b[0m, in \u001b[0;36m_single_map_nested\u001b[0;34m(args)\u001b[0m\n\u001b[1;32m    380\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(pbar_iterable, disable\u001b[38;5;241m=\u001b[39mdisable_tqdm, position\u001b[38;5;241m=\u001b[39mrank, unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobj\u001b[39m\u001b[38;5;124m\"\u001b[39m, desc\u001b[38;5;241m=\u001b[39mpbar_desc) \u001b[38;5;28;01mas\u001b[39;00m pbar:\n\u001b[1;32m    381\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_struct, \u001b[38;5;28mdict\u001b[39m):\n\u001b[0;32m--> 382\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m {k: _single_map_nested((function, v, types, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m)) \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m pbar}\n\u001b[1;32m    383\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    384\u001b[0m         mapped \u001b[38;5;241m=\u001b[39m [_single_map_nested((function, v, types, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m pbar]\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:382\u001b[0m, in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    380\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(pbar_iterable, disable\u001b[38;5;241m=\u001b[39mdisable_tqdm, position\u001b[38;5;241m=\u001b[39mrank, unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobj\u001b[39m\u001b[38;5;124m\"\u001b[39m, desc\u001b[38;5;241m=\u001b[39mpbar_desc) \u001b[38;5;28;01mas\u001b[39;00m pbar:\n\u001b[1;32m    381\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_struct, \u001b[38;5;28mdict\u001b[39m):\n\u001b[0;32m--> 382\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m {k: \u001b[43m_single_map_nested\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtypes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m pbar}\n\u001b[1;32m    383\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    384\u001b[0m         mapped \u001b[38;5;241m=\u001b[39m [_single_map_nested((function, v, types, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m pbar]\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:367\u001b[0m, in \u001b[0;36m_single_map_nested\u001b[0;34m(args)\u001b[0m\n\u001b[1;32m    365\u001b[0m \u001b[38;5;66;03m# Singleton first to spare some computation\u001b[39;00m\n\u001b[1;32m    366\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_struct, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_struct, types):\n\u001b[0;32m--> 367\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_struct\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    369\u001b[0m \u001b[38;5;66;03m# Reduce logging to keep things readable in multiprocessing with tqdm\u001b[39;00m\n\u001b[1;32m    370\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m rank \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mget_verbosity() \u001b[38;5;241m<\u001b[39m logging\u001b[38;5;241m.\u001b[39mWARNING:\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/download/download_manager.py:454\u001b[0m, in \u001b[0;36mDownloadManager._download\u001b[0;34m(self, url_or_filename, download_config)\u001b[0m\n\u001b[1;32m    451\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_relative_path(url_or_filename):\n\u001b[1;32m    452\u001b[0m     \u001b[38;5;66;03m# append the relative path to the base_path\u001b[39;00m\n\u001b[1;32m    453\u001b[0m     url_or_filename \u001b[38;5;241m=\u001b[39m url_or_path_join(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_base_path, url_or_filename)\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcached_path\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl_or_filename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/file_utils.py:182\u001b[0m, in \u001b[0;36mcached_path\u001b[0;34m(url_or_filename, download_config, **download_kwargs)\u001b[0m\n\u001b[1;32m    178\u001b[0m     url_or_filename \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(url_or_filename)\n\u001b[1;32m    180\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_remote_url(url_or_filename):\n\u001b[1;32m    181\u001b[0m     \u001b[38;5;66;03m# URL, so get it from the cache (downloading if necessary)\u001b[39;00m\n\u001b[0;32m--> 182\u001b[0m     output_path \u001b[38;5;241m=\u001b[39m \u001b[43mget_from_cache\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    183\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl_or_filename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    184\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    185\u001b[0m \u001b[43m        \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    186\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    187\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    188\u001b[0m \u001b[43m        \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    189\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    190\u001b[0m \u001b[43m        \u001b[49m\u001b[43muse_etag\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_etag\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    191\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    192\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    193\u001b[0m \u001b[43m        \u001b[49m\u001b[43mignore_url_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mignore_url_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    194\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    195\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdownload_desc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_desc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    196\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    197\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(url_or_filename):\n\u001b[1;32m    198\u001b[0m     \u001b[38;5;66;03m# File, and it exists.\u001b[39;00m\n\u001b[1;32m    199\u001b[0m     output_path \u001b[38;5;241m=\u001b[39m url_or_filename\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/file_utils.py:644\u001b[0m, in \u001b[0;36mget_from_cache\u001b[0;34m(url, cache_dir, force_download, proxies, etag_timeout, resume_download, user_agent, local_files_only, use_etag, max_retries, token, use_auth_token, ignore_url_params, storage_options, download_desc)\u001b[0m\n\u001b[1;32m    642\u001b[0m         fsspec_get(url, temp_file, storage_options\u001b[38;5;241m=\u001b[39mstorage_options, desc\u001b[38;5;241m=\u001b[39mdownload_desc)\n\u001b[1;32m    643\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 644\u001b[0m         \u001b[43mhttp_get\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    645\u001b[0m \u001b[43m            \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    646\u001b[0m \u001b[43m            \u001b[49m\u001b[43mtemp_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    647\u001b[0m \u001b[43m            \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    648\u001b[0m \u001b[43m            \u001b[49m\u001b[43mresume_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    649\u001b[0m \u001b[43m            \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    650\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcookies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcookies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    651\u001b[0m \u001b[43m            \u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    652\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdesc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_desc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    653\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    655\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstoring \u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m in cache at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcache_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    656\u001b[0m shutil\u001b[38;5;241m.\u001b[39mmove(temp_file\u001b[38;5;241m.\u001b[39mname, cache_path)\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/file_utils.py:419\u001b[0m, in \u001b[0;36mhttp_get\u001b[0;34m(url, temp_file, proxies, resume_size, headers, cookies, timeout, max_retries, desc)\u001b[0m\n\u001b[1;32m    410\u001b[0m total \u001b[38;5;241m=\u001b[39m resume_size \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mint\u001b[39m(content_length) \u001b[38;5;28;01mif\u001b[39;00m content_length \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    411\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(\n\u001b[1;32m    412\u001b[0m     unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mB\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    413\u001b[0m     unit_scale\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    417\u001b[0m     disable\u001b[38;5;241m=\u001b[39m\u001b[38;5;129;01mnot\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mis_progress_bar_enabled(),\n\u001b[1;32m    418\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m progress:\n\u001b[0;32m--> 419\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m response\u001b[38;5;241m.\u001b[39miter_content(chunk_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1024\u001b[39m):\n\u001b[1;32m    420\u001b[0m         progress\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28mlen\u001b[39m(chunk))\n\u001b[1;32m    421\u001b[0m         temp_file\u001b[38;5;241m.\u001b[39mwrite(chunk)\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/requests/models.py:816\u001b[0m, in \u001b[0;36mResponse.iter_content.<locals>.generate\u001b[0;34m()\u001b[0m\n\u001b[1;32m    814\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m    815\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 816\u001b[0m         \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw\u001b[38;5;241m.\u001b[39mstream(chunk_size, decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m    817\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m ProtocolError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    818\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m ChunkedEncodingError(e)\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/urllib3/response.py:1091\u001b[0m, in \u001b[0;36mHTTPResponse.stream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m   1089\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1090\u001b[0m     \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_fp_closed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 1091\u001b[0m         data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1093\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m data:\n\u001b[1;32m   1094\u001b[0m             \u001b[38;5;28;01myield\u001b[39;00m data\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/urllib3/response.py:980\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt, decode_content, cache_content)\u001b[0m\n\u001b[1;32m    977\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m amt:\n\u001b[1;32m    978\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer\u001b[38;5;241m.\u001b[39mget(amt)\n\u001b[0;32m--> 980\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raw_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    982\u001b[0m flush_decoder \u001b[38;5;241m=\u001b[39m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m (amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data)\n\u001b[1;32m    984\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/urllib3/response.py:904\u001b[0m, in \u001b[0;36mHTTPResponse._raw_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m    901\u001b[0m fp_closed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclosed\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m    903\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_error_catcher():\n\u001b[0;32m--> 904\u001b[0m     data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mread1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread1\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fp_closed \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    905\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data:\n\u001b[1;32m    906\u001b[0m         \u001b[38;5;66;03m# Platform-specific: Buggy versions of Python.\u001b[39;00m\n\u001b[1;32m    907\u001b[0m         \u001b[38;5;66;03m# Close the connection when no data is returned\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    912\u001b[0m         \u001b[38;5;66;03m# not properly close the connection in all cases. There is\u001b[39;00m\n\u001b[1;32m    913\u001b[0m         \u001b[38;5;66;03m# no harm in redundantly calling close.\u001b[39;00m\n\u001b[1;32m    914\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mclose()\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/urllib3/response.py:887\u001b[0m, in \u001b[0;36mHTTPResponse._fp_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m    884\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1(amt) \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1()\n\u001b[1;32m    885\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    886\u001b[0m     \u001b[38;5;66;03m# StringIO doesn't like amt=None\u001b[39;00m\n\u001b[0;32m--> 887\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread()\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/http/client.py:466\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m    463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength:\n\u001b[1;32m    464\u001b[0m     \u001b[38;5;66;03m# clip the read to the \"end of response\"\u001b[39;00m\n\u001b[1;32m    465\u001b[0m     amt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength\n\u001b[0;32m--> 466\u001b[0m s \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    467\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m s \u001b[38;5;129;01mand\u001b[39;00m amt:\n\u001b[1;32m    468\u001b[0m     \u001b[38;5;66;03m# Ideally, we would raise IncompleteRead if the content-length\u001b[39;00m\n\u001b[1;32m    469\u001b[0m     \u001b[38;5;66;03m# wasn't satisfied, but it might break compatibility.\u001b[39;00m\n\u001b[1;32m    470\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_conn()\n",
-      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/socket.py:717\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m    715\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m    716\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 717\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    718\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m    719\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
      ]
     }
    ],

     "# ===============================\n",
     "# Directory Configuration\n",
     "# ===============================\n",
+    "BASE_DIR       = Path(\"../tests/dataset\")\n",
     "AI_DIR         = BASE_DIR / \"ai\"\n",
     "REAL_DIR       = BASE_DIR / \"real\"\n",
     "RAW_DIR        = BASE_DIR / \"raw_downloads\"\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "74106705-e2d6-411c-8193-8e02f5ee0fdc",
    "metadata": {},
    "outputs": [],
     "                  }]\n",
     "        \n",
     "\n",
+    "REAL_DATASETS   = [{\"name\"      : \"imagenette\",\n",
+    "                    \"hf_id\"     : \"frgfm/imagenette\",\n",
+    "                    \"config\"    : \"320px\",\n",
     "                    \"split\"     : \"train\",\n",
     "                    \"image_key\" : \"image\",\n",
     "                    \"label\"     : \"real\",\n",
     "                    \"family\"    : \"photographic\",\n",
     "                  }]\n",
     "\n",
     "# Kaggle datasets (public, non-scraped)\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "id": "a9ea5276-65bb-49f5-a656-c00ceeb1f4d3",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "id": "c6eca5e6-0469-4af6-8af8-afe3036cb0a8",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "id": "b648832e-5025-4851-af21-382051167a04",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "id": "dd8ef771-f39f-4d9d-8eaf-626ecc211141",
    "metadata": {},
    "outputs": [
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [05:30<00:00,  3.02it/s]\n"
      ]
     },
     {
      "output_type": "stream",
      "text": [
       "\n",
+      "▶ Loading HF dataset: imagenette\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using the latest cached version of the module from /Users/itobuz/.cache/huggingface/modules/datasets_modules/datasets/frgfm--imagenette/38929285b8abcae5c1305418e9d8fea5dd6b189bbbd22caba5f5537c7fa0f01f (last modified on Mon Dec 22 15:06:36 2025) since it couldn't be found locally at frgfm/imagenette., or remotely on the Hugging Face Hub.\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ce74ba00790b49fab546616010a4952d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data:   0%|          | 0.00/342M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dcadae012cfa492f8e94ea1662cb8102",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3a0d3ab402554b7489a01f883e4d6572",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "Downloading data: 0.00B [00:00, ?B/s]"
       ]
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9e65e45076104352951a4a71bd8d6da7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d714e865ab2a43dcb43a33dcf0df2be7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating train split:   0%|          | 0/9469 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "49d1d87ad10548eebf9ba2709dbda441",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating validation split:   0%|          | 0/3925 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 11%|██████████████▋                                                                                                                            | 1000/9469 [02:30<21:13,  6.65it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "⬇ Downloading Kaggle dataset: tristanzhang32/ai-generated-images-vs-real-images\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/itobuz/.conda/envs/mvp_env/bin/kaggle\", line 7, in <module>\n",
+      "    sys.exit(main())\n",
+      "  File \"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/kaggle/cli.py\", line 68, in main\n",
+      "    out = args.func(**command_args)\n",
+      "  File \"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/kaggle/api/kaggle_api_extended.py\", line 1741, in dataset_download_cli\n",
+      "    with self.build_kaggle_client() as kaggle:\n",
+      "  File \"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/kaggle/api/kaggle_api_extended.py\", line 688, in build_kaggle_client\n",
+      "    username=self.config_values['username'],\n",
+      "KeyError: 'username'\n"
+     ]
+    },
+    {
+     "ename": "CalledProcessError",
+     "evalue": "Command '['kaggle', 'datasets', 'download', 'tristanzhang32/ai-generated-images-vs-real-images', '-p', '../tests/dataset/raw_downloads/ai_vs_real', '--unzip']' returned non-zero exit status 1.",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mCalledProcessError\u001b[0m                        Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[12], line 46\u001b[0m\n\u001b[1;32m     42\u001b[0m \u001b[38;5;66;03m# ===============================\u001b[39;00m\n\u001b[1;32m     43\u001b[0m \u001b[38;5;66;03m# Entry Point\u001b[39;00m\n\u001b[1;32m     44\u001b[0m \u001b[38;5;66;03m# ===============================\u001b[39;00m\n\u001b[1;32m     45\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__main__\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 46\u001b[0m     \u001b[43mmain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "Cell \u001b[0;32mIn[12], line 18\u001b[0m, in \u001b[0;36mmain\u001b[0;34m()\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m KAGGLE_DATASETS:\n\u001b[1;32m     17\u001b[0m     raw_path \u001b[38;5;241m=\u001b[39m RAW_DIR \u001b[38;5;241m/\u001b[39m ds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m---> 18\u001b[0m     \u001b[43mdownload_kaggle_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mkaggle_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mraw_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     20\u001b[0m     \u001b[38;5;66;03m# AI images\u001b[39;00m\n\u001b[1;32m     21\u001b[0m     ingest_image_folder(src_dir \u001b[38;5;241m=\u001b[39m raw_path \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mai\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     22\u001b[0m                         out_dir \u001b[38;5;241m=\u001b[39m AI_DIR \u001b[38;5;241m/\u001b[39m ds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m     23\u001b[0m                         writer  \u001b[38;5;241m=\u001b[39m writer,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     26\u001b[0m                         source  \u001b[38;5;241m=\u001b[39m ds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m     27\u001b[0m                        )\n",
+      "Cell \u001b[0;32mIn[10], line 10\u001b[0m, in \u001b[0;36mdownload_kaggle_dataset\u001b[0;34m(kaggle_id, out_dir)\u001b[0m\n\u001b[1;32m      6\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m      8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m⬇ Downloading Kaggle dataset: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkaggle_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 10\u001b[0m \u001b[43msubprocess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mkaggle\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdatasets\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdownload\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     11\u001b[0m \u001b[43m                \u001b[49m\u001b[43mkaggle_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     12\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m-p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mout_dir\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     13\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m--unzip\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m     14\u001b[0m \u001b[43m               \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     15\u001b[0m \u001b[43m               \u001b[49m\u001b[43mcheck\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m     16\u001b[0m \u001b[43m              \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/subprocess.py:526\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[1;32m    524\u001b[0m     retcode \u001b[38;5;241m=\u001b[39m process\u001b[38;5;241m.\u001b[39mpoll()\n\u001b[1;32m    525\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m check \u001b[38;5;129;01mand\u001b[39;00m retcode:\n\u001b[0;32m--> 526\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m CalledProcessError(retcode, process\u001b[38;5;241m.\u001b[39margs,\n\u001b[1;32m    527\u001b[0m                                  output\u001b[38;5;241m=\u001b[39mstdout, stderr\u001b[38;5;241m=\u001b[39mstderr)\n\u001b[1;32m    528\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m CompletedProcess(process\u001b[38;5;241m.\u001b[39margs, retcode, stdout, stderr)\n",
+      "\u001b[0;31mCalledProcessError\u001b[0m: Command '['kaggle', 'datasets', 'download', 'tristanzhang32/ai-generated-images-vs-real-images', '-p', '../tests/dataset/raw_downloads/ai_vs_real', '--unzip']' returned non-zero exit status 1."
      ]
     }
    ],

reporter/csv_reporter.py CHANGED Viewed

@@ -7,8 +7,8 @@ from utils.logger import get_logger
 from config.settings import settings
 from config.constants import MetricType
 from config.schemas import AnalysisResult
 from utils.helpers import generate_unique_id
-from config.constants import DetectionStatus
 from config.schemas import BatchAnalysisResult
 from features.detailed_result_maker import DetailedResultMaker
@@ -19,444 +19,248 @@ logger = get_logger(__name__)
 class CSVReporter:
     """
-    Professional CSV report generator
-    Features:
-    ---------
-    - Single image detailed reports
-    - Batch summary reports with statistics
-    - Detailed forensic data export
-    - Excel-compatible formatting
-    - UTF-8 encoding with BOM for international compatibility
     """
     def __init__(self):
         """
         Initialize CSV Reporter
         """
         self.detailed_maker = DetailedResultMaker()
         logger.debug("CSVReporter initialized")
     def export_batch_summary(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
         """
-        Export batch analysis summary as CSV
-        Arguments:
-        ----------
-            batch_result { BatchAnalysisResult } : Complete batch analysis result
-            output_dir   { Path }                : Output directory (defaults to settings.REPORTS_DIR)
-        Returns:
-        --------
-                       { Path }                  : Path to generated CSV file
         """
         output_dir  = output_dir or settings.REPORTS_DIR
         report_id   = generate_unique_id()
         filename    = f"batch_summary_{report_id}.csv"
         output_path = output_dir / filename
         logger.info(f"Generating batch summary CSV: {filename}")
         try:
             with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
                 writer = csv.writer(f)
-                # Report Header
-                self._write_report_header(writer      = writer,
-                                          report_type = "Batch Analysis Summary",
                                           timestamp   = batch_result.timestamp,
                                          )
-                # Batch Statistics
-                self._write_batch_statistics(writer       = writer,
-                                             batch_result = batch_result,
-                                            )
-                # Main Results Table
                 self._write_batch_results_table(writer       = writer,
                                                 batch_result = batch_result,
                                                )
-                # Footer
                 self._write_footer(writer = writer)
             logger.info(f"Batch summary CSV generated: {output_path}")
             return output_path
         except Exception as e:
             logger.error(f"Failed to generate batch summary CSV: {e}")
             raise
     def export_batch_detailed(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
         """
-        Export detailed batch analysis with forensic data
-        Arguments:
-        ----------
-            batch_result { BatchAnalysisResult } : Complete batch analysis result
-            output_dir   { Path }                : Output directory (defaults to settings.REPORTS_DIR)
-        Returns:
-        --------
-                      { Path }                   : Path to generated CSV file
         """
         output_dir  = output_dir or settings.REPORTS_DIR
         report_id   = generate_unique_id()
         filename    = f"batch_detailed_{report_id}.csv"
         output_path = output_dir / filename
         logger.info(f"Generating detailed batch CSV: {filename}")
         try:
             with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
                 writer = csv.writer(f)
-                # Report Header
-                self._write_report_header(writer      = writer,
                                           report_type = "Detailed Batch Analysis",
                                           timestamp   = batch_result.timestamp,
                                          )
-                # Process each image with full details
                 for idx, result in enumerate(batch_result.results, 1):
-                    self._write_detailed_image_section(writer        = writer,
-                                                       result        = result,
-                                                       image_number  = idx,
-                                                       total_images  = batch_result.processed,
                                                       )
-                    # Add separator between images
                     if (idx < batch_result.processed):
                         writer.writerow([])
                         writer.writerow(['=' * 100])
                         writer.writerow([])
-                # Footer
                 self._write_footer(writer = writer)
             logger.info(f"Detailed batch CSV generated: {output_path}")
-            return output_path
         except Exception as e:
             logger.error(f"Failed to generate detailed batch CSV: {e}")
             raise
     def export_single_detailed(self, result: AnalysisResult, output_dir: Optional[Path] = None) -> Path:
         """
-        Export single image detailed analysis as CSV
-        Arguments:
-        ----------
-            result     { AnalysisResult } : Single image analysis result
-            output_dir { Path }           : Output directory (defaults to settings.REPORTS_DIR)
-        Returns:
-        --------
-                     { Path }             : Path to generated CSV file
         """
         output_dir  = output_dir or settings.REPORTS_DIR
         report_id   = generate_unique_id()
         filename    = f"single_analysis_{report_id}.csv"
         output_path = output_dir / filename
         logger.info(f"Generating single image CSV: {filename}")
         try:
             with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
                 writer = csv.writer(f)
-                # Report Header
-                self._write_report_header(writer     = writer,
                                           report_type = "Single Image Analysis",
                                           timestamp   = result.timestamp,
                                          )
-                # Image Details
-                self._write_detailed_image_section(writer       = writer,
                                                    result       = result,
                                                    image_number = 1,
                                                    total_images = 1,
                                                   )
-                # Footer
                 self._write_footer(writer = writer)
             logger.info(f"Single image CSV generated: {output_path}")
-            return output_path
         except Exception as e:
             logger.error(f"Failed to generate single image CSV: {e}")
             raise
-    def export_metrics_comparison(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
-        """
-        Export metrics comparison table across all images
-        Arguments:
-        ----------
-            batch_result { BatchAnalysisResult } : Complete batch analysis result
-            output_dir   { Path }                : Output directory (defaults to settings.REPORTS_DIR)
-        Returns:
-        --------
-                       { Path }                  : Path to generated CSV file
-        """
-        output_dir  = output_dir or settings.REPORTS_DIR
-        report_id   = generate_unique_id()
-        filename    = f"metrics_comparison_{report_id}.csv"
-        output_path = output_dir / filename
-        logger.info(f"Generating metrics comparison CSV: {filename}")
-        try:
-            with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
-                writer = csv.writer(f)
-                # Report Header
-                self._write_report_header(writer     = writer,
-                                          report_type = "Metrics Comparison",
-                                          timestamp   = batch_result.timestamp,
-                                         )
-                # Comparison Table Header
-                writer.writerow(['Metrics Comparison Across All Images'])
-                writer.writerow([])
-                header = ['Filename',
-                          'Overall Score',
-                          'Analysis Status',
-                          'Gradient Analysis Score',
-                          'Gradient Analysis Confidence',
-                          'Frequency Analysis Score',
-                          'Frequency Analysis Confidence',
-                          'Noise Analysis Score',
-                          'Noise Analysis Confidence',
-                          'Texture Analysis Score',
-                          'Texture Analysis Confidence',
-                          'Color Analysis Score',
-                          'Color Analysis Confidence',
-                          'Processing Time',
-                         ]
-                writer.writerow(header)
-                # Data rows
-                for result in batch_result.results:
-                    row = [result.filename,
-                           f"{result.overall_score:.3f}",
-                           result.status.value,
-                          ]
-                    # Add each metric's score and confidence
-                    for metric_type in [MetricType.GRADIENT, MetricType.FREQUENCY, MetricType.NOISE, MetricType.TEXTURE, MetricType.COLOR]:
-                        metric_result = result.metric_results.get(metric_type)
-                        if metric_result:
-                            row.append(f"{metric_result.score:.3f}")
-                            row.append(f"{metric_result.confidence:.3f}" if metric_result.confidence is not None else "N/A")
-                        else:
-                            row.extend(["N/A", "N/A"])
-                    row.append(f"{result.processing_time:.2f}s")
-                    writer.writerow(row)
-                # Footer
-                writer.writerow([])
-                self._write_footer(writer = writer)
-            logger.info(f"Metrics comparison CSV generated: {output_path}")
-            return output_path
-        except Exception as e:
-            logger.error(f"Failed to generate metrics comparison CSV: {e}")
-            raise
     def _write_report_header(self, writer, report_type: str, timestamp: datetime) -> None:
-        """
-        Write CSV report header
-        """
         writer.writerow(['=' * 100])
         writer.writerow([f'AI Image Screener - {report_type}'])
         writer.writerow([f'Generated: {timestamp.strftime("%Y-%m-%d %H:%M:%S")}'])
         writer.writerow([f'Version: {settings.VERSION}'])
         writer.writerow(['=' * 100])
         writer.writerow([])
-    def _write_batch_statistics(self, writer, batch_result: BatchAnalysisResult) -> None:
-        """
-        Write batch statistics section
-        """
-        writer.writerow(['BATCH STATISTICS'])
         writer.writerow([])
-        stats = [['Total Images', batch_result.total_images],
-                 ['Successfully Processed', batch_result.processed],
-                 ['Failed', batch_result.failed],
-                 ['Success Rate', f"{batch_result.summary.get('success_rate', 0)}%"],
-                 ['' , ''],
-                 ['Likely Authentic', batch_result.summary.get('likely_authentic', 0)],
-                 ['Review Required', batch_result.summary.get('review_required', 0)],
-                 ['', ''],
-                 ['Average Score', f"{batch_result.summary.get('avg_score', 0):.3f}"],
-                 ['Average Confidence', f"{batch_result.summary.get('avg_confidence', 0)}%"],
-                 ['Total Processing Time', f"{batch_result.total_processing_time:.2f}s"],
-                 ['Average Time per Image', f"{batch_result.summary.get('avg_proc_time', 0):.2f}s"],
-                ]
-        for row in stats:
             writer.writerow(row)
         writer.writerow([])
         writer.writerow(['=' * 100])
         writer.writerow([])
     def _write_batch_results_table(self, writer, batch_result: BatchAnalysisResult) -> None:
-        """
-        Write batch results main table
-        """
         writer.writerow(['ANALYSIS RESULTS'])
         writer.writerow([])
-        # Table Header
-        header = ['Filename',
-                  'Image Size',
-                  'Analysis Status',
-                  'Overall Score',
-                  'Analysis Confidence (%)',
-                  'Top Warning Signals',
-                  'Recommendation',
-                  'Processing Time (s)',
                  ]
         writer.writerow(header)
-        # Data rows
         for result in batch_result.results:
-            # Get top warning signals
-            top_signals = [s.name for s in result.signals if s.status.value in ['flagged', 'warning']][:2]
-            signals_str = "; ".join(top_signals) if top_signals else "All tests passed"
-            # Recommendation
-            if (result.status == DetectionStatus.REVIEW_REQUIRED):
-                recommendation = "Manual verification recommended"
-            else:
-                recommendation = "No further action needed"
-            row = [result.filename,
-                   f"{result.image_size[0]}×{result.image_size[1]}",
-                   result.status.value,
-                   f"{result.overall_score:.3f}",
-                   f"{result.confidence}%",
-                   signals_str,
-                   recommendation,
-                   f"{result.processing_time:.2f}",
-                  ]
-            writer.writerow(row)
         writer.writerow([])
     def _write_detailed_image_section(self, writer, result: AnalysisResult, image_number: int, total_images: int) -> None:
-        """
-        Write detailed section for single image
-        """
         writer.writerow([f'IMAGE {image_number} OF {total_images}'])
         writer.writerow([])
-        # Basic Information
-        writer.writerow(['BASIC INFORMATION'])
-        writer.writerow(['Filename', result.filename])
-        writer.writerow(['Status', result.status.value])
-        writer.writerow(['Overall Score', f"{result.overall_score:.3f}"])
         writer.writerow(['Confidence', f"{result.confidence}%"])
-        writer.writerow(['Image Size', f"{result.image_size[0]}×{result.image_size[1]}"])
-        writer.writerow(['Processing Time', f"{result.processing_time:.2f}s"])
-        writer.writerow(['Timestamp', result.timestamp.isoformat()])
-        writer.writerow([])
-        # Detection Signals
-        writer.writerow(['DETECTION SIGNALS'])
         writer.writerow([])
-        writer.writerow(['Metric Name', 'Metric Score', 'Analysis Status', 'Metric Confidence', 'Metric Explanation'])
         for signal in result.signals:
-            metric_result  = result.metric_results.get(signal.metric_type)
-            confidence_str = f"{metric_result.confidence:.3f}" if metric_result.confidence is not None else "N/A"
             writer.writerow([signal.name,
                              f"{signal.score:.3f}",
-                             signal.status.value.upper(),
-                             confidence_str,
-                             signal.explanation.replace("\n", " "),
                            ])
-        writer.writerow([])
-        # Detailed Forensics
-        writer.writerow(['FORENSIC DETAILS'])
-        writer.writerow([])
-        for metric_type in MetricType:
-            metric_result = result.metric_results.get(metric_type)
-            if not metric_result:
-                continue
-            metric_name = self.detailed_maker.metric_display_names.get(metric_type, metric_type.value)
-            writer.writerow([f'--- {metric_name} ---'])
-            writer.writerow(['Score', f"{metric_result.score:.3f}"])
-            writer.writerow(['Confidence', f"{metric_result.confidence:.3f}" if metric_result.confidence is not None else "N/A"])
-            # Write details
-            if metric_result.details:
-                for key, value in metric_result.details.items():
-                    if isinstance(value, dict):
-                        writer.writerow([f"  {key}:", ""])
-                        for sub_key, sub_value in value.items():
-                            writer.writerow([f"    {sub_key}", str(sub_value)])
-                    else:
-                        writer.writerow([f"  {key}", str(value)])
-            writer.writerow([])
-        # Recommendation
-        writer.writerow(['RECOMMENDATION'])
-        writer.writerow([])
-        if (result.status == DetectionStatus.REVIEW_REQUIRED):
-            writer.writerow(['Action', 'Manual verification recommended'])
-            writer.writerow(['Priority', 'HIGH' if (result.overall_score >= 0.85) else 'MEDIUM'])
-            writer.writerow(['Next Steps', 'Forensic analysis, reverse image search, metadata inspection'])
-        else:
-            writer.writerow(['Action', 'No immediate action needed'])
-            writer.writerow(['Priority', 'LOW'])
-            writer.writerow(['Next Steps', 'Proceed with normal workflow'])
         writer.writerow([])
     def _write_footer(self, writer) -> None:
-        """
-        Write CSV report footer
-        """
         writer.writerow(['=' * 100])
         writer.writerow(['Report generated by AI Image Screener'])
-        writer.writerow(['For questions or support, contact: support@aiimagescreener.com'])
-        writer.writerow(['DISCLAIMER: Results are indicative and should be verified manually for critical applications'])
         writer.writerow(['=' * 100])

 from config.settings import settings
 from config.constants import MetricType
 from config.schemas import AnalysisResult
+from config.constants import FinalDecision
 from utils.helpers import generate_unique_id
 from config.schemas import BatchAnalysisResult
 from features.detailed_result_maker import DetailedResultMaker
 class CSVReporter:
     """
+    CSV report generator
+    Guarantees:
+    -----------
+    - FinalDecision is authoritative
+    - Metrics are informational only
+    - Evidence-first reporting
+    - Audit-safe CSV structure
     """
     def __init__(self):
         """
         Initialize CSV Reporter
         """
         self.detailed_maker = DetailedResultMaker()
         logger.debug("CSVReporter initialized")
     def export_batch_summary(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
         """
+        Export batch decision summary as CSV
         """
         output_dir  = output_dir or settings.REPORTS_DIR
         report_id   = generate_unique_id()
         filename    = f"batch_summary_{report_id}.csv"
         output_path = output_dir / filename
         logger.info(f"Generating batch summary CSV: {filename}")
         try:
             with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
                 writer = csv.writer(f)
+                self._write_report_header(writer,
+                                          report_type = "Batch Decision Summary",
                                           timestamp   = batch_result.timestamp,
                                          )
+                self._write_batch_decision_statistics(writer       = writer,
+                                                      batch_result = batch_result,
+                                                     )
                 self._write_batch_results_table(writer       = writer,
                                                 batch_result = batch_result,
                                                )
                 self._write_footer(writer = writer)
             logger.info(f"Batch summary CSV generated: {output_path}")
             return output_path
         except Exception as e:
             logger.error(f"Failed to generate batch summary CSV: {e}")
             raise
     def export_batch_detailed(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
         """
+        Export detailed batch forensic CSV
         """
         output_dir  = output_dir or settings.REPORTS_DIR
         report_id   = generate_unique_id()
         filename    = f"batch_detailed_{report_id}.csv"
         output_path = output_dir / filename
         logger.info(f"Generating detailed batch CSV: {filename}")
         try:
             with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
                 writer = csv.writer(f)
+                self._write_report_header(writer,
                                           report_type = "Detailed Batch Analysis",
                                           timestamp   = batch_result.timestamp,
                                          )
                 for idx, result in enumerate(batch_result.results, 1):
+                    self._write_detailed_image_section(writer,
+                                                       result       = result,
+                                                       image_number = idx,
+                                                       total_images = batch_result.processed,
                                                       )
                     if (idx < batch_result.processed):
                         writer.writerow([])
                         writer.writerow(['=' * 100])
                         writer.writerow([])
                 self._write_footer(writer = writer)
             logger.info(f"Detailed batch CSV generated: {output_path}")
+            return output_path
         except Exception as e:
             logger.error(f"Failed to generate detailed batch CSV: {e}")
             raise
     def export_single_detailed(self, result: AnalysisResult, output_dir: Optional[Path] = None) -> Path:
         """
+        Export single image detailed CSV
         """
         output_dir  = output_dir or settings.REPORTS_DIR
         report_id   = generate_unique_id()
         filename    = f"single_analysis_{report_id}.csv"
         output_path = output_dir / filename
         logger.info(f"Generating single image CSV: {filename}")
         try:
             with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
                 writer = csv.writer(f)
+                self._write_report_header(writer,
                                           report_type = "Single Image Analysis",
                                           timestamp   = result.timestamp,
                                          )
+                self._write_detailed_image_section(writer,
                                                    result       = result,
                                                    image_number = 1,
                                                    total_images = 1,
                                                   )
                 self._write_footer(writer = writer)
             logger.info(f"Single image CSV generated: {output_path}")
+            return output_path
         except Exception as e:
             logger.error(f"Failed to generate single image CSV: {e}")
             raise
     def _write_report_header(self, writer, report_type: str, timestamp: datetime) -> None:
         writer.writerow(['=' * 100])
         writer.writerow([f'AI Image Screener - {report_type}'])
         writer.writerow([f'Generated: {timestamp.strftime("%Y-%m-%d %H:%M:%S")}'])
         writer.writerow([f'Version: {settings.VERSION}'])
         writer.writerow(['=' * 100])
         writer.writerow([])
+    def _write_batch_decision_statistics(self, writer, batch_result: BatchAnalysisResult) -> None:
+        writer.writerow(['BATCH DECISION STATISTICS'])
         writer.writerow([])
+        summary = batch_result.summary or {}
+        rows    = [['Total Images', batch_result.total_images],
+                   ['Processed', batch_result.processed],
+                   ['Failed', batch_result.failed],
+                   ['Success Rate', f"{summary.get('success_rate', 0)}%"],
+                   ['', ''],
+                  ]
+        for decision in FinalDecision:
+            rows.append([decision.value, summary.get(decision.value, 0)])
+        rows.append(['Total Processing Time', f"{batch_result.total_processing_time:.2f}s"])
+        for row in rows:
             writer.writerow(row)
         writer.writerow([])
         writer.writerow(['=' * 100])
         writer.writerow([])
     def _write_batch_results_table(self, writer, batch_result: BatchAnalysisResult) -> None:
         writer.writerow(['ANALYSIS RESULTS'])
         writer.writerow([])
+        header = ['Filename',
+                  'Final Decision',
+                  'Decision Confidence (%)',
+                  'Overall Score (informational)',
+                  'Decision Explanation',
+                  'Processing Time (s)',
                  ]
         writer.writerow(header)
         for result in batch_result.results:
+            writer.writerow([result.filename,
+                             result.final_decision.value,
+                             f"{result.confidence}%",
+                             f"{result.overall_score:.3f}",
+                             (result.decision_explanation or '').replace("\n", " "),
+                             f"{result.processing_time:.2f}",
+                           ])
         writer.writerow([])
     def _write_detailed_image_section(self, writer, result: AnalysisResult, image_number: int, total_images: int) -> None:
         writer.writerow([f'IMAGE {image_number} OF {total_images}'])
         writer.writerow([])
+        # Decision Summary
+        writer.writerow(['FINAL DECISION'])
+        writer.writerow(['Decision', result.final_decision.value])
         writer.writerow(['Confidence', f"{result.confidence}%"])
+        writer.writerow(['Explanation', result.decision_explanation or ''])
         writer.writerow([])
+        # Evidence Summary
+        if result.evidence:
+            writer.writerow(['EVIDENCE SUMMARY'])
+            writer.writerow(['Source', 'Direction', 'Strength', 'Confidence', 'Finding'])
+            for e in result.evidence:
+                writer.writerow([e.source.value,
+                                 e.direction.value,
+                                 e.strength.value,
+                                 f"{e.confidence:.3f}" if e.confidence is not None else 'N/A',
+                                 e.finding.replace("\n", " "),
+                               ])
+            writer.writerow([])
+        # Metric Signals (Informational)
+        writer.writerow(['METRIC SIGNALS (INFORMATIONAL)'])
+        writer.writerow(['Metric', 'Score', 'Status', 'Confidence'])
         for signal in result.signals:
+            metric_result = result.metric_results.get(signal.metric_type)
             writer.writerow([signal.name,
                              f"{signal.score:.3f}",
+                             signal.status.value,
+                             f"{metric_result.confidence:.3f}" if (metric_result and metric_result.confidence is not None) else 'N/A',
                            ])
         writer.writerow([])
     def _write_footer(self, writer) -> None:
         writer.writerow(['=' * 100])
         writer.writerow(['Report generated by AI Image Screener'])
+        writer.writerow(['DISCLAIMER: Statistical signals are non-decisional'])
+        writer.writerow(['Final decisions are policy-based and auditable'])
         writer.writerow(['=' * 100])

reporter/json_reporter.py CHANGED Viewed

@@ -8,6 +8,7 @@ from datetime import datetime
 from utils.logger import get_logger
 from config.settings import settings
 from config.schemas import AnalysisResult
 from utils.helpers import generate_unique_id
 from config.schemas import BatchAnalysisResult
 from features.detailed_result_maker import DetailedResultMaker
@@ -19,39 +20,27 @@ logger = get_logger(__name__)
 class JSONReporter:
     """
-    Professional JSON report generator
-    Features:
-    ---------
-    - Machine-readable structured format
-    - API-friendly output
-    - Complete data preservation
-    - Pretty-printed for readability
-    - Nested structure for complex data
     """
     def __init__(self):
         """
         Initialize JSON Reporter
         """
         self.detailed_maker = DetailedResultMaker()
         logger.debug("JSONReporter initialized")
     def export_batch(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None, include_detailed: bool = True) -> Path:
         """
         Export batch analysis as JSON
-        Arguments:
-        ----------
-            batch_result     { BatchAnalysisResult } : Complete batch analysis result
-            output_dir       { Path }                : Output directory (defaults to settings.REPORTS_DIR)
-            include_detailed { bool }                : Include detailed forensic data
-        Returns:
-        --------
-                        { Path }                     : Path to generated JSON file
         """
         output_dir  = output_dir or settings.REPORTS_DIR
         report_id   = generate_unique_id()
@@ -59,47 +48,33 @@ class JSONReporter:
         output_path = output_dir / filename
         output_dir.mkdir(parents = True, exist_ok = True)
         logger.info(f"Generating batch JSON: {filename}")
         try:
-            # Build JSON structure
             data = self._build_batch_json(batch_result     = batch_result,
                                           include_detailed = include_detailed,
                                          )
-            # Write to file
             with open(output_path, 'w', encoding = 'utf-8') as f:
-                json.dump(obj          = data,
-                          fp           = f,
-                          indent       = 4,
-                          ensure_ascii = False,
                           default      = str,
                          )
             logger.info(f"Batch JSON generated: {output_path}")
             return output_path
         except Exception as e:
             logger.error(f"Failed to generate batch JSON: {e}")
             raise
-    def export_single(self, result: AnalysisResult, output_dir: Optional[Path] = None, include_detailed: bool = True) -> Path:
         """
         Export single image analysis as JSON
-        Arguments:
-        ----------
-            result           { AnalysisResult } : Single image analysis result
-            output_dir            { Path }      : Output directory (defaults to settings.REPORTS_DIR)
-            include_detailed      { bool }      : Include detailed forensic data
-        Returns:
-        --------
-                      { Path }                  : Path to generated JSON file
         """
         output_dir  = output_dir or settings.REPORTS_DIR
         report_id   = generate_unique_id()
@@ -107,27 +82,24 @@ class JSONReporter:
         output_path = output_dir / filename
         output_dir.mkdir(parents = True, exist_ok = True)
         logger.info(f"Generating single image JSON: {filename}")
         try:
-            # Build JSON structure
             data = self._build_single_json(result           = result,
                                            include_detailed = include_detailed,
                                           )
-            # Write to file
             with open(output_path, 'w', encoding = 'utf-8') as f:
-                json.dump(obj          = data,
-                          fp           = f,
-                          indent       = 4,
-                          ensure_ascii = False,
                           default      = str,
                          )
             logger.info(f"Single image JSON generated: {output_path}")
             return output_path
         except Exception as e:
             logger.error(f"Failed to generate single image JSON: {e}")
             raise
@@ -135,15 +107,7 @@ class JSONReporter:
     def export_api_response(self, result: AnalysisResult) -> Dict:
         """
-        Generate API-friendly JSON response (in-memory, no file)
-        Arguments:
-        ----------
-            result { AnalysisResult } : Analysis result
-        Returns:
-        --------
-                   { dict }           : API response dictionary
         """
         return {"success"   : True,
                 "timestamp" : datetime.now().isoformat(),
@@ -158,36 +122,23 @@ class JSONReporter:
         """
         Build complete batch JSON structure
         """
-        data = {"report_metadata" : self._build_metadata(report_type = "Batch Analysis",
                                                          timestamp   = batch_result.timestamp,
                                                         ),
-                "batch_summary"   : self._build_batch_summary(batch_result = batch_result),
-                "results"         : [],
                }
-        # Add each image result
-        for result in batch_result.results:
-            image_data = self._build_image_data(result           = result,
-                                                include_detailed = include_detailed,
-                                               )
-            data["results"].append(image_data)
-        return data
     def _build_single_json(self, result: AnalysisResult, include_detailed: bool) -> Dict:
         """
         Build single image JSON structure
         """
-        data = {"report_metadata" : self._build_metadata(report_type = "Single Image Analysis",
                                                          timestamp   = result.timestamp,
                                                         ),
-                "analysis"        : self._build_image_data(result           = result,
-                                                           include_detailed = include_detailed,
-                                                          ),
                }
-        return data
     def _build_metadata(self, report_type: str, timestamp: datetime) -> Dict:
@@ -204,17 +155,20 @@ class JSONReporter:
     def _build_batch_summary(self, batch_result: BatchAnalysisResult) -> Dict:
         """
-        Build batch summary section
         """
         return {"total_images"          : batch_result.total_images,
                 "processed"             : batch_result.processed,
                 "failed"                : batch_result.failed,
-                "success_rate"          : batch_result.summary.get('success_rate', 0),
-                "statistics"            : {"likely_authentic" : batch_result.summary.get('likely_authentic', 0),
-                                           "review_required"  : batch_result.summary.get('review_required', 0),
-                                           "avg_score"        : batch_result.summary.get('avg_score', 0.0),
-                                           "avg_confidence"   : batch_result.summary.get('avg_confidence', 0),
-                                           "avg_proc_time"    : batch_result.summary.get('avg_proc_time', 0.0),
                                           },
                 "total_processing_time" : round(batch_result.total_processing_time, 2),
                }
@@ -222,128 +176,100 @@ class JSONReporter:
     def _build_image_data(self, result: AnalysisResult, include_detailed: bool) -> Dict:
         """
-        Build complete image data structure
         """
-        image_data = {"filename"     : result.filename,
-                      "status"       : result.status.value,
-                      "overall"      : {"score"           : round(result.overall_score, 3),
-                                        "confidence"      : result.confidence,
-                                        "interpretation"  : self._interpret_score(score = result.overall_score),
-                                       },
-                      "image_info"   : {"size"            : {"width"  : result.image_size[0],
-                                                             "height" : result.image_size[1],
-                                                            },
-                                        "processing_time" : round(result.processing_time, 2),
-                                        "timestamp"       : result.timestamp.isoformat(),
-                                       },
-                      "signals"      : self._build_signals_data(result = result),
                      }
-        # Add detailed forensics if requested
         if include_detailed:
-            image_data["forensics"]       = self._build_forensics_data(result = result)
-            image_data["recommendations"] = self._build_recommendations(result = result)
         return image_data
     def _build_signals_data(self, result: AnalysisResult) -> List[Dict]:
         """
-        Build signals data structure
         """
         signals = list()
         for signal in result.signals:
             metric_result = result.metric_results.get(signal.metric_type)
-            signal_data   = {"metric_name" : signal.name,
-                             "metric_type" : signal.metric_type.value,
-                             "score"       : round(signal.score, 3),
-                             "status"      : signal.status.value,
-                             "confidence"  : round(metric_result.confidence, 3) if (metric_result and metric_result.confidence is not None) else None,
-                             "explanation" : signal.explanation,
-                            }
-            signals.append(signal_data)
         return signals
     def _build_forensics_data(self, result: AnalysisResult) -> Dict:
         """
-        Build detailed forensics data structure
         """
         forensics = dict()
         for metric_type, metric_result in result.metric_results.items():
-            metric_name                  = self.detailed_maker.metric_display_names.get(metric_type, metric_type.value)
-            forensics[metric_type.value] = {"display_name" : metric_name,
                                             "score"        : round(metric_result.score, 3),
-                                            "confidence"   : round(metric_result.confidence, 3) if (metric_result and metric_result.confidence is not None) else None,
                                             "details"      : metric_result.details or {},
-                                            "key_findings" : self.detailed_maker.extract_key_findings(metric_type   = metric_type,
-                                                                                                      metric_result = metric_result,
-                                                                                                     ),
                                            }
         return forensics
     def _build_recommendations(self, result: AnalysisResult) -> Dict:
         """
-        Build recommendations structure
         """
-        score = result.overall_score
-        if (score >= 0.85):
-            return {"action"      : "Immediate manual verification required",
-                    "priority"    : "HIGH",
-                    "risk_level"  : "CRITICAL",
-                    "next_steps"  : ["Forensic analysis", "Reverse image search", "Metadata inspection"],
-                    "confidence"  : "Very high likelihood of AI generation",
                    }
-        elif (score >= 0.70):
-            return {"action"      : "Manual verification recommended",
-                    "priority"    : "MEDIUM",
-                    "risk_level"  : "HIGH",
-                    "next_steps"  : ["Visual inspection", "Compare with authentic samples"],
-                    "confidence"  : "High likelihood of AI generation",
                    }
-        elif (score >= 0.50):
-            return {"action"      : "Optional review suggested",
-                    "priority"    : "LOW",
-                    "risk_level"  : "MEDIUM",
-                    "next_steps"  : ["Verify image source", "Check for inconsistencies"],
-                    "confidence"  : "Moderate indicators present",
                    }
-        else:
-            return {"action"      : "No immediate action required",
-                    "priority"    : "NONE",
-                    "risk_level"  : "LOW",
-                    "next_steps"  : ["Proceed with normal workflow"],
-                    "confidence"  : "Low likelihood of AI generation",
                    }
-    def _interpret_score(self, score: float) -> str:
-        """
-        Interpret score for human readability
-        """
-        if (score >= 0.85):
-            return "Very high suspicion"
-        elif (score >= 0.70):
-            return "High suspicion"
-        elif (score >= 0.50):
-            return "Moderate suspicion"
-        elif (score >= 0.30):
-            return "Low suspicion"
-        else:
-            return "Very low suspicion"

 from utils.logger import get_logger
 from config.settings import settings
 from config.schemas import AnalysisResult
+from config.constants import FinalDecision
 from utils.helpers import generate_unique_id
 from config.schemas import BatchAnalysisResult
 from features.detailed_result_maker import DetailedResultMaker
 class JSONReporter:
     """
+    JSON report generator
+    Guarantees:
+    -----------
+    - FinalDecision is authoritative
+    - Metrics are informational only
+    - Evidence-first interpretation
+    - Audit-safe output
     """
     def __init__(self):
         """
         Initialize JSON Reporter
         """
         self.detailed_maker = DetailedResultMaker()
         logger.debug("JSONReporter initialized")
     def export_batch(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None, include_detailed: bool = True) -> Path:
         """
         Export batch analysis as JSON
         """
         output_dir  = output_dir or settings.REPORTS_DIR
         report_id   = generate_unique_id()
         output_path = output_dir / filename
         output_dir.mkdir(parents = True, exist_ok = True)
         logger.info(f"Generating batch JSON: {filename}")
         try:
             data = self._build_batch_json(batch_result     = batch_result,
                                           include_detailed = include_detailed,
                                          )
             with open(output_path, 'w', encoding = 'utf-8') as f:
+                json.dump(obj          = data,
+                          fp           = f,
+                          indent       = 4,
+                          ensure_ascii = False,
                           default      = str,
                          )
             logger.info(f"Batch JSON generated: {output_path}")
             return output_path
         except Exception as e:
             logger.error(f"Failed to generate batch JSON: {e}")
             raise
+    def export_single(self, result: AnalysisResult, output_dir: Optional[Path] = None, include_detailed: bool = True,
+                     ) -> Path:
         """
         Export single image analysis as JSON
         """
         output_dir  = output_dir or settings.REPORTS_DIR
         report_id   = generate_unique_id()
         output_path = output_dir / filename
         output_dir.mkdir(parents = True, exist_ok = True)
         logger.info(f"Generating single image JSON: {filename}")
         try:
             data = self._build_single_json(result           = result,
                                            include_detailed = include_detailed,
                                           )
             with open(output_path, 'w', encoding = 'utf-8') as f:
+                json.dump(obj          = data,
+                          fp           = f,
+                          indent       = 4,
+                          ensure_ascii = False,
                           default      = str,
                          )
             logger.info(f"Single image JSON generated: {output_path}")
             return output_path
         except Exception as e:
             logger.error(f"Failed to generate single image JSON: {e}")
             raise
     def export_api_response(self, result: AnalysisResult) -> Dict:
         """
+        Generate API-friendly JSON response
         """
         return {"success"   : True,
                 "timestamp" : datetime.now().isoformat(),
         """
         Build complete batch JSON structure
         """
+        return {"report_metadata" : self._build_metadata(report_type = "Batch Analysis",
                                                          timestamp   = batch_result.timestamp,
                                                         ),
+                "batch_summary"   : self._build_batch_summary(batch_result),
+                "results"         : [self._build_image_data(result, include_detailed) for result in batch_result.results],
                }
     def _build_single_json(self, result: AnalysisResult, include_detailed: bool) -> Dict:
         """
         Build single image JSON structure
         """
+        return {"report_metadata" : self._build_metadata(report_type = "Single Image Analysis",
                                                          timestamp   = result.timestamp,
                                                         ),
+                "analysis"        : self._build_image_data(result, include_detailed),
                }
     def _build_metadata(self, report_type: str, timestamp: datetime) -> Dict:
     def _build_batch_summary(self, batch_result: BatchAnalysisResult) -> Dict:
         """
+        Build batch summary (decision-aware)
         """
+        summary = batch_result.summary or {}
         return {"total_images"          : batch_result.total_images,
                 "processed"             : batch_result.processed,
                 "failed"                : batch_result.failed,
+                "success_rate"          : summary.get("success_rate", 0),
+                "decision_distribution" : {key : summary.get(key, 0)
+                                                 for key in [FinalDecision.CONFIRMED_AI_GENERATED.value,
+                                                             FinalDecision.SUSPICIOUS_AI_LIKELY.value,
+                                                             FinalDecision.AUTHENTIC_BUT_REVIEW.value,
+                                                             FinalDecision.MOSTLY_AUTHENTIC.value,
+                                                            ]
                                           },
                 "total_processing_time" : round(batch_result.total_processing_time, 2),
                }
     def _build_image_data(self, result: AnalysisResult, include_detailed: bool) -> Dict:
         """
+        Build complete image data structure (decision-first)
         """
+        image_data = {"filename"   : result.filename,
+                      "decision"   : {"value"        : result.final_decision.value if result.final_decision else None,
+                                      "confidence"   : result.confidence,
+                                      "explanation"  : result.decision_explanation,
+                                     },
+                      "overall"    : {"score"      : round(result.overall_score, 3),
+                                      "note"       : "Statistical score (non-authoritative)",
+                                     },
+                      "image_info" : {"size"            : {"width"  : result.image_size[0],
+                                                           "height" : result.image_size[1],
+                                                          },
+                                      "processing_time" : round(result.processing_time, 2),
+                                      "timestamp"       : result.timestamp.isoformat(),
+                                     },
+                      "signals"    : self._build_signals_data(result),
                      }
         if include_detailed:
+            image_data["forensics"]       = self._build_forensics_data(result)
+            image_data["recommendations"] = self._build_recommendations(result)
         return image_data
     def _build_signals_data(self, result: AnalysisResult) -> List[Dict]:
         """
+        Build Tier-1 signal data (informational)
         """
         signals = list()
         for signal in result.signals:
             metric_result = result.metric_results.get(signal.metric_type)
+            signals.append({"metric_name" : signal.name,
+                            "metric_type" : signal.metric_type.value,
+                            "score"       : round(signal.score, 3),
+                            "status"      : signal.status.value,
+                            "confidence"  : round(metric_result.confidence, 3) if (metric_result and metric_result.confidence is not None) else None,
+                            "explanation" : signal.explanation,
+                           })
         return signals
     def _build_forensics_data(self, result: AnalysisResult) -> Dict:
         """
+        Build forensic metric details
         """
         forensics = dict()
         for metric_type, metric_result in result.metric_results.items():
+            forensics[metric_type.value] = {"display_name" : self.detailed_maker.metric_display_names.get(metric_type, metric_type.value),
                                             "score"        : round(metric_result.score, 3),
+                                            "confidence"   : round(metric_result.confidence, 3) if metric_result.confidence is not None else None,
                                             "details"      : metric_result.details or {},
+                                            "key_findings" : self.detailed_maker.extract_key_findings(metric_type, metric_result),
                                            }
         return forensics
     def _build_recommendations(self, result: AnalysisResult) -> Dict:
         """
+        Build recommendations (decision-driven, not score-driven)
         """
+        decision = result.final_decision
+        if (decision == FinalDecision.CONFIRMED_AI_GENERATED):
+            return {"action"     : "Block or flag image immediately",
+                    "priority"   : "CRITICAL",
+                    "next_steps" : ["Audit source", "Apply AI-content policy"],
                    }
+        if (decision == FinalDecision.SUSPICIOUS_AI_LIKELY):
+            return {"action"     : "Manual review required",
+                    "priority"   : "HIGH",
+                    "next_steps" : ["Human inspection", "Cross-check metadata"],
                    }
+        if (decision == FinalDecision.AUTHENTIC_BUT_REVIEW):
+            return {"action"     : "Optional human review",
+                    "priority"   : "MEDIUM",
+                    "next_steps" : ["Spot-check authenticity"],
                    }
+        if (decision == FinalDecision.MOSTLY_AUTHENTIC):
+            return {"action"     : "No action required",
+                    "priority"   : "LOW",
+                    "next_steps" : ["Proceed normally"],
                    }
+        return {"action"     : "Decision unavailable",
+                "priority"   : "UNKNOWN",
+                "next_steps" : [],
+               }

reporter/pdf_reporter.py CHANGED Viewed

@@ -1,24 +1,28 @@
 # Dependencies
 from pathlib import Path
-from typing import Optional, List, Dict, Any
 from datetime import datetime
 from utils.logger import get_logger
 from config.settings import settings
-from reportlab.platypus import Table, Spacer, Paragraph, PageBreak, Image as RLImage
-from reportlab.lib import colors
-from reportlab.lib.pagesizes import A4, LETTER
-from reportlab.lib.enums import TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY
 from reportlab.platypus import TableStyle
 from config.schemas import AnalysisResult
 from utils.helpers import generate_unique_id
-from config.constants import DetectionStatus
 from config.schemas import BatchAnalysisResult
-from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
 from reportlab.platypus import SimpleDocTemplate
 from features.detailed_result_maker import DetailedResultMaker
-from reportlab.lib.units import inch
-from reportlab.pdfgen import canvas
-import textwrap
 # Setup Logging
@@ -27,817 +31,296 @@ logger = get_logger(__name__)
 class PDFReporter:
     """
-    Professional-Grade PDF Report Generator for AI Image Analysis
-    Features:
-    ---------
-    - Comprehensive single image reports with full forensic details
-    - Multi-page batch reports with executive summary
-    - Enhanced visual hierarchy and color coding
-    - Detailed metric breakdowns with explanations
-    - Professional formatting and layout
-    - Statistical summaries and insights
     """
-    # Enhanced Color Scheme
-    COLOR_PRIMARY = colors.HexColor('#0D47A1')        # Deep Blue
-    COLOR_SUCCESS = colors.HexColor('#1B5E20')        # Dark Green
-    COLOR_WARNING = colors.HexColor('#E65100')        # Deep Orange
-    COLOR_DANGER = colors.HexColor('#B71C1C')         # Dark Red
-    COLOR_INFO = colors.HexColor('#01579B')           # Light Blue
-    COLOR_NEUTRAL = colors.HexColor('#424242')        # Dark Grey
-    COLOR_HEADER_BG = colors.HexColor('#1565C0')      # Blue
-    COLOR_SUBHEADER_BG = colors.HexColor('#1976D2')   # Lighter Blue
-    COLOR_ALT_ROW = colors.HexColor('#F5F5F5')        # Light Grey
-    COLOR_LIGHT_BLUE = colors.HexColor('#E3F2FD')     # Very Light Blue
-    COLOR_LIGHT_GREEN = colors.HexColor('#E8F5E9')    # Very Light Green
-    COLOR_LIGHT_ORANGE = colors.HexColor('#FFF3E0')   # Very Light Orange
-    COLOR_LIGHT_RED = colors.HexColor('#FFEBEE')      # Very Light Red
     def __init__(self):
         self.detailed_maker = DetailedResultMaker()
-        self.styles = self._build_styles()
-        logger.debug("Enhanced PDFReporter initialized")
     def export_single(self, result: AnalysisResult, output_dir: Optional[Path] = None) -> Path:
-        """Export comprehensive single image analysis report"""
-        output_dir = output_dir or settings.REPORTS_DIR
-        output_dir.mkdir(parents=True, exist_ok=True)
-        report_id = generate_unique_id()
-        filename = f"ai_screener_report_{report_id}.pdf"
         output_path = output_dir / filename
-        logger.info(f"Generating comprehensive single image PDF: {filename}")
-        doc = SimpleDocTemplate(
-            str(output_path),
-            pagesize=LETTER,
-            rightMargin=30,
-            leftMargin=30,
-            topMargin=20,
-            bottomMargin=35
-        )
-        story = []
-        self._add_professional_header(story, "AI Image Analysis Report")
-        self._add_executive_summary_single(story, result)
         story.append(PageBreak())
-        self._add_detailed_metrics_analysis(story, result)
-        story.append(PageBreak())
-        self._add_forensic_breakdown(story, result)
-        self._add_recommendations(story, result)
-        self._add_professional_footer(story)
-        doc.build(story, onFirstPage=self._add_watermark, onLaterPages=self._add_watermark)
-        logger.info(f"Single image report generated: {output_path}")
         return output_path
     def export_batch(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
-        """Export comprehensive batch analysis report"""
-        output_dir = output_dir or settings.REPORTS_DIR
-        output_dir.mkdir(parents=True, exist_ok=True)
-        report_id = generate_unique_id()
-        filename = f"ai_screener_report_{report_id}.pdf"
         output_path = output_dir / filename
-        num_images = len(batch_result.results)
-        logger.info(f"Generating batch PDF report: {filename} ({num_images} images)")
-        doc = SimpleDocTemplate(
-            str(output_path),
-            pagesize=LETTER,
-            rightMargin=30,
-            leftMargin=30,
-            topMargin=20,
-            bottomMargin=35
-        )
-        story = []
-        self._add_professional_header(story, "Batch Image Analysis Report")
-        self._add_batch_executive_summary(story, batch_result)
-        story.append(PageBreak())
-        self._add_batch_overview_table(story, batch_result.results)
-        story.append(PageBreak())
-        self._add_batch_metrics_analysis(story, batch_result.results)
         story.append(PageBreak())
-        self._add_individual_results_summary(story, batch_result.results)
-        self._add_batch_recommendations(story, batch_result)
-        self._add_professional_footer(story)
-        doc.build(story, onFirstPage=self._add_watermark, onLaterPages=self._add_watermark)
-        logger.info(f"Batch report generated: {output_path}")
         return output_path
     def _build_styles(self):
-        """Build comprehensive style definitions"""
         styles = getSampleStyleSheet()
         styles.add(ParagraphStyle(
-            name='ReportTitle',
             fontSize=18,
             textColor=self.COLOR_PRIMARY,
-            alignment=TA_CENTER,
-            spaceAfter=4,
-            spaceBefore=2,
-            fontName='Helvetica-Bold'
-        ))
-        styles.add(ParagraphStyle(
-            name='ReportSubtitle',
-            fontSize=10,
-            textColor=self.COLOR_NEUTRAL,
-            alignment=TA_CENTER,
-            spaceAfter=6,
-            fontName='Helvetica'
-        ))
-        styles.add(ParagraphStyle(
-            name='SectionTitle',
-            fontSize=13,
-            textColor=self.COLOR_PRIMARY,
-            spaceBefore=10,
-            spaceAfter=6,
             fontName='Helvetica-Bold'
         ))
-        styles.add(ParagraphStyle(
-            name='SectionHeader',
-            fontSize=11,
-            textColor=self.COLOR_PRIMARY,
-            spaceBefore=8,
-            spaceAfter=5,
-            fontName='Helvetica-Bold'
-        ))
-        styles.add(ParagraphStyle(
-            name='SubHeader',
-            fontSize=9.5,
-            textColor=self.COLOR_PRIMARY,
-            spaceBefore=5,
-            spaceAfter=3,
-            fontName='Helvetica-Bold'
-        ))
         styles.add(ParagraphStyle(
-            name='CustomBodyText',
-            fontSize=9,
-            leading=12,
-            alignment=TA_JUSTIFY,
             spaceAfter=6
         ))
         styles.add(ParagraphStyle(
-            name='TableCell',
             fontSize=8,
-            leading=10
-        ))
-        styles.add(ParagraphStyle(
-            name='TableCellSmall',
-            fontSize=7.5,
-            leading=9
         ))
         styles.add(ParagraphStyle(
-            name='TableHeader',
-            fontSize=8.5,
-            textColor=colors.white,
             fontName='Helvetica-Bold',
-            leading=10,
-            alignment=TA_CENTER
         ))
         styles.add(ParagraphStyle(
-            name='Footer',
-            fontSize=7.5,
-            textColor=colors.grey,
-            alignment=TA_CENTER,
-            spaceAfter=2
         ))
         styles.add(ParagraphStyle(
-            name='Timestamp',
-            fontSize=8,
-            textColor=self.COLOR_NEUTRAL,
-            alignment=TA_CENTER,
-            spaceAfter=8
         ))
-        return styles
-    def _add_watermark(self, canvas, doc):
-        """Add professional watermark"""
-        canvas.saveState()
-        canvas.setFont('Helvetica-Bold', 70)
-        canvas.setFillColorRGB(0.85, 0.85, 0.85, alpha=0.15)
-        canvas.rotate(45)
-        canvas.drawString(2.5*inch, -0.5*inch, "AI IMAGE SCREENER")
-        canvas.restoreState()
-    def _add_professional_header(self, story, title: str):
-        """Professional header with branding"""
-        story.append(Paragraph("🔍 AI IMAGE SCREENER", self.styles['ReportTitle']))
-        story.append(Spacer(1, 3))
-        timestamp_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | Version: {settings.VERSION}"
-        story.append(Paragraph(timestamp_text, self.styles['Timestamp']))
-        story.append(Paragraph(title, self.styles['SectionTitle']))
-        story.append(Spacer(1, 10))
-    def _add_executive_summary_single(self, story, result: AnalysisResult):
-        """Executive summary for single image"""
-        story.append(Paragraph("Executive Summary", self.styles['SectionTitle']))
-        story.append(Spacer(1, 5))
-        # Key findings box
-        status_color = self.COLOR_DANGER if result.status == DetectionStatus.REVIEW_REQUIRED else self.COLOR_SUCCESS
-        status_bg = self.COLOR_LIGHT_RED if result.status == DetectionStatus.REVIEW_REQUIRED else self.COLOR_LIGHT_GREEN
-        status_text = "⚠️ REVIEW REQUIRED" if result.status == DetectionStatus.REVIEW_REQUIRED else "✅ LIKELY AUTHENTIC"
-        key_findings = [
-            [Paragraph("<b>Overall Assessment</b>", self.styles['TableHeader'])],
-            [Paragraph(f"<font size=12 color='{status_color.hexval()}'><b>{status_text}</b></font>", self.styles['CustomBodyText'])],
-            [Paragraph(f"<b>Confidence:</b> {result.confidence}%", self.styles['CustomBodyText'])],
-            [Paragraph(f"<b>Overall Score:</b> {result.overall_score:.4f}", self.styles['CustomBodyText'])]
-        ]
-        findings_table = Table(key_findings, colWidths=[530])
-        findings_table.setStyle(TableStyle([
-            ('BACKGROUND', (0, 0), (-1, 0), self.COLOR_INFO),
-            ('BACKGROUND', (0, 1), (-1, -1), status_bg),
-            ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
-            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
-            ('LEFTPADDING', (0, 0), (-1, -1), 12),
-            ('RIGHTPADDING', (0, 0), (-1, -1), 12),
-            ('TOPPADDING', (0, 0), (-1, -1), 8),
-            ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
-            ('BOX', (0, 0), (-1, -1), 1.5, self.COLOR_PRIMARY)
-        ]))
-        story.append(findings_table)
-        story.append(Spacer(1, 12))
-        # Image information
-        story.append(Paragraph("Image Information", self.styles['SectionHeader']))
-        info_data = [
-            [Paragraph("<b>Property</b>", self.styles['TableHeader']),
-             Paragraph("<b>Value</b>", self.styles['TableHeader'])],
-            [Paragraph("Filename", self.styles['TableCell']),
-             Paragraph(result.filename, self.styles['TableCell'])],
-            [Paragraph("Dimensions", self.styles['TableCell']),
-             Paragraph(f"{result.image_size[0]} × {result.image_size[1]} pixels", self.styles['TableCell'])],
-            [Paragraph("Aspect Ratio", self.styles['TableCell']),
-             Paragraph(f"{result.image_size[0]/result.image_size[1]:.2f}:1", self.styles['TableCell'])],
-            [Paragraph("Processing Time", self.styles['TableCell']),
-             Paragraph(f"{result.processing_time:.3f} seconds", self.styles['TableCell'])],
-            [Paragraph("Analysis Date", self.styles['TableCell']),
-             Paragraph(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), self.styles['TableCell'])]
-        ]
-        info_table = Table(info_data, colWidths=[180, 350])
-        info_table.setStyle(self._get_standard_table_style(len(info_data)))
-        story.append(info_table)
-        story.append(Spacer(1, 12))
-        # Detection signals summary
-        story.append(Paragraph("Detection Signals Summary", self.styles['SectionHeader']))
-        flagged = sum(1 for s in result.signals if s.status.value == 'flagged')
-        warning = sum(1 for s in result.signals if s.status.value == 'warning')
-        passed = sum(1 for s in result.signals if s.status.value == 'passed')
-        signals_data = [
-            [Paragraph("<b>Status</b>", self.styles['TableHeader']),
-             Paragraph("<b>Count</b>", self.styles['TableHeader']),
-             Paragraph("<b>Percentage</b>", self.styles['TableHeader'])],
-            [Paragraph("🔴 Flagged", self.styles['TableCell']),
-             Paragraph(f"<font color='red'><b>{flagged}</b></font>", self.styles['TableCell']),
-             Paragraph(f"{flagged/len(result.signals)*100:.1f}%", self.styles['TableCell'])],
-            [Paragraph("🟡 Warning", self.styles['TableCell']),
-             Paragraph(f"<font color='orange'><b>{warning}</b></font>", self.styles['TableCell']),
-             Paragraph(f"{warning/len(result.signals)*100:.1f}%", self.styles['TableCell'])],
-            [Paragraph("🟢 Passed", self.styles['TableCell']),
-             Paragraph(f"<font color='green'><b>{passed}</b></font>", self.styles['TableCell']),
-             Paragraph(f"{passed/len(result.signals)*100:.1f}%", self.styles['TableCell'])]
-        ]
-        signals_table = Table(signals_data, colWidths=[200, 165, 165])
-        signals_table.setStyle(self._get_standard_table_style(len(signals_data)))
-        story.append(signals_table)
-    def _add_detailed_metrics_analysis(self, story, result: AnalysisResult):
-        """Comprehensive metrics analysis"""
-        story.append(Paragraph("Detailed Metrics Analysis", self.styles['SectionTitle']))
-        story.append(Spacer(1, 8))
-        # All detection signals with full details
-        story.append(Paragraph("Detection Signals Breakdown", self.styles['SectionHeader']))
-        signal_data = [
-            [Paragraph("<b>Metric</b>", self.styles['TableHeader']),
-             Paragraph("<b>Score</b>", self.styles['TableHeader']),
-             Paragraph("<b>Status</b>", self.styles['TableHeader']),
-             Paragraph("<b>Explanation</b>", self.styles['TableHeader'])]
-        ]
-        for signal in result.signals:
-            status_badge = self._get_status_badge_html(signal.status.value)
-            # Wrap long explanations
-            explanation = signal.explanation
-            if len(explanation) > 120:
-                explanation = explanation[:120] + "..."
-            signal_data.append([
-                Paragraph(f"<b>{signal.name}</b>", self.styles['TableCell']),
-                Paragraph(f"{signal.score:.4f}", self.styles['TableCell']),
-                Paragraph(status_badge, self.styles['TableCell']),
-                Paragraph(explanation, self.styles['TableCellSmall'])
-            ])
-        signal_table = Table(signal_data, colWidths=[120, 60, 80, 270])
-        signal_table.setStyle(self._get_signal_table_style(len(signal_data)))
-        story.append(signal_table)
-    def _add_forensic_breakdown(self, story, result: AnalysisResult):
-        """Detailed forensic analysis breakdown"""
-        story.append(Paragraph("Forensic Analysis Breakdown", self.styles['SectionTitle']))
-        story.append(Spacer(1, 8))
-        for metric_type, metric_result in result.metric_results.items():
-            metric_name = self.detailed_maker.metric_display_names.get(metric_type, metric_type.value)
-            details = metric_result.details or {}
-            # Skip if error
-            if 'error' in details:
-                continue
-            story.append(Paragraph(metric_name, self.styles['SectionHeader']))
-            # Metric overview
-            overview_data = [
-                [Paragraph("<b>Property</b>", self.styles['TableHeader']),
-                 Paragraph("<b>Value</b>", self.styles['TableHeader'])],
-                [Paragraph("Score", self.styles['TableCell']),
-                 Paragraph(f"<b>{metric_result.score:.4f}</b>", self.styles['TableCell'])],
-                [Paragraph("Confidence", self.styles['TableCell']),
-                 Paragraph(f"{metric_result.confidence:.4f}" if metric_result.confidence else "N/A", self.styles['TableCell'])],
-                [Paragraph("Status", self.styles['TableCell']),
-                 Paragraph(self._get_metric_status_html(metric_result.score), self.styles['TableCell'])]
-            ]
-            overview_table = Table(overview_data, colWidths=[130, 400])
-            overview_table.setStyle(self._get_standard_table_style(len(overview_data)))
-            story.append(overview_table)
-            story.append(Spacer(1, 5))
-            # Detailed parameters
-            if details and len(details) > 0:
-                story.append(Paragraph("Detailed Parameters:", self.styles['SubHeader']))
-                param_data = [[Paragraph("<b>Parameter</b>", self.styles['TableHeader']),
-                              Paragraph("<b>Value</b>", self.styles['TableHeader'])]]
-                for key, value in details.items():
-                    if key in ['error', 'reason']:
-                        continue
-                    if isinstance(value, dict):
-                        for sub_key, sub_value in value.items():
-                            if sub_key not in ['reason', 'error']:
-                                formatted_value = self._format_value(sub_value)
-                                param_data.append([
-                                    Paragraph(f"  └─ {sub_key}", self.styles['TableCellSmall']),
-                                    Paragraph(formatted_value, self.styles['TableCellSmall'])
-                                ])
-                    else:
-                        formatted_value = self._format_value(value)
-                        param_data.append([
-                            Paragraph(key, self.styles['TableCell']),
-                            Paragraph(formatted_value, self.styles['TableCell'])
-                        ])
-                param_table = Table(param_data, colWidths=[200, 330])
-                param_table.setStyle(TableStyle([
-                    ('BACKGROUND', (0, 0), (-1, 0), self.COLOR_SUBHEADER_BG),
-                    ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
-                    ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
-                    ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
-                    ('VALIGN', (0, 0), (-1, -1), 'TOP'),
-                    ('LEFTPADDING', (0, 0), (-1, -1), 8),
-                    ('RIGHTPADDING', (0, 0), (-1, -1), 8),
-                    ('TOPPADDING', (0, 0), (-1, -1), 4),
-                    ('BOTTOMPADDING', (0, 0), (-1, -1), 4)
-                ]))
-                story.append(param_table)
-            story.append(Spacer(1, 8))
-    def _add_recommendations(self, story, result: AnalysisResult):
-        """Add actionable recommendations"""
-        story.append(Paragraph("Recommendations & Next Steps", self.styles['SectionTitle']))
-        story.append(Spacer(1, 8))
-        if result.status == DetectionStatus.REVIEW_REQUIRED:
-            rec_text = """
-            <b>⚠️ MANUAL REVIEW REQUIRED</b><br/>
-            This image has been flagged for manual review based on multiple detection signals.
-            Recommended actions:<br/>
-            • Conduct visual inspection by trained personnel<br/>
-            • Cross-reference with source verification<br/>
-            • Consider additional forensic analysis if high stakes<br/>
-            • Document findings for audit trail
-            """
-            rec_color = self.COLOR_LIGHT_RED
-            border_color = self.COLOR_DANGER
-        else:
-            rec_text = """
-            <b>✅ NO IMMEDIATE ACTION REQUIRED</b><br/>
-            This image appears to be authentic based on current analysis. However:<br/>
-            • Continue monitoring for evolving AI techniques<br/>
-            • Consider periodic re-screening for critical assets<br/>
-            • Maintain chain of custody documentation<br/>
-            • Stay updated on latest detection methodologies
-            """
-            rec_color = self.COLOR_LIGHT_GREEN
-            border_color = self.COLOR_SUCCESS
-        rec_table = Table([[Paragraph(rec_text, self.styles['CustomBodyText'])]], colWidths=[530])
-        rec_table.setStyle(TableStyle([
-            ('BACKGROUND', (0, 0), (-1, -1), rec_color),
-            ('BOX', (0, 0), (-1, -1), 2, border_color),
-            ('LEFTPADDING', (0, 0), (-1, -1), 15),
-            ('RIGHTPADDING', (0, 0), (-1, -1), 15),
-            ('TOPPADDING', (0, 0), (-1, -1), 12),
-            ('BOTTOMPADDING', (0, 0), (-1, -1), 12)
-        ]))
-        story.append(rec_table)
-    def _add_batch_executive_summary(self, story, batch_result: BatchAnalysisResult):
-        """Executive summary for batch analysis"""
-        story.append(Paragraph("Executive Summary", self.styles['SectionTitle']))
-        story.append(Spacer(1, 8))
-        # Key metrics
-        summary_data = [
-            [Paragraph("<b>Metric</b>", self.styles['TableHeader']),
-             Paragraph("<b>Value</b>", self.styles['TableHeader']),
-             Paragraph("<b>Details</b>", self.styles['TableHeader'])],
-            [Paragraph("Total Images", self.styles['TableCell']),
-             Paragraph(f"<b>{batch_result.total_images}</b>", self.styles['TableCell']),
-             Paragraph("Images submitted for analysis", self.styles['TableCellSmall'])],
-            [Paragraph("Successfully Processed", self.styles['TableCell']),
-             Paragraph(f"<font color='green'><b>{batch_result.processed}</b></font>", self.styles['TableCell']),
-             Paragraph(f"{batch_result.summary.get('success_rate', 0)}% success rate", self.styles['TableCellSmall'])],
-            [Paragraph("Failed", self.styles['TableCell']),
-             Paragraph(f"<font color='red'><b>{batch_result.failed}</b></font>", self.styles['TableCell']),
-             Paragraph("Processing errors encountered", self.styles['TableCellSmall'])],
-            [Paragraph("Likely Authentic", self.styles['TableCell']),
-             Paragraph(f"<font color='green'><b>{batch_result.summary.get('likely_authentic', 0)}</b></font>", self.styles['TableCell']),
-             Paragraph("Images passing authenticity checks", self.styles['TableCellSmall'])],
-            [Paragraph("Review Required", self.styles['TableCell']),
-             Paragraph(f"<font color='red'><b>{batch_result.summary.get('review_required', 0)}</b></font>", self.styles['TableCell']),
-             Paragraph("Images flagged for manual review", self.styles['TableCellSmall'])],
-            [Paragraph("Average Score", self.styles['TableCell']),
-             Paragraph(f"<b>{batch_result.summary.get('avg_score', 0):.4f}</b>", self.styles['TableCell']),
-             Paragraph("Mean authenticity score across batch", self.styles['TableCellSmall'])],
-            [Paragraph("Average Processing Time", self.styles['TableCell']),
-             Paragraph(f"<b>{batch_result.summary.get('avg_proc_time', 0):.3f}s</b>", self.styles['TableCell']),
-             Paragraph("Per-image processing duration", self.styles['TableCellSmall'])],
-        ]
-        summary_table = Table(summary_data, colWidths=[150, 130, 250])
-        summary_table.setStyle(self._get_standard_table_style(len(summary_data)))
-        story.append(summary_table)
-    def _add_batch_overview_table(self, story, results: List[AnalysisResult]):
-        """Comprehensive batch overview"""
-        story.append(Paragraph("Batch Overview Matrix", self.styles['SectionTitle']))
-        story.append(Spacer(1, 8))
-        header = [
-            Paragraph("<b>#</b>", self.styles['TableHeader']),
-            Paragraph("<b>Filename</b>", self.styles['TableHeader']),
-            Paragraph("<b>Image Size</b>", self.styles['TableHeader']),
-            Paragraph("<b>Score</b>", self.styles['TableHeader']),
-            Paragraph("<b>Status</b>", self.styles['TableHeader']),
-            Paragraph("<b>Top Signal</b>", self.styles['TableHeader']),
-            Paragraph("<b>Time(s)</b>", self.styles['TableHeader'])
-        ]
-        data = [header]
-        for idx, result in enumerate(results, 1):
-            top_signal = max(result.signals, key=lambda s: s.score)
-            status_badge = self._get_status_badge_short(result.status.value)
-            data.append([
-                Paragraph(str(idx), self.styles['TableCell']),
-                Paragraph(result.filename, self.styles['TableCellSmall']),
-                Paragraph(f"{result.image_size[0]}×{result.image_size[1]}", self.styles['TableCellSmall']),
-                Paragraph(f"<b>{result.overall_score:.3f}</b>", self.styles['TableCell']),
-                Paragraph(status_badge, self.styles['TableCellSmall']),
-                Paragraph(f"{top_signal.name}: {top_signal.score:.2f}", self.styles['TableCellSmall']),
-                Paragraph(f"{result.processing_time:.2f}", self.styles['TableCell'])
-            ])
-        table = Table(data, colWidths=[25, 155, 65, 50, 70, 120, 45])
-        table.setStyle(self._get_pivot_table_style(len(data)))
-        story.append(table)
-    def _add_batch_metrics_analysis(self, story, results: List[AnalysisResult]):
-        """Detailed metrics analysis for batch"""
-        story.append(Paragraph("Metric-wise Analysis", self.styles['SectionTitle']))
-        story.append(Spacer(1, 8))
-        metric_configs = {
-            'gradient': {
-                'name': 'Gradient-Field PCA Analysis',
-                'keys': ['eigenvalue_ratio', 'gradient_vectors_sampled'],
-                'labels': ['Eigenvalue\nRatio', 'Vectors\nSampled']
-            },
-            'frequency': {
-                'name': 'Frequency Domain Analysis (FFT)',
-                'keys': ['hf_ratio', 'roughness', 'spectral_deviation'],
-                'labels': ['HF Ratio', 'Roughness', 'Spec.\nDeviation']
-            },
-            'noise': {
-                'name': 'Noise Pattern Analysis',
-                'keys': ['mean_noise', 'cv', 'patches_valid'],
-                'labels': ['Mean Noise', 'CV', 'Patches\nValid']
-            },
-            'texture': {
-                'name': 'Texture Statistical Analysis',
-                'keys': ['smooth_ratio', 'contrast_mean', 'entropy_mean'],
-                'labels': ['Smooth\nRatio', 'Mean\nContrast', 'Mean\nEntropy']
-            },
-            'color': {
-                'name': 'Color Distribution Analysis',
-                'keys': ['saturation_stats.mean_saturation', 'saturation_stats.high_sat_ratio'],
-                'labels': ['Mean\nSaturation', 'High Saturation\nRatio']
-            }
-        }
-        for metric_key, config in metric_configs.items():
-            story.append(Paragraph(config['name'], self.styles['SectionHeader']))
-            # Build header
-            header = [
-                Paragraph("<b>#</b>", self.styles['TableHeader']),
-                Paragraph("<b>Filename</b>", self.styles['TableHeader']),
-                Paragraph("<b>Score</b>", self.styles['TableHeader']),
-                Paragraph("<b>Confidence</b>", self.styles['TableHeader'])
-            ]
-            for label in config['labels']:
-                header.append(Paragraph(f"<b>{label}</b>", self.styles['TableHeader']))
-            data = [header]
-            for idx, result in enumerate(results, 1):
-                metric_result = result.metric_results.get(metric_key)
-                if not metric_result:
-                    continue
-                details = metric_result.details or {}
-                row = [
-                    Paragraph(str(idx), self.styles['TableCellSmall']),
-                    Paragraph(result.filename, self.styles['TableCellSmall']),
-                    Paragraph(f"<b>{metric_result.score:.3f}</b>", self.styles['TableCellSmall']),
-                    Paragraph(f"{metric_result.confidence:.2f}" if metric_result.confidence else "N/A",
-                             self.styles['TableCellSmall'])
-                ]
-                # Extract values
-                for key in config['keys']:
-                    value = self._extract_nested_value(details, key)
-                    formatted_value = self._format_value(value, decimal_places=3)
-                    row.append(Paragraph(formatted_value, self.styles['TableCellSmall']))
-                data.append(row)
-            # Dynamic column widths
-            num_detail_cols = len(config['labels'])
-            detail_col_width = (530 - 25 - 140 - 45 - 35) // num_detail_cols
-            col_widths = [25, 140, 45, 35] + [detail_col_width] * num_detail_cols
-            table = Table(data, colWidths=col_widths)
-            table.setStyle(self._get_pivot_table_style(len(data)))
-            story.append(table)
-            story.append(Spacer(1, 10))
-    def _add_individual_results_summary(self, story, results: List[AnalysisResult]):
-        """Individual image summaries in batch"""
-        story.append(Paragraph("Individual Image Summaries", self.styles['SectionTitle']))
-        story.append(Spacer(1, 8))
-        for idx, result in enumerate(results, 1):
-            if idx > 1:
-                story.append(Spacer(1, 12))
-            story.append(Paragraph(f"Image {idx}: {result.filename}", self.styles['SectionHeader']))
-            # Quick stats
-            quick_data = [
-                [Paragraph("<b>Property</b>", self.styles['TableHeader']),
-                 Paragraph("<b>Value</b>", self.styles['TableHeader'])],
-                [Paragraph("Score", self.styles['TableCell']),
-                 Paragraph(f"<b>{result.overall_score:.4f}</b>", self.styles['TableCell'])],
-                [Paragraph("Status", self.styles['TableCell']),
-                 Paragraph(self._get_status_badge_html(result.status.value), self.styles['TableCell'])],
-                [Paragraph("Confidence", self.styles['TableCell']),
-                 Paragraph(f"{result.confidence}%", self.styles['TableCell'])],
-                [Paragraph("Dimensions", self.styles['TableCell']),
-                 Paragraph(f"{result.image_size[0]} × {result.image_size[1]}", self.styles['TableCell'])],
-            ]
-            quick_table = Table(quick_data, colWidths=[120, 410])
-            quick_table.setStyle(self._get_standard_table_style(len(quick_data)))
-            story.append(quick_table)
-            story.append(Spacer(1, 5))
-            # Top 3 signals
-            story.append(Paragraph("Top Detection Signals:", self.styles['SubHeader']))
-            top_signals = sorted(result.signals, key=lambda s: s.score, reverse=True)[:3]
-            signal_data = [[
-                Paragraph("<b>Signal</b>", self.styles['TableHeader']),
-                Paragraph("<b>Score</b>", self.styles['TableHeader']),
-                Paragraph("<b>Status</b>", self.styles['TableHeader'])
-            ]]
-            for signal in top_signals:
-                signal_data.append([
-                    Paragraph(signal.name, self.styles['TableCellSmall']),
-                    Paragraph(f"{signal.score:.3f}", self.styles['TableCellSmall']),
-                    Paragraph(self._get_status_badge_html(signal.status.value), self.styles['TableCellSmall'])
-                ])
-            signal_table = Table(signal_data, colWidths=[200, 165, 165])
-            signal_table.setStyle(self._get_standard_table_style(len(signal_data)))
-            story.append(signal_table)
-    def _add_batch_recommendations(self, story, batch_result: BatchAnalysisResult):
-        """Batch-level recommendations"""
-        story.append(Paragraph("Batch Analysis Recommendations", self.styles['SectionTitle']))
-        story.append(Spacer(1, 8))
-        review_count = batch_result.summary.get('review_required', 0)
-        total = batch_result.total_images
-        if review_count > 0:
-            rec_text = f"""
-            <b>⚠️ ACTION REQUIRED</b><br/>
-            {review_count} out of {total} images require manual review ({review_count/total*100:.1f}%).<br/>
-            <br/>
-            <b>Recommended Actions:</b><br/>
-            • Prioritize high-risk images for immediate review<br/>
-            • Assign qualified personnel for verification<br/>
-            • Document review findings and decisions<br/>
-            • Consider additional forensic analysis for flagged images<br/>
-            • Update screening protocols based on findings
-            """
-            rec_color = self.COLOR_LIGHT_ORANGE
-            border_color = self.COLOR_WARNING
-        else:
-            rec_text = f"""
-            <b>✅ BATCH PASSED SCREENING</b><br/>
-            All {total} images appear to be authentic based on current analysis.<br/>
-            <br/>
-            <b>Recommended Actions:</b><br/>
-            • Archive results for audit trail<br/>
-            • Maintain periodic re-screening schedule<br/>
-            • Monitor for evolving AI generation techniques<br/>
-            • Update detection models regularly<br/>
-            • Document chain of custody
-            """
-            rec_color = self.COLOR_LIGHT_GREEN
-            border_color = self.COLOR_SUCCESS
-        rec_table = Table([[Paragraph(rec_text, self.styles['CustomBodyText'])]], colWidths=[530])
-        rec_table.setStyle(TableStyle([
-            ('BACKGROUND', (0, 0), (-1, -1), rec_color),
-            ('BOX', (0, 0), (-1, -1), 2, border_color),
-            ('LEFTPADDING', (0, 0), (-1, -1), 15),
-            ('RIGHTPADDING', (0, 0), (-1, -1), 15),
-            ('TOPPADDING', (0, 0), (-1, -1), 12),
-            ('BOTTOMPADDING', (0, 0), (-1, -1), 12)
-        ]))
-        story.append(rec_table)
-    def _add_professional_footer(self, story):
-        """Professional footer with disclaimers"""
-        story.append(Spacer(1, 15))
-        disclaimer_lines = [
-            "⚠️ <b>DISCLAIMER</b>: This report provides probabilistic screening results based on current AI detection methodologies, not definitive verdicts.",
-            "Results should be manually verified for critical applications. False positive rate: ~10-20%. Accuracy may vary with image quality and AI generation techniques.",
-            "This analysis should be used as one component of a comprehensive verification process, not as the sole basis for decision-making.",
-            "© 2025 AI Image Screener | Confidential Report | For Authorized Use Only"
-        ]
-        for line in disclaimer_lines:
-            story.append(Paragraph(line, self.styles['Footer']))
-            story.append(Spacer(1, 2))
-    # Helper methods
-    def _get_standard_table_style(self, num_rows):
-        """Standard table styling"""
-        return TableStyle([
-            ('BACKGROUND', (0, 0), (-1, 0), self.COLOR_HEADER_BG),
-            ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
-            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
-            ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
-            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
-            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
-            ('LEFTPADDING', (0, 0), (-1, -1), 8),
-            ('RIGHTPADDING', (0, 0), (-1, -1), 8),
-            ('TOPPADDING', (0, 0), (-1, -1), 5),
-            ('BOTTOMPADDING', (0, 0), (-1, -1), 5)
-        ])
-    def _get_signal_table_style(self, num_rows):
-        """Signal table styling with color coding"""
         return TableStyle([
-            ('BACKGROUND', (0, 0), (-1, 0), self.COLOR_HEADER_BG),
             ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
-            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
             ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
             ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
             ('VALIGN', (0, 0), (-1, -1), 'TOP'),
             ('LEFTPADDING', (0, 0), (-1, -1), 6),
             ('RIGHTPADDING', (0, 0), (-1, -1), 6),
-            ('TOPPADDING', (0, 0), (-1, -1), 5),
-            ('BOTTOMPADDING', (0, 0), (-1, -1), 5)
         ])
-    def _get_pivot_table_style(self, num_rows):
-        """Pivot table styling"""
-        return TableStyle([
-            ('BACKGROUND', (0, 0), (-1, 0), self.COLOR_HEADER_BG),
-            ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
-            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
-            ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
-            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
-            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
-            ('ALIGN', (0, 0), (0, -1), 'CENTER'),
-            ('LEFTPADDING', (0, 0), (-1, -1), 4),
-            ('RIGHTPADDING', (0, 0), (-1, -1), 4),
-            ('TOPPADDING', (0, 0), (-1, -1), 4),
-            ('BOTTOMPADDING', (0, 0), (-1, -1), 4)
-        ])
-    def _get_status_badge_html(self, status: str) -> str:
-        """Generate status badge HTML"""
-        if status == "REVIEW_REQUIRED" or status == "flagged":
-            return "<font color='#B71C1C'><b>🔴 FLAGGED</b></font>"
-        elif status == "warning":
-            return "<font color='#E65100'><b>🟡 WARNING</b></font>"
-        else:
-            return "<font color='#1B5E20'><b>🟢 PASSED</b></font>"
-    def _get_status_badge_short(self, status: str) -> str:
-        """Short status badge"""
-        if status == "REVIEW_REQUIRED":
-            return "<font color='#B71C1C'><b>⚠️ REVIEW REQUIRED</b></font>"
-        else:
-            return "<font color='#1B5E20'><b>✓ LIKELY AUTHENTIC</b></font>"
-    def _get_metric_status_html(self, score: float) -> str:
-        """Metric status based on score"""
-        if score > 0.7:
-            return "<font color='#B71C1C'><b>High Risk</b></font>"
-        elif score > 0.5:
-            return "<font color='#E65100'><b>Moderate Risk</b></font>"
-        else:
-            return "<font color='#1B5E20'><b>Low Risk</b></font>"
-    def _format_value(self, value: Any, decimal_places: int = 4) -> str:
-        """Format value for display"""
-        if value is None or (isinstance(value, dict) and 'reason' in value):
-            return "N/A"
-        elif isinstance(value, float):
-            return f"{value:.{decimal_places}f}"
-        elif isinstance(value, (int, str, bool)):
-            return str(value)
-        else:
-            return "N/A"
-    def _extract_nested_value(self, details: dict, key: str) -> Any:
-        """Extract nested dictionary values"""
-        if '.' in key:
-            parts = key.split('.')
-            value = details
-            for part in parts:
-                if isinstance(value, dict):
-                    value = value.get(part, None)
-                else:
-                    return None
-            return value
-        else:
-            return details.get(key, None)

 # Dependencies
+from typing import Any
+from typing import List
 from pathlib import Path
+from typing import Optional
 from datetime import datetime
+from reportlab.lib import colors
 from utils.logger import get_logger
 from config.settings import settings
+from reportlab.platypus import Table
+from reportlab.lib.units import inch
+from reportlab.platypus import Spacer
+from reportlab.platypus import Paragraph
+from reportlab.platypus import PageBreak
 from reportlab.platypus import TableStyle
 from config.schemas import AnalysisResult
+from config.constants import FinalDecision
+from reportlab.lib.pagesizes import LETTER
 from utils.helpers import generate_unique_id
+from config.constants import EvidenceStrength
 from config.schemas import BatchAnalysisResult
+from reportlab.lib.styles import ParagraphStyle
 from reportlab.platypus import SimpleDocTemplate
+from reportlab.lib.styles import getSampleStyleSheet
 from features.detailed_result_maker import DetailedResultMaker
 # Setup Logging
 class PDFReporter:
     """
+    PDF Report Generator
+    Guarantees:
+    -----------
+    - FinalDecision is authoritative
+    - Evidence-first explanations
+    - Metrics are informational only
+    - Audit-safe and regulator-ready
     """
+    COLOR_PRIMARY = colors.HexColor('#0D47A1')
+    COLOR_SUCCESS = colors.HexColor('#1B5E20')
+    COLOR_WARNING = colors.HexColor('#E65100')
+    COLOR_DANGER  = colors.HexColor('#B71C1C')
+    COLOR_HEADER  = colors.HexColor('#1565C0')
+    COLOR_ALT_ROW = colors.HexColor('#F5F5F5')
     def __init__(self):
         self.detailed_maker = DetailedResultMaker()
+        self.styles         = self._build_styles()
+        logger.debug("PDFReporter initialized")
     def export_single(self, result: AnalysisResult, output_dir: Optional[Path] = None) -> Path:
+        """
+        Export single image PDF report
+        """
+        output_dir  = output_dir or settings.REPORTS_DIR
+        output_dir.mkdir(parents = True, exist_ok = True)
+        filename    = f"ai_screener_report_{generate_unique_id()}.pdf"
         output_path = output_dir / filename
+        logger.info(f"Generating single image PDF: {filename}")
+        doc         = SimpleDocTemplate(str(output_path),
+                                        pagesize     = LETTER,
+                                        rightMargin  = 30,
+                                        leftMargin   = 30,
+                                        topMargin    = 20,
+                                        bottomMargin = 35,
+                                       )
+        story       = list()
+        self._add_header(story, "AI Image Analysis Report")
+        self._add_single_executive_summary(story, result)
+        story.append(PageBreak())
+        self._add_evidence_section(story, result)
         story.append(PageBreak())
+        self._add_metrics_section(story, result)
+        self._add_footer(story)
+        doc.build(story)
         return output_path
     def export_batch(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
+        """
+        Export batch PDF report
+        """
+        output_dir  = output_dir or settings.REPORTS_DIR
+        output_dir.mkdir(parents = True, exist_ok = True)
+        filename    = f"ai_screener_batch_{generate_unique_id()}.pdf"
         output_path = output_dir / filename
+        logger.info(f"Generating batch PDF: {filename}")
+        doc         = SimpleDocTemplate(str(output_path),
+                                        pagesize     = LETTER,
+                                        rightMargin  = 30,
+                                        leftMargin   = 30,
+                                        topMargin    = 20,
+                                        bottomMargin = 35,
+                                       )
+        story       = list()
+        self._add_header(story, "Batch Image Analysis Report")
+        self._add_batch_summary(story, batch_result)
         story.append(PageBreak())
+        for idx, result in enumerate(batch_result.results, 1):
+            self._add_single_executive_summary(story, result, index=idx)
+            self._add_evidence_section(story, result)
+            self._add_metrics_section(story, result)
+            if (idx < len(batch_result.results)):
+                story.append(PageBreak())
+        self._add_footer(story)
+        doc.build(story)
         return output_path
+    def _add_header(self, story, title: str):
+        story.append(Paragraph("AI IMAGE SCREENER", self.styles['Title']))
+        story.append(Paragraph(title, self.styles['Subtitle']))
+        story.append(Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | Version: {settings.VERSION}", self.styles['Meta']))
+        story.append(Spacer(1, 12))
+    def _add_single_executive_summary(self, story, result: AnalysisResult, index: Optional[int] = None):
+        title = "Executive Summary"
+        if index:
+            title += f" — Image {index}"
+        story.append(Paragraph(title, self.styles['Section']))
+        decision = result.final_decision.value if result.final_decision else "UNDECIDED"
+        color    = self._decision_color(result.final_decision)
+        table    = Table([["Final Decision", decision],
+                          ["Confidence", f"{result.confidence}%"],
+                          ["Explanation", result.decision_explanation or "—"],
+                         ],
+                         colWidths = [140, 390]
+                        )
+        table.setStyle(TableStyle([('BACKGROUND', (0, 0), (-1, 0), self.COLOR_HEADER),
+                                   ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
+                                   ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
+                                   ('BACKGROUND', (0, 1), (-1, -1), color),
+                                   ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+                                   ('LEFTPADDING', (0, 0), (-1, -1), 8),
+                                   ('RIGHTPADDING', (0, 0), (-1, -1), 8),
+                                 ])
+                      )
+        story.append(table)
+        story.append(Spacer(1, 10))
+    def _add_evidence_section(self, story, result: AnalysisResult):
+        story.append(Paragraph("Evidence Assessment", self.styles['Section']))
+        if not result.evidence:
+            story.append(Paragraph("No declarative evidence detected. Decision derived from Tier-1 metrics.", self.styles['Body']))
+            return
+        rows = [["Source", "Direction", "Strength", "Confidence", "Finding"]]
+        for e in result.evidence:
+            rows.append([e.source.value,
+                         e.direction.value,
+                         e.strength.value,
+                         f"{e.confidence:.2f}" if e.confidence else "N/A",
+                         e.finding
+                       ])
+        table = Table(rows, colWidths = [70, 80, 80, 70, 230])
+        table.setStyle(self._standard_table_style())
+        story.append(table)
+        story.append(Spacer(1, 10))
+    def _add_metrics_section(self, story, result: AnalysisResult):
+        story.append(Paragraph("Metric Signals (Informational)", self.styles['Section']))
+        rows = [["Metric", "Score", "Confidence", "Notes"]]
+        for mt, mr in result.metric_results.items():
+            rows.append([
+                self.detailed_maker.metric_display_names.get(mt, mt.value),
+                f"{mr.score:.3f}",
+                f"{mr.confidence:.3f}" if mr.confidence else "N/A",
+                ", ".join(self.detailed_maker.extract_key_findings(mt, mr))
+            ])
+        table = Table(rows, colWidths=[180, 70, 80, 210])
+        table.setStyle(self._standard_table_style())
+        story.append(table)
+        story.append(Spacer(1, 10))
+    def _add_batch_summary(self, story, batch_result: BatchAnalysisResult):
+        story.append(Paragraph("Batch Decision Summary", self.styles['Section']))
+        rows = [
+            ["Total Images", batch_result.total_images],
+            ["Processed", batch_result.processed],
+            ["Failed", batch_result.failed],
+            ["Success Rate", f"{batch_result.summary.get('success_rate', 0)}%"],
+        ]
+        for decision in FinalDecision:
+            rows.append([
+                decision.value,
+                batch_result.summary.get(decision.value, 0)
+            ])
+        table = Table(rows, colWidths=[220, 310])
+        table.setStyle(self._standard_table_style())
+        story.append(table)
+        story.append(Spacer(1, 10))
+    def _add_footer(self, story):
+        story.append(Spacer(1, 15))
+        story.append(Paragraph(
+            "DISCLAIMER: Metric scores are non-decisional. "
+            "Final decisions are evidence- and policy-based.",
+            self.styles['Footer']
+        ))
+    # ------------------------------------------------------------------
+    # STYLES & HELPERS
+    # ------------------------------------------------------------------
     def _build_styles(self):
         styles = getSampleStyleSheet()
         styles.add(ParagraphStyle(
+            name='Title',
             fontSize=18,
+            alignment=1,
             textColor=self.COLOR_PRIMARY,
             fontName='Helvetica-Bold'
         ))
         styles.add(ParagraphStyle(
+            name='Subtitle',
+            fontSize=12,
+            alignment=1,
             spaceAfter=6
         ))
         styles.add(ParagraphStyle(
+            name='Meta',
             fontSize=8,
+            alignment=1,
+            spaceAfter=10,
+            textColor=colors.grey
         ))
         styles.add(ParagraphStyle(
+            name='Section',
+            fontSize=13,
             fontName='Helvetica-Bold',
+            spaceBefore=10,
+            spaceAfter=6
         ))
         styles.add(ParagraphStyle(
+            name='Body',
+            fontSize=9,
+            spaceAfter=6
         ))
         styles.add(ParagraphStyle(
+            name='Footer',
+            fontSize=7,
+            alignment=1,
+            textColor=colors.grey
         ))
+        return styles
+    def _standard_table_style(self):
         return TableStyle([
+            ('BACKGROUND', (0, 0), (-1, 0), self.COLOR_HEADER),
             ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
             ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
             ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
             ('VALIGN', (0, 0), (-1, -1), 'TOP'),
             ('LEFTPADDING', (0, 0), (-1, -1), 6),
             ('RIGHTPADDING', (0, 0), (-1, -1), 6),
         ])
+    def _decision_color(self, decision: Optional[FinalDecision]):
+        if decision == FinalDecision.CONFIRMED_AI_GENERATED:
+            return colors.HexColor('#FFEBEE')
+        if decision == FinalDecision.SUSPICIOUS_AI_LIKELY:
+            return colors.HexColor('#FFF3E0')
+        if decision == FinalDecision.AUTHENTIC_BUT_REVIEW:
+            return colors.HexColor('#E3F2FD')
+        if decision == FinalDecision.MOSTLY_AUTHENTIC:
+            return colors.HexColor('#E8F5E9')
+        return colors.white

ui/index.html CHANGED Viewed

@@ -657,6 +657,12 @@
             border: 1px solid rgba(214, 158, 46, 0.3);
         }
         .score-indicator {
             display: flex;
             align-items: center;
@@ -1039,13 +1045,29 @@
                         <h3>Multi-Signal Detection</h3>
                         <p>Five independent statistical detectors with weighted ensemble aggregation</p>
                     </div>
                     <div class="feature-card">
                         <div class="feature-icon">
                             <i class="fas fa-file-export"></i>
                         </div>
                         <h3>Comprehensive Reports</h3>
-                        <p>Export results in CSV, JSON, and PDF formats for integration and documentation</p>
                     </div>
                     <div class="feature-card">
@@ -1215,6 +1237,72 @@
                             </div>
                         </div>
                     </div>
                 </div>
             </div>
@@ -1305,9 +1393,6 @@
                             <button class="action-button secondary-action" id="exportCsvBtn">
                                 <i class="fas fa-file-csv"></i> CSV
                             </button>
-                            <button class="action-button secondary-action" id="exportPdfBtn">
-                                <i class="fas fa-file-pdf"></i> PDF
-                            </button>
                             <button class="action-button secondary-action" id="exportJsonBtn">
                                 <i class="fas fa-file-code"></i> JSON
                             </button>
@@ -1385,8 +1470,7 @@
         const HEALTH_ENDPOINT = '/health';
         const BATCH_PROGRESS_ENDPOINT = '/batch';
         const CSV_REPORT_ENDPOINT = '/report/csv';
-        const PDF_REPORT_ENDPOINT = '/report/pdf';
         // Global state
         let files = [];
         let fileDataUrls = {};
@@ -1419,7 +1503,6 @@
         const resultsTableBody = document.getElementById('resultsTableBody');
         const noResultsRow = document.getElementById('noResultsRow');
         const exportCsvBtn = document.getElementById('exportCsvBtn');
-        const exportPdfBtn = document.getElementById('exportPdfBtn');
         const exportJsonBtn = document.getElementById('exportJsonBtn');
         const toggleDetailedAnalysis = document.getElementById('toggleDetailedAnalysis');
         const detailedAnalysisIcon = document.getElementById('detailedAnalysisIcon');
@@ -1498,7 +1581,6 @@
             // Export
             exportCsvBtn.addEventListener('click', exportCsv);
-            exportPdfBtn.addEventListener('click', exportPdf);
             exportJsonBtn.addEventListener('click', exportJson);
             // Detailed analysis toggle
@@ -1832,6 +1914,46 @@
             }, 1000);
         }
         function displayResults() {
             if (!batchResults) {
                 console.error('No results to display:', batchResults);
@@ -1855,7 +1977,8 @@
                 const filename = resultData.filename || 'Unknown';
                 const overallScore = resultData.overall_score || 0;
-                const status = resultData.status || 'LIKELY_AUTHENTIC';
                 const confidence = resultData.confidence || 0;
                 const imageSize = resultData.image_size || [0, 0];
                 const signals = resultData.signals || [];
@@ -1863,21 +1986,13 @@
                 const scorePercent = Math.round(overallScore * 100);
                 let scoreClass = 'score-low';
-                let scoreWidth = '30%';
-                if (scorePercent >= 70) {
-                    scoreClass = 'score-high';
-                    scoreWidth = '90%';
-                } else if (scorePercent >= 50) {
-                    scoreClass = 'score-medium';
-                    scoreWidth = '60%';
-                }
                 const flaggedCount = signals.filter(s => s.status === 'flagged').length;
                 const warningCount = signals.filter(s => s.status === 'warning').length;
-                // Format status for display (remove underscores)
-                const displayStatus = status.replace(/_/g, ' ');
                 // Get thumbnail
                 const thumbnailSrc = fileDataUrls[filename] || 'data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40"><rect width="40" height="40" fill="%23f0f0f0"/></svg>';
@@ -1894,8 +2009,8 @@
                         </div>
                     </td>
                     <td>
-                        <span class="status-badge ${status === 'LIKELY_AUTHENTIC' ? 'status-authentic' : 'status-review'}" style="white-space: nowrap;">
-                            ${displayStatus}
                         </span>
                     </td>
                     <td>
@@ -1945,153 +2060,190 @@
         }
         function updateSummary(batchResult) {
-            const total = batchResult.total_images || 0;
-            const processed = batchResult.processed || batchResult.results?.length || 0;
-            const failed = batchResult.failed || 0;
-            let likelyAuthentic = 0;
-            let reviewRequired = 0;
-            if (batchResult.results) {
-                batchResult.results.forEach(result => {
-                    const resultData = result;
-                    const status = resultData.status || 'LIKELY_AUTHENTIC';
-                    if (status === 'LIKELY_AUTHENTIC') {
-                        likelyAuthentic++;
-                    } else if (status === 'REVIEW_REQUIRED') {
-                        reviewRequired++;
-                    }
-                });
             }
-            resultsSummary.innerHTML = `
-                <div class="summary-card">
-                    <div class="summary-value">${processed}</div>
-                    <div class="summary-label">Total Processed</div>
-                </div>
-                <div class="summary-card">
-                    <div class="summary-value">${likelyAuthentic}</div>
-                    <div class="summary-label">Likely Authentic</div>
-                </div>
-                <div class="summary-card">
-                    <div class="summary-value">${reviewRequired}</div>
-                    <div class="summary-label">Review Required</div>
-                </div>
-                <div class="summary-card">
-                    <div class="summary-value">${failed}</div>
-                    <div class="summary-label">Failed</div>
-                </div>
-            `;
         }
         function showDetailedAnalysis(index) {
             if (!batchResults || !batchResults.results || !batchResults.results[index]) return;
             selectedImageIndex = index;
-            const result = batchResults.results[index];
-            const resultData = result;
-            const filename = resultData.filename || 'Unknown';
-            const overallScore = resultData.overall_score || 0;
-            const status = resultData.status || 'LIKELY_AUTHENTIC';
-            const confidence = resultData.confidence || 0;
-            const imageSize = resultData.image_size || [0, 0];
-            const processingTime = resultData.processing_time || 0;
-            const signals = resultData.signals || [];
-            const scorePercent = Math.round(overallScore * 100);
-            const displayStatus = status.replace(/_/g, ' ');
-            // Ensure detailed analysis is expanded
             detailedAnalysisContent.classList.add('show');
             detailedAnalysisIcon.classList.remove('fa-chevron-down');
             detailedAnalysisIcon.classList.add('fa-chevron-up');
-            document.getElementById('detailedAnalysisContent').scrollIntoView({
                 behavior: 'smooth',
                 block: 'start'
             });
-            // Build signals HTML
             let signalsHtml = '';
-            if (signals && signals.length > 0) {
                 signals.forEach(signal => {
                     let statusClass = 'signal-passed';
                     if (signal.status === 'warning') statusClass = 'signal-warning';
                     if (signal.status === 'flagged') statusClass = 'signal-flagged';
                     const signalScore = Math.round((signal.score || 0) * 100);
                     signalsHtml += `
                         <div class="signal-card">
                             <div class="signal-header">
                                 <strong>${signal.name || 'Unknown Metric'}</strong>
                                 <span class="signal-badge ${statusClass}">${signal.status}</span>
                             </div>
-                            <p style="font-size: 0.875rem; margin-bottom: 0.5rem; color: var(--text-light);">
                                 ${signal.explanation || 'No explanation available.'}
                             </p>
-                            <div style="display: flex; justify-content: space-between; align-items: center;">
-                                <div style="font-size: 0.75rem; color: var(--text-light);">
-                                    Score: ${signalScore}%
-                                </div>
                             </div>
                         </div>
                     `;
                 });
             } else {
-                signalsHtml = '<p class="text-center" style="color: var(--text-light);">No detection signals available.</p>';
             }
             detailedAnalysisContent.innerHTML = `
-                <div style="margin-bottom: 1.5rem;">
-                    <div style="display: flex; align-items: center; gap: 1rem; margin-bottom: 1rem;">
-                        <img src="${fileDataUrls[filename] || 'data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="60" height="60" viewBox="0 0 60 60"><rect width="60" height="60" fill="%23f0f0f0"/></svg>'}"
-                             alt="${filename}"
-                             style="width: 60px; height: 60px; object-fit: cover; border-radius: 0.5rem; border: 1px solid var(--border);">
                         <div>
-                            <h4 style="margin-bottom: 0.25rem;">${filename}</h4>
-                            <div style="font-size: 0.875rem; color: var(--text-light);">
                                 ${imageSize[0]} × ${imageSize[1]} • ${processingTime.toFixed(2)}s
                             </div>
                         </div>
                     </div>
-                    <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 1rem; margin-bottom: 1.5rem;">
-                        <div style="text-align: center; padding: 1rem; background-color: #f8fafc; border-radius: 0.5rem;">
-                            <div style="font-size: 1.5rem; font-weight: 700; color: ${scorePercent >= 70 ? '#e53e3e' : scorePercent >= 50 ? '#d69e2e' : '#38a169'};">${scorePercent}%</div>
-                            <div style="font-size: 0.875rem; color: var(--text-light);">Score</div>
                         </div>
-                        <div style="text-align: center; padding: 1rem; background-color: #f8fafc; border-radius: 0.5rem;">
-                            <div style="font-size: 1.5rem; font-weight: 700; color: ${displayStatus.includes('REVIEW') ? '#d69e2e' : '#38a169'};">${displayStatus}</div>
-                            <div style="font-size: 0.875rem; color: var(--text-light);">Verdict</div>
                         </div>
-                        <div style="text-align: center; padding: 1rem; background-color: #f8fafc; border-radius: 0.5rem;">
-                            <div style="font-size: 1.5rem; font-weight: 700;">${confidence}%</div>
-                            <div style="font-size: 0.875rem; color: var(--text-light);">Confidence</div>
                         </div>
                     </div>
                 </div>
-                <h4 style="margin-bottom: 1rem;">Detection Signals</h4>
                 <div class="signal-grid">
                     ${signalsHtml}
                 </div>
-                <div class="signal-card" style="margin-top: 1.5rem; background-color: ${displayStatus.includes('REVIEW') ? 'rgba(214, 158, 46, 0.1)' : 'rgba(56, 161, 105, 0.1)'}; border-color: ${displayStatus.includes('REVIEW') ? 'rgba(214, 158, 46, 0.3)' : 'rgba(56, 161, 105, 0.3)'};">
                     <div class="signal-header">
-                        <strong>Recommendation</strong>
-                    </div>
-                    <p style="margin-bottom: 0.5rem;">
-                        ${displayStatus.includes('REVIEW') ? 'Manual verification recommended' : 'No immediate action required'}
-                    </p>
-                    <div style="font-size: 0.875rem; color: var(--text-light);">
-                        Confidence: ${confidence}% likelihood of ${displayStatus.includes('REVIEW') ? 'AI generation' : 'authenticity'}
                     </div>
                 </div>
             `;
         }
         // Export functions
         async function exportCsv() {
             if (!currentBatchId) {
@@ -2129,42 +2281,6 @@
             }
         }
-        async function exportPdf() {
-            if (!currentBatchId) {
-                showToast('No analysis results to export.', 'warning');
-                return;
-            }
-            showLoading(true);
-            try {
-                // Using GET request since backend now accepts both GET and POST
-                const response = await fetch(`${PDF_REPORT_ENDPOINT}/${currentBatchId}`);
-                if (response.ok) {
-                    // Get the blob data
-                    const blob = await response.blob();
-                    // Create download link
-                    const downloadLink = document.createElement('a');
-                    downloadLink.href = URL.createObjectURL(blob);
-                    downloadLink.download = `ai_screener_report_${currentBatchId}.pdf`;
-                    document.body.appendChild(downloadLink);
-                    downloadLink.click();
-                    document.body.removeChild(downloadLink);
-                    showToast('PDF report downloaded successfully.', 'success');
-                } else {
-                    showToast('Failed to generate PDF report.', 'error');
-                }
-            } catch (error) {
-                console.error('PDF export failed:', error);
-                showToast('PDF export failed. Please try again.', 'error');
-            } finally {
-                showLoading(false);
-            }
-        }
         async function exportJson() {
             if (!batchResults) {
                 showToast('No analysis results to export.', 'warning');
@@ -2192,7 +2308,7 @@
                 showLoading(false);
             }
         }
         // Reset functions
         function resetUI() {
             analyzeBtn.disabled = false;

             border: 1px solid rgba(214, 158, 46, 0.3);
         }
+        .status-danger {
+            background-color: rgba(229, 62, 62, 0.1);
+            color: var(--danger);
+            border: 1px solid rgba(229, 62, 62, 0.3);
+        }
         .score-indicator {
             display: flex;
             align-items: center;
                         <h3>Multi-Signal Detection</h3>
                         <p>Five independent statistical detectors with weighted ensemble aggregation</p>
                     </div>
+                    <div class="feature-card">
+                        <div class="feature-icon">
+                            <i class="fas fa-puzzle-piece"></i>
+                        </div>
+                        <h3>Evidence Analysis</h3>
+                        <p>Aggregates detection signals and metadata into structured evidence, resolving conflicts and supporting explainable, non-scoring decisions</p>
+                    </div>
+                    <div class="feature-card">
+                        <div class="feature-icon">
+                            <i class="fas fa-balance-scale"></i>
+                        </div>
+                        <h3>Decision Policy</h3>
+                        <p>Applies deterministic rules over metrics and evidence to produce a review-aware final verdict, prioritizing safety and auditability</p>
+                    </div>
                     <div class="feature-card">
                         <div class="feature-icon">
                             <i class="fas fa-file-export"></i>
                         </div>
                         <h3>Comprehensive Reports</h3>
+                        <p>Export results in CSV and JSON formats for integration and documentation</p>
                     </div>
                     <div class="feature-card">
                             </div>
                         </div>
                     </div>
+                    <div class="metric-card">
+                        <div class="metric-header">
+                            <div class="metric-icon" style="background: linear-gradient(135deg, #2b6cb0 0%, #63b3ed 100%);">
+                                <i class="fas fa-camera-retro"></i>
+                            </div>
+                            <div>
+                                <div class="metric-title">EXIF Analyzer</div>
+                            </div>
+                            <span class="metric-weight">Auxiliary Metric</span>
+                        </div>
+                        <p class="metric-description">
+                            Analyzes image metadata for presence, completeness, and plausibility.
+                            Real camera images usually contain coherent EXIF data, while AI-generated
+                            images often lack metadata or contain inconsistent fields.
+                        </p>
+                        <div class="metric-details">
+                            <div class="detail-item">
+                                <span class="detail-label">Detection Method</span>
+                                <span class="detail-value">Metadata consistency analysis</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Signals</span>
+                                <span class="detail-value">Missing, stripped, or malformed EXIF</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Sensitivity</span>
+                                <span class="detail-value">Medium</span>
+                            </div>
+                        </div>
+                    </div>
+                    <div class="metric-card">
+                        <div class="metric-header">
+                            <div class="metric-icon" style="background: linear-gradient(135deg, #6b46c1 0%, #b794f4 100%);">
+                                <i class="fas fa-fingerprint"></i>
+                            </div>
+                            <div>
+                                <div class="metric-title">Watermark Analyzer</div>
+                            </div>
+                            <span class="metric-weight">Auxiliary Metric</span>
+                        </div>
+                        <p class="metric-description">
+                            Detects known and statistical watermark patterns embedded by generative
+                            models. Includes checks for frequency-domain artifacts and spatial
+                            regularities associated with AI watermarking techniques.
+                        </p>
+                        <div class="metric-details">
+                            <div class="detail-item">
+                                <span class="detail-label">Detection Method</span>
+                                <span class="detail-value">Pattern & frequency-domain analysis</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Signals</span>
+                                <span class="detail-value">Model-specific watermark artifacts</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Sensitivity</span>
+                                <span class="detail-value">Low–Medium (model-dependent)</span>
+                            </div>
+                        </div>
+                    </div>
                 </div>
             </div>
                             <button class="action-button secondary-action" id="exportCsvBtn">
                                 <i class="fas fa-file-csv"></i> CSV
                             </button>
                             <button class="action-button secondary-action" id="exportJsonBtn">
                                 <i class="fas fa-file-code"></i> JSON
                             </button>
         const HEALTH_ENDPOINT = '/health';
         const BATCH_PROGRESS_ENDPOINT = '/batch';
         const CSV_REPORT_ENDPOINT = '/report/csv';
         // Global state
         let files = [];
         let fileDataUrls = {};
         const resultsTableBody = document.getElementById('resultsTableBody');
         const noResultsRow = document.getElementById('noResultsRow');
         const exportCsvBtn = document.getElementById('exportCsvBtn');
         const exportJsonBtn = document.getElementById('exportJsonBtn');
         const toggleDetailedAnalysis = document.getElementById('toggleDetailedAnalysis');
         const detailedAnalysisIcon = document.getElementById('detailedAnalysisIcon');
             // Export
             exportCsvBtn.addEventListener('click', exportCsv);
             exportJsonBtn.addEventListener('click', exportJson);
             // Detailed analysis toggle
             }, 1000);
         }
+        function decisionMeta(decision) {
+            switch (decision) {
+                case 'MOSTLY_AUTHENTIC':
+                    return {
+                        label: 'Mostly Authentic',
+                        badgeClass: 'status-authentic',
+                        recommendation: 'No immediate action required'
+                    };
+                case 'AUTHENTIC_BUT_REVIEW':
+                    return {
+                        label: 'Authentic (Review Suggested)',
+                        badgeClass: 'status-review',
+                        recommendation: 'Optional human review'
+                    };
+                case 'SUSPICIOUS_AI_LIKELY':
+                    return {
+                        label: 'Suspicious (AI Likely)',
+                        badgeClass: 'status-review',
+                        recommendation: 'Manual verification recommended'
+                    };
+                case 'CONFIRMED_AI_GENERATED':
+                    return {
+                        label: 'Confirmed AI Generated',
+                        badgeClass: 'status-danger',
+                        recommendation: 'Block or audit required'
+                    };
+                default:
+                    console.warn('Unknown decision:', decision);
+                    return {
+                    label: decision,
+                    badgeClass: 'status-review',
+                    recommendation: 'Manual review required'
+                };
+            }
+        }
         function displayResults() {
             if (!batchResults) {
                 console.error('No results to display:', batchResults);
                 const filename = resultData.filename || 'Unknown';
                 const overallScore = resultData.overall_score || 0;
+                const decision = resultData.final_decision;
+                const meta = decisionMeta(decision);
                 const confidence = resultData.confidence || 0;
                 const imageSize = resultData.image_size || [0, 0];
                 const signals = resultData.signals || [];
                 const scorePercent = Math.round(overallScore * 100);
                 let scoreClass = 'score-low';
+                if (scorePercent >= 70) scoreClass = 'score-high';
+                else if (scorePercent >= 50) scoreClass = 'score-medium';
+                const scoreWidth = `${Math.min(scorePercent, 100)}%`;
                 const flaggedCount = signals.filter(s => s.status === 'flagged').length;
                 const warningCount = signals.filter(s => s.status === 'warning').length;
                 // Get thumbnail
                 const thumbnailSrc = fileDataUrls[filename] || 'data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40"><rect width="40" height="40" fill="%23f0f0f0"/></svg>';
                         </div>
                     </td>
                     <td>
+                        <span class="status-badge ${meta.badgeClass}">
+                            ${meta.label}
                         </span>
                     </td>
                     <td>
         }
         function updateSummary(batchResult) {
+            if (!batchResult || !Array.isArray(batchResult.results)) {
+                resultsSummary.innerHTML = '';
+                return;
             }
+            // Count decisions exactly as returned by backend
+            const counts = {};
+            batchResult.results.forEach(result => {
+                const status = result.final_decision;
+                if (!status) return;
+                counts[status] = (counts[status] || 0) + 1;
+            });
+            // Stable, policy-aligned display order
+            const ORDER = [
+                'CONFIRMED_AI_GENERATED',
+                'SUSPICIOUS_AI_LIKELY',
+                'AUTHENTIC_BUT_REVIEW',
+                'MOSTLY_AUTHENTIC'
+            ];
+            resultsSummary.innerHTML = ORDER
+                .filter(status => counts[status])
+                .map(status => {
+                    const meta = decisionMeta(status);
+                    return `
+                        <div class="summary-card">
+                            <div class="summary-value">${counts[status]}</div>
+                            <div class="summary-label">${meta.label}</div>
+                        </div>
+                    `;
+                })
+                .join('');
         }
         function showDetailedAnalysis(index) {
             if (!batchResults || !batchResults.results || !batchResults.results[index]) return;
             selectedImageIndex = index;
+            const result          = batchResults.results[index];
+            const filename        = result.filename || 'Unknown';
+            const overallScore    = result.overall_score || 0;
+            const decision        = result.final_decision;
+            const meta            = decisionMeta(decision);
+            const confidence      = result.confidence != null ? Math.round(result.confidence) : 0;
+            const imageSize       = result.image_size || [0, 0];
+            const processingTime  = result.processing_time || 0;
+            const signals         = result.signals || [];
+            const evidence        = result.evidence || [];
+            const scorePercent    = Math.round(overallScore * 100);
+            /* ---------- Expand panel ---------- */
             detailedAnalysisContent.classList.add('show');
             detailedAnalysisIcon.classList.remove('fa-chevron-down');
             detailedAnalysisIcon.classList.add('fa-chevron-up');
+            detailedAnalysisContent.scrollIntoView({
                 behavior: 'smooth',
                 block: 'start'
             });
+            /* ---------- Signals (Tier-1 Metrics) ---------- */
             let signalsHtml = '';
+            if (signals.length > 0) {
                 signals.forEach(signal => {
                     let statusClass = 'signal-passed';
                     if (signal.status === 'warning') statusClass = 'signal-warning';
                     if (signal.status === 'flagged') statusClass = 'signal-flagged';
                     const signalScore = Math.round((signal.score || 0) * 100);
                     signalsHtml += `
                         <div class="signal-card">
                             <div class="signal-header">
                                 <strong>${signal.name || 'Unknown Metric'}</strong>
                                 <span class="signal-badge ${statusClass}">${signal.status}</span>
                             </div>
+                            <p style="font-size:0.875rem;color:var(--text-light);margin-bottom:0.5rem;">
                                 ${signal.explanation || 'No explanation available.'}
                             </p>
+                            <div style="font-size:0.75rem;color:var(--text-light);">
+                                Score: ${signalScore}%
                             </div>
                         </div>
                     `;
                 });
             } else {
+                signalsHtml = `
+                    <p class="text-center" style="color:var(--text-light);">
+                        No detection signals available.
+                    </p>
+                `;
             }
+            /* ---------- Evidence (Tier-2 Declarative Evidence) ---------- */
+            let evidenceHtml = '';
+            if (evidence.length > 0) {
+                evidence.forEach(ev => {
+                    let badgeClass = 'signal-passed';
+                    if (ev.direction === 'AI_GENERATED') badgeClass = 'signal-flagged';
+                    if (ev.direction === 'INDETERMINATE') badgeClass = 'signal-warning';
+                    evidenceHtml += `
+                        <div class="signal-card" style="background:#f1f5f9;">
+                            <div class="signal-header">
+                                <strong>${ev.source.toUpperCase()}</strong>
+                                <span class="signal-badge ${badgeClass}">
+                                    ${ev.strength}
+                                </span>
+                            </div>
+                            <p style="font-size:0.875rem;margin-bottom:0.5rem;">
+                                ${ev.finding}
+                            </p>
+                            <div style="font-size:0.75rem;color:#64748b;">
+                                Analyzer: ${ev.analyzer}
+                                ${ev.confidence != null ? ` • Confidence: ${Math.round(ev.confidence * 100)}%` : ''}
+                            </div>
+                        </div>
+                    `;
+                });
+            } else {
+                evidenceHtml = `
+                    <p class="text-center" style="color:var(--text-light);">
+                        No declarative evidence detected.
+                    </p>
+                `;
+            }
+            /* ---------- Render ---------- */
             detailedAnalysisContent.innerHTML = `
+                <div style="margin-bottom:1.5rem;">
+                    <div style="display:flex;align-items:center;gap:1rem;margin-bottom:1rem;">
+                        <img src="${fileDataUrls[filename] || 'data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="60" height="60"><rect width="60" height="60" fill="%23f0f0f0"/></svg>'}"
+                            alt="${filename}"
+                            style="width:60px;height:60px;object-fit:cover;border-radius:0.5rem;border:1px solid var(--border);">
                         <div>
+                            <h4 style="margin-bottom:0.25rem;">${filename}</h4>
+                            <div style="font-size:0.875rem;color:var(--text-light);">
                                 ${imageSize[0]} × ${imageSize[1]} • ${processingTime.toFixed(2)}s
                             </div>
                         </div>
                     </div>
+                    <div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:1rem;">
+                        <div style="text-align:center;padding:1rem;background:#f8fafc;border-radius:0.5rem;">
+                            <div style="font-size:1.5rem;font-weight:700;">${scorePercent}%</div>
+                            <div style="font-size:0.875rem;color:var(--text-light);">Score</div>
                         </div>
+                        <div style="text-align:center;padding:1rem;background:#f8fafc;border-radius:0.5rem;">
+                            <div style="font-size:1.25rem;font-weight:700;">${meta.label}</div>
+                            <div style="font-size:0.875rem;color:var(--text-light);">Verdict</div>
                         </div>
+                        <div style="text-align:center;padding:1rem;background:#f8fafc;border-radius:0.5rem;">
+                            <div style="font-size:1.5rem;font-weight:700;">${confidence}%</div>
+                            <div style="font-size:0.875rem;color:var(--text-light);">Confidence</div>
                         </div>
                     </div>
                 </div>
+                <h4 style="margin-bottom:0.75rem;">Detection Signals</h4>
                 <div class="signal-grid">
                     ${signalsHtml}
                 </div>
+                <h4 style="margin:1.5rem 0 0.75rem;">Evidence</h4>
+                <div class="signal-grid">
+                    ${evidenceHtml}
+                </div>
+                <div class="signal-card" style="margin-top:1.5rem;">
                     <div class="signal-header">
+                        <strong>Decision Explanation</strong>
                     </div>
+                    <p>${result.decision_explanation || meta.recommendation}</p>
                 </div>
             `;
         }
         // Export functions
         async function exportCsv() {
             if (!currentBatchId) {
             }
         }
         async function exportJson() {
             if (!batchResults) {
                 showToast('No analysis results to export.', 'warning');
                 showLoading(false);
             }
         }
         // Reset functions
         function resetUI() {
             analyzeBtn.disabled = false;