Spaces:
Sleeping
Sleeping
Commit ·
f142520
1
Parent(s): 676e2fe
EXIF Analysis and Watermark Analysis added
Browse files- app.py +11 -40
- config/constants.py +119 -1
- config/schemas.py +58 -18
- config/settings.py +4 -0
- decision_builders/decision_policy.py +146 -0
- evidence_analyzers/evidence_aggregator.py +193 -0
- evidence_analyzers/exif_analyzer.py +375 -0
- evidence_analyzers/watermark_analyzer.py +463 -0
- features/batch_processor.py +65 -33
- features/detailed_result_maker.py +320 -233
- metrics/color_analyzer.py +18 -14
- metrics/{aggregator.py → signal_aggregator.py} +68 -10
- notebooks/.ipynb_checkpoints/Unified_Dataset_Builder-checkpoint.ipynb +73 -9
- notebooks/Unified_Dataset_Builder.ipynb +115 -47
- reporter/csv_reporter.py +122 -318
- reporter/json_reporter.py +107 -181
- reporter/pdf_reporter.py +256 -773
- ui/index.html +272 -156
app.py
CHANGED
|
@@ -8,7 +8,6 @@ from typing import List
|
|
| 8 |
from typing import Dict
|
| 9 |
from pathlib import Path
|
| 10 |
from fastapi import File
|
| 11 |
-
from typing import Optional
|
| 12 |
from fastapi import Request
|
| 13 |
from fastapi import FastAPI
|
| 14 |
from fastapi import UploadFile
|
|
@@ -24,7 +23,6 @@ from utils.validators import ImageValidator
|
|
| 24 |
from fastapi.staticfiles import StaticFiles
|
| 25 |
from utils.helpers import generate_unique_id
|
| 26 |
from reporter.csv_reporter import CSVReporter
|
| 27 |
-
from reporter.pdf_reporter import PDFReporter
|
| 28 |
from config.schemas import BatchAnalysisResult
|
| 29 |
from reporter.json_reporter import JSONReporter
|
| 30 |
from utils.image_processor import ImageProcessor
|
|
@@ -63,12 +61,10 @@ image_validator = ImageValidator()
|
|
| 63 |
image_processor = ImageProcessor()
|
| 64 |
|
| 65 |
threshold_manager = ThresholdManager()
|
| 66 |
-
threshold_manager = threshold_manager
|
| 67 |
batch_processor = BatchProcessor(threshold_manager = threshold_manager)
|
| 68 |
|
| 69 |
json_reporter = JSONReporter()
|
| 70 |
csv_reporter = CSVReporter()
|
| 71 |
-
pdf_reporter = PDFReporter()
|
| 72 |
|
| 73 |
UPLOAD_DIR = settings.UPLOAD_DIR
|
| 74 |
CACHE_DIR = settings.CACHE_DIR
|
|
@@ -157,21 +153,21 @@ async def analyze_single_image(file: UploadFile = File(...)):
|
|
| 157 |
image_id = generate_unique_id()
|
| 158 |
image_path = UPLOAD_DIR / f"{image_id}_{file.filename}"
|
| 159 |
|
| 160 |
-
image_validator.validate_image(file_path = image_path,
|
| 161 |
-
filename = file.filename,
|
| 162 |
-
file_size = file.size,
|
| 163 |
-
)
|
| 164 |
-
|
| 165 |
try:
|
| 166 |
with open(image_path, "wb") as f:
|
| 167 |
shutil.copyfileobj(file.file, f)
|
| 168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
image = image_processor.load_image(image_path)
|
| 170 |
|
| 171 |
# image is a NumPy array → shape = (H, W, C) or (H, W)
|
| 172 |
height, width = image.shape[:2]
|
| 173 |
|
| 174 |
-
result: AnalysisResult = batch_processor.process_single(
|
| 175 |
filename = file.filename,
|
| 176 |
image_size = (width, height),
|
| 177 |
)
|
|
@@ -210,15 +206,15 @@ async def analyze_batch(files: List[UploadFile] = File(...)):
|
|
| 210 |
|
| 211 |
with open(path, "wb") as f:
|
| 212 |
shutil.copyfileobj(file.file, f)
|
| 213 |
-
|
| 214 |
-
image = image_processor.load_image(path)
|
| 215 |
-
height, width = image.shape[:2]
|
| 216 |
|
| 217 |
image_validator.validate_image(file_path = path,
|
| 218 |
filename = file.filename,
|
| 219 |
file_size = file.size,
|
| 220 |
)
|
| 221 |
|
|
|
|
|
|
|
|
|
|
| 222 |
image_entries.append({"path" : path,
|
| 223 |
"filename" : file.filename,
|
| 224 |
"size" : (width, height),
|
|
@@ -297,6 +293,8 @@ def export_csv(batch_id: str):
|
|
| 297 |
|
| 298 |
# Clean up the file after sending
|
| 299 |
path.unlink(missing_ok = True)
|
|
|
|
|
|
|
| 300 |
|
| 301 |
return Response(content = content,
|
| 302 |
media_type = "text/csv",
|
|
@@ -306,33 +304,6 @@ def export_csv(batch_id: str):
|
|
| 306 |
)
|
| 307 |
|
| 308 |
|
| 309 |
-
@app.api_route("/report/pdf/{batch_id}", methods = ["GET", "POST"])
|
| 310 |
-
def export_pdf(batch_id: str):
|
| 311 |
-
session = SESSION_STORE.get(batch_id)
|
| 312 |
-
|
| 313 |
-
if (not session or ("result" not in session)):
|
| 314 |
-
raise HTTPException(status_code = 404,
|
| 315 |
-
detail = "Batch result not found",
|
| 316 |
-
)
|
| 317 |
-
|
| 318 |
-
path = pdf_reporter.export_batch(session["result"])
|
| 319 |
-
|
| 320 |
-
# Read the file and send it as a download
|
| 321 |
-
with open(path, "rb") as f:
|
| 322 |
-
content = f.read()
|
| 323 |
-
|
| 324 |
-
# Clean up the file after sending
|
| 325 |
-
path.unlink(missing_ok = True)
|
| 326 |
-
|
| 327 |
-
return Response(content = content,
|
| 328 |
-
media_type = "application/pdf",
|
| 329 |
-
headers = {"Content-Disposition" : f"attachment; filename=ai_screener_report_{batch_id}.pdf",
|
| 330 |
-
"Content-Type" : "application/pdf"
|
| 331 |
-
}
|
| 332 |
-
)
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
# ==================== MAIN ====================
|
| 337 |
if __name__ == "__main__":
|
| 338 |
# Explicit startup log (forces log file creation)
|
|
|
|
| 8 |
from typing import Dict
|
| 9 |
from pathlib import Path
|
| 10 |
from fastapi import File
|
|
|
|
| 11 |
from fastapi import Request
|
| 12 |
from fastapi import FastAPI
|
| 13 |
from fastapi import UploadFile
|
|
|
|
| 23 |
from fastapi.staticfiles import StaticFiles
|
| 24 |
from utils.helpers import generate_unique_id
|
| 25 |
from reporter.csv_reporter import CSVReporter
|
|
|
|
| 26 |
from config.schemas import BatchAnalysisResult
|
| 27 |
from reporter.json_reporter import JSONReporter
|
| 28 |
from utils.image_processor import ImageProcessor
|
|
|
|
| 61 |
image_processor = ImageProcessor()
|
| 62 |
|
| 63 |
threshold_manager = ThresholdManager()
|
|
|
|
| 64 |
batch_processor = BatchProcessor(threshold_manager = threshold_manager)
|
| 65 |
|
| 66 |
json_reporter = JSONReporter()
|
| 67 |
csv_reporter = CSVReporter()
|
|
|
|
| 68 |
|
| 69 |
UPLOAD_DIR = settings.UPLOAD_DIR
|
| 70 |
CACHE_DIR = settings.CACHE_DIR
|
|
|
|
| 153 |
image_id = generate_unique_id()
|
| 154 |
image_path = UPLOAD_DIR / f"{image_id}_{file.filename}"
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
try:
|
| 157 |
with open(image_path, "wb") as f:
|
| 158 |
shutil.copyfileobj(file.file, f)
|
| 159 |
|
| 160 |
+
image_validator.validate_image(file_path = image_path,
|
| 161 |
+
filename = file.filename,
|
| 162 |
+
file_size = file.size,
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
image = image_processor.load_image(image_path)
|
| 166 |
|
| 167 |
# image is a NumPy array → shape = (H, W, C) or (H, W)
|
| 168 |
height, width = image.shape[:2]
|
| 169 |
|
| 170 |
+
result: AnalysisResult = batch_processor.process_single(image_path = image_path,
|
| 171 |
filename = file.filename,
|
| 172 |
image_size = (width, height),
|
| 173 |
)
|
|
|
|
| 206 |
|
| 207 |
with open(path, "wb") as f:
|
| 208 |
shutil.copyfileobj(file.file, f)
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
image_validator.validate_image(file_path = path,
|
| 211 |
filename = file.filename,
|
| 212 |
file_size = file.size,
|
| 213 |
)
|
| 214 |
|
| 215 |
+
image = image_processor.load_image(path)
|
| 216 |
+
height, width = image.shape[:2]
|
| 217 |
+
|
| 218 |
image_entries.append({"path" : path,
|
| 219 |
"filename" : file.filename,
|
| 220 |
"size" : (width, height),
|
|
|
|
| 293 |
|
| 294 |
# Clean up the file after sending
|
| 295 |
path.unlink(missing_ok = True)
|
| 296 |
+
SESSION_STORE.pop(batch_id, None)
|
| 297 |
+
|
| 298 |
|
| 299 |
return Response(content = content,
|
| 300 |
media_type = "text/csv",
|
|
|
|
| 304 |
)
|
| 305 |
|
| 306 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
# ==================== MAIN ====================
|
| 308 |
if __name__ == "__main__":
|
| 309 |
# Explicit startup log (forces log file creation)
|
config/constants.py
CHANGED
|
@@ -5,7 +5,7 @@ from dataclasses import dataclass
|
|
| 5 |
|
| 6 |
class DetectionStatus(str, Enum):
|
| 7 |
"""
|
| 8 |
-
|
| 9 |
"""
|
| 10 |
LIKELY_AUTHENTIC = "LIKELY_AUTHENTIC"
|
| 11 |
REVIEW_REQUIRED = "REVIEW_REQUIRED"
|
|
@@ -41,6 +41,37 @@ class MetricType(str, Enum):
|
|
| 41 |
COLOR = "color"
|
| 42 |
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# Signal thresholds
|
| 46 |
SIGNAL_THRESHOLDS = {SignalStatus.FLAGGED : 0.7,
|
|
@@ -314,6 +345,79 @@ class ColorAnalysisParams:
|
|
| 314 |
)
|
| 315 |
|
| 316 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
# Singleton instances for parameter classes
|
| 319 |
GRADIENT_FIELD_PCA_PARAMS = GradientFieldPCAParams()
|
|
@@ -321,5 +425,19 @@ FREQUENCY_ANALYSIS_PARAMS = FrequencyAnalysisParams()
|
|
| 321 |
NOISE_ANALYSIS_PARAMS = NoiseAnalysisParams()
|
| 322 |
TEXTURE_ANALYSIS_PARAMS = TextureAnalysisParams()
|
| 323 |
COLOR_ANALYSIS_PARAMS = ColorAnalysisParams()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
|
|
|
|
|
|
| 5 |
|
| 6 |
class DetectionStatus(str, Enum):
|
| 7 |
"""
|
| 8 |
+
Binary status derived from ensemble score only: FinalDecision supersedes this once decision policy is applied
|
| 9 |
"""
|
| 10 |
LIKELY_AUTHENTIC = "LIKELY_AUTHENTIC"
|
| 11 |
REVIEW_REQUIRED = "REVIEW_REQUIRED"
|
|
|
|
| 41 |
COLOR = "color"
|
| 42 |
|
| 43 |
|
| 44 |
+
class EvidenceType(str, Enum):
|
| 45 |
+
EXIF = "exif"
|
| 46 |
+
WATERMARK = "watermark"
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class EvidenceStrength(str, Enum):
|
| 51 |
+
"""
|
| 52 |
+
Ordered by increasing certainty: WEAK < MODERATE < STRONG < CONCLUSIVE
|
| 53 |
+
"""
|
| 54 |
+
WEAK = "weak" # heuristic, non-binding
|
| 55 |
+
MODERATE = "moderate" # strong hint, not cryptographic
|
| 56 |
+
STRONG = "strong" # vendor watermark, strong signal
|
| 57 |
+
CONCLUSIVE = "conclusive" # cryptographic / signed proof
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class EvidenceDirection(str, Enum):
|
| 61 |
+
"""
|
| 62 |
+
What this evidence supports
|
| 63 |
+
"""
|
| 64 |
+
AI_GENERATED = "ai_generated"
|
| 65 |
+
AUTHENTIC = "authentic"
|
| 66 |
+
INDETERMINATE = "indeterminate"
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class FinalDecision(str, Enum):
|
| 70 |
+
MOSTLY_AUTHENTIC = "mostly_authentic"
|
| 71 |
+
AUTHENTIC_BUT_REVIEW = "authentic_but_review"
|
| 72 |
+
SUSPICIOUS_AI_LIKELY = "suspicious_ai_likely"
|
| 73 |
+
CONFIRMED_AI_GENERATED = "confirmed_ai_generated"
|
| 74 |
+
|
| 75 |
|
| 76 |
# Signal thresholds
|
| 77 |
SIGNAL_THRESHOLDS = {SignalStatus.FLAGGED : 0.7,
|
|
|
|
| 345 |
)
|
| 346 |
|
| 347 |
|
| 348 |
+
@dataclass(frozen = True)
|
| 349 |
+
class SignalConfidenceParams:
|
| 350 |
+
"""
|
| 351 |
+
Parameters for Tier-1 signal confidence calculation
|
| 352 |
+
"""
|
| 353 |
+
# Agreement (variance-based confidence)
|
| 354 |
+
VARIANCE_NORM : float = 0.10
|
| 355 |
+
|
| 356 |
+
# Distance-from-threshold confidence
|
| 357 |
+
DISTANCE_NORM : float = 0.30
|
| 358 |
+
|
| 359 |
+
# Fallback when metric confidence is missing
|
| 360 |
+
DEFAULT_RELIABILITY_CONFIDENCE : float = 0.60
|
| 361 |
+
|
| 362 |
+
# Weighting of confidence components (must sum to 1.0)
|
| 363 |
+
AGREEMENT_WEIGHT : float = 0.40
|
| 364 |
+
RELIABILITY_WEIGHT : float = 0.30
|
| 365 |
+
DISTANCE_WEIGHT : float = 0.30
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
@dataclass(frozen = True)
|
| 369 |
+
class WatermarkAnalysisParams:
|
| 370 |
+
"""
|
| 371 |
+
Parameters for heuristic watermark detection
|
| 372 |
+
"""
|
| 373 |
+
# Confidence thresholds
|
| 374 |
+
STRONG_CONFIDENCE_THRESHOLD : float = 0.85
|
| 375 |
+
CONFIDENCE_CAP : float = 0.95
|
| 376 |
+
|
| 377 |
+
# Wavelet-domain thresholds
|
| 378 |
+
HF_ENERGY_RATIO_THRESHOLD : float = 0.18
|
| 379 |
+
KURTOSIS_THRESHOLD : float = 7.5
|
| 380 |
+
PERIODICITY_THRESHOLD : float = 0.8
|
| 381 |
+
|
| 382 |
+
HF_ENERGY_RATIO_NORM : float = 0.4
|
| 383 |
+
KURTOSIS_NORM_FACTOR : float = 15.0
|
| 384 |
+
PEAK_STD_MULTIPLIER : float = 3.0
|
| 385 |
+
|
| 386 |
+
# Frequency-domain thresholds
|
| 387 |
+
MIN_ANOMALOUS_BANDS : int = 2
|
| 388 |
+
SPECTRAL_SYMMETRY_THRESHOLD : float = 0.6
|
| 389 |
+
PEAK_RATIO_THRESHOLD : float = 0.05
|
| 390 |
+
|
| 391 |
+
# LSB steganography thresholds
|
| 392 |
+
LSB_ENTROPY_THRESHOLD : float = 0.72
|
| 393 |
+
CHI_SQUARE_THRESHOLD : float = 20.0
|
| 394 |
+
RUNS_SCORE_THRESHOLD : float = 0.6
|
| 395 |
+
LSB_ENTROPY_NORM_BASE : float = 0.5
|
| 396 |
+
LSB_ENTROPY_NORM_RANGE : float = 0.5
|
| 397 |
+
CHI_SQUARE_NORM_FACTOR : float = 50.0
|
| 398 |
+
|
| 399 |
+
|
| 400 |
+
@dataclass(frozen = True)
|
| 401 |
+
class ExifAnalysisParams:
|
| 402 |
+
"""
|
| 403 |
+
Parameters for EXIF metadata analysis
|
| 404 |
+
"""
|
| 405 |
+
# Confidence values
|
| 406 |
+
MISSING_EXIF_CONFIDENCE : float = 0.5
|
| 407 |
+
AI_FINGERPRINT_CONFIDENCE : float = 0.9
|
| 408 |
+
CAMERA_BASE_CONFIDENCE : float = 0.7
|
| 409 |
+
CAMERA_WITH_LENS_CONFIDENCE : float = 0.75
|
| 410 |
+
SUSPICIOUS_CAMERA_CONFIDENCE : float = 0.4
|
| 411 |
+
TIMESTAMP_INCONSISTENCY_CONFIDENCE : float = 0.4
|
| 412 |
+
MISSING_PHOTO_METADATA_CONFIDENCE : float = 0.5
|
| 413 |
+
SUSPICIOUS_TIMESTAMP_CONFIDENCE : float = 0.3
|
| 414 |
+
|
| 415 |
+
# Thresholds
|
| 416 |
+
TIMESTAMP_DELTA_THRESHOLD : float = 5.0 # seconds
|
| 417 |
+
MIN_VALID_YEAR : int = 1990 # before digital cameras
|
| 418 |
+
MAX_FUTURE_YEARS : int = 1 # how many years in future is valid
|
| 419 |
+
|
| 420 |
+
|
| 421 |
|
| 422 |
# Singleton instances for parameter classes
|
| 423 |
GRADIENT_FIELD_PCA_PARAMS = GradientFieldPCAParams()
|
|
|
|
| 425 |
NOISE_ANALYSIS_PARAMS = NoiseAnalysisParams()
|
| 426 |
TEXTURE_ANALYSIS_PARAMS = TextureAnalysisParams()
|
| 427 |
COLOR_ANALYSIS_PARAMS = ColorAnalysisParams()
|
| 428 |
+
SIGNAL_CONFIDENCE_PARAMS = SignalConfidenceParams()
|
| 429 |
+
|
| 430 |
+
|
| 431 |
+
# Singleton instances for evidence analysis classes
|
| 432 |
+
WATERMARK_ANALYSIS_PARAMS = WatermarkAnalysisParams()
|
| 433 |
+
EXIF_ANALYSIS_PARAMS = ExifAnalysisParams()
|
| 434 |
+
|
| 435 |
|
| 436 |
+
# Evidence Strength ordering
|
| 437 |
+
EVIDENCE_STRENGTH_ORDER = {EvidenceStrength.WEAK : 1,
|
| 438 |
+
EvidenceStrength.MODERATE : 2,
|
| 439 |
+
EvidenceStrength.STRONG : 3,
|
| 440 |
+
EvidenceStrength.CONCLUSIVE : 4,
|
| 441 |
+
}
|
| 442 |
|
| 443 |
+
MIN_EVIDENCE_CONFIDENCE = 0.6
|
config/schemas.py
CHANGED
|
@@ -6,10 +6,15 @@ from typing import Optional
|
|
| 6 |
from datetime import datetime
|
| 7 |
from pydantic import BaseModel
|
| 8 |
from config.constants import MetricType
|
|
|
|
| 9 |
from config.constants import SignalStatus
|
|
|
|
| 10 |
from config.constants import DetectionStatus
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
|
|
|
| 13 |
class MetricResult(BaseModel):
|
| 14 |
"""
|
| 15 |
Raw metric output for explainability and reporting
|
|
@@ -52,29 +57,63 @@ class DetectionSignal(BaseModel):
|
|
| 52 |
}
|
| 53 |
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
class AnalysisResult(BaseModel):
|
| 56 |
"""
|
| 57 |
Single image analysis result
|
| 58 |
"""
|
| 59 |
-
filename
|
| 60 |
-
overall_score
|
| 61 |
-
status
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
| 78 |
}
|
| 79 |
|
| 80 |
|
|
@@ -91,6 +130,7 @@ class BatchAnalysisResult(BaseModel):
|
|
| 91 |
timestamp : datetime = Field(default_factory = datetime.now)
|
| 92 |
|
| 93 |
|
|
|
|
| 94 |
class APIResponse(BaseModel):
|
| 95 |
"""
|
| 96 |
Standard API response wrapper
|
|
|
|
| 6 |
from datetime import datetime
|
| 7 |
from pydantic import BaseModel
|
| 8 |
from config.constants import MetricType
|
| 9 |
+
from config.constants import EvidenceType
|
| 10 |
from config.constants import SignalStatus
|
| 11 |
+
from config.constants import FinalDecision
|
| 12 |
from config.constants import DetectionStatus
|
| 13 |
+
from config.constants import EvidenceStrength
|
| 14 |
+
from config.constants import EvidenceDirection
|
| 15 |
|
| 16 |
|
| 17 |
+
# Metric-Level Structures
|
| 18 |
class MetricResult(BaseModel):
|
| 19 |
"""
|
| 20 |
Raw metric output for explainability and reporting
|
|
|
|
| 57 |
}
|
| 58 |
|
| 59 |
|
| 60 |
+
# # Evidence-Level Structures
|
| 61 |
+
class EvidenceResult(BaseModel):
|
| 62 |
+
"""
|
| 63 |
+
Declarative evidence extracted from image metadata, watermarking, or cryptographic provenance systems
|
| 64 |
+
"""
|
| 65 |
+
source : EvidenceType = Field(..., description = "Evidence source type (exif, watermark, c2pa)")
|
| 66 |
+
finding : str = Field(..., description = "Human-readable description of the evidence")
|
| 67 |
+
direction : EvidenceDirection = Field(..., description = "What this evidence supports")
|
| 68 |
+
strength : EvidenceStrength = Field(..., description = "How strong or reliable this evidence is")
|
| 69 |
+
confidence : Optional[float] = Field(None, ge = 0.0, le = 1.0, description = "Confidence in the evidence extraction itself")
|
| 70 |
+
details : Dict = Field(default_factory = dict, description = "Raw extracted fields or technical metadata")
|
| 71 |
+
analyzer : str = Field(..., description = "Analyzer that produced this evidence (exif_analyzer, watermark_analyzer, etc.)")
|
| 72 |
+
timestamp : datetime = Field(default_factory = datetime.now)
|
| 73 |
+
model_config = {"json_schema_extra": {"example" : {"source" : "watermark",
|
| 74 |
+
"finding" : "Midjourney v6 watermark detected",
|
| 75 |
+
"direction" : "ai_generated",
|
| 76 |
+
"strength" : "strong",
|
| 77 |
+
"confidence" : 0.92,
|
| 78 |
+
"details" : {"watermark_type" : "DWT",
|
| 79 |
+
"vendor" : "Midjourney",
|
| 80 |
+
"version" : "v6"
|
| 81 |
+
},
|
| 82 |
+
"analyzer" : "watermark_analyzer"
|
| 83 |
+
}
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# Analysis-Level Structures
|
| 89 |
class AnalysisResult(BaseModel):
|
| 90 |
"""
|
| 91 |
Single image analysis result
|
| 92 |
"""
|
| 93 |
+
filename : str
|
| 94 |
+
overall_score : float = Field(..., ge = 0.0, le = 1.0)
|
| 95 |
+
status : DetectionStatus
|
| 96 |
+
final_decision : Optional[FinalDecision] = Field(None, description = "Authoritative decision after evidence-first policy evaluation")
|
| 97 |
+
decision_explanation : Optional[str] = Field(None, description = "Human-readable explanation of final decision")
|
| 98 |
+
confidence : int = Field(..., ge = 0, le = 100, description = "Confidence percentage")
|
| 99 |
+
signals : List[DetectionSignal]
|
| 100 |
+
metric_results : Dict[MetricType, MetricResult]
|
| 101 |
+
evidence : List[EvidenceResult] = Field(default_factory = list, description = "Declarative evidence extracted before decision policy")
|
| 102 |
+
processing_time : float = Field(..., description = "Processing time in seconds")
|
| 103 |
+
timestamp : datetime = Field(default_factory = datetime.now)
|
| 104 |
+
image_size : tuple[int, int] = Field(..., description = "Width x Height")
|
| 105 |
|
| 106 |
+
|
| 107 |
+
model_config = {"json_schema_extra" : {"example" : {"filename" : "photo_001.jpg",
|
| 108 |
+
"overall_score" : 0.73,
|
| 109 |
+
"status" : "REVIEW_REQUIRED",
|
| 110 |
+
"confidence" : 73,
|
| 111 |
+
"signals" : [],
|
| 112 |
+
"evidence" : [],
|
| 113 |
+
"processing_time" : 2.34,
|
| 114 |
+
"image_size" : [1920, 1080]
|
| 115 |
+
}
|
| 116 |
+
}
|
| 117 |
}
|
| 118 |
|
| 119 |
|
|
|
|
| 130 |
timestamp : datetime = Field(default_factory = datetime.now)
|
| 131 |
|
| 132 |
|
| 133 |
+
# API Wrappers
|
| 134 |
class APIResponse(BaseModel):
|
| 135 |
"""
|
| 136 |
Standard API response wrapper
|
config/settings.py
CHANGED
|
@@ -46,6 +46,10 @@ class Settings(BaseSettings):
|
|
| 46 |
PROCESSING_TIMEOUT : int = 30
|
| 47 |
PARALLEL_PROCESSING : bool = True
|
| 48 |
MAX_WORKERS : int = 4
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
# Paths
|
| 51 |
BASE_DIR : Path = Path(__file__).parent.parent
|
|
|
|
| 46 |
PROCESSING_TIMEOUT : int = 30
|
| 47 |
PARALLEL_PROCESSING : bool = True
|
| 48 |
MAX_WORKERS : int = 4
|
| 49 |
+
METRIC_WORKERS : int = 4
|
| 50 |
+
EVIDENCE_WORKERS : int = 2
|
| 51 |
+
METRIC_TIMEOUT : float = 5.0
|
| 52 |
+
EVIDENCE_TIMEOUT : float = 5.0
|
| 53 |
|
| 54 |
# Paths
|
| 55 |
BASE_DIR : Path = Path(__file__).parent.parent
|
decision_builders/decision_policy.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dependencies
|
| 2 |
+
from typing import List
|
| 3 |
+
from utils.logger import get_logger
|
| 4 |
+
from config.schemas import AnalysisResult
|
| 5 |
+
from config.schemas import EvidenceResult
|
| 6 |
+
from config.constants import FinalDecision
|
| 7 |
+
from config.constants import DetectionStatus
|
| 8 |
+
from config.constants import EvidenceStrength
|
| 9 |
+
from config.constants import EvidenceDirection
|
| 10 |
+
from config.constants import MIN_EVIDENCE_CONFIDENCE
|
| 11 |
+
from config.constants import EVIDENCE_STRENGTH_ORDER
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# Setup Logging
|
| 15 |
+
logger = get_logger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class DecisionPolicy:
|
| 19 |
+
"""
|
| 20 |
+
Evidence-first decision policy
|
| 21 |
+
|
| 22 |
+
Responsibilities:
|
| 23 |
+
-----------------
|
| 24 |
+
- Apply authoritative rules over Tier-1 metrics
|
| 25 |
+
- Resolve conflicting evidence safely
|
| 26 |
+
- Produce a final, explainable decision
|
| 27 |
+
- Never perform probabilistic inference
|
| 28 |
+
"""
|
| 29 |
+
def apply(self, analysis: AnalysisResult) -> AnalysisResult:
|
| 30 |
+
"""
|
| 31 |
+
Apply final decision policy on analysis result
|
| 32 |
+
"""
|
| 33 |
+
try:
|
| 34 |
+
evidence = analysis.evidence or []
|
| 35 |
+
|
| 36 |
+
if not evidence:
|
| 37 |
+
analysis.final_decision = self._decide_from_metrics(status = analysis.status)
|
| 38 |
+
analysis.decision_explanation = ("No declarative evidence detected. Final decision is based on metric analysis.")
|
| 39 |
+
|
| 40 |
+
return analysis
|
| 41 |
+
|
| 42 |
+
logger.info(f"Applying decision policy on {len(evidence)} evidence items")
|
| 43 |
+
|
| 44 |
+
decision, explanation = self._decide_from_evidence(evidence = evidence,
|
| 45 |
+
fallback_status = analysis.status,
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
analysis.final_decision = decision
|
| 49 |
+
analysis.decision_explanation = explanation
|
| 50 |
+
|
| 51 |
+
logger.info(f"Final decision resolved: {decision.value}")
|
| 52 |
+
|
| 53 |
+
return analysis
|
| 54 |
+
|
| 55 |
+
except Exception as e:
|
| 56 |
+
# Decision policy must NEVER break pipeline
|
| 57 |
+
logger.error(f"Decision policy failed: {e}")
|
| 58 |
+
analysis.final_decision = FinalDecision.AUTHENTIC_BUT_REVIEW
|
| 59 |
+
|
| 60 |
+
analysis.decision_explanation = ("Decision policy encountered an internal error. Image requires manual review.")
|
| 61 |
+
return analysis
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _decide_from_evidence(self, evidence: List[EvidenceResult], fallback_status: DetectionStatus) -> tuple[FinalDecision, str]:
|
| 65 |
+
"""
|
| 66 |
+
Resolve decision from Tier-2 evidence with explanation
|
| 67 |
+
"""
|
| 68 |
+
# Storage for evidences
|
| 69 |
+
ai_evidence = list()
|
| 70 |
+
authentic_evidence = list()
|
| 71 |
+
|
| 72 |
+
indeterminate_count = sum(1 for e in evidence if (e.direction == EvidenceDirection.INDETERMINATE))
|
| 73 |
+
|
| 74 |
+
for item in evidence:
|
| 75 |
+
if (item.direction == EvidenceDirection.AI_GENERATED):
|
| 76 |
+
ai_evidence.append(item)
|
| 77 |
+
|
| 78 |
+
elif (item.direction == EvidenceDirection.AUTHENTIC):
|
| 79 |
+
authentic_evidence.append(item)
|
| 80 |
+
|
| 81 |
+
else:
|
| 82 |
+
logger.debug(f"Indeterminate evidence detected and excluded from decisive rules: {item.finding} ({item.analyzer})")
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
strongest_ai = self._strongest(evidence = ai_evidence)
|
| 86 |
+
strongest_authentic = self._strongest(evidence = authentic_evidence)
|
| 87 |
+
|
| 88 |
+
# Rule 1: CONCLUSIVE AI
|
| 89 |
+
if (strongest_ai and (strongest_ai.strength == EvidenceStrength.CONCLUSIVE) and ((strongest_ai.confidence or 0.0) >= MIN_EVIDENCE_CONFIDENCE)):
|
| 90 |
+
return (FinalDecision.CONFIRMED_AI_GENERATED,
|
| 91 |
+
f"Conclusive evidence detected ({strongest_ai.finding}). This evidence provides cryptographic or authoritative proof of AI generation.",
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# Rule 2: Strong AI evidence
|
| 95 |
+
if (strongest_ai and (strongest_ai.strength == EvidenceStrength.STRONG) and ((strongest_ai.confidence or 0.0) >= MIN_EVIDENCE_CONFIDENCE)):
|
| 96 |
+
if (strongest_authentic and (strongest_authentic.strength in (EvidenceStrength.STRONG, EvidenceStrength.CONCLUSIVE))):
|
| 97 |
+
return (FinalDecision.AUTHENTIC_BUT_REVIEW,
|
| 98 |
+
"Strong evidence exists for both AI generation and authenticity. Conflicting high-authority signals require human review.",
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
return (FinalDecision.SUSPICIOUS_AI_LIKELY,
|
| 102 |
+
f"Strong AI-related evidence detected ({strongest_ai.finding}). This evidence overrides metric-based assessment.")
|
| 103 |
+
|
| 104 |
+
# Rule 3: Strong authentic evidence
|
| 105 |
+
if (strongest_authentic and (strongest_authentic.strength in (EvidenceStrength.STRONG, EvidenceStrength.CONCLUSIVE))):
|
| 106 |
+
return (FinalDecision.MOSTLY_AUTHENTIC,
|
| 107 |
+
f"Strong authenticity evidence detected ({strongest_authentic.finding}). Image is assessed as mostly authentic."
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
# Rule 4: Conflicting moderate evidence
|
| 111 |
+
if (strongest_ai and strongest_authentic):
|
| 112 |
+
return (FinalDecision.AUTHENTIC_BUT_REVIEW,
|
| 113 |
+
"Moderate evidence exists for both AI generation and authenticity. Evidence is inconclusive and requires manual review.")
|
| 114 |
+
|
| 115 |
+
if (indeterminate_count >= 2):
|
| 116 |
+
return (FinalDecision.AUTHENTIC_BUT_REVIEW,
|
| 117 |
+
"Multiple indeterminate evidence sources detected. Evidence is inconclusive and requires manual review."
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
# Rule 5: Weak / indeterminate evidence
|
| 121 |
+
decision = self._decide_from_metrics(status = fallback_status)
|
| 122 |
+
return (decision,
|
| 123 |
+
"No high-authority evidence detected. Final decision is derived from metric-based analysis.",
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def _decide_from_metrics(self, status: DetectionStatus) -> FinalDecision:
|
| 128 |
+
"""
|
| 129 |
+
Convert Tier-1 metric status into final decision
|
| 130 |
+
"""
|
| 131 |
+
if (status == DetectionStatus.REVIEW_REQUIRED):
|
| 132 |
+
return FinalDecision.SUSPICIOUS_AI_LIKELY
|
| 133 |
+
|
| 134 |
+
return FinalDecision.MOSTLY_AUTHENTIC
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def _strongest(self, evidence: List[EvidenceResult]) -> EvidenceResult | None:
|
| 138 |
+
"""
|
| 139 |
+
Return strongest evidence by strength, then confidence
|
| 140 |
+
"""
|
| 141 |
+
if not evidence:
|
| 142 |
+
return None
|
| 143 |
+
|
| 144 |
+
return max(evidence,
|
| 145 |
+
key = lambda item: (EVIDENCE_STRENGTH_ORDER.get(item.strength, 0), item.confidence or 0.0),
|
| 146 |
+
)
|
evidence_analyzers/evidence_aggregator.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dependencies
|
| 2 |
+
import time
|
| 3 |
+
from typing import List
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from utils.logger import get_logger
|
| 6 |
+
from config.settings import settings
|
| 7 |
+
from config.schemas import EvidenceResult
|
| 8 |
+
from concurrent.futures import TimeoutError
|
| 9 |
+
from concurrent.futures import as_completed
|
| 10 |
+
from config.constants import EvidenceStrength
|
| 11 |
+
from config.constants import EvidenceDirection
|
| 12 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 13 |
+
from config.constants import EVIDENCE_STRENGTH_ORDER
|
| 14 |
+
from evidence_analyzers.exif_analyzer import ExifAnalyzer
|
| 15 |
+
from evidence_analyzers.watermark_analyzer import WatermarkAnalyzer
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# Setup Logging
|
| 19 |
+
logger = get_logger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class EvidenceAggregator:
|
| 23 |
+
"""
|
| 24 |
+
Tier-2 Evidence Orchestrator
|
| 25 |
+
|
| 26 |
+
Responsibilities:
|
| 27 |
+
-----------------
|
| 28 |
+
- Execute all evidence analyzers safely
|
| 29 |
+
- Collect declarative evidence only (no inference)
|
| 30 |
+
- Deduplicate overlapping findings
|
| 31 |
+
- Rank evidence by authority & reliability
|
| 32 |
+
- Remain forward-compatible with new evidence sources
|
| 33 |
+
"""
|
| 34 |
+
def __init__(self):
|
| 35 |
+
"""
|
| 36 |
+
Initialize all Tier-2 analyzers
|
| 37 |
+
|
| 38 |
+
NOTE:
|
| 39 |
+
-----
|
| 40 |
+
Any new analyzer (C2PA, camera fingerprinting, sensor PRNU) must be added here explicitly
|
| 41 |
+
"""
|
| 42 |
+
self.exif_analyzer = ExifAnalyzer()
|
| 43 |
+
self.watermark_analyzer = WatermarkAnalyzer()
|
| 44 |
+
|
| 45 |
+
self._analyzers = (self.exif_analyzer,
|
| 46 |
+
self.watermark_analyzer,
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
logger.info("EvidenceAggregator initialized with analyzers: "
|
| 50 |
+
f"{[a.__class__.__name__ for a in self._analyzers]}")
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def analyze(self, image_path: Path) -> List[EvidenceResult]:
|
| 54 |
+
"""
|
| 55 |
+
Run Tier-2 evidence extraction pipeline
|
| 56 |
+
|
| 57 |
+
Arguments:
|
| 58 |
+
----------
|
| 59 |
+
image_path {Path} : Path to image file
|
| 60 |
+
|
| 61 |
+
Returns:
|
| 62 |
+
--------
|
| 63 |
+
{ list } : Ordered, deduplicated evidence
|
| 64 |
+
"""
|
| 65 |
+
# Small, bounded executor for Tier-2 (I/O oriented)
|
| 66 |
+
max_workers = min(len(self._analyzers), settings.EVIDENCE_WORKERS or 2)
|
| 67 |
+
|
| 68 |
+
logger.info(f"Starting Tier-2 evidence analysis: {image_path}")
|
| 69 |
+
|
| 70 |
+
evidence_collected = list()
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
with ThreadPoolExecutor(max_workers = max_workers) as executor:
|
| 74 |
+
futures = {executor.submit(analyzer.analyze, image_path = image_path): {"analyzer": analyzer, "start": time.time()} for analyzer in self._analyzers}
|
| 75 |
+
|
| 76 |
+
for future in as_completed(futures):
|
| 77 |
+
meta = futures[future]
|
| 78 |
+
analyzer = meta["analyzer"]
|
| 79 |
+
start = meta["start"]
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
results = future.result(timeout = settings.EVIDENCE_TIMEOUT)
|
| 83 |
+
|
| 84 |
+
logger.debug(f"{analyzer.__class__.__name__} completed in {time.time()-start:.2f}s")
|
| 85 |
+
|
| 86 |
+
if results:
|
| 87 |
+
evidence_collected.extend(results)
|
| 88 |
+
logger.debug(f"{analyzer.__class__.__name__} returned {len(results)} evidence items")
|
| 89 |
+
|
| 90 |
+
else:
|
| 91 |
+
logger.debug(f"{analyzer.__class__.__name__} returned no evidence")
|
| 92 |
+
|
| 93 |
+
except TimeoutError:
|
| 94 |
+
logger.warning(f"{analyzer.__class__.__name__} timed out")
|
| 95 |
+
|
| 96 |
+
except Exception as e:
|
| 97 |
+
logger.error(f"{analyzer.__class__.__name__} failed: {e}")
|
| 98 |
+
|
| 99 |
+
if not evidence_collected:
|
| 100 |
+
logger.info("No Tier-2 evidence detected")
|
| 101 |
+
return []
|
| 102 |
+
|
| 103 |
+
# Normalize, deduplicate & rank
|
| 104 |
+
evidence = self._deduplicate(evidence = evidence_collected)
|
| 105 |
+
evidence = self._rank_evidence(evidence = evidence)
|
| 106 |
+
|
| 107 |
+
logger.info(f"Tier-2 evidence finalized: {len(evidence)} items")
|
| 108 |
+
|
| 109 |
+
return evidence
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def _deduplicate(self, evidence: List[EvidenceResult]) -> List[EvidenceResult]:
|
| 113 |
+
"""
|
| 114 |
+
Deduplicate evidence items
|
| 115 |
+
|
| 116 |
+
Strategy:
|
| 117 |
+
---------
|
| 118 |
+
- Same analyzer
|
| 119 |
+
- Same semantic finding
|
| 120 |
+
- Same direction
|
| 121 |
+
|
| 122 |
+
Keeps the strongest / highest confidence instance
|
| 123 |
+
"""
|
| 124 |
+
unique_map = dict()
|
| 125 |
+
|
| 126 |
+
for item in evidence:
|
| 127 |
+
key = (item.analyzer, item.finding, item.direction)
|
| 128 |
+
|
| 129 |
+
if key not in unique_map:
|
| 130 |
+
unique_map[key] = item
|
| 131 |
+
continue
|
| 132 |
+
|
| 133 |
+
existing = unique_map[key]
|
| 134 |
+
existing_strength_rank = self._strength_rank(strength = existing.strength)
|
| 135 |
+
item_strength_rank = self._strength_rank(strength = item.strength)
|
| 136 |
+
|
| 137 |
+
# Prefer stronger evidence
|
| 138 |
+
if (item_strength_rank > existing_strength_rank):
|
| 139 |
+
unique_map[key] = item
|
| 140 |
+
continue
|
| 141 |
+
|
| 142 |
+
# Prefer higher confidence if strength equal
|
| 143 |
+
if (item_strength_rank == existing_strength_rank):
|
| 144 |
+
if (item.confidence or 0.0) > (existing.confidence or 0.0):
|
| 145 |
+
unique_map[key] = item
|
| 146 |
+
|
| 147 |
+
deduped = list(unique_map.values())
|
| 148 |
+
|
| 149 |
+
logger.debug(f"Deduplicated evidence: {len(evidence)} → {len(deduped)}")
|
| 150 |
+
|
| 151 |
+
return deduped
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def _rank_evidence(self, evidence: List[EvidenceResult]) -> List[EvidenceResult]:
|
| 155 |
+
"""
|
| 156 |
+
Rank evidence by authority
|
| 157 |
+
|
| 158 |
+
Ranking precedence:
|
| 159 |
+
-------------------
|
| 160 |
+
1. Direction (AI > AUTHENTIC > INDETERMINATE)
|
| 161 |
+
2. Strength (CONCLUSIVE > STRONG > MODERATE > WEAK)
|
| 162 |
+
3. Confidence (higher wins)
|
| 163 |
+
"""
|
| 164 |
+
def priority(e: EvidenceResult) -> tuple:
|
| 165 |
+
return (self._direction_rank(direction = e.direction),
|
| 166 |
+
self._strength_rank(strength = e.strength),
|
| 167 |
+
e.confidence or 0.0,
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
ranked = sorted(evidence, key = priority, reverse = True)
|
| 171 |
+
|
| 172 |
+
logger.debug("Evidence ranking completed")
|
| 173 |
+
|
| 174 |
+
return ranked
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
@staticmethod
|
| 178 |
+
def _direction_rank(direction: EvidenceDirection) -> int:
|
| 179 |
+
"""
|
| 180 |
+
Evidence direction priority
|
| 181 |
+
"""
|
| 182 |
+
return {EvidenceDirection.AI_GENERATED : 3,
|
| 183 |
+
EvidenceDirection.AUTHENTIC : 2,
|
| 184 |
+
EvidenceDirection.INDETERMINATE : 1,
|
| 185 |
+
}.get(direction, 0)
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
@staticmethod
|
| 189 |
+
def _strength_rank(strength: EvidenceStrength) -> int:
|
| 190 |
+
"""
|
| 191 |
+
Evidence strength priority
|
| 192 |
+
"""
|
| 193 |
+
return EVIDENCE_STRENGTH_ORDER.get(strength, 0)
|
evidence_analyzers/exif_analyzer.py
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dependencies
|
| 2 |
+
from PIL import Image
|
| 3 |
+
from typing import List
|
| 4 |
+
from typing import Dict
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from PIL import ExifTags
|
| 7 |
+
from typing import Optional
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from utils.logger import get_logger
|
| 10 |
+
from config.constants import EvidenceType
|
| 11 |
+
from config.schemas import EvidenceResult
|
| 12 |
+
from config.constants import EvidenceStrength
|
| 13 |
+
from config.constants import EvidenceDirection
|
| 14 |
+
from config.constants import EXIF_ANALYSIS_PARAMS
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# Setup Logging
|
| 18 |
+
logger = get_logger(__name__)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class ExifAnalyzer:
|
| 22 |
+
"""
|
| 23 |
+
EXIF analyzer produces declarative evidence only: No probabilistic inference
|
| 24 |
+
"""
|
| 25 |
+
AI_SOFTWARE_FINGERPRINTS = {"sdxl",
|
| 26 |
+
"dall-e",
|
| 27 |
+
"dall·e",
|
| 28 |
+
"openai",
|
| 29 |
+
"imagen",
|
| 30 |
+
"runway",
|
| 31 |
+
"comfyui",
|
| 32 |
+
"firefly",
|
| 33 |
+
"novelai",
|
| 34 |
+
"craiyon",
|
| 35 |
+
"leonardo",
|
| 36 |
+
"midjourney",
|
| 37 |
+
"adobe sensei",
|
| 38 |
+
"automatic1111",
|
| 39 |
+
"waifu diffusion",
|
| 40 |
+
"stable diffusion",
|
| 41 |
+
"bing image creator",
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
CAMERA_FIELDS = {"Make",
|
| 45 |
+
"Model",
|
| 46 |
+
"LensModel",
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
TIME_FIELDS = {"DateTime",
|
| 50 |
+
"DateTimeOriginal",
|
| 51 |
+
"DateTimeDigitized",
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
AI_INDICATOR_FIELDS = {"Artist",
|
| 55 |
+
"Software",
|
| 56 |
+
"XPComment",
|
| 57 |
+
"UserComment",
|
| 58 |
+
"ImageDescription",
|
| 59 |
+
"ProcessingSoftware",
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
TIMESTAMP_FORMATS = ["%Y:%m:%d %H:%M:%S",
|
| 63 |
+
"%Y-%m-%d %H:%M:%S",
|
| 64 |
+
"%Y:%m:%d %H:%M:%S.%f",
|
| 65 |
+
]
|
| 66 |
+
|
| 67 |
+
PHOTO_METADATA_FIELDS = {"ISO",
|
| 68 |
+
"FNumber",
|
| 69 |
+
"FocalLength",
|
| 70 |
+
"ExposureTime",
|
| 71 |
+
"ISOSpeedRatings",
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
SUSPICIOUS_PATTERNS = {"unknown",
|
| 75 |
+
"none",
|
| 76 |
+
"camera",
|
| 77 |
+
"test",
|
| 78 |
+
"generic",
|
| 79 |
+
"placeholder",
|
| 80 |
+
"sample",
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def analyze(self, image_path: Path) -> List[EvidenceResult]:
|
| 85 |
+
evidence = list()
|
| 86 |
+
logger.debug(f"Starting EXIF analysis for {image_path}")
|
| 87 |
+
|
| 88 |
+
try:
|
| 89 |
+
image = Image.open(fp = image_path, mode = "r")
|
| 90 |
+
exif_raw = image.getexif()
|
| 91 |
+
|
| 92 |
+
if not exif_raw:
|
| 93 |
+
logger.info("No EXIF metadata found")
|
| 94 |
+
evidence.append(self._missing_exif())
|
| 95 |
+
return evidence
|
| 96 |
+
|
| 97 |
+
exif = self._normalize_exif(exif_raw = exif_raw)
|
| 98 |
+
logger.debug(f"Normalized EXIF fields: {list(exif.keys())}")
|
| 99 |
+
|
| 100 |
+
evidence.extend(self._analyze_ai_indicators(exif = exif))
|
| 101 |
+
evidence.extend(self._analyze_camera_presence(exif = exif))
|
| 102 |
+
evidence.extend(self._analyze_timestamp_consistency(exif = exif))
|
| 103 |
+
evidence.extend(self._analyze_suspicious_combinations(exif = exif))
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
logger.exception("EXIF parsing failed")
|
| 107 |
+
evidence.append(EvidenceResult(source = EvidenceType.EXIF,
|
| 108 |
+
finding = "EXIF parsing failed",
|
| 109 |
+
direction = EvidenceDirection.INDETERMINATE,
|
| 110 |
+
strength = EvidenceStrength.WEAK,
|
| 111 |
+
confidence = 0.0,
|
| 112 |
+
details = {"error": str(e)},
|
| 113 |
+
analyzer = "exif_analyzer",
|
| 114 |
+
)
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
logger.debug(f"EXIF analysis completed with {len(evidence)} evidence items")
|
| 118 |
+
return evidence
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def _normalize_exif(self, exif_raw) -> Dict[str, str]:
|
| 122 |
+
"""
|
| 123 |
+
Normalize EXIF tags to human-readable names
|
| 124 |
+
"""
|
| 125 |
+
normalized_exif = dict()
|
| 126 |
+
|
| 127 |
+
for tag, value in exif_raw.items():
|
| 128 |
+
tag_name = ExifTags.TAGS.get(tag, tag)
|
| 129 |
+
|
| 130 |
+
# Convert value to string: handling bytes
|
| 131 |
+
if isinstance(value, bytes):
|
| 132 |
+
try:
|
| 133 |
+
value_str = value.decode('utf-8', errors = 'ignore')
|
| 134 |
+
|
| 135 |
+
except:
|
| 136 |
+
value_str = str(value)
|
| 137 |
+
|
| 138 |
+
else:
|
| 139 |
+
value_str = str(value)
|
| 140 |
+
|
| 141 |
+
normalized_exif[tag_name] = value_str
|
| 142 |
+
|
| 143 |
+
return normalized_exif
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def _missing_exif(self) -> EvidenceResult:
|
| 147 |
+
"""
|
| 148 |
+
Missing EXIF is suspicious but not conclusive
|
| 149 |
+
"""
|
| 150 |
+
missing_exif = EvidenceResult(source = EvidenceType.EXIF,
|
| 151 |
+
finding = "No EXIF metadata present (common in AI images and processed web images)",
|
| 152 |
+
direction = EvidenceDirection.INDETERMINATE,
|
| 153 |
+
strength = EvidenceStrength.WEAK,
|
| 154 |
+
confidence = EXIF_ANALYSIS_PARAMS.MISSING_EXIF_CONFIDENCE,
|
| 155 |
+
details = {"note": "Missing EXIF alone is not conclusive"},
|
| 156 |
+
analyzer = "exif_analyzer",
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
return missing_exif
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def _analyze_ai_indicators(self, exif: Dict[str, str]) -> List[EvidenceResult]:
|
| 163 |
+
"""
|
| 164 |
+
Check multiple EXIF fields for AI tool indicators
|
| 165 |
+
"""
|
| 166 |
+
evidence = list()
|
| 167 |
+
|
| 168 |
+
for field_name in self.AI_INDICATOR_FIELDS:
|
| 169 |
+
field_value = exif.get(field_name, "").lower()
|
| 170 |
+
|
| 171 |
+
if not field_value:
|
| 172 |
+
continue
|
| 173 |
+
|
| 174 |
+
logger.debug(f"Checking {field_name}: {field_value}")
|
| 175 |
+
|
| 176 |
+
for fingerprint in self.AI_SOFTWARE_FINGERPRINTS:
|
| 177 |
+
if (fingerprint in field_value):
|
| 178 |
+
logger.warning(f"AI software fingerprint detected in {field_name}: {fingerprint}")
|
| 179 |
+
evidence.append(EvidenceResult(source = EvidenceType.EXIF,
|
| 180 |
+
finding = f"EXIF {field_name} tag matches known AI tool: '{field_value}'",
|
| 181 |
+
direction = EvidenceDirection.AI_GENERATED,
|
| 182 |
+
strength = EvidenceStrength.STRONG,
|
| 183 |
+
confidence = EXIF_ANALYSIS_PARAMS.AI_FINGERPRINT_CONFIDENCE,
|
| 184 |
+
details = {"field" : field_name,
|
| 185 |
+
"value" : field_value,
|
| 186 |
+
"fingerprint" : fingerprint
|
| 187 |
+
},
|
| 188 |
+
analyzer = "exif_analyzer",
|
| 189 |
+
)
|
| 190 |
+
)
|
| 191 |
+
break
|
| 192 |
+
|
| 193 |
+
return evidence
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def _analyze_camera_presence(self, exif: Dict[str, str]) -> List[EvidenceResult]:
|
| 197 |
+
"""
|
| 198 |
+
Analyze camera metadata for authenticity indicators
|
| 199 |
+
"""
|
| 200 |
+
evidence = list()
|
| 201 |
+
|
| 202 |
+
make = exif.get("Make")
|
| 203 |
+
model = exif.get("Model")
|
| 204 |
+
lens = exif.get("LensModel")
|
| 205 |
+
|
| 206 |
+
if (make and model):
|
| 207 |
+
logger.debug(f"Camera metadata found: {make} {model}")
|
| 208 |
+
|
| 209 |
+
confidence = EXIF_ANALYSIS_PARAMS.CAMERA_BASE_CONFIDENCE
|
| 210 |
+
details = {"make": make, "model": model}
|
| 211 |
+
|
| 212 |
+
if lens:
|
| 213 |
+
confidence = EXIF_ANALYSIS_PARAMS.CAMERA_WITH_LENS_CONFIDENCE
|
| 214 |
+
details["lens"] = lens
|
| 215 |
+
logger.debug(f"Lens metadata found: {lens}")
|
| 216 |
+
|
| 217 |
+
if self._is_suspicious_camera_data(make = make, model = model):
|
| 218 |
+
logger.warning(f"Suspicious camera metadata: {make} {model}")
|
| 219 |
+
evidence.append(EvidenceResult(source = EvidenceType.EXIF,
|
| 220 |
+
finding = f"Suspicious camera metadata detected: {make} {model}",
|
| 221 |
+
direction = EvidenceDirection.INDETERMINATE,
|
| 222 |
+
strength = EvidenceStrength.WEAK,
|
| 223 |
+
confidence = EXIF_ANALYSIS_PARAMS.SUSPICIOUS_CAMERA_CONFIDENCE,
|
| 224 |
+
details = details,
|
| 225 |
+
analyzer = "exif_analyzer",
|
| 226 |
+
)
|
| 227 |
+
)
|
| 228 |
+
else:
|
| 229 |
+
evidence.append(EvidenceResult(source = EvidenceType.EXIF,
|
| 230 |
+
finding = f"Camera metadata present: {make} {model}",
|
| 231 |
+
direction = EvidenceDirection.AUTHENTIC,
|
| 232 |
+
strength = EvidenceStrength.MODERATE,
|
| 233 |
+
confidence = confidence,
|
| 234 |
+
details = details,
|
| 235 |
+
analyzer = "exif_analyzer",
|
| 236 |
+
)
|
| 237 |
+
)
|
| 238 |
+
else:
|
| 239 |
+
logger.info("No camera metadata present")
|
| 240 |
+
|
| 241 |
+
return evidence
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def _is_suspicious_camera_data(self, make: str, model: str) -> bool:
|
| 245 |
+
"""
|
| 246 |
+
Check if camera data looks fake or suspicious
|
| 247 |
+
"""
|
| 248 |
+
make_lower = make.lower()
|
| 249 |
+
model_lower = model.lower()
|
| 250 |
+
|
| 251 |
+
for pattern in self.SUSPICIOUS_PATTERNS:
|
| 252 |
+
if ((pattern in make_lower) or (pattern in model_lower)):
|
| 253 |
+
return True
|
| 254 |
+
|
| 255 |
+
return False
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
def _analyze_timestamp_consistency(self, exif: Dict[str, str]) -> List[EvidenceResult]:
|
| 259 |
+
"""
|
| 260 |
+
Check for timestamp inconsistencies
|
| 261 |
+
"""
|
| 262 |
+
timestamps = dict()
|
| 263 |
+
|
| 264 |
+
for field in self.TIME_FIELDS:
|
| 265 |
+
if (field not in exif):
|
| 266 |
+
continue
|
| 267 |
+
|
| 268 |
+
parsed_time = self._parse_timestamp(timestamp_str = exif[field])
|
| 269 |
+
|
| 270 |
+
if parsed_time:
|
| 271 |
+
timestamps[field] = parsed_time
|
| 272 |
+
|
| 273 |
+
if (len(timestamps) < 2):
|
| 274 |
+
return []
|
| 275 |
+
|
| 276 |
+
time_values = list(timestamps.values())
|
| 277 |
+
delta = max(time_values) - min(time_values)
|
| 278 |
+
delta_seconds = delta.total_seconds()
|
| 279 |
+
|
| 280 |
+
logger.debug(f"Timestamp delta: {delta_seconds} seconds across {len(timestamps)} fields")
|
| 281 |
+
|
| 282 |
+
if (delta_seconds > EXIF_ANALYSIS_PARAMS.TIMESTAMP_DELTA_THRESHOLD):
|
| 283 |
+
logger.warning(f"Inconsistent EXIF timestamps detected: {delta_seconds}s delta")
|
| 284 |
+
return [EvidenceResult(source = EvidenceType.EXIF,
|
| 285 |
+
finding = f"Inconsistent EXIF timestamps ({delta_seconds:.1f}s difference)",
|
| 286 |
+
direction = EvidenceDirection.INDETERMINATE,
|
| 287 |
+
strength = EvidenceStrength.WEAK,
|
| 288 |
+
confidence = EXIF_ANALYSIS_PARAMS.TIMESTAMP_INCONSISTENCY_CONFIDENCE,
|
| 289 |
+
details = {"delta_seconds" : delta_seconds,
|
| 290 |
+
"timestamps" : {k: v.isoformat() for k, v in timestamps.items()},
|
| 291 |
+
},
|
| 292 |
+
analyzer = "exif_analyzer",
|
| 293 |
+
)
|
| 294 |
+
]
|
| 295 |
+
|
| 296 |
+
return []
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
def _parse_timestamp(self, timestamp_str: str) -> Optional[datetime]:
|
| 300 |
+
"""
|
| 301 |
+
Parse timestamp with multiple format attempts
|
| 302 |
+
"""
|
| 303 |
+
for fmt in self.TIMESTAMP_FORMATS:
|
| 304 |
+
try:
|
| 305 |
+
return datetime.strptime(timestamp_str, fmt)
|
| 306 |
+
|
| 307 |
+
except (ValueError, TypeError):
|
| 308 |
+
continue
|
| 309 |
+
|
| 310 |
+
logger.debug(f"Could not parse timestamp: {timestamp_str}")
|
| 311 |
+
return None
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
def _analyze_suspicious_combinations(self, exif: Dict[str, str]) -> List[EvidenceResult]:
|
| 315 |
+
"""
|
| 316 |
+
Detect suspicious combinations of EXIF data
|
| 317 |
+
"""
|
| 318 |
+
evidence = list()
|
| 319 |
+
|
| 320 |
+
has_camera = exif.get("Make") and exif.get("Model")
|
| 321 |
+
has_photo_metadata = any([exif.get(field) for field in self.PHOTO_METADATA_FIELDS])
|
| 322 |
+
|
| 323 |
+
if (has_camera and not has_photo_metadata):
|
| 324 |
+
logger.warning("Camera metadata present but missing photographic settings")
|
| 325 |
+
evidence.append(EvidenceResult(source = EvidenceType.EXIF,
|
| 326 |
+
finding = "Camera identified but photographic metadata missing (suspicious)",
|
| 327 |
+
direction = EvidenceDirection.INDETERMINATE,
|
| 328 |
+
strength = EvidenceStrength.WEAK,
|
| 329 |
+
confidence = EXIF_ANALYSIS_PARAMS.MISSING_PHOTO_METADATA_CONFIDENCE,
|
| 330 |
+
details = {"has_camera" : True,
|
| 331 |
+
"missing_settings" : list(self.PHOTO_METADATA_FIELDS),
|
| 332 |
+
},
|
| 333 |
+
analyzer = "exif_analyzer",
|
| 334 |
+
)
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
+
for field in self.TIME_FIELDS:
|
| 338 |
+
if (field not in exif):
|
| 339 |
+
continue
|
| 340 |
+
|
| 341 |
+
timestamp = self._parse_timestamp(timestamp_str = exif[field])
|
| 342 |
+
|
| 343 |
+
if (timestamp and self._is_suspicious_timestamp(dt = timestamp)):
|
| 344 |
+
logger.warning(f"Suspicious timestamp detected: {timestamp}")
|
| 345 |
+
evidence.append(EvidenceResult(source = EvidenceType.EXIF,
|
| 346 |
+
finding = f"Suspicious timestamp pattern in {field}",
|
| 347 |
+
direction = EvidenceDirection.INDETERMINATE,
|
| 348 |
+
strength = EvidenceStrength.WEAK,
|
| 349 |
+
confidence = EXIF_ANALYSIS_PARAMS.SUSPICIOUS_TIMESTAMP_CONFIDENCE,
|
| 350 |
+
details = {"field" : field,
|
| 351 |
+
"timestamp" : timestamp.isoformat(),
|
| 352 |
+
"reason" : "Suspiciously round time (midnight or all zeros)",
|
| 353 |
+
},
|
| 354 |
+
analyzer = "exif_analyzer",
|
| 355 |
+
)
|
| 356 |
+
)
|
| 357 |
+
break
|
| 358 |
+
|
| 359 |
+
return evidence
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
def _is_suspicious_timestamp(self, dt: datetime) -> bool:
|
| 363 |
+
"""
|
| 364 |
+
Check if timestamp looks fake: too perfect/round
|
| 365 |
+
"""
|
| 366 |
+
if ((dt.hour == 0) and (dt.minute == 0) and (dt.second == 0)):
|
| 367 |
+
return True
|
| 368 |
+
|
| 369 |
+
if (dt.year < EXIF_ANALYSIS_PARAMS.MIN_VALID_YEAR):
|
| 370 |
+
return True
|
| 371 |
+
|
| 372 |
+
if (dt.year > datetime.now().year + EXIF_ANALYSIS_PARAMS.MAX_FUTURE_YEARS):
|
| 373 |
+
return True
|
| 374 |
+
|
| 375 |
+
return False
|
evidence_analyzers/watermark_analyzer.py
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dependencies
|
| 2 |
+
import pywt
|
| 3 |
+
import cv2
|
| 4 |
+
import numpy as np
|
| 5 |
+
from typing import List
|
| 6 |
+
from typing import Tuple
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from scipy import fftpack
|
| 9 |
+
from scipy.stats import entropy
|
| 10 |
+
from utils.logger import get_logger
|
| 11 |
+
from scipy.signal import correlate2d
|
| 12 |
+
from config.schemas import EvidenceResult
|
| 13 |
+
from config.constants import EvidenceType
|
| 14 |
+
from config.constants import EvidenceStrength
|
| 15 |
+
from config.constants import EvidenceDirection
|
| 16 |
+
from utils.image_processor import ImageProcessor
|
| 17 |
+
from config.constants import WATERMARK_ANALYSIS_PARAMS
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# Setup Logging
|
| 21 |
+
logger = get_logger(__name__)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class WatermarkAnalyzer:
|
| 25 |
+
"""
|
| 26 |
+
Generic watermark detector using signal processing techniques:
|
| 27 |
+
- Detects invisible watermarks through frequency domain analysis
|
| 28 |
+
- wavelet decomposition, and statistical anomalies - vendor agnostic
|
| 29 |
+
"""
|
| 30 |
+
def __init__(self):
|
| 31 |
+
self.image_processor = ImageProcessor()
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def analyze(self, image_path: Path) -> List[EvidenceResult]:
|
| 35 |
+
logger.debug(f"Starting watermark analysis for {image_path}")
|
| 36 |
+
|
| 37 |
+
evidence = list()
|
| 38 |
+
image = self.image_processor.load_image(image_path)
|
| 39 |
+
|
| 40 |
+
evidence.extend(self._detect_wavelet_watermarks(image = image))
|
| 41 |
+
evidence.extend(self._detect_frequency_watermarks(image = image))
|
| 42 |
+
evidence.extend(self._detect_lsb_steganography(image = image))
|
| 43 |
+
|
| 44 |
+
logger.debug(f"Watermark analysis completed with {len(evidence)} findings")
|
| 45 |
+
|
| 46 |
+
if not evidence:
|
| 47 |
+
return []
|
| 48 |
+
|
| 49 |
+
return evidence
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _detect_wavelet_watermarks(self, image: np.ndarray) -> List[EvidenceResult]:
|
| 53 |
+
"""
|
| 54 |
+
Detect watermarks embedded in wavelet domain
|
| 55 |
+
- Many invisible watermarks modify high-frequency wavelet coefficients
|
| 56 |
+
- This is a general technique used by multiple AI generators
|
| 57 |
+
"""
|
| 58 |
+
logger.debug("Checking for wavelet-domain watermarks")
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
# Convert to grayscale if needed
|
| 62 |
+
if (len(image.shape) == 3):
|
| 63 |
+
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
| 64 |
+
|
| 65 |
+
else:
|
| 66 |
+
gray = image.copy()
|
| 67 |
+
|
| 68 |
+
# Multi-level wavelet decomposition
|
| 69 |
+
coeffs2 = pywt.dwt2(gray, 'haar')
|
| 70 |
+
cA, (cH, cV, cD) = coeffs2
|
| 71 |
+
|
| 72 |
+
# Analyze statistical properties of high-frequency subbands: Watermarks create anomalous energy distributions
|
| 73 |
+
# Calculate sub-band energies
|
| 74 |
+
energy_approx = np.var(cA)
|
| 75 |
+
energy_h = np.var(cH)
|
| 76 |
+
energy_v = np.var(cV)
|
| 77 |
+
energy_d = np.var(cD)
|
| 78 |
+
|
| 79 |
+
total_hf_energy = energy_h + energy_v + energy_d
|
| 80 |
+
total_energy = energy_approx + total_hf_energy
|
| 81 |
+
|
| 82 |
+
if (total_energy == 0):
|
| 83 |
+
return []
|
| 84 |
+
|
| 85 |
+
# High-frequency energy ratio
|
| 86 |
+
hf_ratio = total_hf_energy / total_energy
|
| 87 |
+
|
| 88 |
+
# Watermarks increase high-frequency energy beyond natural levels:
|
| 89 |
+
# - Natural images : ~0.05-0.15
|
| 90 |
+
# - Watermarked : ~0.20-0.40
|
| 91 |
+
anomalous_energy = hf_ratio > WATERMARK_ANALYSIS_PARAMS.HF_ENERGY_RATIO_THRESHOLD
|
| 92 |
+
|
| 93 |
+
# Check for statistical anomalies in coefficient distribution: watermarks create non-Gaussian distributions
|
| 94 |
+
kurtosis_h = self._calculate_kurtosis(data = cH)
|
| 95 |
+
kurtosis_v = self._calculate_kurtosis(data = cV)
|
| 96 |
+
kurtosis_d = self._calculate_kurtosis(data = cD)
|
| 97 |
+
|
| 98 |
+
avg_kurtosis = (kurtosis_h + kurtosis_v + kurtosis_d) / 3
|
| 99 |
+
|
| 100 |
+
# Natural images: kurtosis ~3-6, Watermarked: often >8
|
| 101 |
+
anomalous_distribution = avg_kurtosis > WATERMARK_ANALYSIS_PARAMS.KURTOSIS_THRESHOLD
|
| 102 |
+
|
| 103 |
+
# Check for periodic patterns (grid-based embedding)
|
| 104 |
+
periodicity_score = self._detect_periodicity(cH, cV, cD)
|
| 105 |
+
|
| 106 |
+
# Combined detection
|
| 107 |
+
detected = (anomalous_energy and anomalous_distribution) or ((periodicity_score > WATERMARK_ANALYSIS_PARAMS.PERIODICITY_THRESHOLD) and anomalous_energy)
|
| 108 |
+
|
| 109 |
+
if detected:
|
| 110 |
+
confidence = self._calculate_confidence([hf_ratio / WATERMARK_ANALYSIS_PARAMS.HF_ENERGY_RATIO_NORM,
|
| 111 |
+
min(avg_kurtosis / WATERMARK_ANALYSIS_PARAMS.KURTOSIS_NORM_FACTOR, 1.0),
|
| 112 |
+
periodicity_score
|
| 113 |
+
])
|
| 114 |
+
|
| 115 |
+
is_strong = (confidence >= WATERMARK_ANALYSIS_PARAMS.STRONG_CONFIDENCE_THRESHOLD)
|
| 116 |
+
direction = (EvidenceDirection.AI_GENERATED if is_strong else EvidenceDirection.INDETERMINATE)
|
| 117 |
+
strength = (EvidenceStrength.STRONG if is_strong else EvidenceStrength.MODERATE)
|
| 118 |
+
|
| 119 |
+
logger.warning(f"Heuristic watermark pattern detected in wavelet domain: (confidence: {confidence:.2f})")
|
| 120 |
+
|
| 121 |
+
return [EvidenceResult(source = EvidenceType.WATERMARK,
|
| 122 |
+
finding = "Statistical patterns consistent with invisible watermarking or steganographic embedding detected",
|
| 123 |
+
direction = direction,
|
| 124 |
+
strength = strength,
|
| 125 |
+
confidence = confidence,
|
| 126 |
+
details = {"method" : "wavelet_analysis",
|
| 127 |
+
"note" : "Heuristic detection; not cryptographic or vendor watermark verification",
|
| 128 |
+
"high_frequency_ratio" : float(hf_ratio),
|
| 129 |
+
"avg_kurtosis" : float(avg_kurtosis),
|
| 130 |
+
"periodicity_score" : float(periodicity_score),
|
| 131 |
+
"wavelet_type" : "haar"
|
| 132 |
+
},
|
| 133 |
+
analyzer = "watermark_analyzer",
|
| 134 |
+
)
|
| 135 |
+
]
|
| 136 |
+
|
| 137 |
+
except Exception as e:
|
| 138 |
+
logger.error(f"Error in wavelet watermark detection: {e}")
|
| 139 |
+
|
| 140 |
+
return []
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def _detect_frequency_watermarks(self, image: np.ndarray) -> List[EvidenceResult]:
|
| 144 |
+
"""
|
| 145 |
+
Detect watermarks in frequency domain using FFT analysis: Watermarks often add imperceptible patterns in specific frequency bands
|
| 146 |
+
"""
|
| 147 |
+
logger.debug("Checking for frequency-domain watermarks")
|
| 148 |
+
|
| 149 |
+
try:
|
| 150 |
+
# Convert to grayscale
|
| 151 |
+
if (len(image.shape) == 3):
|
| 152 |
+
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
| 153 |
+
|
| 154 |
+
else:
|
| 155 |
+
gray = image.copy()
|
| 156 |
+
|
| 157 |
+
# 2D FFT
|
| 158 |
+
fft = fftpack.fft2(gray)
|
| 159 |
+
fft_shift = fftpack.fftshift(fft)
|
| 160 |
+
magnitude = np.abs(fft_shift)
|
| 161 |
+
|
| 162 |
+
# Log scale for better visualization of weak signals
|
| 163 |
+
magnitude_log = np.log1p(magnitude)
|
| 164 |
+
|
| 165 |
+
# Analyze frequency spectrum
|
| 166 |
+
h, w = magnitude_log.shape
|
| 167 |
+
center_y, center_x = h // 2, w // 2
|
| 168 |
+
|
| 169 |
+
# Check mid to high frequency bands (common watermark location): Divide spectrum into radial bands
|
| 170 |
+
band_anomalies = list()
|
| 171 |
+
frequency_bands = [(0.2, 0.4), # Mid-low frequencies
|
| 172 |
+
(0.4, 0.6), # Mid frequencies
|
| 173 |
+
(0.6, 0.8), # Mid-high frequencies
|
| 174 |
+
]
|
| 175 |
+
|
| 176 |
+
for low, high in frequency_bands:
|
| 177 |
+
mask = self._create_radial_mask(magnitude_log.shape, low, high)
|
| 178 |
+
band_values = magnitude_log[mask]
|
| 179 |
+
|
| 180 |
+
if (band_values.size == 0):
|
| 181 |
+
continue
|
| 182 |
+
|
| 183 |
+
# Statistical analysis of band
|
| 184 |
+
band_mean = np.mean(band_values)
|
| 185 |
+
band_std = np.std(band_values)
|
| 186 |
+
|
| 187 |
+
# Detect anomalous peaks (watermark signatures)
|
| 188 |
+
threshold = band_mean + WATERMARK_ANALYSIS_PARAMS.PEAK_STD_MULTIPLIER * band_std
|
| 189 |
+
peaks = np.sum(band_values > threshold)
|
| 190 |
+
|
| 191 |
+
peak_ratio = peaks / band_values.size
|
| 192 |
+
|
| 193 |
+
if (peak_ratio > WATERMARK_ANALYSIS_PARAMS.PEAK_RATIO_THRESHOLD):
|
| 194 |
+
# More than 5% anomalous values
|
| 195 |
+
band_anomalies.append({'band' : (low, high),
|
| 196 |
+
'peak_ratio' : float(peak_ratio),
|
| 197 |
+
'peak_count' : int(peaks)
|
| 198 |
+
})
|
| 199 |
+
|
| 200 |
+
# Check for symmetric patterns (common in structured watermarks)
|
| 201 |
+
symmetry_score = self._check_spectral_symmetry(magnitude = magnitude_log)
|
| 202 |
+
|
| 203 |
+
detected = ((len(band_anomalies) >= WATERMARK_ANALYSIS_PARAMS.MIN_ANOMALOUS_BANDS) and
|
| 204 |
+
(symmetry_score > WATERMARK_ANALYSIS_PARAMS.SPECTRAL_SYMMETRY_THRESHOLD))
|
| 205 |
+
|
| 206 |
+
if detected:
|
| 207 |
+
confidence = self._calculate_confidence([min(len(band_anomalies) / 3, 1.0),
|
| 208 |
+
symmetry_score
|
| 209 |
+
])
|
| 210 |
+
|
| 211 |
+
is_strong = (confidence >= WATERMARK_ANALYSIS_PARAMS.STRONG_CONFIDENCE_THRESHOLD)
|
| 212 |
+
direction = (EvidenceDirection.AI_GENERATED if is_strong else EvidenceDirection.INDETERMINATE)
|
| 213 |
+
strength = (EvidenceStrength.STRONG if is_strong else EvidenceStrength.MODERATE)
|
| 214 |
+
|
| 215 |
+
logger.warning(f"Heuristic watermark pattern detected in Frequency-domain: (confidence: {confidence:.2f})")
|
| 216 |
+
|
| 217 |
+
return [EvidenceResult(source = EvidenceType.WATERMARK,
|
| 218 |
+
finding = "Statistical patterns consistent with invisible watermarking or steganographic embedding detected",
|
| 219 |
+
direction = direction,
|
| 220 |
+
strength = strength,
|
| 221 |
+
confidence = confidence,
|
| 222 |
+
details = {"method" : "frequency_analysis",
|
| 223 |
+
"note" : "Heuristic detection; not cryptographic or vendor watermark verification",
|
| 224 |
+
"anomalous_bands" : len(band_anomalies),
|
| 225 |
+
"band_details" : band_anomalies,
|
| 226 |
+
"symmetry_score" : float(symmetry_score),
|
| 227 |
+
},
|
| 228 |
+
analyzer = "watermark_analyzer",
|
| 229 |
+
)
|
| 230 |
+
]
|
| 231 |
+
|
| 232 |
+
except Exception as e:
|
| 233 |
+
logger.error(f"Error in frequency watermark detection: {e}")
|
| 234 |
+
|
| 235 |
+
return []
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def _detect_lsb_steganography(self, image: np.ndarray) -> List[EvidenceResult]:
|
| 239 |
+
"""
|
| 240 |
+
Detect steganographic watermarks using LSB (Least Significant Bit) analysis.
|
| 241 |
+
Many watermarking schemes embed data in the LSB planes.
|
| 242 |
+
"""
|
| 243 |
+
logger.debug("Checking for LSB steganography")
|
| 244 |
+
|
| 245 |
+
try:
|
| 246 |
+
# Analyze all color channels
|
| 247 |
+
if (len(image.shape) == 3):
|
| 248 |
+
channels = cv2.split(image)
|
| 249 |
+
|
| 250 |
+
else:
|
| 251 |
+
channels = [image]
|
| 252 |
+
|
| 253 |
+
channel_results = list()
|
| 254 |
+
|
| 255 |
+
for idx, channel in enumerate(channels):
|
| 256 |
+
# Extract bit planes
|
| 257 |
+
lsb_plane = channel & 1 # LSB
|
| 258 |
+
msb_plane = (channel >> 7) & 1 # MSB for comparison
|
| 259 |
+
|
| 260 |
+
# Calculate entropy
|
| 261 |
+
lsb_entropy = self._shannon_entropy(lsb_plane)
|
| 262 |
+
msb_entropy = self._shannon_entropy(msb_plane)
|
| 263 |
+
|
| 264 |
+
# Chi-square test for randomness
|
| 265 |
+
chi_square = self._chi_square_test(lsb_plane)
|
| 266 |
+
|
| 267 |
+
# Run test for detecting non-random patterns
|
| 268 |
+
runs = self._runs_test(lsb_plane)
|
| 269 |
+
|
| 270 |
+
channel_results.append({'channel' : idx,
|
| 271 |
+
'lsb_entropy' : float(lsb_entropy),
|
| 272 |
+
'msb_entropy' : float(msb_entropy),
|
| 273 |
+
'chi_square' : float(chi_square),
|
| 274 |
+
'runs_score' : float(runs)
|
| 275 |
+
})
|
| 276 |
+
|
| 277 |
+
# Average results across channels
|
| 278 |
+
avg_lsb_entropy = np.mean([r['lsb_entropy'] for r in channel_results])
|
| 279 |
+
avg_chi_square = np.mean([r['chi_square'] for r in channel_results])
|
| 280 |
+
avg_runs = np.mean([r['runs_score'] for r in channel_results])
|
| 281 |
+
|
| 282 |
+
# Detection criteria:
|
| 283 |
+
# - High LSB entropy (>0.72) indicates embedded data
|
| 284 |
+
# - High chi-square indicates non-uniform distribution
|
| 285 |
+
# - Runs test indicates structured patterns
|
| 286 |
+
|
| 287 |
+
suspicious_entropy = (avg_lsb_entropy > WATERMARK_ANALYSIS_PARAMS.LSB_ENTROPY_THRESHOLD)
|
| 288 |
+
suspicious_chi = (avg_chi_square > WATERMARK_ANALYSIS_PARAMS.CHI_SQUARE_THRESHOLD)
|
| 289 |
+
suspicious_runs = (avg_runs > WATERMARK_ANALYSIS_PARAMS.RUNS_SCORE_THRESHOLD)
|
| 290 |
+
|
| 291 |
+
detected = (suspicious_entropy and (suspicious_chi or suspicious_runs))
|
| 292 |
+
|
| 293 |
+
if detected:
|
| 294 |
+
# Determine strength based on confidence
|
| 295 |
+
confidence = self._calculate_confidence([min((avg_lsb_entropy - WATERMARK_ANALYSIS_PARAMS.LSB_ENTROPY_NORM_BASE) / WATERMARK_ANALYSIS_PARAMS.LSB_ENTROPY_NORM_RANGE, 1.0),
|
| 296 |
+
min(avg_chi_square / WATERMARK_ANALYSIS_PARAMS.CHI_SQUARE_NORM_FACTOR, 1.0),
|
| 297 |
+
avg_runs
|
| 298 |
+
])
|
| 299 |
+
is_strong = (confidence >= WATERMARK_ANALYSIS_PARAMS.STRONG_CONFIDENCE_THRESHOLD)
|
| 300 |
+
direction = (EvidenceDirection.AI_GENERATED if is_strong else EvidenceDirection.INDETERMINATE)
|
| 301 |
+
strength = (EvidenceStrength.STRONG if is_strong else EvidenceStrength.MODERATE)
|
| 302 |
+
|
| 303 |
+
logger.warning(f"Heuristic watermark pattern detected in LSB steganography-domain: (confidence: {confidence:.2f})")
|
| 304 |
+
|
| 305 |
+
return [EvidenceResult(source = EvidenceType.WATERMARK,
|
| 306 |
+
finding = "Statistical patterns consistent with invisible watermarking or steganographic embedding detected",
|
| 307 |
+
direction = direction,
|
| 308 |
+
strength = strength,
|
| 309 |
+
confidence = confidence,
|
| 310 |
+
details = {"method" : "lsb_analysis",
|
| 311 |
+
"note" : "Heuristic detection; not cryptographic or vendor watermark verification",
|
| 312 |
+
"avg_lsb_entropy" : float(avg_lsb_entropy),
|
| 313 |
+
"avg_chi_square" : float(avg_chi_square),
|
| 314 |
+
"avg_runs_score" : float(avg_runs),
|
| 315 |
+
"avg_msb_entropy" : float(np.mean([r["msb_entropy"] for r in channel_results])),
|
| 316 |
+
"channel_results" : channel_results
|
| 317 |
+
},
|
| 318 |
+
analyzer = "watermark_analyzer",
|
| 319 |
+
)
|
| 320 |
+
]
|
| 321 |
+
|
| 322 |
+
except Exception as e:
|
| 323 |
+
logger.error(f"Error in LSB steganography detection: {e}")
|
| 324 |
+
|
| 325 |
+
return []
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
def _calculate_kurtosis(self, data: np.ndarray) -> float:
|
| 329 |
+
"""
|
| 330 |
+
Calculate kurtosis: measure of distribution tailedness
|
| 331 |
+
"""
|
| 332 |
+
data_flat = data.flatten()
|
| 333 |
+
mean = np.mean(data_flat)
|
| 334 |
+
std = np.std(data_flat)
|
| 335 |
+
|
| 336 |
+
if (std == 0):
|
| 337 |
+
return 0.0
|
| 338 |
+
|
| 339 |
+
normalized = (data_flat - mean) / std
|
| 340 |
+
|
| 341 |
+
return float(np.mean(normalized ** 4))
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
def _detect_periodicity(self, *coeffs) -> float:
|
| 345 |
+
"""
|
| 346 |
+
Detect periodic patterns in coefficients: grid-based watermarks
|
| 347 |
+
"""
|
| 348 |
+
try:
|
| 349 |
+
scores = list()
|
| 350 |
+
|
| 351 |
+
for coeff in coeffs:
|
| 352 |
+
# Apply autocorrelation
|
| 353 |
+
autocorr = correlate2d(coeff, coeff, mode = 'same')
|
| 354 |
+
|
| 355 |
+
max_val = np.max(autocorr)
|
| 356 |
+
if (max_val == 0):
|
| 357 |
+
continue
|
| 358 |
+
|
| 359 |
+
autocorr = autocorr / max_val
|
| 360 |
+
|
| 361 |
+
# Look for secondary peaks (indicating periodicity)
|
| 362 |
+
center = tuple(s // 2 for s in autocorr.shape)
|
| 363 |
+
|
| 364 |
+
# Remove center peak
|
| 365 |
+
autocorr[center] = 0
|
| 366 |
+
|
| 367 |
+
max_secondary = np.max(autocorr)
|
| 368 |
+
scores.append(max_secondary)
|
| 369 |
+
|
| 370 |
+
return float(np.mean(scores))
|
| 371 |
+
|
| 372 |
+
except:
|
| 373 |
+
return 0.0
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
def _create_radial_mask(self, shape: Tuple[int, int], inner_ratio: float, outer_ratio: float) -> np.ndarray:
|
| 377 |
+
"""
|
| 378 |
+
Create radial mask for frequency analysis
|
| 379 |
+
"""
|
| 380 |
+
h, w = shape
|
| 381 |
+
center_y, center_x = h // 2, w // 2
|
| 382 |
+
max_radius = min(center_y, center_x)
|
| 383 |
+
|
| 384 |
+
y, x = np.ogrid[:h, :w]
|
| 385 |
+
distances = np.sqrt((y - center_y)**2 + (x - center_x)**2)
|
| 386 |
+
|
| 387 |
+
mask = (distances >= inner_ratio * max_radius) & (distances < outer_ratio * max_radius)
|
| 388 |
+
|
| 389 |
+
return mask
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
def _check_spectral_symmetry(self, magnitude: np.ndarray) -> float:
|
| 393 |
+
"""
|
| 394 |
+
Check for symmetric patterns in frequency spectrum
|
| 395 |
+
"""
|
| 396 |
+
try:
|
| 397 |
+
h, w = magnitude.shape
|
| 398 |
+
left_half = magnitude[:, :w//2]
|
| 399 |
+
right_half = np.fliplr(magnitude[:, w//2:])
|
| 400 |
+
|
| 401 |
+
# Ensure same size
|
| 402 |
+
min_width = min(left_half.shape[1], right_half.shape[1])
|
| 403 |
+
left_half = left_half[:, :min_width]
|
| 404 |
+
right_half = right_half[:, :min_width]
|
| 405 |
+
|
| 406 |
+
# Calculate correlation
|
| 407 |
+
correlation = np.corrcoef(left_half.flatten(), right_half.flatten())[0, 1]
|
| 408 |
+
|
| 409 |
+
return float(abs(correlation)) if not np.isnan(correlation) else 0.0
|
| 410 |
+
|
| 411 |
+
except:
|
| 412 |
+
return 0.0
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
def _shannon_entropy(self, data: np.ndarray) -> float:
|
| 416 |
+
"""
|
| 417 |
+
Calculate Shannon entropy
|
| 418 |
+
"""
|
| 419 |
+
values, counts = np.unique(data.flatten(), return_counts = True)
|
| 420 |
+
probabilities = counts / counts.sum()
|
| 421 |
+
|
| 422 |
+
return float(entropy(probabilities, base=2))
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
def _chi_square_test(self, data: np.ndarray) -> float:
|
| 426 |
+
"""
|
| 427 |
+
Chi-square test for uniformity
|
| 428 |
+
"""
|
| 429 |
+
values, counts = np.unique(data.flatten(), return_counts = True)
|
| 430 |
+
expected = len(data.flatten()) / len(values)
|
| 431 |
+
chi_square = np.sum((counts - expected) ** 2 / expected)
|
| 432 |
+
|
| 433 |
+
return float(chi_square)
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
def _runs_test(self, data: np.ndarray) -> float:
|
| 437 |
+
"""
|
| 438 |
+
Runs test for randomness: normalized score
|
| 439 |
+
"""
|
| 440 |
+
flat = data.flatten()
|
| 441 |
+
median = np.median(flat)
|
| 442 |
+
runs = np.sum(np.abs(np.diff((flat > median).astype(int))))
|
| 443 |
+
expected_runs = len(flat) / 2
|
| 444 |
+
|
| 445 |
+
if (expected_runs == 0):
|
| 446 |
+
return 0.0
|
| 447 |
+
|
| 448 |
+
return float(min(runs / expected_runs, 1.0))
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
def _calculate_confidence(self, scores: List[float]) -> float:
|
| 452 |
+
"""
|
| 453 |
+
Calculate overall confidence from multiple scores
|
| 454 |
+
"""
|
| 455 |
+
valid_scores = [score for score in scores if ((isinstance(score, (int, float))) and (not np.isnan(score)))]
|
| 456 |
+
|
| 457 |
+
if not valid_scores:
|
| 458 |
+
return 0.0
|
| 459 |
+
|
| 460 |
+
confidence = np.mean(valid_scores)
|
| 461 |
+
|
| 462 |
+
# Cap at 0.95
|
| 463 |
+
return float(min(max(confidence, 0.0), WATERMARK_ANALYSIS_PARAMS.CONFIDENCE_CAP))
|
features/batch_processor.py
CHANGED
|
@@ -1,20 +1,25 @@
|
|
| 1 |
# Dependencies
|
| 2 |
import time
|
|
|
|
| 3 |
from typing import List
|
| 4 |
from typing import Dict
|
| 5 |
from typing import Tuple
|
| 6 |
from pathlib import Path
|
| 7 |
from typing import Callable
|
|
|
|
| 8 |
from utils.logger import get_logger
|
| 9 |
from config.settings import settings
|
| 10 |
from config.schemas import AnalysisResult
|
|
|
|
| 11 |
from concurrent.futures import TimeoutError
|
| 12 |
from concurrent.futures import as_completed
|
| 13 |
from config.constants import DetectionStatus
|
| 14 |
from config.schemas import BatchAnalysisResult
|
| 15 |
-
from metrics.aggregator import MetricsAggregator
|
| 16 |
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
| 17 |
from features.threshold_manager import ThresholdManager
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
# Setup Logging
|
|
@@ -37,18 +42,24 @@ class BatchProcessor:
|
|
| 37 |
Initialize Batch Processor
|
| 38 |
"""
|
| 39 |
# Instantiate threshold manager
|
| 40 |
-
self.threshold_manager
|
| 41 |
|
| 42 |
-
# Initialize
|
| 43 |
-
self.aggregator
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# Fix number of workers
|
| 46 |
-
self.max_workers
|
| 47 |
|
| 48 |
logger.info(f"BatchProcessor initialized with max_workers={self.max_workers}, parallel={settings.PARALLEL_PROCESSING}")
|
| 49 |
|
| 50 |
|
| 51 |
-
def process_batch(self, image_files: List[Dict[str,
|
| 52 |
"""
|
| 53 |
Process multiple images with automatic parallel/sequential switching
|
| 54 |
|
|
@@ -231,17 +242,26 @@ class BatchProcessor:
|
|
| 231 |
{ AnalysisResult } : Analysis result or None on error
|
| 232 |
"""
|
| 233 |
try:
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
except Exception as e:
|
| 240 |
logger.error(f"Failed to process {filename}: {e}", exc_info = True)
|
| 241 |
return None
|
| 242 |
|
| 243 |
|
| 244 |
-
def _calculate_summary(self, results: List[AnalysisResult], total: int) -> Dict[str,
|
| 245 |
"""
|
| 246 |
Calculate summary statistics from results
|
| 247 |
|
|
@@ -256,28 +276,38 @@ class BatchProcessor:
|
|
| 256 |
{ dict } : Summary statistics
|
| 257 |
"""
|
| 258 |
# Calculate processing stats
|
| 259 |
-
|
| 260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
-
processed = len(results)
|
| 263 |
-
failed = total - processed
|
| 264 |
-
success_rate = int((processed / total * 100) if (total > 0) else 0)
|
| 265 |
-
|
| 266 |
# Calculate average scores
|
| 267 |
-
avg_score
|
| 268 |
-
avg_confidence
|
| 269 |
-
avg_proc_time
|
| 270 |
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
|
| 282 |
def _create_empty_batch_result(self) -> BatchAnalysisResult:
|
| 283 |
"""
|
|
@@ -291,9 +321,11 @@ class BatchProcessor:
|
|
| 291 |
processed = 0,
|
| 292 |
failed = 0,
|
| 293 |
results = [],
|
| 294 |
-
summary = {
|
| 295 |
-
|
| 296 |
-
|
|
|
|
|
|
|
| 297 |
},
|
| 298 |
total_processing_time = 0.0,
|
| 299 |
)
|
|
|
|
| 1 |
# Dependencies
|
| 2 |
import time
|
| 3 |
+
from typing import Any
|
| 4 |
from typing import List
|
| 5 |
from typing import Dict
|
| 6 |
from typing import Tuple
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import Callable
|
| 9 |
+
from collections import Counter
|
| 10 |
from utils.logger import get_logger
|
| 11 |
from config.settings import settings
|
| 12 |
from config.schemas import AnalysisResult
|
| 13 |
+
from config.constants import FinalDecision
|
| 14 |
from concurrent.futures import TimeoutError
|
| 15 |
from concurrent.futures import as_completed
|
| 16 |
from config.constants import DetectionStatus
|
| 17 |
from config.schemas import BatchAnalysisResult
|
|
|
|
| 18 |
from concurrent.futures import ThreadPoolExecutor
|
| 19 |
+
from metrics.signal_aggregator import SignalAggregator
|
| 20 |
from features.threshold_manager import ThresholdManager
|
| 21 |
+
from decision_builders.decision_policy import DecisionPolicy
|
| 22 |
+
from evidence_analyzers.evidence_aggregator import EvidenceAggregator
|
| 23 |
|
| 24 |
|
| 25 |
# Setup Logging
|
|
|
|
| 42 |
Initialize Batch Processor
|
| 43 |
"""
|
| 44 |
# Instantiate threshold manager
|
| 45 |
+
self.threshold_manager = threshold_manager
|
| 46 |
|
| 47 |
+
# Initialize signal aggregators
|
| 48 |
+
self.aggregator = SignalAggregator(threshold_manager = threshold_manager)
|
| 49 |
+
|
| 50 |
+
# Initialize evidence-based aggregator
|
| 51 |
+
self.evidence_aggregator = EvidenceAggregator()
|
| 52 |
+
|
| 53 |
+
# Initialize decision-policy engine
|
| 54 |
+
self.decision_policy = DecisionPolicy()
|
| 55 |
|
| 56 |
# Fix number of workers
|
| 57 |
+
self.max_workers = settings.MAX_WORKERS if settings.PARALLEL_PROCESSING else 1
|
| 58 |
|
| 59 |
logger.info(f"BatchProcessor initialized with max_workers={self.max_workers}, parallel={settings.PARALLEL_PROCESSING}")
|
| 60 |
|
| 61 |
|
| 62 |
+
def process_batch(self, image_files: List[Dict[str, Any]], on_progress: Callable[[int, int, str], None] | None = None) -> BatchAnalysisResult:
|
| 63 |
"""
|
| 64 |
Process multiple images with automatic parallel/sequential switching
|
| 65 |
|
|
|
|
| 242 |
{ AnalysisResult } : Analysis result or None on error
|
| 243 |
"""
|
| 244 |
try:
|
| 245 |
+
# Tier-1 Signal
|
| 246 |
+
analysis = self.aggregator.analyze_image(image_path = image_path,
|
| 247 |
+
filename = filename,
|
| 248 |
+
image_size = image_size,
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
# Tier-2 evidence
|
| 252 |
+
analysis.evidence = self.evidence_aggregator.analyze(image_path = image_path)
|
| 253 |
+
|
| 254 |
+
# Final decision
|
| 255 |
+
final_analysis_result = self.decision_policy.apply(analysis = analysis)
|
| 256 |
+
|
| 257 |
+
return final_analysis_result
|
| 258 |
|
| 259 |
except Exception as e:
|
| 260 |
logger.error(f"Failed to process {filename}: {e}", exc_info = True)
|
| 261 |
return None
|
| 262 |
|
| 263 |
|
| 264 |
+
def _calculate_summary(self, results: List[AnalysisResult], total: int) -> Dict[str, Any]:
|
| 265 |
"""
|
| 266 |
Calculate summary statistics from results
|
| 267 |
|
|
|
|
| 276 |
{ dict } : Summary statistics
|
| 277 |
"""
|
| 278 |
# Calculate processing stats
|
| 279 |
+
processed = len(results)
|
| 280 |
+
failed = total - processed
|
| 281 |
+
success_rate = int((processed / total * 100) if total > 0 else 0)
|
| 282 |
+
|
| 283 |
+
# Count final decisions safely
|
| 284 |
+
decision_counts = Counter(result.final_decision.value for result in results)
|
| 285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
# Calculate average scores
|
| 287 |
+
avg_score = sum(r.overall_score for r in results) / processed if results else 0.0
|
| 288 |
+
avg_confidence = sum(r.confidence for r in results) / processed if results else 0
|
| 289 |
+
avg_proc_time = sum(r.processing_time for r in results) / processed if results else 0.0
|
| 290 |
|
| 291 |
+
# Final decision distribution
|
| 292 |
+
decision_distribution = {FinalDecision.CONFIRMED_AI_GENERATED.value : decision_counts.get(FinalDecision.CONFIRMED_AI_GENERATED.value, 0),
|
| 293 |
+
FinalDecision.SUSPICIOUS_AI_LIKELY.value : decision_counts.get(FinalDecision.SUSPICIOUS_AI_LIKELY.value, 0),
|
| 294 |
+
FinalDecision.AUTHENTIC_BUT_REVIEW.value : decision_counts.get(FinalDecision.AUTHENTIC_BUT_REVIEW.value, 0),
|
| 295 |
+
FinalDecision.MOSTLY_AUTHENTIC.value : decision_counts.get(FinalDecision.MOSTLY_AUTHENTIC.value, 0),
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
summary = {"processed" : processed,
|
| 299 |
+
"failed" : failed,
|
| 300 |
+
"success_rate" : success_rate,
|
| 301 |
+
"avg_score" : round(avg_score, 3),
|
| 302 |
+
"avg_confidence" : int(avg_confidence),
|
| 303 |
+
"avg_proc_time" : round(avg_proc_time, 2),
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
# Update summary dictb with decision_distriubution dict
|
| 307 |
+
summary.update(decision_distribution)
|
| 308 |
+
|
| 309 |
+
return summary
|
| 310 |
+
|
| 311 |
|
| 312 |
def _create_empty_batch_result(self) -> BatchAnalysisResult:
|
| 313 |
"""
|
|
|
|
| 321 |
processed = 0,
|
| 322 |
failed = 0,
|
| 323 |
results = [],
|
| 324 |
+
summary = {FinalDecision.CONFIRMED_AI_GENERATED.value : 0,
|
| 325 |
+
FinalDecision.SUSPICIOUS_AI_LIKELY.value : 0,
|
| 326 |
+
FinalDecision.AUTHENTIC_BUT_REVIEW.value : 0,
|
| 327 |
+
FinalDecision.MOSTLY_AUTHENTIC.value : 0,
|
| 328 |
+
"success_rate" : 0,
|
| 329 |
},
|
| 330 |
total_processing_time = 0.0,
|
| 331 |
)
|
features/detailed_result_maker.py
CHANGED
|
@@ -2,11 +2,16 @@
|
|
| 2 |
import pandas as pd
|
| 3 |
from typing import Dict
|
| 4 |
from typing import List
|
| 5 |
-
from typing import Optional
|
| 6 |
from utils.logger import get_logger
|
| 7 |
from config.constants import MetricType
|
|
|
|
|
|
|
| 8 |
from config.constants import SignalStatus
|
| 9 |
from config.schemas import AnalysisResult
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from config.constants import SIGNAL_THRESHOLDS
|
| 11 |
|
| 12 |
|
|
@@ -16,40 +21,59 @@ logger = get_logger(__name__)
|
|
| 16 |
|
| 17 |
class DetailedResultMaker:
|
| 18 |
"""
|
| 19 |
-
Extract and format detailed analysis results for
|
| 20 |
|
| 21 |
Purpose:
|
| 22 |
--------
|
| 23 |
-
- Extracts
|
| 24 |
-
- Formats data
|
| 25 |
-
- Provides
|
| 26 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
Output Formats:
|
| 29 |
---------------
|
| 30 |
-
1. Structured dictionaries for
|
| 31 |
-
2. Pandas DataFrames for reports
|
| 32 |
-
3. Hierarchical JSON
|
| 33 |
"""
|
| 34 |
def __init__(self, signal_thresholds: dict | None = None):
|
| 35 |
"""
|
| 36 |
Initialize Detailed Result Maker
|
| 37 |
"""
|
| 38 |
-
self.metric_display_names
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
self.signal_thresholds
|
| 46 |
|
| 47 |
logger.debug("DetailedResultMaker initialized")
|
| 48 |
|
| 49 |
|
| 50 |
def extract_detailed_results(self, analysis_result: AnalysisResult) -> Dict:
|
| 51 |
"""
|
| 52 |
-
Extract
|
|
|
|
|
|
|
| 53 |
|
| 54 |
Arguments:
|
| 55 |
----------
|
|
@@ -57,24 +81,35 @@ class DetailedResultMaker:
|
|
| 57 |
|
| 58 |
Returns:
|
| 59 |
--------
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
"""
|
| 62 |
logger.debug(f"Extracting detailed results for: {analysis_result.filename}")
|
| 63 |
|
| 64 |
-
detailed = {"filename"
|
| 65 |
-
"
|
| 66 |
-
"
|
| 67 |
-
"
|
|
|
|
|
|
|
|
|
|
| 68 |
}
|
| 69 |
|
| 70 |
-
logger.debug(f"Extracted {len(detailed['metrics_detailed'])} metric details")
|
| 71 |
|
| 72 |
return detailed
|
| 73 |
|
| 74 |
|
| 75 |
def create_detailed_table(self, analysis_result: AnalysisResult) -> pd.DataFrame:
|
| 76 |
"""
|
| 77 |
-
Create detailed
|
|
|
|
|
|
|
| 78 |
|
| 79 |
Arguments:
|
| 80 |
----------
|
|
@@ -82,58 +117,204 @@ class DetailedResultMaker:
|
|
| 82 |
|
| 83 |
Returns:
|
| 84 |
--------
|
| 85 |
-
|
| 86 |
"""
|
| 87 |
rows = list()
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
for metric_type, metric_result in analysis_result.metric_results.items():
|
| 90 |
display_name = self.metric_display_names.get(metric_type, metric_type.value)
|
| 91 |
|
| 92 |
-
|
| 93 |
-
"
|
| 94 |
-
"
|
| 95 |
-
"
|
|
|
|
| 96 |
}
|
| 97 |
|
| 98 |
-
# Add key details
|
| 99 |
details = self._extract_key_details(metric_type = metric_type,
|
| 100 |
metric_result = metric_result,
|
| 101 |
)
|
|
|
|
| 102 |
|
| 103 |
-
|
| 104 |
-
rows.append(row)
|
| 105 |
|
| 106 |
-
|
| 107 |
-
dataframe = pd.DataFrame(data = rows)
|
| 108 |
|
| 109 |
-
logger.debug(f"Created detailed table with {len(dataframe)} rows
|
| 110 |
|
| 111 |
return dataframe
|
| 112 |
-
|
| 113 |
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
| 115 |
"""
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
analysis_result { AnalysisResult } : Complete analysis result
|
| 121 |
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
"""
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
"metric_breakdown" : self._create_metric_breakdown(analysis_result = analysis_result),
|
| 129 |
-
"forensic_details" : self._create_forensic_details(analysis_result = analysis_result),
|
| 130 |
-
"recommendations" : self._create_recommendations(analysis_result = analysis_result),
|
| 131 |
-
}
|
| 132 |
|
| 133 |
-
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
def _extract_overall_summary(self, analysis_result: AnalysisResult) -> Dict:
|
| 139 |
"""
|
|
@@ -162,34 +343,54 @@ class DetailedResultMaker:
|
|
| 162 |
"display_name" : self.metric_display_names.get(metric_type, metric_type.value),
|
| 163 |
"score" : round(metric_result.score, 3),
|
| 164 |
"confidence" : round(metric_result.confidence, 3) if metric_result.confidence is not None else None,
|
| 165 |
-
"status" : self._score_to_status(
|
| 166 |
"details" : metric_result.details or {},
|
| 167 |
-
"interpretation" : self._interpret_metric(metric_type
|
| 168 |
-
|
| 169 |
-
),
|
| 170 |
}
|
| 171 |
-
|
| 172 |
metrics_detailed.append(metric_detail)
|
| 173 |
|
| 174 |
# Sort by score (highest first)
|
| 175 |
-
metrics_detailed.sort(key
|
|
|
|
|
|
|
| 176 |
|
| 177 |
return metrics_detailed
|
| 178 |
|
| 179 |
|
| 180 |
def _extract_metadata(self, analysis_result: AnalysisResult) -> Dict:
|
| 181 |
"""
|
| 182 |
-
Extract processing metadata
|
| 183 |
"""
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
-
def _extract_key_details(self, metric_type: MetricType, metric_result) -> Dict:
|
| 193 |
"""
|
| 194 |
Extract key details specific to each metric type
|
| 195 |
"""
|
|
@@ -201,25 +402,26 @@ class DetailedResultMaker:
|
|
| 201 |
}
|
| 202 |
|
| 203 |
elif (metric_type == MetricType.FREQUENCY):
|
| 204 |
-
return {"HF_Ratio"
|
| 205 |
-
"HF_Anomaly"
|
| 206 |
-
"Spectrum_Bins"
|
| 207 |
}
|
| 208 |
|
| 209 |
elif (metric_type == MetricType.NOISE):
|
| 210 |
-
return {"Mean_Noise"
|
| 211 |
-
"CV"
|
| 212 |
-
"Patches_Valid"
|
| 213 |
}
|
| 214 |
|
| 215 |
elif (metric_type == MetricType.TEXTURE):
|
| 216 |
-
return {"Smooth_Ratio"
|
| 217 |
-
"Contrast_Mean"
|
| 218 |
-
"Patches_Used"
|
| 219 |
}
|
| 220 |
|
| 221 |
elif (metric_type == MetricType.COLOR):
|
| 222 |
sat_stats = details.get('saturation_stats', {})
|
|
|
|
| 223 |
return {"Mean_Saturation" : sat_stats.get('mean_saturation', 'N/A'),
|
| 224 |
"High_Sat_Ratio" : sat_stats.get('high_sat_ratio', 'N/A'),
|
| 225 |
}
|
|
@@ -227,42 +429,41 @@ class DetailedResultMaker:
|
|
| 227 |
return {}
|
| 228 |
|
| 229 |
|
| 230 |
-
def _interpret_metric(self, metric_type: MetricType, metric_result) -> str:
|
| 231 |
"""
|
| 232 |
Provide human-readable interpretation of metric result
|
| 233 |
"""
|
| 234 |
-
score = metric_result.score
|
| 235 |
details = metric_result.details or {}
|
| 236 |
|
| 237 |
if (metric_type == MetricType.GRADIENT):
|
| 238 |
eig_ratio = details.get('eigenvalue_ratio')
|
| 239 |
|
| 240 |
if eig_ratio:
|
| 241 |
-
return f"Eigenvalue ratio of {eig_ratio:.3f} ({'high' if eig_ratio > 0.85 else 'low'} alignment)"
|
| 242 |
|
| 243 |
return "Gradient structure analysis"
|
| 244 |
|
| 245 |
-
elif
|
| 246 |
hf_ratio = details.get('hf_ratio')
|
| 247 |
-
|
| 248 |
if hf_ratio:
|
| 249 |
-
return f"High-freq ratio: {hf_ratio:.3f} ({'elevated' if hf_ratio > 0.35 else 'low' if hf_ratio < 0.08 else 'normal'})"
|
| 250 |
|
| 251 |
return "Frequency spectrum analysis"
|
| 252 |
|
| 253 |
elif (metric_type == MetricType.NOISE):
|
| 254 |
mean_noise = details.get('mean_noise')
|
| 255 |
-
|
| 256 |
if mean_noise:
|
| 257 |
-
return f"Mean noise: {mean_noise:.2f} ({'low' if mean_noise < 1.5 else 'normal'})"
|
| 258 |
|
| 259 |
return "Noise pattern analysis"
|
| 260 |
|
| 261 |
elif (metric_type == MetricType.TEXTURE):
|
| 262 |
smooth_ratio = details.get('smooth_ratio')
|
| 263 |
-
|
| 264 |
if smooth_ratio is not None:
|
| 265 |
-
return f"Smooth regions: {smooth_ratio:.1%} ({'excessive' if smooth_ratio > 0.4 else 'normal'})"
|
| 266 |
|
| 267 |
return "Texture variation analysis"
|
| 268 |
|
|
@@ -271,161 +472,23 @@ class DetailedResultMaker:
|
|
| 271 |
mean_sat = sat_stats.get('mean_saturation')
|
| 272 |
|
| 273 |
if mean_sat:
|
| 274 |
-
return f"Mean saturation: {mean_sat:.2f} ({'high' if mean_sat > 0.65 else 'normal'})"
|
| 275 |
|
| 276 |
return "Color distribution analysis"
|
| 277 |
|
| 278 |
return "Analysis complete"
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
def _create_report_header(self, analysis_result: AnalysisResult) -> Dict:
|
| 282 |
-
"""
|
| 283 |
-
Create report header section
|
| 284 |
-
"""
|
| 285 |
-
return {"filename" : analysis_result.filename,
|
| 286 |
-
"analysis_date" : analysis_result.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
|
| 287 |
-
"image_size" : f"{analysis_result.image_size[0]} × {analysis_result.image_size[1]} pixels",
|
| 288 |
-
"processing_time" : f"{analysis_result.processing_time:.2f} seconds",
|
| 289 |
-
}
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
def _create_overall_assessment(self, analysis_result: AnalysisResult) -> Dict:
|
| 293 |
-
"""
|
| 294 |
-
Create overall assessment section
|
| 295 |
-
"""
|
| 296 |
-
return {"status" : analysis_result.status.value,
|
| 297 |
-
"score" : round(analysis_result.overall_score * 100, 1),
|
| 298 |
-
"confidence" : analysis_result.confidence,
|
| 299 |
-
"verdict" : "REVIEW REQUIRED" if analysis_result.status.value == "REVIEW_REQUIRED" else "LIKELY AUTHENTIC",
|
| 300 |
-
"risk_level" : self._calculate_risk_level(score = analysis_result.overall_score),
|
| 301 |
-
}
|
| 302 |
|
| 303 |
|
| 304 |
-
def
|
| 305 |
"""
|
| 306 |
-
|
| 307 |
-
"""
|
| 308 |
-
breakdown = list()
|
| 309 |
-
|
| 310 |
-
for signal in analysis_result.signals:
|
| 311 |
-
metric_result = analysis_result.metric_results.get(signal.metric_type)
|
| 312 |
-
|
| 313 |
-
item = {"metric" : signal.name,
|
| 314 |
-
"score" : f"{signal.score * 100:.1f}%",
|
| 315 |
-
"status" : signal.status.value.upper(),
|
| 316 |
-
"confidence" : f"{metric_result.confidence * 100:.1f}%" if metric_result.confidence else "N/A",
|
| 317 |
-
"explanation" : signal.explanation,
|
| 318 |
-
"key_findings" : self.extract_key_findings(metric_type = signal.metric_type,
|
| 319 |
-
metric_result = metric_result,
|
| 320 |
-
),
|
| 321 |
-
}
|
| 322 |
-
|
| 323 |
-
breakdown.append(item)
|
| 324 |
-
|
| 325 |
-
return breakdown
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
def _create_forensic_details(self, analysis_result: AnalysisResult) -> Dict:
|
| 329 |
-
"""
|
| 330 |
-
Create forensic details section
|
| 331 |
-
"""
|
| 332 |
-
forensic = dict()
|
| 333 |
-
|
| 334 |
-
for metric_type, metric_result in analysis_result.metric_results.items():
|
| 335 |
-
metric_name = self.metric_display_names.get(metric_type, metric_type.value)
|
| 336 |
-
forensic[metric_name] = metric_result.details or {"note": "No detailed forensics available"}
|
| 337 |
-
|
| 338 |
-
return forensic
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
def _create_recommendations(self, analysis_result: AnalysisResult) -> Dict:
|
| 342 |
-
"""
|
| 343 |
-
Create recommendations section
|
| 344 |
-
"""
|
| 345 |
-
score = analysis_result.overall_score
|
| 346 |
-
|
| 347 |
-
if (score >= 0.85):
|
| 348 |
-
return {"action" : "Immediate manual verification required",
|
| 349 |
-
"priority" : "HIGH",
|
| 350 |
-
"next_steps" : ["Forensic analysis", "Reverse image search", "Metadata inspection", "Expert review"],
|
| 351 |
-
"confidence" : "Very high likelihood of AI generation",
|
| 352 |
-
}
|
| 353 |
-
|
| 354 |
-
elif (score >= 0.70):
|
| 355 |
-
return {"action" : "Manual verification recommended",
|
| 356 |
-
"priority" : "MEDIUM",
|
| 357 |
-
"next_steps" : ["Visual inspection", "Compare with authentic samples", "Check source provenance"],
|
| 358 |
-
"confidence" : "High likelihood of AI generation",
|
| 359 |
-
}
|
| 360 |
-
|
| 361 |
-
elif (score >= 0.50):
|
| 362 |
-
return {"action" : "Optional review suggested",
|
| 363 |
-
"priority" : "LOW",
|
| 364 |
-
"next_steps" : ["May be edited photo", "Verify image source", "Check for inconsistencies"],
|
| 365 |
-
"confidence" : "Moderate indicators present",
|
| 366 |
-
}
|
| 367 |
-
|
| 368 |
-
else:
|
| 369 |
-
return {"action" : "No immediate action required",
|
| 370 |
-
"priority" : "NONE",
|
| 371 |
-
"next_steps" : ["Proceed with normal workflow"],
|
| 372 |
-
"confidence" : "Low likelihood of AI generation",
|
| 373 |
-
}
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
def _score_to_status(self, score: float) -> str:
|
| 377 |
-
"""
|
| 378 |
-
Convert score to status label
|
| 379 |
-
"""
|
| 380 |
-
if (score >= self.signal_thresholds[SignalStatus.FLAGGED]):
|
| 381 |
-
return "FLAGGED"
|
| 382 |
-
|
| 383 |
-
elif (score >= self.signal_thresholds[SignalStatus.WARNING]):
|
| 384 |
-
return "WARNING"
|
| 385 |
-
|
| 386 |
-
else:
|
| 387 |
-
return "PASSED"
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
def _calculate_avg_confidence(self, analysis_result: AnalysisResult) -> float:
|
| 391 |
-
"""
|
| 392 |
-
Calculate average confidence across all metrics
|
| 393 |
-
"""
|
| 394 |
-
confidences = [mr.confidence for mr in analysis_result.metric_results.values() if mr.confidence is not None]
|
| 395 |
-
|
| 396 |
-
return round(sum(confidences) / len(confidences), 3) if confidences else 0.0
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
def _calculate_risk_level(self, score: float) -> str:
|
| 400 |
-
"""
|
| 401 |
-
Calculate risk level from score
|
| 402 |
-
"""
|
| 403 |
-
if (score >= 0.85):
|
| 404 |
-
return "CRITICAL"
|
| 405 |
-
|
| 406 |
-
elif (score >= 0.70):
|
| 407 |
-
return "HIGH"
|
| 408 |
-
|
| 409 |
-
elif (score >= 0.50):
|
| 410 |
-
return "MEDIUM"
|
| 411 |
-
|
| 412 |
-
else:
|
| 413 |
-
return "LOW"
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
def extract_key_findings(self, metric_type: MetricType, metric_result) -> List[str]:
|
| 417 |
-
"""
|
| 418 |
-
Extract human-readable key forensic findings for a given metric used by:
|
| 419 |
-
- Detailed UI views
|
| 420 |
-
- CSV reports
|
| 421 |
-
- JSON reports
|
| 422 |
"""
|
| 423 |
findings = list()
|
| 424 |
details = metric_result.details or {}
|
| 425 |
|
| 426 |
if (metric_type == MetricType.GRADIENT):
|
| 427 |
eig_ratio = details.get('eigenvalue_ratio')
|
| 428 |
-
|
| 429 |
if eig_ratio:
|
| 430 |
findings.append(f"Eigenvalue ratio: {eig_ratio:.3f}")
|
| 431 |
|
|
@@ -436,17 +499,18 @@ class DetailedResultMaker:
|
|
| 436 |
|
| 437 |
elif (metric_type == MetricType.FREQUENCY):
|
| 438 |
hf_ratio = details.get('hf_ratio')
|
| 439 |
-
|
| 440 |
if hf_ratio:
|
| 441 |
findings.append(f"High-frequency ratio: {hf_ratio:.3f}")
|
| 442 |
|
| 443 |
roughness = details.get('roughness')
|
|
|
|
| 444 |
if roughness:
|
| 445 |
findings.append(f"Spectral roughness: {roughness:.3f}")
|
| 446 |
|
| 447 |
elif (metric_type == MetricType.NOISE):
|
| 448 |
mean_noise = details.get('mean_noise')
|
| 449 |
-
|
| 450 |
if mean_noise:
|
| 451 |
findings.append(f"Mean noise level: {mean_noise:.2f}")
|
| 452 |
|
|
@@ -457,7 +521,7 @@ class DetailedResultMaker:
|
|
| 457 |
|
| 458 |
elif (metric_type == MetricType.TEXTURE):
|
| 459 |
smooth_ratio = details.get('smooth_ratio')
|
| 460 |
-
|
| 461 |
if smooth_ratio:
|
| 462 |
findings.append(f"Smooth patches: {smooth_ratio:.1%}")
|
| 463 |
|
|
@@ -478,4 +542,27 @@ class DetailedResultMaker:
|
|
| 478 |
if high_sat:
|
| 479 |
findings.append(f"High saturation pixels: {high_sat:.1%}")
|
| 480 |
|
| 481 |
-
return findings if findings else ["Analysis complete"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
from typing import Dict
|
| 4 |
from typing import List
|
|
|
|
| 5 |
from utils.logger import get_logger
|
| 6 |
from config.constants import MetricType
|
| 7 |
+
from config.schemas import MetricResult
|
| 8 |
+
from config.constants import EvidenceType
|
| 9 |
from config.constants import SignalStatus
|
| 10 |
from config.schemas import AnalysisResult
|
| 11 |
+
from config.schemas import EvidenceResult
|
| 12 |
+
from config.constants import FinalDecision
|
| 13 |
+
from config.constants import EvidenceStrength
|
| 14 |
+
from config.constants import EvidenceDirection
|
| 15 |
from config.constants import SIGNAL_THRESHOLDS
|
| 16 |
|
| 17 |
|
|
|
|
| 21 |
|
| 22 |
class DetailedResultMaker:
|
| 23 |
"""
|
| 24 |
+
Extract and format detailed analysis results for reporting
|
| 25 |
|
| 26 |
Purpose:
|
| 27 |
--------
|
| 28 |
+
- Extracts ALL data from AnalysisResult (metrics + evidence + decision)
|
| 29 |
+
- Formats data into unified dictionaries/DataFrames
|
| 30 |
+
- Provides structured data for reporters (JSON/CSV/PDF)
|
| 31 |
+
- NO re-computation - pure data extraction and formatting
|
| 32 |
+
|
| 33 |
+
Data Sources:
|
| 34 |
+
-------------
|
| 35 |
+
1. Final Decision (from DecisionPolicy)
|
| 36 |
+
2. Evidence Results (from EvidenceAggregator)
|
| 37 |
+
3. Metric Results (from SignalAggregator)
|
| 38 |
+
4. Metadata (timestamps, processing info)
|
| 39 |
|
| 40 |
Output Formats:
|
| 41 |
---------------
|
| 42 |
+
1. Structured dictionaries for reporters
|
| 43 |
+
2. Pandas DataFrames for tabular reports
|
| 44 |
+
3. Hierarchical JSON-ready structures
|
| 45 |
"""
|
| 46 |
def __init__(self, signal_thresholds: dict | None = None):
|
| 47 |
"""
|
| 48 |
Initialize Detailed Result Maker
|
| 49 |
"""
|
| 50 |
+
self.metric_display_names = {MetricType.GRADIENT : "Gradient-Field PCA",
|
| 51 |
+
MetricType.FREQUENCY : "Frequency Domain (FFT)",
|
| 52 |
+
MetricType.NOISE : "Noise Pattern Analysis",
|
| 53 |
+
MetricType.TEXTURE : "Texture Statistics",
|
| 54 |
+
MetricType.COLOR : "Color Distribution",
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
self.evidence_display_names = {EvidenceType.EXIF : "EXIF Metadata",
|
| 58 |
+
EvidenceType.WATERMARK : "Watermark Detection",
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
self.decision_labels = {FinalDecision.CONFIRMED_AI_GENERATED : "🔴 CONFIRMED AI GENERATED",
|
| 62 |
+
FinalDecision.SUSPICIOUS_AI_LIKELY : "🟠 SUSPICIOUS - AI LIKELY",
|
| 63 |
+
FinalDecision.AUTHENTIC_BUT_REVIEW : "🟡 AUTHENTIC BUT REVIEW",
|
| 64 |
+
FinalDecision.MOSTLY_AUTHENTIC : "🟢 MOSTLY AUTHENTIC",
|
| 65 |
+
}
|
| 66 |
|
| 67 |
+
self.signal_thresholds = signal_thresholds or SIGNAL_THRESHOLDS
|
| 68 |
|
| 69 |
logger.debug("DetailedResultMaker initialized")
|
| 70 |
|
| 71 |
|
| 72 |
def extract_detailed_results(self, analysis_result: AnalysisResult) -> Dict:
|
| 73 |
"""
|
| 74 |
+
Extract ALL detailed results from AnalysisResult into unified dictionary
|
| 75 |
+
|
| 76 |
+
This is the MAIN extraction method - reporters call this!
|
| 77 |
|
| 78 |
Arguments:
|
| 79 |
----------
|
|
|
|
| 81 |
|
| 82 |
Returns:
|
| 83 |
--------
|
| 84 |
+
{ dict } : Comprehensive detailed results containing:
|
| 85 |
+
- final_decision (from DecisionPolicy)
|
| 86 |
+
- evidence_summary (from EvidenceAggregator)
|
| 87 |
+
- evidence_detailed (all evidence items)
|
| 88 |
+
- overall_summary (basic info)
|
| 89 |
+
- metrics_detailed (all metric results)
|
| 90 |
+
- metadata (stats and counts)
|
| 91 |
"""
|
| 92 |
logger.debug(f"Extracting detailed results for: {analysis_result.filename}")
|
| 93 |
|
| 94 |
+
detailed = {"filename" : analysis_result.filename,
|
| 95 |
+
"final_decision" : self._extract_final_decision(analysis_result),
|
| 96 |
+
"evidence_summary" : self._extract_evidence_summary(analysis_result),
|
| 97 |
+
"evidence_detailed" : self._extract_all_evidence(analysis_result),
|
| 98 |
+
"overall_summary" : self._extract_overall_summary(analysis_result),
|
| 99 |
+
"metrics_detailed" : self._extract_all_metrics(analysis_result),
|
| 100 |
+
"metadata" : self._extract_metadata(analysis_result),
|
| 101 |
}
|
| 102 |
|
| 103 |
+
logger.debug(f"Extracted {len(detailed['evidence_detailed'])} evidence items, {len(detailed['metrics_detailed'])} metric details")
|
| 104 |
|
| 105 |
return detailed
|
| 106 |
|
| 107 |
|
| 108 |
def create_detailed_table(self, analysis_result: AnalysisResult) -> pd.DataFrame:
|
| 109 |
"""
|
| 110 |
+
Create detailed table as DataFrame (for CSV export)
|
| 111 |
+
|
| 112 |
+
Includes: Decision + Evidence + Metrics in hierarchical order
|
| 113 |
|
| 114 |
Arguments:
|
| 115 |
----------
|
|
|
|
| 117 |
|
| 118 |
Returns:
|
| 119 |
--------
|
| 120 |
+
{ DataFrame } : Tabular detailed results
|
| 121 |
"""
|
| 122 |
rows = list()
|
| 123 |
|
| 124 |
+
# Final Decision (if available)
|
| 125 |
+
if analysis_result.final_decision:
|
| 126 |
+
decision_row = {"Type" : "FINAL DECISION",
|
| 127 |
+
"Name" : self.decision_labels.get(analysis_result.final_decision, analysis_result.final_decision.value),
|
| 128 |
+
"Score" : "N/A",
|
| 129 |
+
"Confidence" : f"{analysis_result.confidence}%",
|
| 130 |
+
"Status" : analysis_result.final_decision.value.upper(),
|
| 131 |
+
"Explanation" : analysis_result.decision_explanation or "See evidence and metrics below",
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
rows.append(decision_row)
|
| 135 |
+
|
| 136 |
+
# Evidence (if any)
|
| 137 |
+
if analysis_result.evidence:
|
| 138 |
+
for evidence in analysis_result.evidence:
|
| 139 |
+
source_key = evidence.source.value if hasattr(evidence.source, "value") else str(evidence.source)
|
| 140 |
+
|
| 141 |
+
evidence_row = {"Type" : "EVIDENCE",
|
| 142 |
+
"Name" : f"{self.evidence_display_names.get(source_key, source_key)} - {evidence.analyzer}",
|
| 143 |
+
"Score" : f"{evidence.confidence:.2f}" if evidence.confidence is not None else "N/A",
|
| 144 |
+
"Confidence" : f"{int(evidence.confidence * 100)}%" if evidence.confidence is not None else "N/A",
|
| 145 |
+
"Status" : self._evidence_to_status_label(evidence),
|
| 146 |
+
"Explanation" : evidence.finding,
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
rows.append(evidence_row)
|
| 150 |
+
|
| 151 |
+
# Metrics
|
| 152 |
for metric_type, metric_result in analysis_result.metric_results.items():
|
| 153 |
display_name = self.metric_display_names.get(metric_type, metric_type.value)
|
| 154 |
|
| 155 |
+
metric_row = {"Type" : "METRIC",
|
| 156 |
+
"Name" : display_name,
|
| 157 |
+
"Score" : round(metric_result.score, 3),
|
| 158 |
+
"Confidence" : f"{round(metric_result.confidence * 100)}%" if metric_result.confidence is not None else "N/A",
|
| 159 |
+
"Status" : self._score_to_status(metric_result.score),
|
| 160 |
}
|
| 161 |
|
| 162 |
+
# Add key details
|
| 163 |
details = self._extract_key_details(metric_type = metric_type,
|
| 164 |
metric_result = metric_result,
|
| 165 |
)
|
| 166 |
+
metric_row.update(details)
|
| 167 |
|
| 168 |
+
rows.append(metric_row)
|
|
|
|
| 169 |
|
| 170 |
+
dataframe = pd.DataFrame(data=rows)
|
|
|
|
| 171 |
|
| 172 |
+
logger.debug(f"Created detailed table with {len(dataframe)} rows")
|
| 173 |
|
| 174 |
return dataframe
|
|
|
|
| 175 |
|
| 176 |
+
|
| 177 |
+
def _extract_final_decision(self, analysis_result: AnalysisResult) -> Dict:
|
| 178 |
+
"""
|
| 179 |
+
Extract final decision information from DecisionPolicy
|
| 180 |
"""
|
| 181 |
+
if not analysis_result.final_decision:
|
| 182 |
+
return {"decision" : None,
|
| 183 |
+
"label" : "⚪ No Decision",
|
| 184 |
+
"explanation" : "Decision policy not applied",
|
| 185 |
+
"confidence" : 0,
|
| 186 |
+
"based_on" : "Unknown",
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
final_decision = {"decision" : analysis_result.final_decision.value,
|
| 190 |
+
"label" : self.decision_labels.get(analysis_result.final_decision, analysis_result.final_decision.value),
|
| 191 |
+
"explanation" : analysis_result.decision_explanation or "No explanation provided",
|
| 192 |
+
"confidence" : analysis_result.confidence,
|
| 193 |
+
"based_on" : self._determine_decision_basis(analysis_result),
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
return final_decision
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def _determine_decision_basis(self, analysis_result: AnalysisResult) -> str:
|
| 200 |
+
"""
|
| 201 |
+
Determine what the decision was based on
|
| 202 |
+
"""
|
| 203 |
+
if not analysis_result.evidence:
|
| 204 |
+
return "Statistical metrics only"
|
| 205 |
|
| 206 |
+
# Check for strong evidence
|
| 207 |
+
strong_evidence = [item for item in analysis_result.evidence if item.strength in (EvidenceStrength.STRONG, EvidenceStrength.CONCLUSIVE)]
|
|
|
|
| 208 |
|
| 209 |
+
if strong_evidence:
|
| 210 |
+
evidence_types = {item.source.value if hasattr(item.source, "value") else str(item.source) for item in strong_evidence}
|
| 211 |
+
return f"Strong evidence (Tier 2): {', '.join(evidence_types)}"
|
| 212 |
+
|
| 213 |
+
return "Combination of evidence and metrics (Tier 2 + Tier 1)"
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def _extract_evidence_summary(self, analysis_result: AnalysisResult) -> Dict:
|
| 217 |
+
"""
|
| 218 |
+
Extract high-level evidence summary
|
| 219 |
+
"""
|
| 220 |
+
if not analysis_result.evidence:
|
| 221 |
+
return {"total_evidence" : 0,
|
| 222 |
+
"ai_evidence_count" : 0,
|
| 223 |
+
"auth_evidence_count" : 0,
|
| 224 |
+
"strongest_evidence" : None,
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
ai_evidence = [item for item in analysis_result.evidence if (item.direction == EvidenceDirection.AI_GENERATED)]
|
| 228 |
+
auth_evidence = [item for item in analysis_result.evidence if (item.direction == EvidenceDirection.AUTHENTIC)]
|
| 229 |
+
|
| 230 |
+
# Find strongest evidence
|
| 231 |
+
strongest = max(analysis_result.evidence,
|
| 232 |
+
key = lambda item: (self._strength_to_rank(item.strength), item.confidence or 0.0)
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
return {"total_evidence" : len(analysis_result.evidence),
|
| 236 |
+
"ai_evidence_count" : len(ai_evidence),
|
| 237 |
+
"auth_evidence_count" : len(auth_evidence),
|
| 238 |
+
"strongest_evidence" : {"source" : strongest.source.value,
|
| 239 |
+
"direction" : strongest.direction.value,
|
| 240 |
+
"strength" : strongest.strength.value,
|
| 241 |
+
"finding" : strongest.finding,
|
| 242 |
+
"confidence" : strongest.confidence,
|
| 243 |
+
},
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def _extract_all_evidence(self, analysis_result: AnalysisResult) -> List[Dict]:
|
| 248 |
+
"""
|
| 249 |
+
Extract detailed information for all evidence items
|
| 250 |
"""
|
| 251 |
+
if not analysis_result.evidence:
|
| 252 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
+
evidence_detailed = list()
|
| 255 |
|
| 256 |
+
for evidence in analysis_result.evidence:
|
| 257 |
+
timestamp = getattr(evidence, "timestamp", None)
|
| 258 |
+
|
| 259 |
+
evidence_detail = {"source" : evidence.source.value,
|
| 260 |
+
"display_name" : self.evidence_display_names.get(evidence.source.value if hasattr(evidence.source, "value") else str(evidence.source), str(evidence.source)),
|
| 261 |
+
"finding" : evidence.finding,
|
| 262 |
+
"direction" : evidence.direction.value,
|
| 263 |
+
"strength" : evidence.strength.value,
|
| 264 |
+
"confidence" : evidence.confidence,
|
| 265 |
+
"analyzer" : evidence.analyzer,
|
| 266 |
+
"details" : evidence.details,
|
| 267 |
+
"status_label" : self._evidence_to_status_label(evidence),
|
| 268 |
+
"timestamp" : timestamp.isoformat() if timestamp else None,
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
evidence_detailed.append(evidence_detail)
|
| 272 |
+
|
| 273 |
+
return evidence_detailed
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
def _evidence_to_status_label(self, evidence: EvidenceResult) -> str:
|
| 277 |
+
"""
|
| 278 |
+
Convert evidence to human-readable status label
|
| 279 |
+
"""
|
| 280 |
+
if (evidence.direction == EvidenceDirection.AI_GENERATED):
|
| 281 |
+
if (evidence.strength == EvidenceStrength.CONCLUSIVE):
|
| 282 |
+
return "🔴 CONCLUSIVE AI"
|
| 283 |
+
|
| 284 |
+
elif (evidence.strength == EvidenceStrength.STRONG):
|
| 285 |
+
return "🔴 STRONG AI"
|
| 286 |
+
|
| 287 |
+
elif (evidence.strength == EvidenceStrength.MODERATE):
|
| 288 |
+
return "🟠 MODERATE AI"
|
| 289 |
+
|
| 290 |
+
else:
|
| 291 |
+
return "🟡 WEAK AI"
|
| 292 |
+
|
| 293 |
+
elif (evidence.direction == EvidenceDirection.AUTHENTIC):
|
| 294 |
+
if (evidence.strength in (EvidenceStrength.STRONG, EvidenceStrength.CONCLUSIVE)):
|
| 295 |
+
return "🟢 STRONG AUTHENTIC"
|
| 296 |
+
|
| 297 |
+
elif (evidence.strength == EvidenceStrength.MODERATE):
|
| 298 |
+
return "🟢 MODERATE AUTHENTIC"
|
| 299 |
+
|
| 300 |
+
else:
|
| 301 |
+
return "🟡 WEAK AUTHENTIC"
|
| 302 |
+
|
| 303 |
+
else:
|
| 304 |
+
# INDETERMINATE
|
| 305 |
+
return "⚪ INDETERMINATE"
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
def _strength_to_rank(self, strength: EvidenceStrength) -> int:
|
| 309 |
+
"""
|
| 310 |
+
Convert strength to numeric rank for sorting
|
| 311 |
+
"""
|
| 312 |
+
return {EvidenceStrength.CONCLUSIVE : 4,
|
| 313 |
+
EvidenceStrength.STRONG : 3,
|
| 314 |
+
EvidenceStrength.MODERATE : 2,
|
| 315 |
+
EvidenceStrength.WEAK : 1,
|
| 316 |
+
}.get(strength, 0)
|
| 317 |
+
|
| 318 |
|
| 319 |
def _extract_overall_summary(self, analysis_result: AnalysisResult) -> Dict:
|
| 320 |
"""
|
|
|
|
| 343 |
"display_name" : self.metric_display_names.get(metric_type, metric_type.value),
|
| 344 |
"score" : round(metric_result.score, 3),
|
| 345 |
"confidence" : round(metric_result.confidence, 3) if metric_result.confidence is not None else None,
|
| 346 |
+
"status" : self._score_to_status(metric_result.score),
|
| 347 |
"details" : metric_result.details or {},
|
| 348 |
+
"interpretation" : self._interpret_metric(metric_type, metric_result),
|
| 349 |
+
"key_findings" : self.extract_key_findings(metric_type, metric_result),
|
|
|
|
| 350 |
}
|
| 351 |
+
|
| 352 |
metrics_detailed.append(metric_detail)
|
| 353 |
|
| 354 |
# Sort by score (highest first)
|
| 355 |
+
metrics_detailed.sort(key = lambda x: x['score'],
|
| 356 |
+
reverse = True,
|
| 357 |
+
)
|
| 358 |
|
| 359 |
return metrics_detailed
|
| 360 |
|
| 361 |
|
| 362 |
def _extract_metadata(self, analysis_result: AnalysisResult) -> Dict:
|
| 363 |
"""
|
| 364 |
+
Extract processing metadata and statistics
|
| 365 |
"""
|
| 366 |
+
metadata = {"total_metrics" : len(analysis_result.metric_results),
|
| 367 |
+
"flagged_metrics" : sum(1 for s in analysis_result.signals if s.status == SignalStatus.FLAGGED),
|
| 368 |
+
"warning_metrics" : sum(1 for s in analysis_result.signals if s.status == SignalStatus.WARNING),
|
| 369 |
+
"passed_metrics" : sum(1 for s in analysis_result.signals if s.status == SignalStatus.PASSED),
|
| 370 |
+
"avg_confidence" : self._calculate_avg_confidence(analysis_result),
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
# Evidence stats (if available)
|
| 374 |
+
if analysis_result.evidence:
|
| 375 |
+
metadata["total_evidence"] = len(analysis_result.evidence)
|
| 376 |
+
metadata["ai_evidence"] = sum(1 for e in analysis_result.evidence if e.direction == EvidenceDirection.AI_GENERATED)
|
| 377 |
+
metadata["auth_evidence"] = sum(1 for e in analysis_result.evidence if e.direction == EvidenceDirection.AUTHENTIC)
|
| 378 |
+
metadata["strong_evidence"] = sum(1 for e in analysis_result.evidence if e.strength in (EvidenceStrength.STRONG, EvidenceStrength.CONCLUSIVE))
|
| 379 |
+
|
| 380 |
+
else:
|
| 381 |
+
metadata["total_evidence"] = 0
|
| 382 |
+
metadata["ai_evidence"] = 0
|
| 383 |
+
metadata["auth_evidence"] = 0
|
| 384 |
+
metadata["strong_evidence"] = 0
|
| 385 |
+
|
| 386 |
+
# Decision info
|
| 387 |
+
metadata["has_final_decision"] = analysis_result.final_decision is not None
|
| 388 |
+
metadata["decision_value"] = analysis_result.final_decision.value if analysis_result.final_decision else None
|
| 389 |
+
|
| 390 |
+
return metadata
|
| 391 |
+
|
| 392 |
|
| 393 |
+
def _extract_key_details(self, metric_type: MetricType, metric_result: MetricResult) -> Dict:
|
| 394 |
"""
|
| 395 |
Extract key details specific to each metric type
|
| 396 |
"""
|
|
|
|
| 402 |
}
|
| 403 |
|
| 404 |
elif (metric_type == MetricType.FREQUENCY):
|
| 405 |
+
return {"HF_Ratio" : details.get('hf_ratio', 'N/A'),
|
| 406 |
+
"HF_Anomaly" : details.get('hf_anomaly', 'N/A'),
|
| 407 |
+
"Spectrum_Bins" : details.get('spectrum_bins', 'N/A'),
|
| 408 |
}
|
| 409 |
|
| 410 |
elif (metric_type == MetricType.NOISE):
|
| 411 |
+
return {"Mean_Noise" : details.get('mean_noise', 'N/A'),
|
| 412 |
+
"CV" : details.get('cv', 'N/A'),
|
| 413 |
+
"Patches_Valid" : details.get('patches_valid', 'N/A'),
|
| 414 |
}
|
| 415 |
|
| 416 |
elif (metric_type == MetricType.TEXTURE):
|
| 417 |
+
return {"Smooth_Ratio" : details.get('smooth_ratio', 'N/A'),
|
| 418 |
+
"Contrast_Mean" : details.get('contrast_mean', 'N/A'),
|
| 419 |
+
"Patches_Used" : details.get('patches_used', 'N/A'),
|
| 420 |
}
|
| 421 |
|
| 422 |
elif (metric_type == MetricType.COLOR):
|
| 423 |
sat_stats = details.get('saturation_stats', {})
|
| 424 |
+
|
| 425 |
return {"Mean_Saturation" : sat_stats.get('mean_saturation', 'N/A'),
|
| 426 |
"High_Sat_Ratio" : sat_stats.get('high_sat_ratio', 'N/A'),
|
| 427 |
}
|
|
|
|
| 429 |
return {}
|
| 430 |
|
| 431 |
|
| 432 |
+
def _interpret_metric(self, metric_type: MetricType, metric_result: MetricResult) -> str:
|
| 433 |
"""
|
| 434 |
Provide human-readable interpretation of metric result
|
| 435 |
"""
|
|
|
|
| 436 |
details = metric_result.details or {}
|
| 437 |
|
| 438 |
if (metric_type == MetricType.GRADIENT):
|
| 439 |
eig_ratio = details.get('eigenvalue_ratio')
|
| 440 |
|
| 441 |
if eig_ratio:
|
| 442 |
+
return f"Eigenvalue ratio of {eig_ratio:.3f} ({'high' if (eig_ratio > 0.85) else 'low'} alignment)"
|
| 443 |
|
| 444 |
return "Gradient structure analysis"
|
| 445 |
|
| 446 |
+
elif( metric_type == MetricType.FREQUENCY):
|
| 447 |
hf_ratio = details.get('hf_ratio')
|
| 448 |
+
|
| 449 |
if hf_ratio:
|
| 450 |
+
return f"High-freq ratio: {hf_ratio:.3f} ({'elevated' if (hf_ratio > 0.35) else 'low' if (hf_ratio < 0.08) else 'normal'})"
|
| 451 |
|
| 452 |
return "Frequency spectrum analysis"
|
| 453 |
|
| 454 |
elif (metric_type == MetricType.NOISE):
|
| 455 |
mean_noise = details.get('mean_noise')
|
| 456 |
+
|
| 457 |
if mean_noise:
|
| 458 |
+
return f"Mean noise: {mean_noise:.2f} ({'low' if (mean_noise < 1.5) else 'normal'})"
|
| 459 |
|
| 460 |
return "Noise pattern analysis"
|
| 461 |
|
| 462 |
elif (metric_type == MetricType.TEXTURE):
|
| 463 |
smooth_ratio = details.get('smooth_ratio')
|
| 464 |
+
|
| 465 |
if smooth_ratio is not None:
|
| 466 |
+
return f"Smooth regions: {smooth_ratio:.1%} ({'excessive' if (smooth_ratio > 0.4) else 'normal'})"
|
| 467 |
|
| 468 |
return "Texture variation analysis"
|
| 469 |
|
|
|
|
| 472 |
mean_sat = sat_stats.get('mean_saturation')
|
| 473 |
|
| 474 |
if mean_sat:
|
| 475 |
+
return f"Mean saturation: {mean_sat:.2f} ({'high' if (mean_sat > 0.65) else 'normal'})"
|
| 476 |
|
| 477 |
return "Color distribution analysis"
|
| 478 |
|
| 479 |
return "Analysis complete"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
|
| 481 |
|
| 482 |
+
def extract_key_findings(self, metric_type: MetricType, metric_result: MetricResult) -> List[str]:
|
| 483 |
"""
|
| 484 |
+
Extract human-readable key forensic findings for reporters
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
"""
|
| 486 |
findings = list()
|
| 487 |
details = metric_result.details or {}
|
| 488 |
|
| 489 |
if (metric_type == MetricType.GRADIENT):
|
| 490 |
eig_ratio = details.get('eigenvalue_ratio')
|
| 491 |
+
|
| 492 |
if eig_ratio:
|
| 493 |
findings.append(f"Eigenvalue ratio: {eig_ratio:.3f}")
|
| 494 |
|
|
|
|
| 499 |
|
| 500 |
elif (metric_type == MetricType.FREQUENCY):
|
| 501 |
hf_ratio = details.get('hf_ratio')
|
| 502 |
+
|
| 503 |
if hf_ratio:
|
| 504 |
findings.append(f"High-frequency ratio: {hf_ratio:.3f}")
|
| 505 |
|
| 506 |
roughness = details.get('roughness')
|
| 507 |
+
|
| 508 |
if roughness:
|
| 509 |
findings.append(f"Spectral roughness: {roughness:.3f}")
|
| 510 |
|
| 511 |
elif (metric_type == MetricType.NOISE):
|
| 512 |
mean_noise = details.get('mean_noise')
|
| 513 |
+
|
| 514 |
if mean_noise:
|
| 515 |
findings.append(f"Mean noise level: {mean_noise:.2f}")
|
| 516 |
|
|
|
|
| 521 |
|
| 522 |
elif (metric_type == MetricType.TEXTURE):
|
| 523 |
smooth_ratio = details.get('smooth_ratio')
|
| 524 |
+
|
| 525 |
if smooth_ratio:
|
| 526 |
findings.append(f"Smooth patches: {smooth_ratio:.1%}")
|
| 527 |
|
|
|
|
| 542 |
if high_sat:
|
| 543 |
findings.append(f"High saturation pixels: {high_sat:.1%}")
|
| 544 |
|
| 545 |
+
return findings if findings else ["Analysis complete"]
|
| 546 |
+
|
| 547 |
+
|
| 548 |
+
def _score_to_status(self, score: float) -> str:
|
| 549 |
+
"""
|
| 550 |
+
Convert score to status label
|
| 551 |
+
"""
|
| 552 |
+
if (score >= self.signal_thresholds[SignalStatus.FLAGGED]):
|
| 553 |
+
return "FLAGGED"
|
| 554 |
+
|
| 555 |
+
elif (score >= self.signal_thresholds[SignalStatus.WARNING]):
|
| 556 |
+
return "WARNING"
|
| 557 |
+
|
| 558 |
+
else:
|
| 559 |
+
return "PASSED"
|
| 560 |
+
|
| 561 |
+
|
| 562 |
+
def _calculate_avg_confidence(self, analysis_result: AnalysisResult) -> float:
|
| 563 |
+
"""
|
| 564 |
+
Calculate average confidence across all metrics
|
| 565 |
+
"""
|
| 566 |
+
confidences = [mr.confidence for mr in analysis_result.metric_results.values() if mr.confidence is not None]
|
| 567 |
+
|
| 568 |
+
return round(sum(confidences) / len(confidences), 3) if confidences else 0.0
|
metrics/color_analyzer.py
CHANGED
|
@@ -110,34 +110,38 @@ class ColorAnalyzer:
|
|
| 110 |
--------
|
| 111 |
{ np.ndarray } : HSV image (H in [0, 360], S and V in [0, 1])
|
| 112 |
"""
|
| 113 |
-
r, g, b
|
| 114 |
|
| 115 |
-
maxc
|
| 116 |
-
minc
|
| 117 |
-
delta
|
| 118 |
|
| 119 |
# Value
|
| 120 |
-
v
|
| 121 |
|
| 122 |
# Saturation
|
| 123 |
-
s
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
# Hue
|
| 126 |
-
h
|
| 127 |
|
| 128 |
# Red is max
|
| 129 |
-
mask
|
| 130 |
-
h[mask]
|
| 131 |
|
| 132 |
# Green is max
|
| 133 |
-
mask
|
| 134 |
-
h[mask]
|
| 135 |
|
| 136 |
# Blue is max
|
| 137 |
-
mask
|
| 138 |
-
h[mask]
|
| 139 |
|
| 140 |
-
hsv
|
|
|
|
| 141 |
|
| 142 |
return hsv
|
| 143 |
|
|
|
|
| 110 |
--------
|
| 111 |
{ np.ndarray } : HSV image (H in [0, 360], S and V in [0, 1])
|
| 112 |
"""
|
| 113 |
+
r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
|
| 114 |
|
| 115 |
+
maxc = np.maximum(np.maximum(r, g), b)
|
| 116 |
+
minc = np.minimum(np.minimum(r, g), b)
|
| 117 |
+
delta = maxc - minc
|
| 118 |
|
| 119 |
# Value
|
| 120 |
+
v = maxc
|
| 121 |
|
| 122 |
# Saturation
|
| 123 |
+
s = np.zeros_like(maxc, dtype = np.float32)
|
| 124 |
+
|
| 125 |
+
nonzero_mask = maxc > 0
|
| 126 |
+
s[nonzero_mask] = delta[nonzero_mask] / maxc[nonzero_mask]
|
| 127 |
|
| 128 |
# Hue
|
| 129 |
+
h = np.zeros_like(maxc)
|
| 130 |
|
| 131 |
# Red is max
|
| 132 |
+
mask = (maxc == r) & (delta > 0)
|
| 133 |
+
h[mask] = 60.0 * (((g[mask] - b[mask]) / delta[mask]) % 6.0)
|
| 134 |
|
| 135 |
# Green is max
|
| 136 |
+
mask = (maxc == g) & (delta > 0)
|
| 137 |
+
h[mask] = 60.0 * (((b[mask] - r[mask]) / delta[mask]) + 2)
|
| 138 |
|
| 139 |
# Blue is max
|
| 140 |
+
mask = (maxc == b) & (delta > 0)
|
| 141 |
+
h[mask] = 60.0 * (((r[mask] - g[mask]) / delta[mask]) + 4)
|
| 142 |
|
| 143 |
+
hsv = np.stack([h, s, v], axis = 2)
|
| 144 |
+
hsv = np.nan_to_num(hsv, nan = 0.0, posinf = 0.0, neginf = 0.0)
|
| 145 |
|
| 146 |
return hsv
|
| 147 |
|
metrics/{aggregator.py → signal_aggregator.py}
RENAMED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
# Dependencies
|
|
|
|
| 2 |
import time
|
| 3 |
import numpy as np
|
| 4 |
from typing import List
|
|
@@ -11,18 +12,22 @@ from config.constants import MetricType
|
|
| 11 |
from config.constants import SignalStatus
|
| 12 |
from config.schemas import AnalysisResult
|
| 13 |
from config.schemas import DetectionSignal
|
|
|
|
| 14 |
from config.constants import DetectionStatus
|
| 15 |
from config.constants import SIGNAL_THRESHOLDS
|
| 16 |
from utils.image_processor import ImageProcessor
|
| 17 |
from config.constants import METRIC_EXPLANATIONS
|
| 18 |
from metrics.noise_analyzer import NoiseAnalyzer
|
| 19 |
from metrics.color_analyzer import ColorAnalyzer
|
|
|
|
| 20 |
from metrics.texture_analyzer import TextureAnalyzer
|
|
|
|
| 21 |
from features.threshold_manager import ThresholdManager
|
| 22 |
from config.constants import IMAGE_RESIZE_MAX_DIMENSION
|
| 23 |
from metrics.frequency_analyzer import FrequencyAnalyzer
|
| 24 |
from metrics.gradient_field_pca import GradientFieldPCADetector
|
| 25 |
|
|
|
|
| 26 |
# Suppress NumPy warning
|
| 27 |
np.seterr(divide = 'ignore',
|
| 28 |
invalid = 'ignore',
|
|
@@ -33,12 +38,12 @@ np.seterr(divide = 'ignore',
|
|
| 33 |
logger = get_logger(__name__)
|
| 34 |
|
| 35 |
|
| 36 |
-
class
|
| 37 |
"""
|
| 38 |
-
Main detector that orchestrates all detection
|
| 39 |
|
| 40 |
-
Combines multiple unsupervised
|
| 41 |
-
----------------------------------------
|
| 42 |
1. Gradient-Field PCA
|
| 43 |
2. Frequency Domain Analysis (FFT)
|
| 44 |
3. Noise Pattern Analysis
|
|
@@ -74,6 +79,11 @@ class MetricsAggregator:
|
|
| 74 |
# Get metric weights either from runtime UI or default to settings
|
| 75 |
self.weights = (self.threshold_manager.get_metric_weights() if self.threshold_manager else settings.get_metric_weights())
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
logger.info(f"Metric weights: {self.weights}")
|
| 78 |
|
| 79 |
|
|
@@ -117,6 +127,11 @@ class MetricsAggregator:
|
|
| 117 |
|
| 118 |
# Determine status
|
| 119 |
status = self._determine_status(overall_score = overall_score)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
# Calculate processing time
|
| 122 |
processing_time = time.time() - start_time
|
|
@@ -125,7 +140,7 @@ class MetricsAggregator:
|
|
| 125 |
result = AnalysisResult(filename = filename,
|
| 126 |
overall_score = overall_score,
|
| 127 |
status = status,
|
| 128 |
-
confidence =
|
| 129 |
signals = signals,
|
| 130 |
metric_results = metric_results,
|
| 131 |
processing_time = processing_time,
|
|
@@ -154,18 +169,26 @@ class MetricsAggregator:
|
|
| 154 |
{ dict } : Dictionary mapping MetricType to MetricResult
|
| 155 |
"""
|
| 156 |
metric_results = dict()
|
|
|
|
| 157 |
|
| 158 |
-
#
|
| 159 |
for metric_type, (detector_name, detector) in self.detector_registry.items():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
try:
|
| 161 |
-
result =
|
| 162 |
result.metric_type = metric_type
|
| 163 |
metric_results[metric_type] = result
|
| 164 |
|
| 165 |
logger.debug(f"{detector_name} | {metric_type.value} | score={result.score:.3f} | confidence={result.confidence:.3f}")
|
| 166 |
-
|
| 167 |
except Exception as e:
|
| 168 |
-
logger.error(f"{
|
| 169 |
|
| 170 |
# Same Failure Score by all metrics with same confidence
|
| 171 |
metric_results[metric_type] = MetricResult(metric_type = metric_type,
|
|
@@ -285,4 +308,39 @@ class MetricsAggregator:
|
|
| 285 |
return DetectionStatus.REVIEW_REQUIRED
|
| 286 |
|
| 287 |
else:
|
| 288 |
-
return DetectionStatus.LIKELY_AUTHENTIC
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Dependencies
|
| 2 |
+
import os
|
| 3 |
import time
|
| 4 |
import numpy as np
|
| 5 |
from typing import List
|
|
|
|
| 12 |
from config.constants import SignalStatus
|
| 13 |
from config.schemas import AnalysisResult
|
| 14 |
from config.schemas import DetectionSignal
|
| 15 |
+
from concurrent.futures import as_completed
|
| 16 |
from config.constants import DetectionStatus
|
| 17 |
from config.constants import SIGNAL_THRESHOLDS
|
| 18 |
from utils.image_processor import ImageProcessor
|
| 19 |
from config.constants import METRIC_EXPLANATIONS
|
| 20 |
from metrics.noise_analyzer import NoiseAnalyzer
|
| 21 |
from metrics.color_analyzer import ColorAnalyzer
|
| 22 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 23 |
from metrics.texture_analyzer import TextureAnalyzer
|
| 24 |
+
from config.constants import SIGNAL_CONFIDENCE_PARAMS
|
| 25 |
from features.threshold_manager import ThresholdManager
|
| 26 |
from config.constants import IMAGE_RESIZE_MAX_DIMENSION
|
| 27 |
from metrics.frequency_analyzer import FrequencyAnalyzer
|
| 28 |
from metrics.gradient_field_pca import GradientFieldPCADetector
|
| 29 |
|
| 30 |
+
|
| 31 |
# Suppress NumPy warning
|
| 32 |
np.seterr(divide = 'ignore',
|
| 33 |
invalid = 'ignore',
|
|
|
|
| 38 |
logger = get_logger(__name__)
|
| 39 |
|
| 40 |
|
| 41 |
+
class SignalAggregator:
|
| 42 |
"""
|
| 43 |
+
Main detector that orchestrates all detection signals
|
| 44 |
|
| 45 |
+
Combines multiple unsupervised metric signals:
|
| 46 |
+
----------------------------------------------
|
| 47 |
1. Gradient-Field PCA
|
| 48 |
2. Frequency Domain Analysis (FFT)
|
| 49 |
3. Noise Pattern Analysis
|
|
|
|
| 79 |
# Get metric weights either from runtime UI or default to settings
|
| 80 |
self.weights = (self.threshold_manager.get_metric_weights() if self.threshold_manager else settings.get_metric_weights())
|
| 81 |
|
| 82 |
+
# Initialize shared ThreadPoolExecutor (CPU-safe)
|
| 83 |
+
max_workers = min(settings.METRIC_WORKERS or len(self.detector_registry), os.cpu_count() or 4)
|
| 84 |
+
|
| 85 |
+
self.executor = ThreadPoolExecutor(max_workers = max_workers)
|
| 86 |
+
|
| 87 |
logger.info(f"Metric weights: {self.weights}")
|
| 88 |
|
| 89 |
|
|
|
|
| 127 |
|
| 128 |
# Determine status
|
| 129 |
status = self._determine_status(overall_score = overall_score)
|
| 130 |
+
|
| 131 |
+
# Calculate confidence
|
| 132 |
+
confidence = self._calculate_confidence(metric_results = metric_results,
|
| 133 |
+
overall_score = overall_score,
|
| 134 |
+
)
|
| 135 |
|
| 136 |
# Calculate processing time
|
| 137 |
processing_time = time.time() - start_time
|
|
|
|
| 140 |
result = AnalysisResult(filename = filename,
|
| 141 |
overall_score = overall_score,
|
| 142 |
status = status,
|
| 143 |
+
confidence = confidence,
|
| 144 |
signals = signals,
|
| 145 |
metric_results = metric_results,
|
| 146 |
processing_time = processing_time,
|
|
|
|
| 169 |
{ dict } : Dictionary mapping MetricType to MetricResult
|
| 170 |
"""
|
| 171 |
metric_results = dict()
|
| 172 |
+
futures = dict()
|
| 173 |
|
| 174 |
+
# Submit all detectors
|
| 175 |
for metric_type, (detector_name, detector) in self.detector_registry.items():
|
| 176 |
+
|
| 177 |
+
futures[self.executor.submit(detector.detect, image = image)] = (metric_type, detector_name)
|
| 178 |
+
|
| 179 |
+
# Collect results as they complete
|
| 180 |
+
for future in as_completed(futures):
|
| 181 |
+
metric_type, detector_name = futures[future]
|
| 182 |
+
|
| 183 |
try:
|
| 184 |
+
result = future.result(timeout = settings.METRIC_TIMEOUT)
|
| 185 |
result.metric_type = metric_type
|
| 186 |
metric_results[metric_type] = result
|
| 187 |
|
| 188 |
logger.debug(f"{detector_name} | {metric_type.value} | score={result.score:.3f} | confidence={result.confidence:.3f}")
|
| 189 |
+
|
| 190 |
except Exception as e:
|
| 191 |
+
logger.error(f"{detector_name} failed: {e}")
|
| 192 |
|
| 193 |
# Same Failure Score by all metrics with same confidence
|
| 194 |
metric_results[metric_type] = MetricResult(metric_type = metric_type,
|
|
|
|
| 308 |
return DetectionStatus.REVIEW_REQUIRED
|
| 309 |
|
| 310 |
else:
|
| 311 |
+
return DetectionStatus.LIKELY_AUTHENTIC
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
def _calculate_confidence(self, metric_results: dict[MetricType, MetricResult], overall_score: float) -> int:
|
| 315 |
+
"""
|
| 316 |
+
Tier-1 confidence calculator based on:
|
| 317 |
+
- metric agreement
|
| 318 |
+
- metric reliability
|
| 319 |
+
- decision boundary distance
|
| 320 |
+
"""
|
| 321 |
+
scores = [result.score for result in metric_results.values()]
|
| 322 |
+
score_variance = np.var(scores)
|
| 323 |
+
|
| 324 |
+
# If all metrics failed, confidence must be low
|
| 325 |
+
if all(isinstance(result.details, dict) and "error" in result.details for result in metric_results.values()):
|
| 326 |
+
return int(SIGNAL_CONFIDENCE_PARAMS.MIN_CONFIDENCE * 100)
|
| 327 |
+
|
| 328 |
+
# Agreement confidence
|
| 329 |
+
agreement_confidence = 1.0 - min(score_variance / SIGNAL_CONFIDENCE_PARAMS.VARIANCE_NORM, 1.0)
|
| 330 |
+
|
| 331 |
+
# Reliability confidence
|
| 332 |
+
confidences = [result.confidence for result in metric_results.values() if result.confidence is not None]
|
| 333 |
+
reliability_confidence = float(np.mean(confidences)) if confidences else SIGNAL_CONFIDENCE_PARAMS.DEFAULT_RELIABILITY_CONFIDENCE
|
| 334 |
+
|
| 335 |
+
# Distance confidence
|
| 336 |
+
review_threshold = (self.threshold_manager.get_review_threshold() if self.threshold_manager else settings.REVIEW_THRESHOLD)
|
| 337 |
+
distance_confidence = min(abs(overall_score - review_threshold) / SIGNAL_CONFIDENCE_PARAMS.DISTANCE_NORM, 1.0)
|
| 338 |
+
|
| 339 |
+
logger.debug(f"Confidence breakdown | agreement={agreement_confidence:.2f}, reliability={reliability_confidence:.2f}, distance={distance_confidence:.2f}")
|
| 340 |
+
|
| 341 |
+
confidence = (SIGNAL_CONFIDENCE_PARAMS.AGREEMENT_WEIGHT * agreement_confidence +
|
| 342 |
+
SIGNAL_CONFIDENCE_PARAMS.RELIABILITY_WEIGHT * reliability_confidence +
|
| 343 |
+
SIGNAL_CONFIDENCE_PARAMS.DISTANCE_WEIGHT * distance_confidence
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
return int(np.clip(confidence, 0.0, 1.0) * 100)
|
notebooks/.ipynb_checkpoints/Unified_Dataset_Builder-checkpoint.ipynb
CHANGED
|
@@ -84,7 +84,7 @@
|
|
| 84 |
"# ===============================\n",
|
| 85 |
"# Directory Configuration\n",
|
| 86 |
"# ===============================\n",
|
| 87 |
-
"BASE_DIR = Path(\"tests/dataset\")\n",
|
| 88 |
"AI_DIR = BASE_DIR / \"ai\"\n",
|
| 89 |
"REAL_DIR = BASE_DIR / \"real\"\n",
|
| 90 |
"RAW_DIR = BASE_DIR / \"raw_downloads\"\n",
|
|
@@ -166,16 +166,13 @@
|
|
| 166 |
" }]\n",
|
| 167 |
" \n",
|
| 168 |
"\n",
|
| 169 |
-
"REAL_DATASETS = [{\"name\" : \"
|
| 170 |
-
" \"hf_id\" : \"
|
| 171 |
-
" \"
|
| 172 |
-
" \"coco_task\": \"instances\"\n",
|
| 173 |
-
" },\n",
|
| 174 |
" \"split\" : \"train\",\n",
|
| 175 |
" \"image_key\" : \"image\",\n",
|
| 176 |
" \"label\" : \"real\",\n",
|
| 177 |
" \"family\" : \"photographic\",\n",
|
| 178 |
-
" \"streaming\" : False\n",
|
| 179 |
" }]\n",
|
| 180 |
"\n",
|
| 181 |
"# Kaggle datasets (public, non-scraped)\n",
|
|
@@ -355,7 +352,7 @@
|
|
| 355 |
},
|
| 356 |
{
|
| 357 |
"cell_type": "code",
|
| 358 |
-
"execution_count":
|
| 359 |
"id": "dd8ef771-f39f-4d9d-8eaf-626ecc211141",
|
| 360 |
"metadata": {},
|
| 361 |
"outputs": [
|
|
@@ -371,7 +368,74 @@
|
|
| 371 |
"name": "stderr",
|
| 372 |
"output_type": "stream",
|
| 373 |
"text": [
|
| 374 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
]
|
| 376 |
}
|
| 377 |
],
|
|
|
|
| 84 |
"# ===============================\n",
|
| 85 |
"# Directory Configuration\n",
|
| 86 |
"# ===============================\n",
|
| 87 |
+
"BASE_DIR = Path(\"../tests/dataset\")\n",
|
| 88 |
"AI_DIR = BASE_DIR / \"ai\"\n",
|
| 89 |
"REAL_DIR = BASE_DIR / \"real\"\n",
|
| 90 |
"RAW_DIR = BASE_DIR / \"raw_downloads\"\n",
|
|
|
|
| 166 |
" }]\n",
|
| 167 |
" \n",
|
| 168 |
"\n",
|
| 169 |
+
"REAL_DATASETS = [{\"name\" : \"imagenette\",\n",
|
| 170 |
+
" \"hf_id\" : \"frgfm/imagenette\",\n",
|
| 171 |
+
" \"config\" : \"320px\",\n",
|
|
|
|
|
|
|
| 172 |
" \"split\" : \"train\",\n",
|
| 173 |
" \"image_key\" : \"image\",\n",
|
| 174 |
" \"label\" : \"real\",\n",
|
| 175 |
" \"family\" : \"photographic\",\n",
|
|
|
|
| 176 |
" }]\n",
|
| 177 |
"\n",
|
| 178 |
"# Kaggle datasets (public, non-scraped)\n",
|
|
|
|
| 352 |
},
|
| 353 |
{
|
| 354 |
"cell_type": "code",
|
| 355 |
+
"execution_count": 7,
|
| 356 |
"id": "dd8ef771-f39f-4d9d-8eaf-626ecc211141",
|
| 357 |
"metadata": {},
|
| 358 |
"outputs": [
|
|
|
|
| 368 |
"name": "stderr",
|
| 369 |
"output_type": "stream",
|
| 370 |
"text": [
|
| 371 |
+
"100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [05:31<00:00, 3.01it/s]\n"
|
| 372 |
+
]
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"name": "stdout",
|
| 376 |
+
"output_type": "stream",
|
| 377 |
+
"text": [
|
| 378 |
+
"\n",
|
| 379 |
+
"▶ Loading HF dataset: imagenette\n"
|
| 380 |
+
]
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"data": {
|
| 384 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 385 |
+
"model_id": "6e436d2fc4374bff9d76dc2534b752b8",
|
| 386 |
+
"version_major": 2,
|
| 387 |
+
"version_minor": 0
|
| 388 |
+
},
|
| 389 |
+
"text/plain": [
|
| 390 |
+
"Downloading builder script: 0.00B [00:00, ?B/s]"
|
| 391 |
+
]
|
| 392 |
+
},
|
| 393 |
+
"metadata": {},
|
| 394 |
+
"output_type": "display_data"
|
| 395 |
+
},
|
| 396 |
+
{
|
| 397 |
+
"data": {
|
| 398 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 399 |
+
"model_id": "791d872b01a44cbb908ddbad43f20a42",
|
| 400 |
+
"version_major": 2,
|
| 401 |
+
"version_minor": 0
|
| 402 |
+
},
|
| 403 |
+
"text/plain": [
|
| 404 |
+
"Downloading metadata: 0.00B [00:00, ?B/s]"
|
| 405 |
+
]
|
| 406 |
+
},
|
| 407 |
+
"metadata": {},
|
| 408 |
+
"output_type": "display_data"
|
| 409 |
+
},
|
| 410 |
+
{
|
| 411 |
+
"data": {
|
| 412 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 413 |
+
"model_id": "36b6ebafe8e4436e9cc3a4bf38a36bda",
|
| 414 |
+
"version_major": 2,
|
| 415 |
+
"version_minor": 0
|
| 416 |
+
},
|
| 417 |
+
"text/plain": [
|
| 418 |
+
"Downloading readme: 0.00B [00:00, ?B/s]"
|
| 419 |
+
]
|
| 420 |
+
},
|
| 421 |
+
"metadata": {},
|
| 422 |
+
"output_type": "display_data"
|
| 423 |
+
},
|
| 424 |
+
{
|
| 425 |
+
"ename": "ValueError",
|
| 426 |
+
"evalue": "Config name is missing.\nPlease pick one among the available configs: ['full_size', '320px', '160px']\nExample of usage:\n\t`load_dataset('imagenette', 'full_size')`",
|
| 427 |
+
"output_type": "error",
|
| 428 |
+
"traceback": [
|
| 429 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
| 430 |
+
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
| 431 |
+
"Cell \u001b[0;32mIn[7], line 46\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;66;03m# ===============================\u001b[39;00m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;66;03m# Entry Point\u001b[39;00m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;66;03m# ===============================\u001b[39;00m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__main__\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 46\u001b[0m \u001b[43mmain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
| 432 |
+
"Cell \u001b[0;32mIn[7], line 13\u001b[0m, in \u001b[0;36mmain\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m process_hf_dataset(ds, AI_DIR, writer)\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m REAL_DATASETS:\n\u001b[0;32m---> 13\u001b[0m \u001b[43mprocess_hf_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mREAL_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwriter\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# Kaggle datasets\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m KAGGLE_DATASETS:\n",
|
| 433 |
+
"Cell \u001b[0;32mIn[4], line 4\u001b[0m, in \u001b[0;36mprocess_hf_dataset\u001b[0;34m(ds_cfg, root_dir, writer)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mprocess_hf_dataset\u001b[39m(ds_cfg, root_dir, writer):\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m▶ Loading HF dataset: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mds_cfg[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhf_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mds_cfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhf_kwargs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mconfig\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43msplit\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msplit\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mstreaming\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstreaming\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m out_dir \u001b[38;5;241m=\u001b[39m root_dir \u001b[38;5;241m/\u001b[39m ds_cfg[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 12\u001b[0m out_dir\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
|
| 434 |
+
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/load.py:2129\u001b[0m, in \u001b[0;36mload_dataset\u001b[0;34m(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)\u001b[0m\n\u001b[1;32m 2124\u001b[0m verification_mode \u001b[38;5;241m=\u001b[39m VerificationMode(\n\u001b[1;32m 2125\u001b[0m (verification_mode \u001b[38;5;129;01mor\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mBASIC_CHECKS) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m save_infos \u001b[38;5;28;01melse\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mALL_CHECKS\n\u001b[1;32m 2126\u001b[0m )\n\u001b[1;32m 2128\u001b[0m \u001b[38;5;66;03m# Create a dataset builder\u001b[39;00m\n\u001b[0;32m-> 2129\u001b[0m builder_instance \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset_builder\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2130\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2131\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2132\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2133\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2134\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2135\u001b[0m \u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2136\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2137\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2138\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2139\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2140\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2141\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2142\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2144\u001b[0m \u001b[38;5;66;03m# Return iterable dataset in case of streaming\u001b[39;00m\n\u001b[1;32m 2145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m streaming:\n",
|
| 435 |
+
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/load.py:1852\u001b[0m, in \u001b[0;36mload_dataset_builder\u001b[0;34m(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, use_auth_token, storage_options, **config_kwargs)\u001b[0m\n\u001b[1;32m 1850\u001b[0m builder_cls \u001b[38;5;241m=\u001b[39m get_dataset_builder_class(dataset_module, dataset_name\u001b[38;5;241m=\u001b[39mdataset_name)\n\u001b[1;32m 1851\u001b[0m \u001b[38;5;66;03m# Instantiate the dataset builder\u001b[39;00m\n\u001b[0;32m-> 1852\u001b[0m builder_instance: DatasetBuilder \u001b[38;5;241m=\u001b[39m \u001b[43mbuilder_cls\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1853\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1854\u001b[0m \u001b[43m \u001b[49m\u001b[43mdataset_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1855\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1856\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1857\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1858\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mhash\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mhash\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1859\u001b[0m \u001b[43m \u001b[49m\u001b[43minfo\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minfo\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1860\u001b[0m \u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1861\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1862\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1863\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mbuilder_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1864\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1865\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1867\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m builder_instance\n",
|
| 436 |
+
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/builder.py:373\u001b[0m, in \u001b[0;36mDatasetBuilder.__init__\u001b[0;34m(self, cache_dir, dataset_name, config_name, hash, base_path, info, features, token, use_auth_token, repo_id, data_files, data_dir, storage_options, writer_batch_size, name, **config_kwargs)\u001b[0m\n\u001b[1;32m 371\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data_dir \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 372\u001b[0m config_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata_dir\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m data_dir\n\u001b[0;32m--> 373\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_create_builder_config\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 374\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 375\u001b[0m \u001b[43m \u001b[49m\u001b[43mcustom_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 376\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 377\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 379\u001b[0m \u001b[38;5;66;03m# prepare info: DatasetInfo are a standardized dataclass across all datasets\u001b[39;00m\n\u001b[1;32m 380\u001b[0m \u001b[38;5;66;03m# Prefill datasetinfo\u001b[39;00m\n\u001b[1;32m 381\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m info \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 382\u001b[0m \u001b[38;5;66;03m# TODO FOR PACKAGED MODULES IT IMPORTS DATA FROM src/packaged_modules which doesn't make sense\u001b[39;00m\n",
|
| 437 |
+
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/builder.py:525\u001b[0m, in \u001b[0;36mDatasetBuilder._create_builder_config\u001b[0;34m(self, config_name, custom_features, **config_kwargs)\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mBUILDER_CONFIGS) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 524\u001b[0m example_of_usage \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mload_dataset(\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdataset_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mBUILDER_CONFIGS[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 525\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 526\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConfig name is missing.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 527\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mPlease pick one among the available configs: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuilder_configs\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 528\u001b[0m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mExample of usage:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m`\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexample_of_usage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 529\u001b[0m )\n\u001b[1;32m 530\u001b[0m builder_config \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mBUILDER_CONFIGS[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 531\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m 532\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo config specified, defaulting to the single config: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdataset_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbuilder_config\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 533\u001b[0m )\n",
|
| 438 |
+
"\u001b[0;31mValueError\u001b[0m: Config name is missing.\nPlease pick one among the available configs: ['full_size', '320px', '160px']\nExample of usage:\n\t`load_dataset('imagenette', 'full_size')`"
|
| 439 |
]
|
| 440 |
}
|
| 441 |
],
|
notebooks/Unified_Dataset_Builder.ipynb
CHANGED
|
@@ -84,7 +84,7 @@
|
|
| 84 |
"# ===============================\n",
|
| 85 |
"# Directory Configuration\n",
|
| 86 |
"# ===============================\n",
|
| 87 |
-
"BASE_DIR = Path(\"tests/dataset\")\n",
|
| 88 |
"AI_DIR = BASE_DIR / \"ai\"\n",
|
| 89 |
"REAL_DIR = BASE_DIR / \"real\"\n",
|
| 90 |
"RAW_DIR = BASE_DIR / \"raw_downloads\"\n",
|
|
@@ -150,7 +150,7 @@
|
|
| 150 |
},
|
| 151 |
{
|
| 152 |
"cell_type": "code",
|
| 153 |
-
"execution_count":
|
| 154 |
"id": "74106705-e2d6-411c-8193-8e02f5ee0fdc",
|
| 155 |
"metadata": {},
|
| 156 |
"outputs": [],
|
|
@@ -166,16 +166,13 @@
|
|
| 166 |
" }]\n",
|
| 167 |
" \n",
|
| 168 |
"\n",
|
| 169 |
-
"REAL_DATASETS = [{\"name\" : \"
|
| 170 |
-
" \"hf_id\" : \"
|
| 171 |
-
" \"
|
| 172 |
-
" \"coco_task\": \"instances\"\n",
|
| 173 |
-
" },\n",
|
| 174 |
" \"split\" : \"train\",\n",
|
| 175 |
" \"image_key\" : \"image\",\n",
|
| 176 |
" \"label\" : \"real\",\n",
|
| 177 |
" \"family\" : \"photographic\",\n",
|
| 178 |
-
" \"streaming\" : False\n",
|
| 179 |
" }]\n",
|
| 180 |
"\n",
|
| 181 |
"# Kaggle datasets (public, non-scraped)\n",
|
|
@@ -204,7 +201,7 @@
|
|
| 204 |
},
|
| 205 |
{
|
| 206 |
"cell_type": "code",
|
| 207 |
-
"execution_count":
|
| 208 |
"id": "a9ea5276-65bb-49f5-a656-c00ceeb1f4d3",
|
| 209 |
"metadata": {},
|
| 210 |
"outputs": [],
|
|
@@ -269,7 +266,7 @@
|
|
| 269 |
},
|
| 270 |
{
|
| 271 |
"cell_type": "code",
|
| 272 |
-
"execution_count":
|
| 273 |
"id": "c6eca5e6-0469-4af6-8af8-afe3036cb0a8",
|
| 274 |
"metadata": {},
|
| 275 |
"outputs": [],
|
|
@@ -305,7 +302,7 @@
|
|
| 305 |
},
|
| 306 |
{
|
| 307 |
"cell_type": "code",
|
| 308 |
-
"execution_count":
|
| 309 |
"id": "b648832e-5025-4851-af21-382051167a04",
|
| 310 |
"metadata": {},
|
| 311 |
"outputs": [],
|
|
@@ -355,7 +352,7 @@
|
|
| 355 |
},
|
| 356 |
{
|
| 357 |
"cell_type": "code",
|
| 358 |
-
"execution_count":
|
| 359 |
"id": "dd8ef771-f39f-4d9d-8eaf-626ecc211141",
|
| 360 |
"metadata": {},
|
| 361 |
"outputs": [
|
|
@@ -371,7 +368,7 @@
|
|
| 371 |
"name": "stderr",
|
| 372 |
"output_type": "stream",
|
| 373 |
"text": [
|
| 374 |
-
"100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [05:
|
| 375 |
]
|
| 376 |
},
|
| 377 |
{
|
|
@@ -379,13 +376,34 @@
|
|
| 379 |
"output_type": "stream",
|
| 380 |
"text": [
|
| 381 |
"\n",
|
| 382 |
-
"▶ Loading HF dataset:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
]
|
| 384 |
},
|
| 385 |
{
|
| 386 |
"data": {
|
| 387 |
"application/vnd.jupyter.widget-view+json": {
|
| 388 |
-
"model_id": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
"version_major": 2,
|
| 390 |
"version_minor": 0
|
| 391 |
},
|
|
@@ -399,51 +417,101 @@
|
|
| 399 |
{
|
| 400 |
"data": {
|
| 401 |
"application/vnd.jupyter.widget-view+json": {
|
| 402 |
-
"model_id": "
|
| 403 |
"version_major": 2,
|
| 404 |
"version_minor": 0
|
| 405 |
},
|
| 406 |
"text/plain": [
|
| 407 |
-
"Downloading data:
|
| 408 |
]
|
| 409 |
},
|
| 410 |
"metadata": {},
|
| 411 |
"output_type": "display_data"
|
| 412 |
},
|
| 413 |
{
|
| 414 |
-
"
|
| 415 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
"output_type": "error",
|
| 417 |
"traceback": [
|
| 418 |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
| 419 |
-
"\u001b[0;
|
| 420 |
-
"Cell \u001b[0;32mIn[
|
| 421 |
-
"Cell \u001b[0;32mIn[
|
| 422 |
-
"Cell \u001b[0;32mIn[
|
| 423 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/
|
| 424 |
-
"
|
| 425 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/builder.py:1717\u001b[0m, in \u001b[0;36mGeneratorBasedBuilder._download_and_prepare\u001b[0;34m(self, dl_manager, verification_mode, **prepare_splits_kwargs)\u001b[0m\n\u001b[1;32m 1716\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_download_and_prepare\u001b[39m(\u001b[38;5;28mself\u001b[39m, dl_manager, verification_mode, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mprepare_splits_kwargs):\n\u001b[0;32m-> 1717\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_download_and_prepare\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1718\u001b[0m \u001b[43m \u001b[49m\u001b[43mdl_manager\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1719\u001b[0m \u001b[43m \u001b[49m\u001b[43mverification_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1720\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_duplicate_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverification_mode\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mVerificationMode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mBASIC_CHECKS\u001b[49m\n\u001b[1;32m 1721\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mverification_mode\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mVerificationMode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mALL_CHECKS\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1722\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mprepare_splits_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1723\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
| 426 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/builder.py:1027\u001b[0m, in \u001b[0;36mDatasetBuilder._download_and_prepare\u001b[0;34m(self, dl_manager, verification_mode, **prepare_split_kwargs)\u001b[0m\n\u001b[1;32m 1025\u001b[0m split_dict \u001b[38;5;241m=\u001b[39m SplitDict(dataset_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdataset_name)\n\u001b[1;32m 1026\u001b[0m split_generators_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_split_generators_kwargs(prepare_split_kwargs)\n\u001b[0;32m-> 1027\u001b[0m split_generators \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_split_generators\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdl_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msplit_generators_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1029\u001b[0m \u001b[38;5;66;03m# Checksums verification\u001b[39;00m\n\u001b[1;32m 1030\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m verification_mode \u001b[38;5;241m==\u001b[39m VerificationMode\u001b[38;5;241m.\u001b[39mALL_CHECKS \u001b[38;5;129;01mand\u001b[39;00m dl_manager\u001b[38;5;241m.\u001b[39mrecord_checksums:\n",
|
| 427 |
-
"File \u001b[0;32m~/.cache/huggingface/modules/datasets_modules/datasets/shunk031--MSCOCO/9a9d3cb1e5e1927e03f5448bc4e3dd95d17101d142ba4b94d6973770757f535f/MSCOCO.py:977\u001b[0m, in \u001b[0;36mMsCocoDataset._split_generators\u001b[0;34m(self, dl_manager)\u001b[0m\n\u001b[1;32m 976\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_split_generators\u001b[39m(\u001b[38;5;28mself\u001b[39m, dl_manager: ds\u001b[38;5;241m.\u001b[39mDownloadManager):\n\u001b[0;32m--> 977\u001b[0m file_paths \u001b[38;5;241m=\u001b[39m \u001b[43mdl_manager\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_and_extract\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_URLS\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43myear\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 979\u001b[0m imgs \u001b[38;5;241m=\u001b[39m file_paths[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mimages\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 980\u001b[0m anns \u001b[38;5;241m=\u001b[39m file_paths[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mannotations\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n",
|
| 428 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/download/download_manager.py:565\u001b[0m, in \u001b[0;36mDownloadManager.download_and_extract\u001b[0;34m(self, url_or_urls)\u001b[0m\n\u001b[1;32m 549\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdownload_and_extract\u001b[39m(\u001b[38;5;28mself\u001b[39m, url_or_urls):\n\u001b[1;32m 550\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Download and extract given `url_or_urls`.\u001b[39;00m\n\u001b[1;32m 551\u001b[0m \n\u001b[1;32m 552\u001b[0m \u001b[38;5;124;03m Is roughly equivalent to:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 563\u001b[0m \u001b[38;5;124;03m extracted_path(s): `str`, extracted paths of given URL(s).\u001b[39;00m\n\u001b[1;32m 564\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 565\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mextract(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl_or_urls\u001b[49m\u001b[43m)\u001b[49m)\n",
|
| 429 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/download/download_manager.py:428\u001b[0m, in \u001b[0;36mDownloadManager.download\u001b[0;34m(self, url_or_urls)\u001b[0m\n\u001b[1;32m 425\u001b[0m download_func \u001b[38;5;241m=\u001b[39m partial(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_download, download_config\u001b[38;5;241m=\u001b[39mdownload_config)\n\u001b[1;32m 427\u001b[0m start_time \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow()\n\u001b[0;32m--> 428\u001b[0m downloaded_path_or_paths \u001b[38;5;241m=\u001b[39m \u001b[43mmap_nested\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 429\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 430\u001b[0m \u001b[43m \u001b[49m\u001b[43murl_or_urls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 431\u001b[0m \u001b[43m \u001b[49m\u001b[43mmap_tuple\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 432\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_proc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_proc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 433\u001b[0m \u001b[43m \u001b[49m\u001b[43mdisable_tqdm\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mis_progress_bar_enabled\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 434\u001b[0m \u001b[43m \u001b[49m\u001b[43mdesc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mDownloading data files\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 435\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 436\u001b[0m duration \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow() \u001b[38;5;241m-\u001b[39m start_time\n\u001b[1;32m 437\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading took \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mduration\u001b[38;5;241m.\u001b[39mtotal_seconds()\u001b[38;5;250m \u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;241m60\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m min\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
| 430 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:464\u001b[0m, in \u001b[0;36mmap_nested\u001b[0;34m(function, data_struct, dict_only, map_list, map_tuple, map_numpy, num_proc, parallel_min_length, types, disable_tqdm, desc)\u001b[0m\n\u001b[1;32m 462\u001b[0m num_proc \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_proc \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m num_proc \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(iterable) \u001b[38;5;241m<\u001b[39m parallel_min_length:\n\u001b[0;32m--> 464\u001b[0m mapped \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 465\u001b[0m _single_map_nested((function, obj, types, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 466\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m obj \u001b[38;5;129;01min\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(iterable, disable\u001b[38;5;241m=\u001b[39mdisable_tqdm, desc\u001b[38;5;241m=\u001b[39mdesc)\n\u001b[1;32m 467\u001b[0m ]\n\u001b[1;32m 468\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 469\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m warnings\u001b[38;5;241m.\u001b[39mcatch_warnings():\n",
|
| 431 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:465\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 462\u001b[0m num_proc \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_proc \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m num_proc \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(iterable) \u001b[38;5;241m<\u001b[39m parallel_min_length:\n\u001b[1;32m 464\u001b[0m mapped \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m--> 465\u001b[0m \u001b[43m_single_map_nested\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtypes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 466\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m obj \u001b[38;5;129;01min\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(iterable, disable\u001b[38;5;241m=\u001b[39mdisable_tqdm, desc\u001b[38;5;241m=\u001b[39mdesc)\n\u001b[1;32m 467\u001b[0m ]\n\u001b[1;32m 468\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 469\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m warnings\u001b[38;5;241m.\u001b[39mcatch_warnings():\n",
|
| 432 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:382\u001b[0m, in \u001b[0;36m_single_map_nested\u001b[0;34m(args)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(pbar_iterable, disable\u001b[38;5;241m=\u001b[39mdisable_tqdm, position\u001b[38;5;241m=\u001b[39mrank, unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobj\u001b[39m\u001b[38;5;124m\"\u001b[39m, desc\u001b[38;5;241m=\u001b[39mpbar_desc) \u001b[38;5;28;01mas\u001b[39;00m pbar:\n\u001b[1;32m 381\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_struct, \u001b[38;5;28mdict\u001b[39m):\n\u001b[0;32m--> 382\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {k: _single_map_nested((function, v, types, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m)) \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m pbar}\n\u001b[1;32m 383\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 384\u001b[0m mapped \u001b[38;5;241m=\u001b[39m [_single_map_nested((function, v, types, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m pbar]\n",
|
| 433 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:382\u001b[0m, in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(pbar_iterable, disable\u001b[38;5;241m=\u001b[39mdisable_tqdm, position\u001b[38;5;241m=\u001b[39mrank, unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobj\u001b[39m\u001b[38;5;124m\"\u001b[39m, desc\u001b[38;5;241m=\u001b[39mpbar_desc) \u001b[38;5;28;01mas\u001b[39;00m pbar:\n\u001b[1;32m 381\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_struct, \u001b[38;5;28mdict\u001b[39m):\n\u001b[0;32m--> 382\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {k: \u001b[43m_single_map_nested\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtypes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m pbar}\n\u001b[1;32m 383\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 384\u001b[0m mapped \u001b[38;5;241m=\u001b[39m [_single_map_nested((function, v, types, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m pbar]\n",
|
| 434 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:367\u001b[0m, in \u001b[0;36m_single_map_nested\u001b[0;34m(args)\u001b[0m\n\u001b[1;32m 365\u001b[0m \u001b[38;5;66;03m# Singleton first to spare some computation\u001b[39;00m\n\u001b[1;32m 366\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_struct, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_struct, types):\n\u001b[0;32m--> 367\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_struct\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 369\u001b[0m \u001b[38;5;66;03m# Reduce logging to keep things readable in multiprocessing with tqdm\u001b[39;00m\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m rank \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mget_verbosity() \u001b[38;5;241m<\u001b[39m logging\u001b[38;5;241m.\u001b[39mWARNING:\n",
|
| 435 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/download/download_manager.py:454\u001b[0m, in \u001b[0;36mDownloadManager._download\u001b[0;34m(self, url_or_filename, download_config)\u001b[0m\n\u001b[1;32m 451\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_relative_path(url_or_filename):\n\u001b[1;32m 452\u001b[0m \u001b[38;5;66;03m# append the relative path to the base_path\u001b[39;00m\n\u001b[1;32m 453\u001b[0m url_or_filename \u001b[38;5;241m=\u001b[39m url_or_path_join(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_base_path, url_or_filename)\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcached_path\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl_or_filename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m)\u001b[49m\n",
|
| 436 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/file_utils.py:182\u001b[0m, in \u001b[0;36mcached_path\u001b[0;34m(url_or_filename, download_config, **download_kwargs)\u001b[0m\n\u001b[1;32m 178\u001b[0m url_or_filename \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(url_or_filename)\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_remote_url(url_or_filename):\n\u001b[1;32m 181\u001b[0m \u001b[38;5;66;03m# URL, so get it from the cache (downloading if necessary)\u001b[39;00m\n\u001b[0;32m--> 182\u001b[0m output_path \u001b[38;5;241m=\u001b[39m \u001b[43mget_from_cache\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 183\u001b[0m \u001b[43m \u001b[49m\u001b[43murl_or_filename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 184\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 186\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 187\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 189\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 190\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_etag\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_etag\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 191\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 192\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 193\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_url_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mignore_url_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 195\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_desc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_desc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 196\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(url_or_filename):\n\u001b[1;32m 198\u001b[0m \u001b[38;5;66;03m# File, and it exists.\u001b[39;00m\n\u001b[1;32m 199\u001b[0m output_path \u001b[38;5;241m=\u001b[39m url_or_filename\n",
|
| 437 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/file_utils.py:644\u001b[0m, in \u001b[0;36mget_from_cache\u001b[0;34m(url, cache_dir, force_download, proxies, etag_timeout, resume_download, user_agent, local_files_only, use_etag, max_retries, token, use_auth_token, ignore_url_params, storage_options, download_desc)\u001b[0m\n\u001b[1;32m 642\u001b[0m fsspec_get(url, temp_file, storage_options\u001b[38;5;241m=\u001b[39mstorage_options, desc\u001b[38;5;241m=\u001b[39mdownload_desc)\n\u001b[1;32m 643\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 644\u001b[0m \u001b[43mhttp_get\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 645\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 646\u001b[0m \u001b[43m \u001b[49m\u001b[43mtemp_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 647\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 648\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 649\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 650\u001b[0m \u001b[43m \u001b[49m\u001b[43mcookies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcookies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 651\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 652\u001b[0m \u001b[43m \u001b[49m\u001b[43mdesc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_desc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 653\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 655\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstoring \u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m in cache at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcache_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 656\u001b[0m shutil\u001b[38;5;241m.\u001b[39mmove(temp_file\u001b[38;5;241m.\u001b[39mname, cache_path)\n",
|
| 438 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/file_utils.py:419\u001b[0m, in \u001b[0;36mhttp_get\u001b[0;34m(url, temp_file, proxies, resume_size, headers, cookies, timeout, max_retries, desc)\u001b[0m\n\u001b[1;32m 410\u001b[0m total \u001b[38;5;241m=\u001b[39m resume_size \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mint\u001b[39m(content_length) \u001b[38;5;28;01mif\u001b[39;00m content_length \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 411\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(\n\u001b[1;32m 412\u001b[0m unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mB\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 413\u001b[0m unit_scale\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 417\u001b[0m disable\u001b[38;5;241m=\u001b[39m\u001b[38;5;129;01mnot\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mis_progress_bar_enabled(),\n\u001b[1;32m 418\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m progress:\n\u001b[0;32m--> 419\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m response\u001b[38;5;241m.\u001b[39miter_content(chunk_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1024\u001b[39m):\n\u001b[1;32m 420\u001b[0m progress\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28mlen\u001b[39m(chunk))\n\u001b[1;32m 421\u001b[0m temp_file\u001b[38;5;241m.\u001b[39mwrite(chunk)\n",
|
| 439 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/requests/models.py:816\u001b[0m, in \u001b[0;36mResponse.iter_content.<locals>.generate\u001b[0;34m()\u001b[0m\n\u001b[1;32m 814\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 815\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 816\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw\u001b[38;5;241m.\u001b[39mstream(chunk_size, decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 817\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ProtocolError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 818\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ChunkedEncodingError(e)\n",
|
| 440 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/urllib3/response.py:1091\u001b[0m, in \u001b[0;36mHTTPResponse.stream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1090\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_fp_closed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 1091\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data:\n\u001b[1;32m 1094\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m data\n",
|
| 441 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/urllib3/response.py:980\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt, decode_content, cache_content)\u001b[0m\n\u001b[1;32m 977\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m amt:\n\u001b[1;32m 978\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer\u001b[38;5;241m.\u001b[39mget(amt)\n\u001b[0;32m--> 980\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raw_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 982\u001b[0m flush_decoder \u001b[38;5;241m=\u001b[39m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m (amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data)\n\u001b[1;32m 984\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
|
| 442 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/urllib3/response.py:904\u001b[0m, in \u001b[0;36mHTTPResponse._raw_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m 901\u001b[0m fp_closed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclosed\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 903\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_error_catcher():\n\u001b[0;32m--> 904\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mread1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread1\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fp_closed \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 905\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data:\n\u001b[1;32m 906\u001b[0m \u001b[38;5;66;03m# Platform-specific: Buggy versions of Python.\u001b[39;00m\n\u001b[1;32m 907\u001b[0m \u001b[38;5;66;03m# Close the connection when no data is returned\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 912\u001b[0m \u001b[38;5;66;03m# not properly close the connection in all cases. There is\u001b[39;00m\n\u001b[1;32m 913\u001b[0m \u001b[38;5;66;03m# no harm in redundantly calling close.\u001b[39;00m\n\u001b[1;32m 914\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mclose()\n",
|
| 443 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/urllib3/response.py:887\u001b[0m, in \u001b[0;36mHTTPResponse._fp_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m 884\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1(amt) \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1()\n\u001b[1;32m 885\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 886\u001b[0m \u001b[38;5;66;03m# StringIO doesn't like amt=None\u001b[39;00m\n\u001b[0;32m--> 887\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread()\n",
|
| 444 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/http/client.py:466\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength:\n\u001b[1;32m 464\u001b[0m \u001b[38;5;66;03m# clip the read to the \"end of response\"\u001b[39;00m\n\u001b[1;32m 465\u001b[0m amt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength\n\u001b[0;32m--> 466\u001b[0m s \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 467\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m s \u001b[38;5;129;01mand\u001b[39;00m amt:\n\u001b[1;32m 468\u001b[0m \u001b[38;5;66;03m# Ideally, we would raise IncompleteRead if the content-length\u001b[39;00m\n\u001b[1;32m 469\u001b[0m \u001b[38;5;66;03m# wasn't satisfied, but it might break compatibility.\u001b[39;00m\n\u001b[1;32m 470\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_conn()\n",
|
| 445 |
-
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/socket.py:717\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 715\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 716\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 717\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 718\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 719\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
|
| 446 |
-
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
| 447 |
]
|
| 448 |
}
|
| 449 |
],
|
|
|
|
| 84 |
"# ===============================\n",
|
| 85 |
"# Directory Configuration\n",
|
| 86 |
"# ===============================\n",
|
| 87 |
+
"BASE_DIR = Path(\"../tests/dataset\")\n",
|
| 88 |
"AI_DIR = BASE_DIR / \"ai\"\n",
|
| 89 |
"REAL_DIR = BASE_DIR / \"real\"\n",
|
| 90 |
"RAW_DIR = BASE_DIR / \"raw_downloads\"\n",
|
|
|
|
| 150 |
},
|
| 151 |
{
|
| 152 |
"cell_type": "code",
|
| 153 |
+
"execution_count": 8,
|
| 154 |
"id": "74106705-e2d6-411c-8193-8e02f5ee0fdc",
|
| 155 |
"metadata": {},
|
| 156 |
"outputs": [],
|
|
|
|
| 166 |
" }]\n",
|
| 167 |
" \n",
|
| 168 |
"\n",
|
| 169 |
+
"REAL_DATASETS = [{\"name\" : \"imagenette\",\n",
|
| 170 |
+
" \"hf_id\" : \"frgfm/imagenette\",\n",
|
| 171 |
+
" \"config\" : \"320px\",\n",
|
|
|
|
|
|
|
| 172 |
" \"split\" : \"train\",\n",
|
| 173 |
" \"image_key\" : \"image\",\n",
|
| 174 |
" \"label\" : \"real\",\n",
|
| 175 |
" \"family\" : \"photographic\",\n",
|
|
|
|
| 176 |
" }]\n",
|
| 177 |
"\n",
|
| 178 |
"# Kaggle datasets (public, non-scraped)\n",
|
|
|
|
| 201 |
},
|
| 202 |
{
|
| 203 |
"cell_type": "code",
|
| 204 |
+
"execution_count": 9,
|
| 205 |
"id": "a9ea5276-65bb-49f5-a656-c00ceeb1f4d3",
|
| 206 |
"metadata": {},
|
| 207 |
"outputs": [],
|
|
|
|
| 266 |
},
|
| 267 |
{
|
| 268 |
"cell_type": "code",
|
| 269 |
+
"execution_count": 10,
|
| 270 |
"id": "c6eca5e6-0469-4af6-8af8-afe3036cb0a8",
|
| 271 |
"metadata": {},
|
| 272 |
"outputs": [],
|
|
|
|
| 302 |
},
|
| 303 |
{
|
| 304 |
"cell_type": "code",
|
| 305 |
+
"execution_count": 11,
|
| 306 |
"id": "b648832e-5025-4851-af21-382051167a04",
|
| 307 |
"metadata": {},
|
| 308 |
"outputs": [],
|
|
|
|
| 352 |
},
|
| 353 |
{
|
| 354 |
"cell_type": "code",
|
| 355 |
+
"execution_count": 12,
|
| 356 |
"id": "dd8ef771-f39f-4d9d-8eaf-626ecc211141",
|
| 357 |
"metadata": {},
|
| 358 |
"outputs": [
|
|
|
|
| 368 |
"name": "stderr",
|
| 369 |
"output_type": "stream",
|
| 370 |
"text": [
|
| 371 |
+
"100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [05:30<00:00, 3.02it/s]\n"
|
| 372 |
]
|
| 373 |
},
|
| 374 |
{
|
|
|
|
| 376 |
"output_type": "stream",
|
| 377 |
"text": [
|
| 378 |
"\n",
|
| 379 |
+
"▶ Loading HF dataset: imagenette\n"
|
| 380 |
+
]
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"name": "stderr",
|
| 384 |
+
"output_type": "stream",
|
| 385 |
+
"text": [
|
| 386 |
+
"Using the latest cached version of the module from /Users/itobuz/.cache/huggingface/modules/datasets_modules/datasets/frgfm--imagenette/38929285b8abcae5c1305418e9d8fea5dd6b189bbbd22caba5f5537c7fa0f01f (last modified on Mon Dec 22 15:06:36 2025) since it couldn't be found locally at frgfm/imagenette., or remotely on the Hugging Face Hub.\n"
|
| 387 |
]
|
| 388 |
},
|
| 389 |
{
|
| 390 |
"data": {
|
| 391 |
"application/vnd.jupyter.widget-view+json": {
|
| 392 |
+
"model_id": "ce74ba00790b49fab546616010a4952d",
|
| 393 |
+
"version_major": 2,
|
| 394 |
+
"version_minor": 0
|
| 395 |
+
},
|
| 396 |
+
"text/plain": [
|
| 397 |
+
"Downloading data: 0%| | 0.00/342M [00:00<?, ?B/s]"
|
| 398 |
+
]
|
| 399 |
+
},
|
| 400 |
+
"metadata": {},
|
| 401 |
+
"output_type": "display_data"
|
| 402 |
+
},
|
| 403 |
+
{
|
| 404 |
+
"data": {
|
| 405 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 406 |
+
"model_id": "dcadae012cfa492f8e94ea1662cb8102",
|
| 407 |
"version_major": 2,
|
| 408 |
"version_minor": 0
|
| 409 |
},
|
|
|
|
| 417 |
{
|
| 418 |
"data": {
|
| 419 |
"application/vnd.jupyter.widget-view+json": {
|
| 420 |
+
"model_id": "3a0d3ab402554b7489a01f883e4d6572",
|
| 421 |
"version_major": 2,
|
| 422 |
"version_minor": 0
|
| 423 |
},
|
| 424 |
"text/plain": [
|
| 425 |
+
"Downloading data: 0.00B [00:00, ?B/s]"
|
| 426 |
]
|
| 427 |
},
|
| 428 |
"metadata": {},
|
| 429 |
"output_type": "display_data"
|
| 430 |
},
|
| 431 |
{
|
| 432 |
+
"data": {
|
| 433 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 434 |
+
"model_id": "9e65e45076104352951a4a71bd8d6da7",
|
| 435 |
+
"version_major": 2,
|
| 436 |
+
"version_minor": 0
|
| 437 |
+
},
|
| 438 |
+
"text/plain": [
|
| 439 |
+
"Downloading data: 0.00B [00:00, ?B/s]"
|
| 440 |
+
]
|
| 441 |
+
},
|
| 442 |
+
"metadata": {},
|
| 443 |
+
"output_type": "display_data"
|
| 444 |
+
},
|
| 445 |
+
{
|
| 446 |
+
"data": {
|
| 447 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 448 |
+
"model_id": "d714e865ab2a43dcb43a33dcf0df2be7",
|
| 449 |
+
"version_major": 2,
|
| 450 |
+
"version_minor": 0
|
| 451 |
+
},
|
| 452 |
+
"text/plain": [
|
| 453 |
+
"Generating train split: 0%| | 0/9469 [00:00<?, ? examples/s]"
|
| 454 |
+
]
|
| 455 |
+
},
|
| 456 |
+
"metadata": {},
|
| 457 |
+
"output_type": "display_data"
|
| 458 |
+
},
|
| 459 |
+
{
|
| 460 |
+
"data": {
|
| 461 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 462 |
+
"model_id": "49d1d87ad10548eebf9ba2709dbda441",
|
| 463 |
+
"version_major": 2,
|
| 464 |
+
"version_minor": 0
|
| 465 |
+
},
|
| 466 |
+
"text/plain": [
|
| 467 |
+
"Generating validation split: 0%| | 0/3925 [00:00<?, ? examples/s]"
|
| 468 |
+
]
|
| 469 |
+
},
|
| 470 |
+
"metadata": {},
|
| 471 |
+
"output_type": "display_data"
|
| 472 |
+
},
|
| 473 |
+
{
|
| 474 |
+
"name": "stderr",
|
| 475 |
+
"output_type": "stream",
|
| 476 |
+
"text": [
|
| 477 |
+
" 11%|██████████████▋ | 1000/9469 [02:30<21:13, 6.65it/s]\n"
|
| 478 |
+
]
|
| 479 |
+
},
|
| 480 |
+
{
|
| 481 |
+
"name": "stdout",
|
| 482 |
+
"output_type": "stream",
|
| 483 |
+
"text": [
|
| 484 |
+
"⬇ Downloading Kaggle dataset: tristanzhang32/ai-generated-images-vs-real-images\n"
|
| 485 |
+
]
|
| 486 |
+
},
|
| 487 |
+
{
|
| 488 |
+
"name": "stderr",
|
| 489 |
+
"output_type": "stream",
|
| 490 |
+
"text": [
|
| 491 |
+
"Traceback (most recent call last):\n",
|
| 492 |
+
" File \"/Users/itobuz/.conda/envs/mvp_env/bin/kaggle\", line 7, in <module>\n",
|
| 493 |
+
" sys.exit(main())\n",
|
| 494 |
+
" File \"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/kaggle/cli.py\", line 68, in main\n",
|
| 495 |
+
" out = args.func(**command_args)\n",
|
| 496 |
+
" File \"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/kaggle/api/kaggle_api_extended.py\", line 1741, in dataset_download_cli\n",
|
| 497 |
+
" with self.build_kaggle_client() as kaggle:\n",
|
| 498 |
+
" File \"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/kaggle/api/kaggle_api_extended.py\", line 688, in build_kaggle_client\n",
|
| 499 |
+
" username=self.config_values['username'],\n",
|
| 500 |
+
"KeyError: 'username'\n"
|
| 501 |
+
]
|
| 502 |
+
},
|
| 503 |
+
{
|
| 504 |
+
"ename": "CalledProcessError",
|
| 505 |
+
"evalue": "Command '['kaggle', 'datasets', 'download', 'tristanzhang32/ai-generated-images-vs-real-images', '-p', '../tests/dataset/raw_downloads/ai_vs_real', '--unzip']' returned non-zero exit status 1.",
|
| 506 |
"output_type": "error",
|
| 507 |
"traceback": [
|
| 508 |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
| 509 |
+
"\u001b[0;31mCalledProcessError\u001b[0m Traceback (most recent call last)",
|
| 510 |
+
"Cell \u001b[0;32mIn[12], line 46\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;66;03m# ===============================\u001b[39;00m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;66;03m# Entry Point\u001b[39;00m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;66;03m# ===============================\u001b[39;00m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__main__\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 46\u001b[0m \u001b[43mmain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
| 511 |
+
"Cell \u001b[0;32mIn[12], line 18\u001b[0m, in \u001b[0;36mmain\u001b[0;34m()\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m KAGGLE_DATASETS:\n\u001b[1;32m 17\u001b[0m raw_path \u001b[38;5;241m=\u001b[39m RAW_DIR \u001b[38;5;241m/\u001b[39m ds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m---> 18\u001b[0m \u001b[43mdownload_kaggle_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mkaggle_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mraw_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;66;03m# AI images\u001b[39;00m\n\u001b[1;32m 21\u001b[0m ingest_image_folder(src_dir \u001b[38;5;241m=\u001b[39m raw_path \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mai\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 22\u001b[0m out_dir \u001b[38;5;241m=\u001b[39m AI_DIR \u001b[38;5;241m/\u001b[39m ds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 23\u001b[0m writer \u001b[38;5;241m=\u001b[39m writer,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 26\u001b[0m source \u001b[38;5;241m=\u001b[39m ds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 27\u001b[0m )\n",
|
| 512 |
+
"Cell \u001b[0;32mIn[10], line 10\u001b[0m, in \u001b[0;36mdownload_kaggle_dataset\u001b[0;34m(kaggle_id, out_dir)\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m⬇ Downloading Kaggle dataset: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkaggle_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 10\u001b[0m \u001b[43msubprocess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mkaggle\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdatasets\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdownload\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43mkaggle_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m-p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mout_dir\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m--unzip\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
| 513 |
+
"File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/subprocess.py:526\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[1;32m 524\u001b[0m retcode \u001b[38;5;241m=\u001b[39m process\u001b[38;5;241m.\u001b[39mpoll()\n\u001b[1;32m 525\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check \u001b[38;5;129;01mand\u001b[39;00m retcode:\n\u001b[0;32m--> 526\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CalledProcessError(retcode, process\u001b[38;5;241m.\u001b[39margs,\n\u001b[1;32m 527\u001b[0m output\u001b[38;5;241m=\u001b[39mstdout, stderr\u001b[38;5;241m=\u001b[39mstderr)\n\u001b[1;32m 528\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m CompletedProcess(process\u001b[38;5;241m.\u001b[39margs, retcode, stdout, stderr)\n",
|
| 514 |
+
"\u001b[0;31mCalledProcessError\u001b[0m: Command '['kaggle', 'datasets', 'download', 'tristanzhang32/ai-generated-images-vs-real-images', '-p', '../tests/dataset/raw_downloads/ai_vs_real', '--unzip']' returned non-zero exit status 1."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 515 |
]
|
| 516 |
}
|
| 517 |
],
|
reporter/csv_reporter.py
CHANGED
|
@@ -7,8 +7,8 @@ from utils.logger import get_logger
|
|
| 7 |
from config.settings import settings
|
| 8 |
from config.constants import MetricType
|
| 9 |
from config.schemas import AnalysisResult
|
|
|
|
| 10 |
from utils.helpers import generate_unique_id
|
| 11 |
-
from config.constants import DetectionStatus
|
| 12 |
from config.schemas import BatchAnalysisResult
|
| 13 |
from features.detailed_result_maker import DetailedResultMaker
|
| 14 |
|
|
@@ -19,444 +19,248 @@ logger = get_logger(__name__)
|
|
| 19 |
|
| 20 |
class CSVReporter:
|
| 21 |
"""
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
---------
|
| 26 |
-
-
|
| 27 |
-
-
|
| 28 |
-
-
|
| 29 |
-
-
|
| 30 |
-
- UTF-8 encoding with BOM for international compatibility
|
| 31 |
"""
|
| 32 |
def __init__(self):
|
| 33 |
"""
|
| 34 |
Initialize CSV Reporter
|
| 35 |
"""
|
| 36 |
self.detailed_maker = DetailedResultMaker()
|
|
|
|
| 37 |
logger.debug("CSVReporter initialized")
|
| 38 |
-
|
| 39 |
|
| 40 |
def export_batch_summary(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
|
| 41 |
"""
|
| 42 |
-
Export batch
|
| 43 |
-
|
| 44 |
-
Arguments:
|
| 45 |
-
----------
|
| 46 |
-
batch_result { BatchAnalysisResult } : Complete batch analysis result
|
| 47 |
-
|
| 48 |
-
output_dir { Path } : Output directory (defaults to settings.REPORTS_DIR)
|
| 49 |
-
|
| 50 |
-
Returns:
|
| 51 |
-
--------
|
| 52 |
-
{ Path } : Path to generated CSV file
|
| 53 |
"""
|
| 54 |
output_dir = output_dir or settings.REPORTS_DIR
|
| 55 |
report_id = generate_unique_id()
|
| 56 |
filename = f"batch_summary_{report_id}.csv"
|
| 57 |
output_path = output_dir / filename
|
| 58 |
-
|
| 59 |
logger.info(f"Generating batch summary CSV: {filename}")
|
| 60 |
-
|
| 61 |
try:
|
| 62 |
with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
|
| 63 |
writer = csv.writer(f)
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
report_type = "Batch Analysis Summary",
|
| 68 |
timestamp = batch_result.timestamp,
|
| 69 |
)
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
# Main Results Table
|
| 77 |
self._write_batch_results_table(writer = writer,
|
| 78 |
batch_result = batch_result,
|
| 79 |
)
|
| 80 |
-
|
| 81 |
-
# Footer
|
| 82 |
self._write_footer(writer = writer)
|
| 83 |
-
|
| 84 |
logger.info(f"Batch summary CSV generated: {output_path}")
|
| 85 |
return output_path
|
| 86 |
-
|
| 87 |
except Exception as e:
|
| 88 |
logger.error(f"Failed to generate batch summary CSV: {e}")
|
| 89 |
raise
|
| 90 |
-
|
| 91 |
|
| 92 |
def export_batch_detailed(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
|
| 93 |
"""
|
| 94 |
-
Export detailed batch
|
| 95 |
-
|
| 96 |
-
Arguments:
|
| 97 |
-
----------
|
| 98 |
-
batch_result { BatchAnalysisResult } : Complete batch analysis result
|
| 99 |
-
|
| 100 |
-
output_dir { Path } : Output directory (defaults to settings.REPORTS_DIR)
|
| 101 |
-
|
| 102 |
-
Returns:
|
| 103 |
-
--------
|
| 104 |
-
{ Path } : Path to generated CSV file
|
| 105 |
"""
|
| 106 |
output_dir = output_dir or settings.REPORTS_DIR
|
| 107 |
report_id = generate_unique_id()
|
| 108 |
filename = f"batch_detailed_{report_id}.csv"
|
| 109 |
output_path = output_dir / filename
|
| 110 |
-
|
| 111 |
logger.info(f"Generating detailed batch CSV: {filename}")
|
| 112 |
-
|
| 113 |
try:
|
| 114 |
with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
|
| 115 |
writer = csv.writer(f)
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
self._write_report_header(writer = writer,
|
| 119 |
report_type = "Detailed Batch Analysis",
|
| 120 |
timestamp = batch_result.timestamp,
|
| 121 |
)
|
| 122 |
-
|
| 123 |
-
# Process each image with full details
|
| 124 |
for idx, result in enumerate(batch_result.results, 1):
|
| 125 |
-
self._write_detailed_image_section(writer
|
| 126 |
-
result
|
| 127 |
-
image_number
|
| 128 |
-
total_images
|
| 129 |
)
|
| 130 |
-
|
| 131 |
-
# Add separator between images
|
| 132 |
if (idx < batch_result.processed):
|
| 133 |
writer.writerow([])
|
| 134 |
writer.writerow(['=' * 100])
|
| 135 |
writer.writerow([])
|
| 136 |
-
|
| 137 |
-
# Footer
|
| 138 |
self._write_footer(writer = writer)
|
| 139 |
-
|
| 140 |
logger.info(f"Detailed batch CSV generated: {output_path}")
|
| 141 |
-
return output_path
|
| 142 |
|
|
|
|
|
|
|
| 143 |
except Exception as e:
|
| 144 |
logger.error(f"Failed to generate detailed batch CSV: {e}")
|
| 145 |
raise
|
| 146 |
-
|
| 147 |
|
| 148 |
def export_single_detailed(self, result: AnalysisResult, output_dir: Optional[Path] = None) -> Path:
|
| 149 |
"""
|
| 150 |
-
Export single image detailed
|
| 151 |
-
|
| 152 |
-
Arguments:
|
| 153 |
-
----------
|
| 154 |
-
result { AnalysisResult } : Single image analysis result
|
| 155 |
-
|
| 156 |
-
output_dir { Path } : Output directory (defaults to settings.REPORTS_DIR)
|
| 157 |
-
|
| 158 |
-
Returns:
|
| 159 |
-
--------
|
| 160 |
-
{ Path } : Path to generated CSV file
|
| 161 |
"""
|
| 162 |
output_dir = output_dir or settings.REPORTS_DIR
|
| 163 |
report_id = generate_unique_id()
|
| 164 |
filename = f"single_analysis_{report_id}.csv"
|
| 165 |
output_path = output_dir / filename
|
| 166 |
-
|
| 167 |
logger.info(f"Generating single image CSV: {filename}")
|
| 168 |
-
|
| 169 |
try:
|
| 170 |
with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
|
| 171 |
writer = csv.writer(f)
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
self._write_report_header(writer = writer,
|
| 175 |
report_type = "Single Image Analysis",
|
| 176 |
timestamp = result.timestamp,
|
| 177 |
)
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
self._write_detailed_image_section(writer = writer,
|
| 181 |
result = result,
|
| 182 |
image_number = 1,
|
| 183 |
total_images = 1,
|
| 184 |
)
|
| 185 |
-
|
| 186 |
-
# Footer
|
| 187 |
self._write_footer(writer = writer)
|
| 188 |
-
|
| 189 |
logger.info(f"Single image CSV generated: {output_path}")
|
| 190 |
-
return output_path
|
| 191 |
|
|
|
|
|
|
|
| 192 |
except Exception as e:
|
| 193 |
logger.error(f"Failed to generate single image CSV: {e}")
|
| 194 |
raise
|
| 195 |
-
|
| 196 |
|
| 197 |
-
def export_metrics_comparison(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
|
| 198 |
-
"""
|
| 199 |
-
Export metrics comparison table across all images
|
| 200 |
-
|
| 201 |
-
Arguments:
|
| 202 |
-
----------
|
| 203 |
-
batch_result { BatchAnalysisResult } : Complete batch analysis result
|
| 204 |
-
|
| 205 |
-
output_dir { Path } : Output directory (defaults to settings.REPORTS_DIR)
|
| 206 |
-
|
| 207 |
-
Returns:
|
| 208 |
-
--------
|
| 209 |
-
{ Path } : Path to generated CSV file
|
| 210 |
-
"""
|
| 211 |
-
output_dir = output_dir or settings.REPORTS_DIR
|
| 212 |
-
report_id = generate_unique_id()
|
| 213 |
-
filename = f"metrics_comparison_{report_id}.csv"
|
| 214 |
-
output_path = output_dir / filename
|
| 215 |
-
|
| 216 |
-
logger.info(f"Generating metrics comparison CSV: {filename}")
|
| 217 |
-
|
| 218 |
-
try:
|
| 219 |
-
with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
|
| 220 |
-
writer = csv.writer(f)
|
| 221 |
-
|
| 222 |
-
# Report Header
|
| 223 |
-
self._write_report_header(writer = writer,
|
| 224 |
-
report_type = "Metrics Comparison",
|
| 225 |
-
timestamp = batch_result.timestamp,
|
| 226 |
-
)
|
| 227 |
-
|
| 228 |
-
# Comparison Table Header
|
| 229 |
-
writer.writerow(['Metrics Comparison Across All Images'])
|
| 230 |
-
writer.writerow([])
|
| 231 |
-
|
| 232 |
-
header = ['Filename',
|
| 233 |
-
'Overall Score',
|
| 234 |
-
'Analysis Status',
|
| 235 |
-
'Gradient Analysis Score',
|
| 236 |
-
'Gradient Analysis Confidence',
|
| 237 |
-
'Frequency Analysis Score',
|
| 238 |
-
'Frequency Analysis Confidence',
|
| 239 |
-
'Noise Analysis Score',
|
| 240 |
-
'Noise Analysis Confidence',
|
| 241 |
-
'Texture Analysis Score',
|
| 242 |
-
'Texture Analysis Confidence',
|
| 243 |
-
'Color Analysis Score',
|
| 244 |
-
'Color Analysis Confidence',
|
| 245 |
-
'Processing Time',
|
| 246 |
-
]
|
| 247 |
-
|
| 248 |
-
writer.writerow(header)
|
| 249 |
-
|
| 250 |
-
# Data rows
|
| 251 |
-
for result in batch_result.results:
|
| 252 |
-
row = [result.filename,
|
| 253 |
-
f"{result.overall_score:.3f}",
|
| 254 |
-
result.status.value,
|
| 255 |
-
]
|
| 256 |
-
|
| 257 |
-
# Add each metric's score and confidence
|
| 258 |
-
for metric_type in [MetricType.GRADIENT, MetricType.FREQUENCY, MetricType.NOISE, MetricType.TEXTURE, MetricType.COLOR]:
|
| 259 |
-
metric_result = result.metric_results.get(metric_type)
|
| 260 |
-
|
| 261 |
-
if metric_result:
|
| 262 |
-
row.append(f"{metric_result.score:.3f}")
|
| 263 |
-
row.append(f"{metric_result.confidence:.3f}" if metric_result.confidence is not None else "N/A")
|
| 264 |
-
|
| 265 |
-
else:
|
| 266 |
-
row.extend(["N/A", "N/A"])
|
| 267 |
-
|
| 268 |
-
row.append(f"{result.processing_time:.2f}s")
|
| 269 |
-
writer.writerow(row)
|
| 270 |
-
|
| 271 |
-
# Footer
|
| 272 |
-
writer.writerow([])
|
| 273 |
-
self._write_footer(writer = writer)
|
| 274 |
-
|
| 275 |
-
logger.info(f"Metrics comparison CSV generated: {output_path}")
|
| 276 |
-
return output_path
|
| 277 |
-
|
| 278 |
-
except Exception as e:
|
| 279 |
-
logger.error(f"Failed to generate metrics comparison CSV: {e}")
|
| 280 |
-
raise
|
| 281 |
-
|
| 282 |
|
| 283 |
def _write_report_header(self, writer, report_type: str, timestamp: datetime) -> None:
|
| 284 |
-
"""
|
| 285 |
-
Write CSV report header
|
| 286 |
-
"""
|
| 287 |
writer.writerow(['=' * 100])
|
| 288 |
writer.writerow([f'AI Image Screener - {report_type}'])
|
| 289 |
writer.writerow([f'Generated: {timestamp.strftime("%Y-%m-%d %H:%M:%S")}'])
|
| 290 |
writer.writerow([f'Version: {settings.VERSION}'])
|
| 291 |
writer.writerow(['=' * 100])
|
| 292 |
writer.writerow([])
|
| 293 |
-
|
| 294 |
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
"""
|
| 299 |
-
writer.writerow(['BATCH STATISTICS'])
|
| 300 |
writer.writerow([])
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
for row in
|
| 317 |
writer.writerow(row)
|
| 318 |
-
|
| 319 |
writer.writerow([])
|
| 320 |
writer.writerow(['=' * 100])
|
| 321 |
writer.writerow([])
|
| 322 |
-
|
| 323 |
|
| 324 |
def _write_batch_results_table(self, writer, batch_result: BatchAnalysisResult) -> None:
|
| 325 |
-
"""
|
| 326 |
-
Write batch results main table
|
| 327 |
-
"""
|
| 328 |
writer.writerow(['ANALYSIS RESULTS'])
|
| 329 |
writer.writerow([])
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
'
|
| 334 |
-
'
|
| 335 |
-
'
|
| 336 |
-
'
|
| 337 |
-
'Top Warning Signals',
|
| 338 |
-
'Recommendation',
|
| 339 |
-
'Processing Time (s)',
|
| 340 |
]
|
| 341 |
|
| 342 |
writer.writerow(header)
|
| 343 |
-
|
| 344 |
-
# Data rows
|
| 345 |
for result in batch_result.results:
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
|
| 354 |
-
else:
|
| 355 |
-
recommendation = "No further action needed"
|
| 356 |
-
|
| 357 |
-
row = [result.filename,
|
| 358 |
-
f"{result.image_size[0]}×{result.image_size[1]}",
|
| 359 |
-
result.status.value,
|
| 360 |
-
f"{result.overall_score:.3f}",
|
| 361 |
-
f"{result.confidence}%",
|
| 362 |
-
signals_str,
|
| 363 |
-
recommendation,
|
| 364 |
-
f"{result.processing_time:.2f}",
|
| 365 |
-
]
|
| 366 |
-
|
| 367 |
-
writer.writerow(row)
|
| 368 |
-
|
| 369 |
writer.writerow([])
|
| 370 |
-
|
| 371 |
|
| 372 |
def _write_detailed_image_section(self, writer, result: AnalysisResult, image_number: int, total_images: int) -> None:
|
| 373 |
-
"""
|
| 374 |
-
Write detailed section for single image
|
| 375 |
-
"""
|
| 376 |
writer.writerow([f'IMAGE {image_number} OF {total_images}'])
|
| 377 |
writer.writerow([])
|
| 378 |
-
|
| 379 |
-
#
|
| 380 |
-
writer.writerow(['
|
| 381 |
-
writer.writerow(['
|
| 382 |
-
writer.writerow(['Status', result.status.value])
|
| 383 |
-
writer.writerow(['Overall Score', f"{result.overall_score:.3f}"])
|
| 384 |
writer.writerow(['Confidence', f"{result.confidence}%"])
|
| 385 |
-
writer.writerow(['
|
| 386 |
-
writer.writerow(['Processing Time', f"{result.processing_time:.2f}s"])
|
| 387 |
-
writer.writerow(['Timestamp', result.timestamp.isoformat()])
|
| 388 |
-
writer.writerow([])
|
| 389 |
-
|
| 390 |
-
# Detection Signals
|
| 391 |
-
writer.writerow(['DETECTION SIGNALS'])
|
| 392 |
writer.writerow([])
|
| 393 |
-
|
| 394 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
for signal in result.signals:
|
| 396 |
-
metric_result
|
| 397 |
-
|
| 398 |
-
|
| 399 |
writer.writerow([signal.name,
|
| 400 |
f"{signal.score:.3f}",
|
| 401 |
-
signal.status.value
|
| 402 |
-
|
| 403 |
-
signal.explanation.replace("\n", " "),
|
| 404 |
])
|
| 405 |
-
|
| 406 |
-
writer.writerow([])
|
| 407 |
-
|
| 408 |
-
# Detailed Forensics
|
| 409 |
-
writer.writerow(['FORENSIC DETAILS'])
|
| 410 |
-
writer.writerow([])
|
| 411 |
|
| 412 |
-
for metric_type in MetricType:
|
| 413 |
-
metric_result = result.metric_results.get(metric_type)
|
| 414 |
-
|
| 415 |
-
if not metric_result:
|
| 416 |
-
continue
|
| 417 |
-
|
| 418 |
-
metric_name = self.detailed_maker.metric_display_names.get(metric_type, metric_type.value)
|
| 419 |
-
|
| 420 |
-
writer.writerow([f'--- {metric_name} ---'])
|
| 421 |
-
writer.writerow(['Score', f"{metric_result.score:.3f}"])
|
| 422 |
-
writer.writerow(['Confidence', f"{metric_result.confidence:.3f}" if metric_result.confidence is not None else "N/A"])
|
| 423 |
-
|
| 424 |
-
# Write details
|
| 425 |
-
if metric_result.details:
|
| 426 |
-
for key, value in metric_result.details.items():
|
| 427 |
-
if isinstance(value, dict):
|
| 428 |
-
writer.writerow([f" {key}:", ""])
|
| 429 |
-
for sub_key, sub_value in value.items():
|
| 430 |
-
writer.writerow([f" {sub_key}", str(sub_value)])
|
| 431 |
-
|
| 432 |
-
else:
|
| 433 |
-
writer.writerow([f" {key}", str(value)])
|
| 434 |
-
|
| 435 |
-
writer.writerow([])
|
| 436 |
-
|
| 437 |
-
# Recommendation
|
| 438 |
-
writer.writerow(['RECOMMENDATION'])
|
| 439 |
-
writer.writerow([])
|
| 440 |
-
|
| 441 |
-
if (result.status == DetectionStatus.REVIEW_REQUIRED):
|
| 442 |
-
writer.writerow(['Action', 'Manual verification recommended'])
|
| 443 |
-
writer.writerow(['Priority', 'HIGH' if (result.overall_score >= 0.85) else 'MEDIUM'])
|
| 444 |
-
writer.writerow(['Next Steps', 'Forensic analysis, reverse image search, metadata inspection'])
|
| 445 |
-
|
| 446 |
-
else:
|
| 447 |
-
writer.writerow(['Action', 'No immediate action needed'])
|
| 448 |
-
writer.writerow(['Priority', 'LOW'])
|
| 449 |
-
writer.writerow(['Next Steps', 'Proceed with normal workflow'])
|
| 450 |
-
|
| 451 |
writer.writerow([])
|
| 452 |
-
|
| 453 |
|
| 454 |
def _write_footer(self, writer) -> None:
|
| 455 |
-
"""
|
| 456 |
-
Write CSV report footer
|
| 457 |
-
"""
|
| 458 |
writer.writerow(['=' * 100])
|
| 459 |
writer.writerow(['Report generated by AI Image Screener'])
|
| 460 |
-
writer.writerow(['
|
| 461 |
-
writer.writerow(['
|
| 462 |
writer.writerow(['=' * 100])
|
|
|
|
| 7 |
from config.settings import settings
|
| 8 |
from config.constants import MetricType
|
| 9 |
from config.schemas import AnalysisResult
|
| 10 |
+
from config.constants import FinalDecision
|
| 11 |
from utils.helpers import generate_unique_id
|
|
|
|
| 12 |
from config.schemas import BatchAnalysisResult
|
| 13 |
from features.detailed_result_maker import DetailedResultMaker
|
| 14 |
|
|
|
|
| 19 |
|
| 20 |
class CSVReporter:
|
| 21 |
"""
|
| 22 |
+
CSV report generator
|
| 23 |
+
|
| 24 |
+
Guarantees:
|
| 25 |
+
-----------
|
| 26 |
+
- FinalDecision is authoritative
|
| 27 |
+
- Metrics are informational only
|
| 28 |
+
- Evidence-first reporting
|
| 29 |
+
- Audit-safe CSV structure
|
|
|
|
| 30 |
"""
|
| 31 |
def __init__(self):
|
| 32 |
"""
|
| 33 |
Initialize CSV Reporter
|
| 34 |
"""
|
| 35 |
self.detailed_maker = DetailedResultMaker()
|
| 36 |
+
|
| 37 |
logger.debug("CSVReporter initialized")
|
| 38 |
+
|
| 39 |
|
| 40 |
def export_batch_summary(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
|
| 41 |
"""
|
| 42 |
+
Export batch decision summary as CSV
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
"""
|
| 44 |
output_dir = output_dir or settings.REPORTS_DIR
|
| 45 |
report_id = generate_unique_id()
|
| 46 |
filename = f"batch_summary_{report_id}.csv"
|
| 47 |
output_path = output_dir / filename
|
| 48 |
+
|
| 49 |
logger.info(f"Generating batch summary CSV: {filename}")
|
| 50 |
+
|
| 51 |
try:
|
| 52 |
with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
|
| 53 |
writer = csv.writer(f)
|
| 54 |
+
|
| 55 |
+
self._write_report_header(writer,
|
| 56 |
+
report_type = "Batch Decision Summary",
|
|
|
|
| 57 |
timestamp = batch_result.timestamp,
|
| 58 |
)
|
| 59 |
+
|
| 60 |
+
self._write_batch_decision_statistics(writer = writer,
|
| 61 |
+
batch_result = batch_result,
|
| 62 |
+
)
|
| 63 |
+
|
|
|
|
|
|
|
| 64 |
self._write_batch_results_table(writer = writer,
|
| 65 |
batch_result = batch_result,
|
| 66 |
)
|
| 67 |
+
|
|
|
|
| 68 |
self._write_footer(writer = writer)
|
| 69 |
+
|
| 70 |
logger.info(f"Batch summary CSV generated: {output_path}")
|
| 71 |
return output_path
|
| 72 |
+
|
| 73 |
except Exception as e:
|
| 74 |
logger.error(f"Failed to generate batch summary CSV: {e}")
|
| 75 |
raise
|
| 76 |
+
|
| 77 |
|
| 78 |
def export_batch_detailed(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
|
| 79 |
"""
|
| 80 |
+
Export detailed batch forensic CSV
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
"""
|
| 82 |
output_dir = output_dir or settings.REPORTS_DIR
|
| 83 |
report_id = generate_unique_id()
|
| 84 |
filename = f"batch_detailed_{report_id}.csv"
|
| 85 |
output_path = output_dir / filename
|
| 86 |
+
|
| 87 |
logger.info(f"Generating detailed batch CSV: {filename}")
|
| 88 |
+
|
| 89 |
try:
|
| 90 |
with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
|
| 91 |
writer = csv.writer(f)
|
| 92 |
+
|
| 93 |
+
self._write_report_header(writer,
|
|
|
|
| 94 |
report_type = "Detailed Batch Analysis",
|
| 95 |
timestamp = batch_result.timestamp,
|
| 96 |
)
|
| 97 |
+
|
|
|
|
| 98 |
for idx, result in enumerate(batch_result.results, 1):
|
| 99 |
+
self._write_detailed_image_section(writer,
|
| 100 |
+
result = result,
|
| 101 |
+
image_number = idx,
|
| 102 |
+
total_images = batch_result.processed,
|
| 103 |
)
|
| 104 |
+
|
|
|
|
| 105 |
if (idx < batch_result.processed):
|
| 106 |
writer.writerow([])
|
| 107 |
writer.writerow(['=' * 100])
|
| 108 |
writer.writerow([])
|
| 109 |
+
|
|
|
|
| 110 |
self._write_footer(writer = writer)
|
| 111 |
+
|
| 112 |
logger.info(f"Detailed batch CSV generated: {output_path}")
|
|
|
|
| 113 |
|
| 114 |
+
return output_path
|
| 115 |
+
|
| 116 |
except Exception as e:
|
| 117 |
logger.error(f"Failed to generate detailed batch CSV: {e}")
|
| 118 |
raise
|
| 119 |
+
|
| 120 |
|
| 121 |
def export_single_detailed(self, result: AnalysisResult, output_dir: Optional[Path] = None) -> Path:
|
| 122 |
"""
|
| 123 |
+
Export single image detailed CSV
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
"""
|
| 125 |
output_dir = output_dir or settings.REPORTS_DIR
|
| 126 |
report_id = generate_unique_id()
|
| 127 |
filename = f"single_analysis_{report_id}.csv"
|
| 128 |
output_path = output_dir / filename
|
| 129 |
+
|
| 130 |
logger.info(f"Generating single image CSV: {filename}")
|
| 131 |
+
|
| 132 |
try:
|
| 133 |
with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
|
| 134 |
writer = csv.writer(f)
|
| 135 |
+
|
| 136 |
+
self._write_report_header(writer,
|
|
|
|
| 137 |
report_type = "Single Image Analysis",
|
| 138 |
timestamp = result.timestamp,
|
| 139 |
)
|
| 140 |
+
|
| 141 |
+
self._write_detailed_image_section(writer,
|
|
|
|
| 142 |
result = result,
|
| 143 |
image_number = 1,
|
| 144 |
total_images = 1,
|
| 145 |
)
|
| 146 |
+
|
|
|
|
| 147 |
self._write_footer(writer = writer)
|
| 148 |
+
|
| 149 |
logger.info(f"Single image CSV generated: {output_path}")
|
|
|
|
| 150 |
|
| 151 |
+
return output_path
|
| 152 |
+
|
| 153 |
except Exception as e:
|
| 154 |
logger.error(f"Failed to generate single image CSV: {e}")
|
| 155 |
raise
|
|
|
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
def _write_report_header(self, writer, report_type: str, timestamp: datetime) -> None:
|
|
|
|
|
|
|
|
|
|
| 159 |
writer.writerow(['=' * 100])
|
| 160 |
writer.writerow([f'AI Image Screener - {report_type}'])
|
| 161 |
writer.writerow([f'Generated: {timestamp.strftime("%Y-%m-%d %H:%M:%S")}'])
|
| 162 |
writer.writerow([f'Version: {settings.VERSION}'])
|
| 163 |
writer.writerow(['=' * 100])
|
| 164 |
writer.writerow([])
|
|
|
|
| 165 |
|
| 166 |
+
|
| 167 |
+
def _write_batch_decision_statistics(self, writer, batch_result: BatchAnalysisResult) -> None:
|
| 168 |
+
writer.writerow(['BATCH DECISION STATISTICS'])
|
|
|
|
|
|
|
| 169 |
writer.writerow([])
|
| 170 |
+
|
| 171 |
+
summary = batch_result.summary or {}
|
| 172 |
+
|
| 173 |
+
rows = [['Total Images', batch_result.total_images],
|
| 174 |
+
['Processed', batch_result.processed],
|
| 175 |
+
['Failed', batch_result.failed],
|
| 176 |
+
['Success Rate', f"{summary.get('success_rate', 0)}%"],
|
| 177 |
+
['', ''],
|
| 178 |
+
]
|
| 179 |
+
|
| 180 |
+
for decision in FinalDecision:
|
| 181 |
+
rows.append([decision.value, summary.get(decision.value, 0)])
|
| 182 |
+
|
| 183 |
+
rows.append(['Total Processing Time', f"{batch_result.total_processing_time:.2f}s"])
|
| 184 |
+
|
| 185 |
+
for row in rows:
|
| 186 |
writer.writerow(row)
|
| 187 |
+
|
| 188 |
writer.writerow([])
|
| 189 |
writer.writerow(['=' * 100])
|
| 190 |
writer.writerow([])
|
| 191 |
+
|
| 192 |
|
| 193 |
def _write_batch_results_table(self, writer, batch_result: BatchAnalysisResult) -> None:
|
|
|
|
|
|
|
|
|
|
| 194 |
writer.writerow(['ANALYSIS RESULTS'])
|
| 195 |
writer.writerow([])
|
| 196 |
+
|
| 197 |
+
header = ['Filename',
|
| 198 |
+
'Final Decision',
|
| 199 |
+
'Decision Confidence (%)',
|
| 200 |
+
'Overall Score (informational)',
|
| 201 |
+
'Decision Explanation',
|
| 202 |
+
'Processing Time (s)',
|
|
|
|
|
|
|
|
|
|
| 203 |
]
|
| 204 |
|
| 205 |
writer.writerow(header)
|
| 206 |
+
|
|
|
|
| 207 |
for result in batch_result.results:
|
| 208 |
+
writer.writerow([result.filename,
|
| 209 |
+
result.final_decision.value,
|
| 210 |
+
f"{result.confidence}%",
|
| 211 |
+
f"{result.overall_score:.3f}",
|
| 212 |
+
(result.decision_explanation or '').replace("\n", " "),
|
| 213 |
+
f"{result.processing_time:.2f}",
|
| 214 |
+
])
|
| 215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
writer.writerow([])
|
| 217 |
+
|
| 218 |
|
| 219 |
def _write_detailed_image_section(self, writer, result: AnalysisResult, image_number: int, total_images: int) -> None:
|
|
|
|
|
|
|
|
|
|
| 220 |
writer.writerow([f'IMAGE {image_number} OF {total_images}'])
|
| 221 |
writer.writerow([])
|
| 222 |
+
|
| 223 |
+
# Decision Summary
|
| 224 |
+
writer.writerow(['FINAL DECISION'])
|
| 225 |
+
writer.writerow(['Decision', result.final_decision.value])
|
|
|
|
|
|
|
| 226 |
writer.writerow(['Confidence', f"{result.confidence}%"])
|
| 227 |
+
writer.writerow(['Explanation', result.decision_explanation or ''])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
writer.writerow([])
|
| 229 |
+
|
| 230 |
+
# Evidence Summary
|
| 231 |
+
if result.evidence:
|
| 232 |
+
writer.writerow(['EVIDENCE SUMMARY'])
|
| 233 |
+
writer.writerow(['Source', 'Direction', 'Strength', 'Confidence', 'Finding'])
|
| 234 |
+
|
| 235 |
+
for e in result.evidence:
|
| 236 |
+
writer.writerow([e.source.value,
|
| 237 |
+
e.direction.value,
|
| 238 |
+
e.strength.value,
|
| 239 |
+
f"{e.confidence:.3f}" if e.confidence is not None else 'N/A',
|
| 240 |
+
e.finding.replace("\n", " "),
|
| 241 |
+
])
|
| 242 |
+
|
| 243 |
+
writer.writerow([])
|
| 244 |
+
|
| 245 |
+
# Metric Signals (Informational)
|
| 246 |
+
writer.writerow(['METRIC SIGNALS (INFORMATIONAL)'])
|
| 247 |
+
writer.writerow(['Metric', 'Score', 'Status', 'Confidence'])
|
| 248 |
+
|
| 249 |
for signal in result.signals:
|
| 250 |
+
metric_result = result.metric_results.get(signal.metric_type)
|
| 251 |
+
|
|
|
|
| 252 |
writer.writerow([signal.name,
|
| 253 |
f"{signal.score:.3f}",
|
| 254 |
+
signal.status.value,
|
| 255 |
+
f"{metric_result.confidence:.3f}" if (metric_result and metric_result.confidence is not None) else 'N/A',
|
|
|
|
| 256 |
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
writer.writerow([])
|
| 259 |
+
|
| 260 |
|
| 261 |
def _write_footer(self, writer) -> None:
|
|
|
|
|
|
|
|
|
|
| 262 |
writer.writerow(['=' * 100])
|
| 263 |
writer.writerow(['Report generated by AI Image Screener'])
|
| 264 |
+
writer.writerow(['DISCLAIMER: Statistical signals are non-decisional'])
|
| 265 |
+
writer.writerow(['Final decisions are policy-based and auditable'])
|
| 266 |
writer.writerow(['=' * 100])
|
reporter/json_reporter.py
CHANGED
|
@@ -8,6 +8,7 @@ from datetime import datetime
|
|
| 8 |
from utils.logger import get_logger
|
| 9 |
from config.settings import settings
|
| 10 |
from config.schemas import AnalysisResult
|
|
|
|
| 11 |
from utils.helpers import generate_unique_id
|
| 12 |
from config.schemas import BatchAnalysisResult
|
| 13 |
from features.detailed_result_maker import DetailedResultMaker
|
|
@@ -19,39 +20,27 @@ logger = get_logger(__name__)
|
|
| 19 |
|
| 20 |
class JSONReporter:
|
| 21 |
"""
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
---------
|
| 26 |
-
-
|
| 27 |
-
-
|
| 28 |
-
-
|
| 29 |
-
-
|
| 30 |
-
- Nested structure for complex data
|
| 31 |
"""
|
| 32 |
def __init__(self):
|
| 33 |
"""
|
| 34 |
Initialize JSON Reporter
|
| 35 |
"""
|
| 36 |
self.detailed_maker = DetailedResultMaker()
|
|
|
|
| 37 |
logger.debug("JSONReporter initialized")
|
| 38 |
|
| 39 |
|
| 40 |
def export_batch(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None, include_detailed: bool = True) -> Path:
|
| 41 |
"""
|
| 42 |
Export batch analysis as JSON
|
| 43 |
-
|
| 44 |
-
Arguments:
|
| 45 |
-
----------
|
| 46 |
-
batch_result { BatchAnalysisResult } : Complete batch analysis result
|
| 47 |
-
|
| 48 |
-
output_dir { Path } : Output directory (defaults to settings.REPORTS_DIR)
|
| 49 |
-
|
| 50 |
-
include_detailed { bool } : Include detailed forensic data
|
| 51 |
-
|
| 52 |
-
Returns:
|
| 53 |
-
--------
|
| 54 |
-
{ Path } : Path to generated JSON file
|
| 55 |
"""
|
| 56 |
output_dir = output_dir or settings.REPORTS_DIR
|
| 57 |
report_id = generate_unique_id()
|
|
@@ -59,47 +48,33 @@ class JSONReporter:
|
|
| 59 |
output_path = output_dir / filename
|
| 60 |
|
| 61 |
output_dir.mkdir(parents = True, exist_ok = True)
|
| 62 |
-
|
| 63 |
logger.info(f"Generating batch JSON: {filename}")
|
| 64 |
-
|
| 65 |
try:
|
| 66 |
-
# Build JSON structure
|
| 67 |
data = self._build_batch_json(batch_result = batch_result,
|
| 68 |
include_detailed = include_detailed,
|
| 69 |
)
|
| 70 |
-
|
| 71 |
-
# Write to file
|
| 72 |
with open(output_path, 'w', encoding = 'utf-8') as f:
|
| 73 |
-
json.dump(obj = data,
|
| 74 |
-
fp = f,
|
| 75 |
-
indent = 4,
|
| 76 |
-
ensure_ascii = False,
|
| 77 |
default = str,
|
| 78 |
)
|
| 79 |
-
|
| 80 |
logger.info(f"Batch JSON generated: {output_path}")
|
| 81 |
return output_path
|
| 82 |
-
|
| 83 |
except Exception as e:
|
| 84 |
logger.error(f"Failed to generate batch JSON: {e}")
|
| 85 |
raise
|
| 86 |
|
| 87 |
|
| 88 |
-
def export_single(self, result: AnalysisResult, output_dir: Optional[Path] = None, include_detailed: bool = True
|
|
|
|
| 89 |
"""
|
| 90 |
Export single image analysis as JSON
|
| 91 |
-
|
| 92 |
-
Arguments:
|
| 93 |
-
----------
|
| 94 |
-
result { AnalysisResult } : Single image analysis result
|
| 95 |
-
|
| 96 |
-
output_dir { Path } : Output directory (defaults to settings.REPORTS_DIR)
|
| 97 |
-
|
| 98 |
-
include_detailed { bool } : Include detailed forensic data
|
| 99 |
-
|
| 100 |
-
Returns:
|
| 101 |
-
--------
|
| 102 |
-
{ Path } : Path to generated JSON file
|
| 103 |
"""
|
| 104 |
output_dir = output_dir or settings.REPORTS_DIR
|
| 105 |
report_id = generate_unique_id()
|
|
@@ -107,27 +82,24 @@ class JSONReporter:
|
|
| 107 |
output_path = output_dir / filename
|
| 108 |
|
| 109 |
output_dir.mkdir(parents = True, exist_ok = True)
|
| 110 |
-
|
| 111 |
logger.info(f"Generating single image JSON: {filename}")
|
| 112 |
-
|
| 113 |
try:
|
| 114 |
-
# Build JSON structure
|
| 115 |
data = self._build_single_json(result = result,
|
| 116 |
include_detailed = include_detailed,
|
| 117 |
)
|
| 118 |
-
|
| 119 |
-
# Write to file
|
| 120 |
with open(output_path, 'w', encoding = 'utf-8') as f:
|
| 121 |
-
json.dump(obj = data,
|
| 122 |
-
fp = f,
|
| 123 |
-
indent = 4,
|
| 124 |
-
ensure_ascii = False,
|
| 125 |
default = str,
|
| 126 |
)
|
| 127 |
-
|
| 128 |
logger.info(f"Single image JSON generated: {output_path}")
|
| 129 |
return output_path
|
| 130 |
-
|
| 131 |
except Exception as e:
|
| 132 |
logger.error(f"Failed to generate single image JSON: {e}")
|
| 133 |
raise
|
|
@@ -135,15 +107,7 @@ class JSONReporter:
|
|
| 135 |
|
| 136 |
def export_api_response(self, result: AnalysisResult) -> Dict:
|
| 137 |
"""
|
| 138 |
-
Generate API-friendly JSON response
|
| 139 |
-
|
| 140 |
-
Arguments:
|
| 141 |
-
----------
|
| 142 |
-
result { AnalysisResult } : Analysis result
|
| 143 |
-
|
| 144 |
-
Returns:
|
| 145 |
-
--------
|
| 146 |
-
{ dict } : API response dictionary
|
| 147 |
"""
|
| 148 |
return {"success" : True,
|
| 149 |
"timestamp" : datetime.now().isoformat(),
|
|
@@ -158,36 +122,23 @@ class JSONReporter:
|
|
| 158 |
"""
|
| 159 |
Build complete batch JSON structure
|
| 160 |
"""
|
| 161 |
-
|
| 162 |
timestamp = batch_result.timestamp,
|
| 163 |
),
|
| 164 |
-
"batch_summary" : self._build_batch_summary(batch_result
|
| 165 |
-
"results" : [],
|
| 166 |
}
|
| 167 |
-
|
| 168 |
-
# Add each image result
|
| 169 |
-
for result in batch_result.results:
|
| 170 |
-
image_data = self._build_image_data(result = result,
|
| 171 |
-
include_detailed = include_detailed,
|
| 172 |
-
)
|
| 173 |
-
data["results"].append(image_data)
|
| 174 |
-
|
| 175 |
-
return data
|
| 176 |
|
| 177 |
|
| 178 |
def _build_single_json(self, result: AnalysisResult, include_detailed: bool) -> Dict:
|
| 179 |
"""
|
| 180 |
Build single image JSON structure
|
| 181 |
"""
|
| 182 |
-
|
| 183 |
timestamp = result.timestamp,
|
| 184 |
),
|
| 185 |
-
"analysis" : self._build_image_data(result
|
| 186 |
-
include_detailed = include_detailed,
|
| 187 |
-
),
|
| 188 |
}
|
| 189 |
-
|
| 190 |
-
return data
|
| 191 |
|
| 192 |
|
| 193 |
def _build_metadata(self, report_type: str, timestamp: datetime) -> Dict:
|
|
@@ -204,17 +155,20 @@ class JSONReporter:
|
|
| 204 |
|
| 205 |
def _build_batch_summary(self, batch_result: BatchAnalysisResult) -> Dict:
|
| 206 |
"""
|
| 207 |
-
Build batch summary
|
| 208 |
"""
|
|
|
|
|
|
|
| 209 |
return {"total_images" : batch_result.total_images,
|
| 210 |
"processed" : batch_result.processed,
|
| 211 |
"failed" : batch_result.failed,
|
| 212 |
-
"success_rate" :
|
| 213 |
-
"
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
|
|
|
| 218 |
},
|
| 219 |
"total_processing_time" : round(batch_result.total_processing_time, 2),
|
| 220 |
}
|
|
@@ -222,128 +176,100 @@ class JSONReporter:
|
|
| 222 |
|
| 223 |
def _build_image_data(self, result: AnalysisResult, include_detailed: bool) -> Dict:
|
| 224 |
"""
|
| 225 |
-
Build complete image data structure
|
| 226 |
"""
|
| 227 |
-
image_data = {"filename"
|
| 228 |
-
"
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
|
|
|
|
|
|
| 240 |
}
|
| 241 |
-
|
| 242 |
-
# Add detailed forensics if requested
|
| 243 |
if include_detailed:
|
| 244 |
-
image_data["forensics"] = self._build_forensics_data(result
|
| 245 |
-
image_data["recommendations"] = self._build_recommendations(result
|
| 246 |
-
|
| 247 |
return image_data
|
| 248 |
|
| 249 |
|
| 250 |
def _build_signals_data(self, result: AnalysisResult) -> List[Dict]:
|
| 251 |
"""
|
| 252 |
-
Build
|
| 253 |
"""
|
| 254 |
signals = list()
|
| 255 |
-
|
| 256 |
for signal in result.signals:
|
| 257 |
metric_result = result.metric_results.get(signal.metric_type)
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
signals.append(signal_data)
|
| 268 |
-
|
| 269 |
return signals
|
| 270 |
|
| 271 |
|
| 272 |
def _build_forensics_data(self, result: AnalysisResult) -> Dict:
|
| 273 |
"""
|
| 274 |
-
Build
|
| 275 |
"""
|
| 276 |
forensics = dict()
|
| 277 |
-
|
| 278 |
for metric_type, metric_result in result.metric_results.items():
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
forensics[metric_type.value] = {"display_name" : metric_name,
|
| 282 |
"score" : round(metric_result.score, 3),
|
| 283 |
-
"confidence" : round(metric_result.confidence, 3) if
|
| 284 |
"details" : metric_result.details or {},
|
| 285 |
-
"key_findings" : self.detailed_maker.extract_key_findings(metric_type
|
| 286 |
-
metric_result = metric_result,
|
| 287 |
-
),
|
| 288 |
}
|
| 289 |
-
|
| 290 |
return forensics
|
| 291 |
|
| 292 |
|
| 293 |
def _build_recommendations(self, result: AnalysisResult) -> Dict:
|
| 294 |
"""
|
| 295 |
-
Build recommendations
|
| 296 |
"""
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
if (
|
| 300 |
-
return {"action"
|
| 301 |
-
"priority"
|
| 302 |
-
"
|
| 303 |
-
"next_steps" : ["Forensic analysis", "Reverse image search", "Metadata inspection"],
|
| 304 |
-
"confidence" : "Very high likelihood of AI generation",
|
| 305 |
}
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
return {"action"
|
| 309 |
-
"priority"
|
| 310 |
-
"
|
| 311 |
-
"next_steps" : ["Visual inspection", "Compare with authentic samples"],
|
| 312 |
-
"confidence" : "High likelihood of AI generation",
|
| 313 |
}
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
return {"action"
|
| 317 |
-
"priority"
|
| 318 |
-
"
|
| 319 |
-
"next_steps" : ["Verify image source", "Check for inconsistencies"],
|
| 320 |
-
"confidence" : "Moderate indicators present",
|
| 321 |
}
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
return {"action"
|
| 325 |
-
"priority"
|
| 326 |
-
"
|
| 327 |
-
"next_steps" : ["Proceed with normal workflow"],
|
| 328 |
-
"confidence" : "Low likelihood of AI generation",
|
| 329 |
}
|
| 330 |
-
|
| 331 |
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
if (score >= 0.85):
|
| 337 |
-
return "Very high suspicion"
|
| 338 |
-
|
| 339 |
-
elif (score >= 0.70):
|
| 340 |
-
return "High suspicion"
|
| 341 |
-
|
| 342 |
-
elif (score >= 0.50):
|
| 343 |
-
return "Moderate suspicion"
|
| 344 |
-
|
| 345 |
-
elif (score >= 0.30):
|
| 346 |
-
return "Low suspicion"
|
| 347 |
-
|
| 348 |
-
else:
|
| 349 |
-
return "Very low suspicion"
|
|
|
|
| 8 |
from utils.logger import get_logger
|
| 9 |
from config.settings import settings
|
| 10 |
from config.schemas import AnalysisResult
|
| 11 |
+
from config.constants import FinalDecision
|
| 12 |
from utils.helpers import generate_unique_id
|
| 13 |
from config.schemas import BatchAnalysisResult
|
| 14 |
from features.detailed_result_maker import DetailedResultMaker
|
|
|
|
| 20 |
|
| 21 |
class JSONReporter:
|
| 22 |
"""
|
| 23 |
+
JSON report generator
|
| 24 |
+
|
| 25 |
+
Guarantees:
|
| 26 |
+
-----------
|
| 27 |
+
- FinalDecision is authoritative
|
| 28 |
+
- Metrics are informational only
|
| 29 |
+
- Evidence-first interpretation
|
| 30 |
+
- Audit-safe output
|
|
|
|
| 31 |
"""
|
| 32 |
def __init__(self):
|
| 33 |
"""
|
| 34 |
Initialize JSON Reporter
|
| 35 |
"""
|
| 36 |
self.detailed_maker = DetailedResultMaker()
|
| 37 |
+
|
| 38 |
logger.debug("JSONReporter initialized")
|
| 39 |
|
| 40 |
|
| 41 |
def export_batch(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None, include_detailed: bool = True) -> Path:
|
| 42 |
"""
|
| 43 |
Export batch analysis as JSON
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
"""
|
| 45 |
output_dir = output_dir or settings.REPORTS_DIR
|
| 46 |
report_id = generate_unique_id()
|
|
|
|
| 48 |
output_path = output_dir / filename
|
| 49 |
|
| 50 |
output_dir.mkdir(parents = True, exist_ok = True)
|
|
|
|
| 51 |
logger.info(f"Generating batch JSON: {filename}")
|
| 52 |
+
|
| 53 |
try:
|
|
|
|
| 54 |
data = self._build_batch_json(batch_result = batch_result,
|
| 55 |
include_detailed = include_detailed,
|
| 56 |
)
|
| 57 |
+
|
|
|
|
| 58 |
with open(output_path, 'w', encoding = 'utf-8') as f:
|
| 59 |
+
json.dump(obj = data,
|
| 60 |
+
fp = f,
|
| 61 |
+
indent = 4,
|
| 62 |
+
ensure_ascii = False,
|
| 63 |
default = str,
|
| 64 |
)
|
| 65 |
+
|
| 66 |
logger.info(f"Batch JSON generated: {output_path}")
|
| 67 |
return output_path
|
| 68 |
+
|
| 69 |
except Exception as e:
|
| 70 |
logger.error(f"Failed to generate batch JSON: {e}")
|
| 71 |
raise
|
| 72 |
|
| 73 |
|
| 74 |
+
def export_single(self, result: AnalysisResult, output_dir: Optional[Path] = None, include_detailed: bool = True,
|
| 75 |
+
) -> Path:
|
| 76 |
"""
|
| 77 |
Export single image analysis as JSON
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
"""
|
| 79 |
output_dir = output_dir or settings.REPORTS_DIR
|
| 80 |
report_id = generate_unique_id()
|
|
|
|
| 82 |
output_path = output_dir / filename
|
| 83 |
|
| 84 |
output_dir.mkdir(parents = True, exist_ok = True)
|
|
|
|
| 85 |
logger.info(f"Generating single image JSON: {filename}")
|
| 86 |
+
|
| 87 |
try:
|
|
|
|
| 88 |
data = self._build_single_json(result = result,
|
| 89 |
include_detailed = include_detailed,
|
| 90 |
)
|
| 91 |
+
|
|
|
|
| 92 |
with open(output_path, 'w', encoding = 'utf-8') as f:
|
| 93 |
+
json.dump(obj = data,
|
| 94 |
+
fp = f,
|
| 95 |
+
indent = 4,
|
| 96 |
+
ensure_ascii = False,
|
| 97 |
default = str,
|
| 98 |
)
|
| 99 |
+
|
| 100 |
logger.info(f"Single image JSON generated: {output_path}")
|
| 101 |
return output_path
|
| 102 |
+
|
| 103 |
except Exception as e:
|
| 104 |
logger.error(f"Failed to generate single image JSON: {e}")
|
| 105 |
raise
|
|
|
|
| 107 |
|
| 108 |
def export_api_response(self, result: AnalysisResult) -> Dict:
|
| 109 |
"""
|
| 110 |
+
Generate API-friendly JSON response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
"""
|
| 112 |
return {"success" : True,
|
| 113 |
"timestamp" : datetime.now().isoformat(),
|
|
|
|
| 122 |
"""
|
| 123 |
Build complete batch JSON structure
|
| 124 |
"""
|
| 125 |
+
return {"report_metadata" : self._build_metadata(report_type = "Batch Analysis",
|
| 126 |
timestamp = batch_result.timestamp,
|
| 127 |
),
|
| 128 |
+
"batch_summary" : self._build_batch_summary(batch_result),
|
| 129 |
+
"results" : [self._build_image_data(result, include_detailed) for result in batch_result.results],
|
| 130 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
|
| 133 |
def _build_single_json(self, result: AnalysisResult, include_detailed: bool) -> Dict:
|
| 134 |
"""
|
| 135 |
Build single image JSON structure
|
| 136 |
"""
|
| 137 |
+
return {"report_metadata" : self._build_metadata(report_type = "Single Image Analysis",
|
| 138 |
timestamp = result.timestamp,
|
| 139 |
),
|
| 140 |
+
"analysis" : self._build_image_data(result, include_detailed),
|
|
|
|
|
|
|
| 141 |
}
|
|
|
|
|
|
|
| 142 |
|
| 143 |
|
| 144 |
def _build_metadata(self, report_type: str, timestamp: datetime) -> Dict:
|
|
|
|
| 155 |
|
| 156 |
def _build_batch_summary(self, batch_result: BatchAnalysisResult) -> Dict:
|
| 157 |
"""
|
| 158 |
+
Build batch summary (decision-aware)
|
| 159 |
"""
|
| 160 |
+
summary = batch_result.summary or {}
|
| 161 |
+
|
| 162 |
return {"total_images" : batch_result.total_images,
|
| 163 |
"processed" : batch_result.processed,
|
| 164 |
"failed" : batch_result.failed,
|
| 165 |
+
"success_rate" : summary.get("success_rate", 0),
|
| 166 |
+
"decision_distribution" : {key : summary.get(key, 0)
|
| 167 |
+
for key in [FinalDecision.CONFIRMED_AI_GENERATED.value,
|
| 168 |
+
FinalDecision.SUSPICIOUS_AI_LIKELY.value,
|
| 169 |
+
FinalDecision.AUTHENTIC_BUT_REVIEW.value,
|
| 170 |
+
FinalDecision.MOSTLY_AUTHENTIC.value,
|
| 171 |
+
]
|
| 172 |
},
|
| 173 |
"total_processing_time" : round(batch_result.total_processing_time, 2),
|
| 174 |
}
|
|
|
|
| 176 |
|
| 177 |
def _build_image_data(self, result: AnalysisResult, include_detailed: bool) -> Dict:
|
| 178 |
"""
|
| 179 |
+
Build complete image data structure (decision-first)
|
| 180 |
"""
|
| 181 |
+
image_data = {"filename" : result.filename,
|
| 182 |
+
"decision" : {"value" : result.final_decision.value if result.final_decision else None,
|
| 183 |
+
"confidence" : result.confidence,
|
| 184 |
+
"explanation" : result.decision_explanation,
|
| 185 |
+
},
|
| 186 |
+
"overall" : {"score" : round(result.overall_score, 3),
|
| 187 |
+
"note" : "Statistical score (non-authoritative)",
|
| 188 |
+
},
|
| 189 |
+
"image_info" : {"size" : {"width" : result.image_size[0],
|
| 190 |
+
"height" : result.image_size[1],
|
| 191 |
+
},
|
| 192 |
+
"processing_time" : round(result.processing_time, 2),
|
| 193 |
+
"timestamp" : result.timestamp.isoformat(),
|
| 194 |
+
},
|
| 195 |
+
"signals" : self._build_signals_data(result),
|
| 196 |
}
|
| 197 |
+
|
|
|
|
| 198 |
if include_detailed:
|
| 199 |
+
image_data["forensics"] = self._build_forensics_data(result)
|
| 200 |
+
image_data["recommendations"] = self._build_recommendations(result)
|
| 201 |
+
|
| 202 |
return image_data
|
| 203 |
|
| 204 |
|
| 205 |
def _build_signals_data(self, result: AnalysisResult) -> List[Dict]:
|
| 206 |
"""
|
| 207 |
+
Build Tier-1 signal data (informational)
|
| 208 |
"""
|
| 209 |
signals = list()
|
| 210 |
+
|
| 211 |
for signal in result.signals:
|
| 212 |
metric_result = result.metric_results.get(signal.metric_type)
|
| 213 |
+
|
| 214 |
+
signals.append({"metric_name" : signal.name,
|
| 215 |
+
"metric_type" : signal.metric_type.value,
|
| 216 |
+
"score" : round(signal.score, 3),
|
| 217 |
+
"status" : signal.status.value,
|
| 218 |
+
"confidence" : round(metric_result.confidence, 3) if (metric_result and metric_result.confidence is not None) else None,
|
| 219 |
+
"explanation" : signal.explanation,
|
| 220 |
+
})
|
| 221 |
+
|
|
|
|
|
|
|
| 222 |
return signals
|
| 223 |
|
| 224 |
|
| 225 |
def _build_forensics_data(self, result: AnalysisResult) -> Dict:
|
| 226 |
"""
|
| 227 |
+
Build forensic metric details
|
| 228 |
"""
|
| 229 |
forensics = dict()
|
| 230 |
+
|
| 231 |
for metric_type, metric_result in result.metric_results.items():
|
| 232 |
+
forensics[metric_type.value] = {"display_name" : self.detailed_maker.metric_display_names.get(metric_type, metric_type.value),
|
|
|
|
|
|
|
| 233 |
"score" : round(metric_result.score, 3),
|
| 234 |
+
"confidence" : round(metric_result.confidence, 3) if metric_result.confidence is not None else None,
|
| 235 |
"details" : metric_result.details or {},
|
| 236 |
+
"key_findings" : self.detailed_maker.extract_key_findings(metric_type, metric_result),
|
|
|
|
|
|
|
| 237 |
}
|
| 238 |
+
|
| 239 |
return forensics
|
| 240 |
|
| 241 |
|
| 242 |
def _build_recommendations(self, result: AnalysisResult) -> Dict:
|
| 243 |
"""
|
| 244 |
+
Build recommendations (decision-driven, not score-driven)
|
| 245 |
"""
|
| 246 |
+
decision = result.final_decision
|
| 247 |
+
|
| 248 |
+
if (decision == FinalDecision.CONFIRMED_AI_GENERATED):
|
| 249 |
+
return {"action" : "Block or flag image immediately",
|
| 250 |
+
"priority" : "CRITICAL",
|
| 251 |
+
"next_steps" : ["Audit source", "Apply AI-content policy"],
|
|
|
|
|
|
|
| 252 |
}
|
| 253 |
+
|
| 254 |
+
if (decision == FinalDecision.SUSPICIOUS_AI_LIKELY):
|
| 255 |
+
return {"action" : "Manual review required",
|
| 256 |
+
"priority" : "HIGH",
|
| 257 |
+
"next_steps" : ["Human inspection", "Cross-check metadata"],
|
|
|
|
|
|
|
| 258 |
}
|
| 259 |
+
|
| 260 |
+
if (decision == FinalDecision.AUTHENTIC_BUT_REVIEW):
|
| 261 |
+
return {"action" : "Optional human review",
|
| 262 |
+
"priority" : "MEDIUM",
|
| 263 |
+
"next_steps" : ["Spot-check authenticity"],
|
|
|
|
|
|
|
| 264 |
}
|
| 265 |
+
|
| 266 |
+
if (decision == FinalDecision.MOSTLY_AUTHENTIC):
|
| 267 |
+
return {"action" : "No action required",
|
| 268 |
+
"priority" : "LOW",
|
| 269 |
+
"next_steps" : ["Proceed normally"],
|
|
|
|
|
|
|
| 270 |
}
|
|
|
|
| 271 |
|
| 272 |
+
return {"action" : "Decision unavailable",
|
| 273 |
+
"priority" : "UNKNOWN",
|
| 274 |
+
"next_steps" : [],
|
| 275 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
reporter/pdf_reporter.py
CHANGED
|
@@ -1,24 +1,28 @@
|
|
| 1 |
# Dependencies
|
|
|
|
|
|
|
| 2 |
from pathlib import Path
|
| 3 |
-
from typing import Optional
|
| 4 |
from datetime import datetime
|
|
|
|
| 5 |
from utils.logger import get_logger
|
| 6 |
from config.settings import settings
|
| 7 |
-
from reportlab.platypus import Table
|
| 8 |
-
from reportlab.lib import
|
| 9 |
-
from reportlab.
|
| 10 |
-
from reportlab.
|
|
|
|
| 11 |
from reportlab.platypus import TableStyle
|
| 12 |
from config.schemas import AnalysisResult
|
|
|
|
|
|
|
| 13 |
from utils.helpers import generate_unique_id
|
| 14 |
-
from config.constants import
|
| 15 |
from config.schemas import BatchAnalysisResult
|
| 16 |
-
from reportlab.lib.styles import ParagraphStyle
|
| 17 |
from reportlab.platypus import SimpleDocTemplate
|
|
|
|
| 18 |
from features.detailed_result_maker import DetailedResultMaker
|
| 19 |
-
from reportlab.lib.units import inch
|
| 20 |
-
from reportlab.pdfgen import canvas
|
| 21 |
-
import textwrap
|
| 22 |
|
| 23 |
|
| 24 |
# Setup Logging
|
|
@@ -27,817 +31,296 @@ logger = get_logger(__name__)
|
|
| 27 |
|
| 28 |
class PDFReporter:
|
| 29 |
"""
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
---------
|
| 34 |
-
-
|
| 35 |
-
-
|
| 36 |
-
-
|
| 37 |
-
-
|
| 38 |
-
- Professional formatting and layout
|
| 39 |
-
- Statistical summaries and insights
|
| 40 |
"""
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
COLOR_NEUTRAL = colors.HexColor('#424242') # Dark Grey
|
| 49 |
-
COLOR_HEADER_BG = colors.HexColor('#1565C0') # Blue
|
| 50 |
-
COLOR_SUBHEADER_BG = colors.HexColor('#1976D2') # Lighter Blue
|
| 51 |
-
COLOR_ALT_ROW = colors.HexColor('#F5F5F5') # Light Grey
|
| 52 |
-
COLOR_LIGHT_BLUE = colors.HexColor('#E3F2FD') # Very Light Blue
|
| 53 |
-
COLOR_LIGHT_GREEN = colors.HexColor('#E8F5E9') # Very Light Green
|
| 54 |
-
COLOR_LIGHT_ORANGE = colors.HexColor('#FFF3E0') # Very Light Orange
|
| 55 |
-
COLOR_LIGHT_RED = colors.HexColor('#FFEBEE') # Very Light Red
|
| 56 |
-
|
| 57 |
def __init__(self):
|
| 58 |
self.detailed_maker = DetailedResultMaker()
|
| 59 |
-
self.styles
|
| 60 |
-
logger.debug("
|
|
|
|
| 61 |
|
| 62 |
def export_single(self, result: AnalysisResult, output_dir: Optional[Path] = None) -> Path:
|
| 63 |
-
"""
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
| 69 |
output_path = output_dir / filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
-
|
| 72 |
|
| 73 |
-
|
| 74 |
-
str(output_path),
|
| 75 |
-
pagesize=LETTER,
|
| 76 |
-
rightMargin=30,
|
| 77 |
-
leftMargin=30,
|
| 78 |
-
topMargin=20,
|
| 79 |
-
bottomMargin=35
|
| 80 |
-
)
|
| 81 |
|
| 82 |
-
story = []
|
| 83 |
-
self._add_professional_header(story, "AI Image Analysis Report")
|
| 84 |
-
self._add_executive_summary_single(story, result)
|
| 85 |
story.append(PageBreak())
|
| 86 |
-
self._add_detailed_metrics_analysis(story, result)
|
| 87 |
-
story.append(PageBreak())
|
| 88 |
-
self._add_forensic_breakdown(story, result)
|
| 89 |
-
self._add_recommendations(story, result)
|
| 90 |
-
self._add_professional_footer(story)
|
| 91 |
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
| 94 |
return output_path
|
| 95 |
|
|
|
|
| 96 |
def export_batch(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
|
| 97 |
-
"""
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
| 103 |
output_path = output_dir / filename
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
)
|
| 116 |
-
|
| 117 |
-
story
|
| 118 |
-
self.
|
| 119 |
-
self._add_batch_executive_summary(story, batch_result)
|
| 120 |
-
story.append(PageBreak())
|
| 121 |
-
self._add_batch_overview_table(story, batch_result.results)
|
| 122 |
-
story.append(PageBreak())
|
| 123 |
-
self._add_batch_metrics_analysis(story, batch_result.results)
|
| 124 |
story.append(PageBreak())
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
return output_path
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
def _build_styles(self):
|
| 134 |
-
"""Build comprehensive style definitions"""
|
| 135 |
styles = getSampleStyleSheet()
|
| 136 |
-
|
| 137 |
styles.add(ParagraphStyle(
|
| 138 |
-
name='
|
| 139 |
fontSize=18,
|
|
|
|
| 140 |
textColor=self.COLOR_PRIMARY,
|
| 141 |
-
alignment=TA_CENTER,
|
| 142 |
-
spaceAfter=4,
|
| 143 |
-
spaceBefore=2,
|
| 144 |
-
fontName='Helvetica-Bold'
|
| 145 |
-
))
|
| 146 |
-
|
| 147 |
-
styles.add(ParagraphStyle(
|
| 148 |
-
name='ReportSubtitle',
|
| 149 |
-
fontSize=10,
|
| 150 |
-
textColor=self.COLOR_NEUTRAL,
|
| 151 |
-
alignment=TA_CENTER,
|
| 152 |
-
spaceAfter=6,
|
| 153 |
-
fontName='Helvetica'
|
| 154 |
-
))
|
| 155 |
-
|
| 156 |
-
styles.add(ParagraphStyle(
|
| 157 |
-
name='SectionTitle',
|
| 158 |
-
fontSize=13,
|
| 159 |
-
textColor=self.COLOR_PRIMARY,
|
| 160 |
-
spaceBefore=10,
|
| 161 |
-
spaceAfter=6,
|
| 162 |
fontName='Helvetica-Bold'
|
| 163 |
))
|
| 164 |
-
|
| 165 |
-
styles.add(ParagraphStyle(
|
| 166 |
-
name='SectionHeader',
|
| 167 |
-
fontSize=11,
|
| 168 |
-
textColor=self.COLOR_PRIMARY,
|
| 169 |
-
spaceBefore=8,
|
| 170 |
-
spaceAfter=5,
|
| 171 |
-
fontName='Helvetica-Bold'
|
| 172 |
-
))
|
| 173 |
-
|
| 174 |
-
styles.add(ParagraphStyle(
|
| 175 |
-
name='SubHeader',
|
| 176 |
-
fontSize=9.5,
|
| 177 |
-
textColor=self.COLOR_PRIMARY,
|
| 178 |
-
spaceBefore=5,
|
| 179 |
-
spaceAfter=3,
|
| 180 |
-
fontName='Helvetica-Bold'
|
| 181 |
-
))
|
| 182 |
-
|
| 183 |
styles.add(ParagraphStyle(
|
| 184 |
-
name='
|
| 185 |
-
fontSize=
|
| 186 |
-
|
| 187 |
-
alignment=TA_JUSTIFY,
|
| 188 |
spaceAfter=6
|
| 189 |
))
|
| 190 |
-
|
| 191 |
styles.add(ParagraphStyle(
|
| 192 |
-
name='
|
| 193 |
fontSize=8,
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
styles.add(ParagraphStyle(
|
| 198 |
-
name='TableCellSmall',
|
| 199 |
-
fontSize=7.5,
|
| 200 |
-
leading=9
|
| 201 |
))
|
| 202 |
-
|
| 203 |
styles.add(ParagraphStyle(
|
| 204 |
-
name='
|
| 205 |
-
fontSize=
|
| 206 |
-
textColor=colors.white,
|
| 207 |
fontName='Helvetica-Bold',
|
| 208 |
-
|
| 209 |
-
|
| 210 |
))
|
| 211 |
-
|
| 212 |
styles.add(ParagraphStyle(
|
| 213 |
-
name='
|
| 214 |
-
fontSize=
|
| 215 |
-
|
| 216 |
-
alignment=TA_CENTER,
|
| 217 |
-
spaceAfter=2
|
| 218 |
))
|
| 219 |
-
|
| 220 |
styles.add(ParagraphStyle(
|
| 221 |
-
name='
|
| 222 |
-
fontSize=
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
spaceAfter=8
|
| 226 |
))
|
| 227 |
-
|
| 228 |
-
return styles
|
| 229 |
|
| 230 |
-
|
| 231 |
-
"""Add professional watermark"""
|
| 232 |
-
canvas.saveState()
|
| 233 |
-
canvas.setFont('Helvetica-Bold', 70)
|
| 234 |
-
canvas.setFillColorRGB(0.85, 0.85, 0.85, alpha=0.15)
|
| 235 |
-
canvas.rotate(45)
|
| 236 |
-
canvas.drawString(2.5*inch, -0.5*inch, "AI IMAGE SCREENER")
|
| 237 |
-
canvas.restoreState()
|
| 238 |
-
|
| 239 |
-
def _add_professional_header(self, story, title: str):
|
| 240 |
-
"""Professional header with branding"""
|
| 241 |
-
story.append(Paragraph("🔍 AI IMAGE SCREENER", self.styles['ReportTitle']))
|
| 242 |
-
story.append(Spacer(1, 3))
|
| 243 |
-
|
| 244 |
-
timestamp_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | Version: {settings.VERSION}"
|
| 245 |
-
story.append(Paragraph(timestamp_text, self.styles['Timestamp']))
|
| 246 |
-
|
| 247 |
-
story.append(Paragraph(title, self.styles['SectionTitle']))
|
| 248 |
-
story.append(Spacer(1, 10))
|
| 249 |
-
|
| 250 |
-
def _add_executive_summary_single(self, story, result: AnalysisResult):
|
| 251 |
-
"""Executive summary for single image"""
|
| 252 |
-
story.append(Paragraph("Executive Summary", self.styles['SectionTitle']))
|
| 253 |
-
story.append(Spacer(1, 5))
|
| 254 |
-
|
| 255 |
-
# Key findings box
|
| 256 |
-
status_color = self.COLOR_DANGER if result.status == DetectionStatus.REVIEW_REQUIRED else self.COLOR_SUCCESS
|
| 257 |
-
status_bg = self.COLOR_LIGHT_RED if result.status == DetectionStatus.REVIEW_REQUIRED else self.COLOR_LIGHT_GREEN
|
| 258 |
-
status_text = "⚠️ REVIEW REQUIRED" if result.status == DetectionStatus.REVIEW_REQUIRED else "✅ LIKELY AUTHENTIC"
|
| 259 |
-
|
| 260 |
-
key_findings = [
|
| 261 |
-
[Paragraph("<b>Overall Assessment</b>", self.styles['TableHeader'])],
|
| 262 |
-
[Paragraph(f"<font size=12 color='{status_color.hexval()}'><b>{status_text}</b></font>", self.styles['CustomBodyText'])],
|
| 263 |
-
[Paragraph(f"<b>Confidence:</b> {result.confidence}%", self.styles['CustomBodyText'])],
|
| 264 |
-
[Paragraph(f"<b>Overall Score:</b> {result.overall_score:.4f}", self.styles['CustomBodyText'])]
|
| 265 |
-
]
|
| 266 |
-
|
| 267 |
-
findings_table = Table(key_findings, colWidths=[530])
|
| 268 |
-
findings_table.setStyle(TableStyle([
|
| 269 |
-
('BACKGROUND', (0, 0), (-1, 0), self.COLOR_INFO),
|
| 270 |
-
('BACKGROUND', (0, 1), (-1, -1), status_bg),
|
| 271 |
-
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
|
| 272 |
-
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
|
| 273 |
-
('LEFTPADDING', (0, 0), (-1, -1), 12),
|
| 274 |
-
('RIGHTPADDING', (0, 0), (-1, -1), 12),
|
| 275 |
-
('TOPPADDING', (0, 0), (-1, -1), 8),
|
| 276 |
-
('BOTTOMPADDING', (0, 0), (-1, -1), 8),
|
| 277 |
-
('BOX', (0, 0), (-1, -1), 1.5, self.COLOR_PRIMARY)
|
| 278 |
-
]))
|
| 279 |
-
story.append(findings_table)
|
| 280 |
-
story.append(Spacer(1, 12))
|
| 281 |
-
|
| 282 |
-
# Image information
|
| 283 |
-
story.append(Paragraph("Image Information", self.styles['SectionHeader']))
|
| 284 |
-
|
| 285 |
-
info_data = [
|
| 286 |
-
[Paragraph("<b>Property</b>", self.styles['TableHeader']),
|
| 287 |
-
Paragraph("<b>Value</b>", self.styles['TableHeader'])],
|
| 288 |
-
[Paragraph("Filename", self.styles['TableCell']),
|
| 289 |
-
Paragraph(result.filename, self.styles['TableCell'])],
|
| 290 |
-
[Paragraph("Dimensions", self.styles['TableCell']),
|
| 291 |
-
Paragraph(f"{result.image_size[0]} × {result.image_size[1]} pixels", self.styles['TableCell'])],
|
| 292 |
-
[Paragraph("Aspect Ratio", self.styles['TableCell']),
|
| 293 |
-
Paragraph(f"{result.image_size[0]/result.image_size[1]:.2f}:1", self.styles['TableCell'])],
|
| 294 |
-
[Paragraph("Processing Time", self.styles['TableCell']),
|
| 295 |
-
Paragraph(f"{result.processing_time:.3f} seconds", self.styles['TableCell'])],
|
| 296 |
-
[Paragraph("Analysis Date", self.styles['TableCell']),
|
| 297 |
-
Paragraph(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), self.styles['TableCell'])]
|
| 298 |
-
]
|
| 299 |
-
|
| 300 |
-
info_table = Table(info_data, colWidths=[180, 350])
|
| 301 |
-
info_table.setStyle(self._get_standard_table_style(len(info_data)))
|
| 302 |
-
story.append(info_table)
|
| 303 |
-
story.append(Spacer(1, 12))
|
| 304 |
-
|
| 305 |
-
# Detection signals summary
|
| 306 |
-
story.append(Paragraph("Detection Signals Summary", self.styles['SectionHeader']))
|
| 307 |
-
|
| 308 |
-
flagged = sum(1 for s in result.signals if s.status.value == 'flagged')
|
| 309 |
-
warning = sum(1 for s in result.signals if s.status.value == 'warning')
|
| 310 |
-
passed = sum(1 for s in result.signals if s.status.value == 'passed')
|
| 311 |
-
|
| 312 |
-
signals_data = [
|
| 313 |
-
[Paragraph("<b>Status</b>", self.styles['TableHeader']),
|
| 314 |
-
Paragraph("<b>Count</b>", self.styles['TableHeader']),
|
| 315 |
-
Paragraph("<b>Percentage</b>", self.styles['TableHeader'])],
|
| 316 |
-
[Paragraph("🔴 Flagged", self.styles['TableCell']),
|
| 317 |
-
Paragraph(f"<font color='red'><b>{flagged}</b></font>", self.styles['TableCell']),
|
| 318 |
-
Paragraph(f"{flagged/len(result.signals)*100:.1f}%", self.styles['TableCell'])],
|
| 319 |
-
[Paragraph("🟡 Warning", self.styles['TableCell']),
|
| 320 |
-
Paragraph(f"<font color='orange'><b>{warning}</b></font>", self.styles['TableCell']),
|
| 321 |
-
Paragraph(f"{warning/len(result.signals)*100:.1f}%", self.styles['TableCell'])],
|
| 322 |
-
[Paragraph("🟢 Passed", self.styles['TableCell']),
|
| 323 |
-
Paragraph(f"<font color='green'><b>{passed}</b></font>", self.styles['TableCell']),
|
| 324 |
-
Paragraph(f"{passed/len(result.signals)*100:.1f}%", self.styles['TableCell'])]
|
| 325 |
-
]
|
| 326 |
-
|
| 327 |
-
signals_table = Table(signals_data, colWidths=[200, 165, 165])
|
| 328 |
-
signals_table.setStyle(self._get_standard_table_style(len(signals_data)))
|
| 329 |
-
story.append(signals_table)
|
| 330 |
-
|
| 331 |
-
def _add_detailed_metrics_analysis(self, story, result: AnalysisResult):
|
| 332 |
-
"""Comprehensive metrics analysis"""
|
| 333 |
-
story.append(Paragraph("Detailed Metrics Analysis", self.styles['SectionTitle']))
|
| 334 |
-
story.append(Spacer(1, 8))
|
| 335 |
-
|
| 336 |
-
# All detection signals with full details
|
| 337 |
-
story.append(Paragraph("Detection Signals Breakdown", self.styles['SectionHeader']))
|
| 338 |
-
|
| 339 |
-
signal_data = [
|
| 340 |
-
[Paragraph("<b>Metric</b>", self.styles['TableHeader']),
|
| 341 |
-
Paragraph("<b>Score</b>", self.styles['TableHeader']),
|
| 342 |
-
Paragraph("<b>Status</b>", self.styles['TableHeader']),
|
| 343 |
-
Paragraph("<b>Explanation</b>", self.styles['TableHeader'])]
|
| 344 |
-
]
|
| 345 |
-
|
| 346 |
-
for signal in result.signals:
|
| 347 |
-
status_badge = self._get_status_badge_html(signal.status.value)
|
| 348 |
-
|
| 349 |
-
# Wrap long explanations
|
| 350 |
-
explanation = signal.explanation
|
| 351 |
-
if len(explanation) > 120:
|
| 352 |
-
explanation = explanation[:120] + "..."
|
| 353 |
-
|
| 354 |
-
signal_data.append([
|
| 355 |
-
Paragraph(f"<b>{signal.name}</b>", self.styles['TableCell']),
|
| 356 |
-
Paragraph(f"{signal.score:.4f}", self.styles['TableCell']),
|
| 357 |
-
Paragraph(status_badge, self.styles['TableCell']),
|
| 358 |
-
Paragraph(explanation, self.styles['TableCellSmall'])
|
| 359 |
-
])
|
| 360 |
-
|
| 361 |
-
signal_table = Table(signal_data, colWidths=[120, 60, 80, 270])
|
| 362 |
-
signal_table.setStyle(self._get_signal_table_style(len(signal_data)))
|
| 363 |
-
story.append(signal_table)
|
| 364 |
-
|
| 365 |
-
def _add_forensic_breakdown(self, story, result: AnalysisResult):
|
| 366 |
-
"""Detailed forensic analysis breakdown"""
|
| 367 |
-
story.append(Paragraph("Forensic Analysis Breakdown", self.styles['SectionTitle']))
|
| 368 |
-
story.append(Spacer(1, 8))
|
| 369 |
-
|
| 370 |
-
for metric_type, metric_result in result.metric_results.items():
|
| 371 |
-
metric_name = self.detailed_maker.metric_display_names.get(metric_type, metric_type.value)
|
| 372 |
-
details = metric_result.details or {}
|
| 373 |
-
|
| 374 |
-
# Skip if error
|
| 375 |
-
if 'error' in details:
|
| 376 |
-
continue
|
| 377 |
-
|
| 378 |
-
story.append(Paragraph(metric_name, self.styles['SectionHeader']))
|
| 379 |
-
|
| 380 |
-
# Metric overview
|
| 381 |
-
overview_data = [
|
| 382 |
-
[Paragraph("<b>Property</b>", self.styles['TableHeader']),
|
| 383 |
-
Paragraph("<b>Value</b>", self.styles['TableHeader'])],
|
| 384 |
-
[Paragraph("Score", self.styles['TableCell']),
|
| 385 |
-
Paragraph(f"<b>{metric_result.score:.4f}</b>", self.styles['TableCell'])],
|
| 386 |
-
[Paragraph("Confidence", self.styles['TableCell']),
|
| 387 |
-
Paragraph(f"{metric_result.confidence:.4f}" if metric_result.confidence else "N/A", self.styles['TableCell'])],
|
| 388 |
-
[Paragraph("Status", self.styles['TableCell']),
|
| 389 |
-
Paragraph(self._get_metric_status_html(metric_result.score), self.styles['TableCell'])]
|
| 390 |
-
]
|
| 391 |
-
|
| 392 |
-
overview_table = Table(overview_data, colWidths=[130, 400])
|
| 393 |
-
overview_table.setStyle(self._get_standard_table_style(len(overview_data)))
|
| 394 |
-
story.append(overview_table)
|
| 395 |
-
story.append(Spacer(1, 5))
|
| 396 |
-
|
| 397 |
-
# Detailed parameters
|
| 398 |
-
if details and len(details) > 0:
|
| 399 |
-
story.append(Paragraph("Detailed Parameters:", self.styles['SubHeader']))
|
| 400 |
-
|
| 401 |
-
param_data = [[Paragraph("<b>Parameter</b>", self.styles['TableHeader']),
|
| 402 |
-
Paragraph("<b>Value</b>", self.styles['TableHeader'])]]
|
| 403 |
-
|
| 404 |
-
for key, value in details.items():
|
| 405 |
-
if key in ['error', 'reason']:
|
| 406 |
-
continue
|
| 407 |
-
|
| 408 |
-
if isinstance(value, dict):
|
| 409 |
-
for sub_key, sub_value in value.items():
|
| 410 |
-
if sub_key not in ['reason', 'error']:
|
| 411 |
-
formatted_value = self._format_value(sub_value)
|
| 412 |
-
param_data.append([
|
| 413 |
-
Paragraph(f" └─ {sub_key}", self.styles['TableCellSmall']),
|
| 414 |
-
Paragraph(formatted_value, self.styles['TableCellSmall'])
|
| 415 |
-
])
|
| 416 |
-
else:
|
| 417 |
-
formatted_value = self._format_value(value)
|
| 418 |
-
param_data.append([
|
| 419 |
-
Paragraph(key, self.styles['TableCell']),
|
| 420 |
-
Paragraph(formatted_value, self.styles['TableCell'])
|
| 421 |
-
])
|
| 422 |
-
|
| 423 |
-
param_table = Table(param_data, colWidths=[200, 330])
|
| 424 |
-
param_table.setStyle(TableStyle([
|
| 425 |
-
('BACKGROUND', (0, 0), (-1, 0), self.COLOR_SUBHEADER_BG),
|
| 426 |
-
('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
|
| 427 |
-
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
|
| 428 |
-
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
|
| 429 |
-
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
| 430 |
-
('LEFTPADDING', (0, 0), (-1, -1), 8),
|
| 431 |
-
('RIGHTPADDING', (0, 0), (-1, -1), 8),
|
| 432 |
-
('TOPPADDING', (0, 0), (-1, -1), 4),
|
| 433 |
-
('BOTTOMPADDING', (0, 0), (-1, -1), 4)
|
| 434 |
-
]))
|
| 435 |
-
story.append(param_table)
|
| 436 |
-
|
| 437 |
-
story.append(Spacer(1, 8))
|
| 438 |
-
|
| 439 |
-
def _add_recommendations(self, story, result: AnalysisResult):
|
| 440 |
-
"""Add actionable recommendations"""
|
| 441 |
-
story.append(Paragraph("Recommendations & Next Steps", self.styles['SectionTitle']))
|
| 442 |
-
story.append(Spacer(1, 8))
|
| 443 |
-
|
| 444 |
-
if result.status == DetectionStatus.REVIEW_REQUIRED:
|
| 445 |
-
rec_text = """
|
| 446 |
-
<b>⚠️ MANUAL REVIEW REQUIRED</b><br/>
|
| 447 |
-
This image has been flagged for manual review based on multiple detection signals.
|
| 448 |
-
Recommended actions:<br/>
|
| 449 |
-
• Conduct visual inspection by trained personnel<br/>
|
| 450 |
-
• Cross-reference with source verification<br/>
|
| 451 |
-
• Consider additional forensic analysis if high stakes<br/>
|
| 452 |
-
• Document findings for audit trail
|
| 453 |
-
"""
|
| 454 |
-
rec_color = self.COLOR_LIGHT_RED
|
| 455 |
-
border_color = self.COLOR_DANGER
|
| 456 |
-
else:
|
| 457 |
-
rec_text = """
|
| 458 |
-
<b>✅ NO IMMEDIATE ACTION REQUIRED</b><br/>
|
| 459 |
-
This image appears to be authentic based on current analysis. However:<br/>
|
| 460 |
-
• Continue monitoring for evolving AI techniques<br/>
|
| 461 |
-
• Consider periodic re-screening for critical assets<br/>
|
| 462 |
-
• Maintain chain of custody documentation<br/>
|
| 463 |
-
• Stay updated on latest detection methodologies
|
| 464 |
-
"""
|
| 465 |
-
rec_color = self.COLOR_LIGHT_GREEN
|
| 466 |
-
border_color = self.COLOR_SUCCESS
|
| 467 |
-
|
| 468 |
-
rec_table = Table([[Paragraph(rec_text, self.styles['CustomBodyText'])]], colWidths=[530])
|
| 469 |
-
rec_table.setStyle(TableStyle([
|
| 470 |
-
('BACKGROUND', (0, 0), (-1, -1), rec_color),
|
| 471 |
-
('BOX', (0, 0), (-1, -1), 2, border_color),
|
| 472 |
-
('LEFTPADDING', (0, 0), (-1, -1), 15),
|
| 473 |
-
('RIGHTPADDING', (0, 0), (-1, -1), 15),
|
| 474 |
-
('TOPPADDING', (0, 0), (-1, -1), 12),
|
| 475 |
-
('BOTTOMPADDING', (0, 0), (-1, -1), 12)
|
| 476 |
-
]))
|
| 477 |
-
story.append(rec_table)
|
| 478 |
-
|
| 479 |
-
def _add_batch_executive_summary(self, story, batch_result: BatchAnalysisResult):
|
| 480 |
-
"""Executive summary for batch analysis"""
|
| 481 |
-
story.append(Paragraph("Executive Summary", self.styles['SectionTitle']))
|
| 482 |
-
story.append(Spacer(1, 8))
|
| 483 |
-
|
| 484 |
-
# Key metrics
|
| 485 |
-
summary_data = [
|
| 486 |
-
[Paragraph("<b>Metric</b>", self.styles['TableHeader']),
|
| 487 |
-
Paragraph("<b>Value</b>", self.styles['TableHeader']),
|
| 488 |
-
Paragraph("<b>Details</b>", self.styles['TableHeader'])],
|
| 489 |
-
[Paragraph("Total Images", self.styles['TableCell']),
|
| 490 |
-
Paragraph(f"<b>{batch_result.total_images}</b>", self.styles['TableCell']),
|
| 491 |
-
Paragraph("Images submitted for analysis", self.styles['TableCellSmall'])],
|
| 492 |
-
[Paragraph("Successfully Processed", self.styles['TableCell']),
|
| 493 |
-
Paragraph(f"<font color='green'><b>{batch_result.processed}</b></font>", self.styles['TableCell']),
|
| 494 |
-
Paragraph(f"{batch_result.summary.get('success_rate', 0)}% success rate", self.styles['TableCellSmall'])],
|
| 495 |
-
[Paragraph("Failed", self.styles['TableCell']),
|
| 496 |
-
Paragraph(f"<font color='red'><b>{batch_result.failed}</b></font>", self.styles['TableCell']),
|
| 497 |
-
Paragraph("Processing errors encountered", self.styles['TableCellSmall'])],
|
| 498 |
-
[Paragraph("Likely Authentic", self.styles['TableCell']),
|
| 499 |
-
Paragraph(f"<font color='green'><b>{batch_result.summary.get('likely_authentic', 0)}</b></font>", self.styles['TableCell']),
|
| 500 |
-
Paragraph("Images passing authenticity checks", self.styles['TableCellSmall'])],
|
| 501 |
-
[Paragraph("Review Required", self.styles['TableCell']),
|
| 502 |
-
Paragraph(f"<font color='red'><b>{batch_result.summary.get('review_required', 0)}</b></font>", self.styles['TableCell']),
|
| 503 |
-
Paragraph("Images flagged for manual review", self.styles['TableCellSmall'])],
|
| 504 |
-
[Paragraph("Average Score", self.styles['TableCell']),
|
| 505 |
-
Paragraph(f"<b>{batch_result.summary.get('avg_score', 0):.4f}</b>", self.styles['TableCell']),
|
| 506 |
-
Paragraph("Mean authenticity score across batch", self.styles['TableCellSmall'])],
|
| 507 |
-
[Paragraph("Average Processing Time", self.styles['TableCell']),
|
| 508 |
-
Paragraph(f"<b>{batch_result.summary.get('avg_proc_time', 0):.3f}s</b>", self.styles['TableCell']),
|
| 509 |
-
Paragraph("Per-image processing duration", self.styles['TableCellSmall'])],
|
| 510 |
-
]
|
| 511 |
-
|
| 512 |
-
summary_table = Table(summary_data, colWidths=[150, 130, 250])
|
| 513 |
-
summary_table.setStyle(self._get_standard_table_style(len(summary_data)))
|
| 514 |
-
story.append(summary_table)
|
| 515 |
-
|
| 516 |
-
def _add_batch_overview_table(self, story, results: List[AnalysisResult]):
|
| 517 |
-
"""Comprehensive batch overview"""
|
| 518 |
-
story.append(Paragraph("Batch Overview Matrix", self.styles['SectionTitle']))
|
| 519 |
-
story.append(Spacer(1, 8))
|
| 520 |
-
|
| 521 |
-
header = [
|
| 522 |
-
Paragraph("<b>#</b>", self.styles['TableHeader']),
|
| 523 |
-
Paragraph("<b>Filename</b>", self.styles['TableHeader']),
|
| 524 |
-
Paragraph("<b>Image Size</b>", self.styles['TableHeader']),
|
| 525 |
-
Paragraph("<b>Score</b>", self.styles['TableHeader']),
|
| 526 |
-
Paragraph("<b>Status</b>", self.styles['TableHeader']),
|
| 527 |
-
Paragraph("<b>Top Signal</b>", self.styles['TableHeader']),
|
| 528 |
-
Paragraph("<b>Time(s)</b>", self.styles['TableHeader'])
|
| 529 |
-
]
|
| 530 |
-
|
| 531 |
-
data = [header]
|
| 532 |
-
|
| 533 |
-
for idx, result in enumerate(results, 1):
|
| 534 |
-
top_signal = max(result.signals, key=lambda s: s.score)
|
| 535 |
-
status_badge = self._get_status_badge_short(result.status.value)
|
| 536 |
-
|
| 537 |
-
data.append([
|
| 538 |
-
Paragraph(str(idx), self.styles['TableCell']),
|
| 539 |
-
Paragraph(result.filename, self.styles['TableCellSmall']),
|
| 540 |
-
Paragraph(f"{result.image_size[0]}×{result.image_size[1]}", self.styles['TableCellSmall']),
|
| 541 |
-
Paragraph(f"<b>{result.overall_score:.3f}</b>", self.styles['TableCell']),
|
| 542 |
-
Paragraph(status_badge, self.styles['TableCellSmall']),
|
| 543 |
-
Paragraph(f"{top_signal.name}: {top_signal.score:.2f}", self.styles['TableCellSmall']),
|
| 544 |
-
Paragraph(f"{result.processing_time:.2f}", self.styles['TableCell'])
|
| 545 |
-
])
|
| 546 |
-
|
| 547 |
-
table = Table(data, colWidths=[25, 155, 65, 50, 70, 120, 45])
|
| 548 |
-
table.setStyle(self._get_pivot_table_style(len(data)))
|
| 549 |
-
story.append(table)
|
| 550 |
|
| 551 |
-
def _add_batch_metrics_analysis(self, story, results: List[AnalysisResult]):
|
| 552 |
-
"""Detailed metrics analysis for batch"""
|
| 553 |
-
story.append(Paragraph("Metric-wise Analysis", self.styles['SectionTitle']))
|
| 554 |
-
story.append(Spacer(1, 8))
|
| 555 |
-
|
| 556 |
-
metric_configs = {
|
| 557 |
-
'gradient': {
|
| 558 |
-
'name': 'Gradient-Field PCA Analysis',
|
| 559 |
-
'keys': ['eigenvalue_ratio', 'gradient_vectors_sampled'],
|
| 560 |
-
'labels': ['Eigenvalue\nRatio', 'Vectors\nSampled']
|
| 561 |
-
},
|
| 562 |
-
'frequency': {
|
| 563 |
-
'name': 'Frequency Domain Analysis (FFT)',
|
| 564 |
-
'keys': ['hf_ratio', 'roughness', 'spectral_deviation'],
|
| 565 |
-
'labels': ['HF Ratio', 'Roughness', 'Spec.\nDeviation']
|
| 566 |
-
},
|
| 567 |
-
'noise': {
|
| 568 |
-
'name': 'Noise Pattern Analysis',
|
| 569 |
-
'keys': ['mean_noise', 'cv', 'patches_valid'],
|
| 570 |
-
'labels': ['Mean Noise', 'CV', 'Patches\nValid']
|
| 571 |
-
},
|
| 572 |
-
'texture': {
|
| 573 |
-
'name': 'Texture Statistical Analysis',
|
| 574 |
-
'keys': ['smooth_ratio', 'contrast_mean', 'entropy_mean'],
|
| 575 |
-
'labels': ['Smooth\nRatio', 'Mean\nContrast', 'Mean\nEntropy']
|
| 576 |
-
},
|
| 577 |
-
'color': {
|
| 578 |
-
'name': 'Color Distribution Analysis',
|
| 579 |
-
'keys': ['saturation_stats.mean_saturation', 'saturation_stats.high_sat_ratio'],
|
| 580 |
-
'labels': ['Mean\nSaturation', 'High Saturation\nRatio']
|
| 581 |
-
}
|
| 582 |
-
}
|
| 583 |
-
|
| 584 |
-
for metric_key, config in metric_configs.items():
|
| 585 |
-
story.append(Paragraph(config['name'], self.styles['SectionHeader']))
|
| 586 |
-
|
| 587 |
-
# Build header
|
| 588 |
-
header = [
|
| 589 |
-
Paragraph("<b>#</b>", self.styles['TableHeader']),
|
| 590 |
-
Paragraph("<b>Filename</b>", self.styles['TableHeader']),
|
| 591 |
-
Paragraph("<b>Score</b>", self.styles['TableHeader']),
|
| 592 |
-
Paragraph("<b>Confidence</b>", self.styles['TableHeader'])
|
| 593 |
-
]
|
| 594 |
-
|
| 595 |
-
for label in config['labels']:
|
| 596 |
-
header.append(Paragraph(f"<b>{label}</b>", self.styles['TableHeader']))
|
| 597 |
-
|
| 598 |
-
data = [header]
|
| 599 |
-
|
| 600 |
-
for idx, result in enumerate(results, 1):
|
| 601 |
-
metric_result = result.metric_results.get(metric_key)
|
| 602 |
-
if not metric_result:
|
| 603 |
-
continue
|
| 604 |
-
|
| 605 |
-
details = metric_result.details or {}
|
| 606 |
-
|
| 607 |
-
row = [
|
| 608 |
-
Paragraph(str(idx), self.styles['TableCellSmall']),
|
| 609 |
-
Paragraph(result.filename, self.styles['TableCellSmall']),
|
| 610 |
-
Paragraph(f"<b>{metric_result.score:.3f}</b>", self.styles['TableCellSmall']),
|
| 611 |
-
Paragraph(f"{metric_result.confidence:.2f}" if metric_result.confidence else "N/A",
|
| 612 |
-
self.styles['TableCellSmall'])
|
| 613 |
-
]
|
| 614 |
-
|
| 615 |
-
# Extract values
|
| 616 |
-
for key in config['keys']:
|
| 617 |
-
value = self._extract_nested_value(details, key)
|
| 618 |
-
formatted_value = self._format_value(value, decimal_places=3)
|
| 619 |
-
row.append(Paragraph(formatted_value, self.styles['TableCellSmall']))
|
| 620 |
-
|
| 621 |
-
data.append(row)
|
| 622 |
-
|
| 623 |
-
# Dynamic column widths
|
| 624 |
-
num_detail_cols = len(config['labels'])
|
| 625 |
-
detail_col_width = (530 - 25 - 140 - 45 - 35) // num_detail_cols
|
| 626 |
-
col_widths = [25, 140, 45, 35] + [detail_col_width] * num_detail_cols
|
| 627 |
-
|
| 628 |
-
table = Table(data, colWidths=col_widths)
|
| 629 |
-
table.setStyle(self._get_pivot_table_style(len(data)))
|
| 630 |
-
story.append(table)
|
| 631 |
-
story.append(Spacer(1, 10))
|
| 632 |
-
|
| 633 |
-
def _add_individual_results_summary(self, story, results: List[AnalysisResult]):
|
| 634 |
-
"""Individual image summaries in batch"""
|
| 635 |
-
story.append(Paragraph("Individual Image Summaries", self.styles['SectionTitle']))
|
| 636 |
-
story.append(Spacer(1, 8))
|
| 637 |
-
|
| 638 |
-
for idx, result in enumerate(results, 1):
|
| 639 |
-
if idx > 1:
|
| 640 |
-
story.append(Spacer(1, 12))
|
| 641 |
-
|
| 642 |
-
story.append(Paragraph(f"Image {idx}: {result.filename}", self.styles['SectionHeader']))
|
| 643 |
-
|
| 644 |
-
# Quick stats
|
| 645 |
-
quick_data = [
|
| 646 |
-
[Paragraph("<b>Property</b>", self.styles['TableHeader']),
|
| 647 |
-
Paragraph("<b>Value</b>", self.styles['TableHeader'])],
|
| 648 |
-
[Paragraph("Score", self.styles['TableCell']),
|
| 649 |
-
Paragraph(f"<b>{result.overall_score:.4f}</b>", self.styles['TableCell'])],
|
| 650 |
-
[Paragraph("Status", self.styles['TableCell']),
|
| 651 |
-
Paragraph(self._get_status_badge_html(result.status.value), self.styles['TableCell'])],
|
| 652 |
-
[Paragraph("Confidence", self.styles['TableCell']),
|
| 653 |
-
Paragraph(f"{result.confidence}%", self.styles['TableCell'])],
|
| 654 |
-
[Paragraph("Dimensions", self.styles['TableCell']),
|
| 655 |
-
Paragraph(f"{result.image_size[0]} × {result.image_size[1]}", self.styles['TableCell'])],
|
| 656 |
-
]
|
| 657 |
-
|
| 658 |
-
quick_table = Table(quick_data, colWidths=[120, 410])
|
| 659 |
-
quick_table.setStyle(self._get_standard_table_style(len(quick_data)))
|
| 660 |
-
story.append(quick_table)
|
| 661 |
-
story.append(Spacer(1, 5))
|
| 662 |
-
|
| 663 |
-
# Top 3 signals
|
| 664 |
-
story.append(Paragraph("Top Detection Signals:", self.styles['SubHeader']))
|
| 665 |
-
|
| 666 |
-
top_signals = sorted(result.signals, key=lambda s: s.score, reverse=True)[:3]
|
| 667 |
-
signal_data = [[
|
| 668 |
-
Paragraph("<b>Signal</b>", self.styles['TableHeader']),
|
| 669 |
-
Paragraph("<b>Score</b>", self.styles['TableHeader']),
|
| 670 |
-
Paragraph("<b>Status</b>", self.styles['TableHeader'])
|
| 671 |
-
]]
|
| 672 |
-
|
| 673 |
-
for signal in top_signals:
|
| 674 |
-
signal_data.append([
|
| 675 |
-
Paragraph(signal.name, self.styles['TableCellSmall']),
|
| 676 |
-
Paragraph(f"{signal.score:.3f}", self.styles['TableCellSmall']),
|
| 677 |
-
Paragraph(self._get_status_badge_html(signal.status.value), self.styles['TableCellSmall'])
|
| 678 |
-
])
|
| 679 |
-
|
| 680 |
-
signal_table = Table(signal_data, colWidths=[200, 165, 165])
|
| 681 |
-
signal_table.setStyle(self._get_standard_table_style(len(signal_data)))
|
| 682 |
-
story.append(signal_table)
|
| 683 |
-
|
| 684 |
-
def _add_batch_recommendations(self, story, batch_result: BatchAnalysisResult):
|
| 685 |
-
"""Batch-level recommendations"""
|
| 686 |
-
story.append(Paragraph("Batch Analysis Recommendations", self.styles['SectionTitle']))
|
| 687 |
-
story.append(Spacer(1, 8))
|
| 688 |
-
|
| 689 |
-
review_count = batch_result.summary.get('review_required', 0)
|
| 690 |
-
total = batch_result.total_images
|
| 691 |
-
|
| 692 |
-
if review_count > 0:
|
| 693 |
-
rec_text = f"""
|
| 694 |
-
<b>⚠️ ACTION REQUIRED</b><br/>
|
| 695 |
-
{review_count} out of {total} images require manual review ({review_count/total*100:.1f}%).<br/>
|
| 696 |
-
<br/>
|
| 697 |
-
<b>Recommended Actions:</b><br/>
|
| 698 |
-
• Prioritize high-risk images for immediate review<br/>
|
| 699 |
-
• Assign qualified personnel for verification<br/>
|
| 700 |
-
• Document review findings and decisions<br/>
|
| 701 |
-
• Consider additional forensic analysis for flagged images<br/>
|
| 702 |
-
• Update screening protocols based on findings
|
| 703 |
-
"""
|
| 704 |
-
rec_color = self.COLOR_LIGHT_ORANGE
|
| 705 |
-
border_color = self.COLOR_WARNING
|
| 706 |
-
else:
|
| 707 |
-
rec_text = f"""
|
| 708 |
-
<b>✅ BATCH PASSED SCREENING</b><br/>
|
| 709 |
-
All {total} images appear to be authentic based on current analysis.<br/>
|
| 710 |
-
<br/>
|
| 711 |
-
<b>Recommended Actions:</b><br/>
|
| 712 |
-
• Archive results for audit trail<br/>
|
| 713 |
-
• Maintain periodic re-screening schedule<br/>
|
| 714 |
-
• Monitor for evolving AI generation techniques<br/>
|
| 715 |
-
• Update detection models regularly<br/>
|
| 716 |
-
• Document chain of custody
|
| 717 |
-
"""
|
| 718 |
-
rec_color = self.COLOR_LIGHT_GREEN
|
| 719 |
-
border_color = self.COLOR_SUCCESS
|
| 720 |
-
|
| 721 |
-
rec_table = Table([[Paragraph(rec_text, self.styles['CustomBodyText'])]], colWidths=[530])
|
| 722 |
-
rec_table.setStyle(TableStyle([
|
| 723 |
-
('BACKGROUND', (0, 0), (-1, -1), rec_color),
|
| 724 |
-
('BOX', (0, 0), (-1, -1), 2, border_color),
|
| 725 |
-
('LEFTPADDING', (0, 0), (-1, -1), 15),
|
| 726 |
-
('RIGHTPADDING', (0, 0), (-1, -1), 15),
|
| 727 |
-
('TOPPADDING', (0, 0), (-1, -1), 12),
|
| 728 |
-
('BOTTOMPADDING', (0, 0), (-1, -1), 12)
|
| 729 |
-
]))
|
| 730 |
-
story.append(rec_table)
|
| 731 |
-
|
| 732 |
-
def _add_professional_footer(self, story):
|
| 733 |
-
"""Professional footer with disclaimers"""
|
| 734 |
-
story.append(Spacer(1, 15))
|
| 735 |
-
|
| 736 |
-
disclaimer_lines = [
|
| 737 |
-
"⚠️ <b>DISCLAIMER</b>: This report provides probabilistic screening results based on current AI detection methodologies, not definitive verdicts.",
|
| 738 |
-
"Results should be manually verified for critical applications. False positive rate: ~10-20%. Accuracy may vary with image quality and AI generation techniques.",
|
| 739 |
-
"This analysis should be used as one component of a comprehensive verification process, not as the sole basis for decision-making.",
|
| 740 |
-
"© 2025 AI Image Screener | Confidential Report | For Authorized Use Only"
|
| 741 |
-
]
|
| 742 |
-
|
| 743 |
-
for line in disclaimer_lines:
|
| 744 |
-
story.append(Paragraph(line, self.styles['Footer']))
|
| 745 |
-
story.append(Spacer(1, 2))
|
| 746 |
-
|
| 747 |
-
# Helper methods
|
| 748 |
-
|
| 749 |
-
def _get_standard_table_style(self, num_rows):
|
| 750 |
-
"""Standard table styling"""
|
| 751 |
-
return TableStyle([
|
| 752 |
-
('BACKGROUND', (0, 0), (-1, 0), self.COLOR_HEADER_BG),
|
| 753 |
-
('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
|
| 754 |
-
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
| 755 |
-
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
|
| 756 |
-
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
|
| 757 |
-
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
|
| 758 |
-
('LEFTPADDING', (0, 0), (-1, -1), 8),
|
| 759 |
-
('RIGHTPADDING', (0, 0), (-1, -1), 8),
|
| 760 |
-
('TOPPADDING', (0, 0), (-1, -1), 5),
|
| 761 |
-
('BOTTOMPADDING', (0, 0), (-1, -1), 5)
|
| 762 |
-
])
|
| 763 |
|
| 764 |
-
def
|
| 765 |
-
"""Signal table styling with color coding"""
|
| 766 |
return TableStyle([
|
| 767 |
-
('BACKGROUND', (0, 0), (-1, 0), self.
|
| 768 |
('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
|
| 769 |
-
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
| 770 |
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
|
| 771 |
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
|
| 772 |
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
| 773 |
('LEFTPADDING', (0, 0), (-1, -1), 6),
|
| 774 |
('RIGHTPADDING', (0, 0), (-1, -1), 6),
|
| 775 |
-
('TOPPADDING', (0, 0), (-1, -1), 5),
|
| 776 |
-
('BOTTOMPADDING', (0, 0), (-1, -1), 5)
|
| 777 |
])
|
| 778 |
|
| 779 |
-
def _get_pivot_table_style(self, num_rows):
|
| 780 |
-
"""Pivot table styling"""
|
| 781 |
-
return TableStyle([
|
| 782 |
-
('BACKGROUND', (0, 0), (-1, 0), self.COLOR_HEADER_BG),
|
| 783 |
-
('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
|
| 784 |
-
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
| 785 |
-
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
|
| 786 |
-
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
|
| 787 |
-
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
|
| 788 |
-
('ALIGN', (0, 0), (0, -1), 'CENTER'),
|
| 789 |
-
('LEFTPADDING', (0, 0), (-1, -1), 4),
|
| 790 |
-
('RIGHTPADDING', (0, 0), (-1, -1), 4),
|
| 791 |
-
('TOPPADDING', (0, 0), (-1, -1), 4),
|
| 792 |
-
('BOTTOMPADDING', (0, 0), (-1, -1), 4)
|
| 793 |
-
])
|
| 794 |
|
| 795 |
-
def
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
"""Short status badge"""
|
| 806 |
-
if status == "REVIEW_REQUIRED":
|
| 807 |
-
return "<font color='#B71C1C'><b>⚠️ REVIEW REQUIRED</b></font>"
|
| 808 |
-
else:
|
| 809 |
-
return "<font color='#1B5E20'><b>✓ LIKELY AUTHENTIC</b></font>"
|
| 810 |
-
|
| 811 |
-
def _get_metric_status_html(self, score: float) -> str:
|
| 812 |
-
"""Metric status based on score"""
|
| 813 |
-
if score > 0.7:
|
| 814 |
-
return "<font color='#B71C1C'><b>High Risk</b></font>"
|
| 815 |
-
elif score > 0.5:
|
| 816 |
-
return "<font color='#E65100'><b>Moderate Risk</b></font>"
|
| 817 |
-
else:
|
| 818 |
-
return "<font color='#1B5E20'><b>Low Risk</b></font>"
|
| 819 |
-
|
| 820 |
-
def _format_value(self, value: Any, decimal_places: int = 4) -> str:
|
| 821 |
-
"""Format value for display"""
|
| 822 |
-
if value is None or (isinstance(value, dict) and 'reason' in value):
|
| 823 |
-
return "N/A"
|
| 824 |
-
elif isinstance(value, float):
|
| 825 |
-
return f"{value:.{decimal_places}f}"
|
| 826 |
-
elif isinstance(value, (int, str, bool)):
|
| 827 |
-
return str(value)
|
| 828 |
-
else:
|
| 829 |
-
return "N/A"
|
| 830 |
-
|
| 831 |
-
def _extract_nested_value(self, details: dict, key: str) -> Any:
|
| 832 |
-
"""Extract nested dictionary values"""
|
| 833 |
-
if '.' in key:
|
| 834 |
-
parts = key.split('.')
|
| 835 |
-
value = details
|
| 836 |
-
for part in parts:
|
| 837 |
-
if isinstance(value, dict):
|
| 838 |
-
value = value.get(part, None)
|
| 839 |
-
else:
|
| 840 |
-
return None
|
| 841 |
-
return value
|
| 842 |
-
else:
|
| 843 |
-
return details.get(key, None)
|
|
|
|
| 1 |
# Dependencies
|
| 2 |
+
from typing import Any
|
| 3 |
+
from typing import List
|
| 4 |
from pathlib import Path
|
| 5 |
+
from typing import Optional
|
| 6 |
from datetime import datetime
|
| 7 |
+
from reportlab.lib import colors
|
| 8 |
from utils.logger import get_logger
|
| 9 |
from config.settings import settings
|
| 10 |
+
from reportlab.platypus import Table
|
| 11 |
+
from reportlab.lib.units import inch
|
| 12 |
+
from reportlab.platypus import Spacer
|
| 13 |
+
from reportlab.platypus import Paragraph
|
| 14 |
+
from reportlab.platypus import PageBreak
|
| 15 |
from reportlab.platypus import TableStyle
|
| 16 |
from config.schemas import AnalysisResult
|
| 17 |
+
from config.constants import FinalDecision
|
| 18 |
+
from reportlab.lib.pagesizes import LETTER
|
| 19 |
from utils.helpers import generate_unique_id
|
| 20 |
+
from config.constants import EvidenceStrength
|
| 21 |
from config.schemas import BatchAnalysisResult
|
| 22 |
+
from reportlab.lib.styles import ParagraphStyle
|
| 23 |
from reportlab.platypus import SimpleDocTemplate
|
| 24 |
+
from reportlab.lib.styles import getSampleStyleSheet
|
| 25 |
from features.detailed_result_maker import DetailedResultMaker
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
# Setup Logging
|
|
|
|
| 31 |
|
| 32 |
class PDFReporter:
|
| 33 |
"""
|
| 34 |
+
PDF Report Generator
|
| 35 |
+
|
| 36 |
+
Guarantees:
|
| 37 |
+
-----------
|
| 38 |
+
- FinalDecision is authoritative
|
| 39 |
+
- Evidence-first explanations
|
| 40 |
+
- Metrics are informational only
|
| 41 |
+
- Audit-safe and regulator-ready
|
|
|
|
|
|
|
| 42 |
"""
|
| 43 |
+
COLOR_PRIMARY = colors.HexColor('#0D47A1')
|
| 44 |
+
COLOR_SUCCESS = colors.HexColor('#1B5E20')
|
| 45 |
+
COLOR_WARNING = colors.HexColor('#E65100')
|
| 46 |
+
COLOR_DANGER = colors.HexColor('#B71C1C')
|
| 47 |
+
COLOR_HEADER = colors.HexColor('#1565C0')
|
| 48 |
+
COLOR_ALT_ROW = colors.HexColor('#F5F5F5')
|
| 49 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
def __init__(self):
|
| 51 |
self.detailed_maker = DetailedResultMaker()
|
| 52 |
+
self.styles = self._build_styles()
|
| 53 |
+
logger.debug("PDFReporter initialized")
|
| 54 |
+
|
| 55 |
|
| 56 |
def export_single(self, result: AnalysisResult, output_dir: Optional[Path] = None) -> Path:
|
| 57 |
+
"""
|
| 58 |
+
Export single image PDF report
|
| 59 |
+
"""
|
| 60 |
+
output_dir = output_dir or settings.REPORTS_DIR
|
| 61 |
+
output_dir.mkdir(parents = True, exist_ok = True)
|
| 62 |
+
|
| 63 |
+
filename = f"ai_screener_report_{generate_unique_id()}.pdf"
|
| 64 |
output_path = output_dir / filename
|
| 65 |
+
|
| 66 |
+
logger.info(f"Generating single image PDF: {filename}")
|
| 67 |
+
|
| 68 |
+
doc = SimpleDocTemplate(str(output_path),
|
| 69 |
+
pagesize = LETTER,
|
| 70 |
+
rightMargin = 30,
|
| 71 |
+
leftMargin = 30,
|
| 72 |
+
topMargin = 20,
|
| 73 |
+
bottomMargin = 35,
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
story = list()
|
| 77 |
+
|
| 78 |
+
self._add_header(story, "AI Image Analysis Report")
|
| 79 |
+
|
| 80 |
+
self._add_single_executive_summary(story, result)
|
| 81 |
|
| 82 |
+
story.append(PageBreak())
|
| 83 |
|
| 84 |
+
self._add_evidence_section(story, result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
|
|
|
|
|
|
|
|
|
| 86 |
story.append(PageBreak())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
self._add_metrics_section(story, result)
|
| 89 |
+
self._add_footer(story)
|
| 90 |
+
|
| 91 |
+
doc.build(story)
|
| 92 |
+
|
| 93 |
return output_path
|
| 94 |
|
| 95 |
+
|
| 96 |
def export_batch(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
|
| 97 |
+
"""
|
| 98 |
+
Export batch PDF report
|
| 99 |
+
"""
|
| 100 |
+
output_dir = output_dir or settings.REPORTS_DIR
|
| 101 |
+
output_dir.mkdir(parents = True, exist_ok = True)
|
| 102 |
+
|
| 103 |
+
filename = f"ai_screener_batch_{generate_unique_id()}.pdf"
|
| 104 |
output_path = output_dir / filename
|
| 105 |
+
|
| 106 |
+
logger.info(f"Generating batch PDF: {filename}")
|
| 107 |
+
|
| 108 |
+
doc = SimpleDocTemplate(str(output_path),
|
| 109 |
+
pagesize = LETTER,
|
| 110 |
+
rightMargin = 30,
|
| 111 |
+
leftMargin = 30,
|
| 112 |
+
topMargin = 20,
|
| 113 |
+
bottomMargin = 35,
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
story = list()
|
| 117 |
+
|
| 118 |
+
self._add_header(story, "Batch Image Analysis Report")
|
| 119 |
+
self._add_batch_summary(story, batch_result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
story.append(PageBreak())
|
| 121 |
+
|
| 122 |
+
for idx, result in enumerate(batch_result.results, 1):
|
| 123 |
+
self._add_single_executive_summary(story, result, index=idx)
|
| 124 |
+
self._add_evidence_section(story, result)
|
| 125 |
+
self._add_metrics_section(story, result)
|
| 126 |
+
|
| 127 |
+
if (idx < len(batch_result.results)):
|
| 128 |
+
story.append(PageBreak())
|
| 129 |
+
|
| 130 |
+
self._add_footer(story)
|
| 131 |
+
doc.build(story)
|
| 132 |
+
|
| 133 |
return output_path
|
| 134 |
|
| 135 |
+
|
| 136 |
+
def _add_header(self, story, title: str):
|
| 137 |
+
story.append(Paragraph("AI IMAGE SCREENER", self.styles['Title']))
|
| 138 |
+
story.append(Paragraph(title, self.styles['Subtitle']))
|
| 139 |
+
story.append(Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | Version: {settings.VERSION}", self.styles['Meta']))
|
| 140 |
+
story.append(Spacer(1, 12))
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def _add_single_executive_summary(self, story, result: AnalysisResult, index: Optional[int] = None):
|
| 144 |
+
title = "Executive Summary"
|
| 145 |
+
|
| 146 |
+
if index:
|
| 147 |
+
title += f" — Image {index}"
|
| 148 |
+
|
| 149 |
+
story.append(Paragraph(title, self.styles['Section']))
|
| 150 |
+
|
| 151 |
+
decision = result.final_decision.value if result.final_decision else "UNDECIDED"
|
| 152 |
+
color = self._decision_color(result.final_decision)
|
| 153 |
+
|
| 154 |
+
table = Table([["Final Decision", decision],
|
| 155 |
+
["Confidence", f"{result.confidence}%"],
|
| 156 |
+
["Explanation", result.decision_explanation or "—"],
|
| 157 |
+
],
|
| 158 |
+
colWidths = [140, 390]
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
table.setStyle(TableStyle([('BACKGROUND', (0, 0), (-1, 0), self.COLOR_HEADER),
|
| 162 |
+
('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
|
| 163 |
+
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
|
| 164 |
+
('BACKGROUND', (0, 1), (-1, -1), color),
|
| 165 |
+
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
| 166 |
+
('LEFTPADDING', (0, 0), (-1, -1), 8),
|
| 167 |
+
('RIGHTPADDING', (0, 0), (-1, -1), 8),
|
| 168 |
+
])
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
story.append(table)
|
| 172 |
+
story.append(Spacer(1, 10))
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def _add_evidence_section(self, story, result: AnalysisResult):
|
| 176 |
+
story.append(Paragraph("Evidence Assessment", self.styles['Section']))
|
| 177 |
+
|
| 178 |
+
if not result.evidence:
|
| 179 |
+
story.append(Paragraph("No declarative evidence detected. Decision derived from Tier-1 metrics.", self.styles['Body']))
|
| 180 |
+
return
|
| 181 |
+
|
| 182 |
+
rows = [["Source", "Direction", "Strength", "Confidence", "Finding"]]
|
| 183 |
+
|
| 184 |
+
for e in result.evidence:
|
| 185 |
+
rows.append([e.source.value,
|
| 186 |
+
e.direction.value,
|
| 187 |
+
e.strength.value,
|
| 188 |
+
f"{e.confidence:.2f}" if e.confidence else "N/A",
|
| 189 |
+
e.finding
|
| 190 |
+
])
|
| 191 |
+
|
| 192 |
+
table = Table(rows, colWidths = [70, 80, 80, 70, 230])
|
| 193 |
+
table.setStyle(self._standard_table_style())
|
| 194 |
+
|
| 195 |
+
story.append(table)
|
| 196 |
+
story.append(Spacer(1, 10))
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def _add_metrics_section(self, story, result: AnalysisResult):
|
| 200 |
+
story.append(Paragraph("Metric Signals (Informational)", self.styles['Section']))
|
| 201 |
+
|
| 202 |
+
rows = [["Metric", "Score", "Confidence", "Notes"]]
|
| 203 |
+
|
| 204 |
+
for mt, mr in result.metric_results.items():
|
| 205 |
+
rows.append([
|
| 206 |
+
self.detailed_maker.metric_display_names.get(mt, mt.value),
|
| 207 |
+
f"{mr.score:.3f}",
|
| 208 |
+
f"{mr.confidence:.3f}" if mr.confidence else "N/A",
|
| 209 |
+
", ".join(self.detailed_maker.extract_key_findings(mt, mr))
|
| 210 |
+
])
|
| 211 |
+
|
| 212 |
+
table = Table(rows, colWidths=[180, 70, 80, 210])
|
| 213 |
+
table.setStyle(self._standard_table_style())
|
| 214 |
+
|
| 215 |
+
story.append(table)
|
| 216 |
+
story.append(Spacer(1, 10))
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def _add_batch_summary(self, story, batch_result: BatchAnalysisResult):
|
| 220 |
+
story.append(Paragraph("Batch Decision Summary", self.styles['Section']))
|
| 221 |
+
|
| 222 |
+
rows = [
|
| 223 |
+
["Total Images", batch_result.total_images],
|
| 224 |
+
["Processed", batch_result.processed],
|
| 225 |
+
["Failed", batch_result.failed],
|
| 226 |
+
["Success Rate", f"{batch_result.summary.get('success_rate', 0)}%"],
|
| 227 |
+
]
|
| 228 |
+
|
| 229 |
+
for decision in FinalDecision:
|
| 230 |
+
rows.append([
|
| 231 |
+
decision.value,
|
| 232 |
+
batch_result.summary.get(decision.value, 0)
|
| 233 |
+
])
|
| 234 |
+
|
| 235 |
+
table = Table(rows, colWidths=[220, 310])
|
| 236 |
+
table.setStyle(self._standard_table_style())
|
| 237 |
+
|
| 238 |
+
story.append(table)
|
| 239 |
+
story.append(Spacer(1, 10))
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
def _add_footer(self, story):
|
| 243 |
+
story.append(Spacer(1, 15))
|
| 244 |
+
story.append(Paragraph(
|
| 245 |
+
"DISCLAIMER: Metric scores are non-decisional. "
|
| 246 |
+
"Final decisions are evidence- and policy-based.",
|
| 247 |
+
self.styles['Footer']
|
| 248 |
+
))
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
# ------------------------------------------------------------------
|
| 252 |
+
# STYLES & HELPERS
|
| 253 |
+
# ------------------------------------------------------------------
|
| 254 |
+
|
| 255 |
def _build_styles(self):
|
|
|
|
| 256 |
styles = getSampleStyleSheet()
|
| 257 |
+
|
| 258 |
styles.add(ParagraphStyle(
|
| 259 |
+
name='Title',
|
| 260 |
fontSize=18,
|
| 261 |
+
alignment=1,
|
| 262 |
textColor=self.COLOR_PRIMARY,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
fontName='Helvetica-Bold'
|
| 264 |
))
|
| 265 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
styles.add(ParagraphStyle(
|
| 267 |
+
name='Subtitle',
|
| 268 |
+
fontSize=12,
|
| 269 |
+
alignment=1,
|
|
|
|
| 270 |
spaceAfter=6
|
| 271 |
))
|
| 272 |
+
|
| 273 |
styles.add(ParagraphStyle(
|
| 274 |
+
name='Meta',
|
| 275 |
fontSize=8,
|
| 276 |
+
alignment=1,
|
| 277 |
+
spaceAfter=10,
|
| 278 |
+
textColor=colors.grey
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
))
|
| 280 |
+
|
| 281 |
styles.add(ParagraphStyle(
|
| 282 |
+
name='Section',
|
| 283 |
+
fontSize=13,
|
|
|
|
| 284 |
fontName='Helvetica-Bold',
|
| 285 |
+
spaceBefore=10,
|
| 286 |
+
spaceAfter=6
|
| 287 |
))
|
| 288 |
+
|
| 289 |
styles.add(ParagraphStyle(
|
| 290 |
+
name='Body',
|
| 291 |
+
fontSize=9,
|
| 292 |
+
spaceAfter=6
|
|
|
|
|
|
|
| 293 |
))
|
| 294 |
+
|
| 295 |
styles.add(ParagraphStyle(
|
| 296 |
+
name='Footer',
|
| 297 |
+
fontSize=7,
|
| 298 |
+
alignment=1,
|
| 299 |
+
textColor=colors.grey
|
|
|
|
| 300 |
))
|
|
|
|
|
|
|
| 301 |
|
| 302 |
+
return styles
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
+
def _standard_table_style(self):
|
|
|
|
| 306 |
return TableStyle([
|
| 307 |
+
('BACKGROUND', (0, 0), (-1, 0), self.COLOR_HEADER),
|
| 308 |
('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
|
|
|
|
| 309 |
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
|
| 310 |
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
|
| 311 |
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
| 312 |
('LEFTPADDING', (0, 0), (-1, -1), 6),
|
| 313 |
('RIGHTPADDING', (0, 0), (-1, -1), 6),
|
|
|
|
|
|
|
| 314 |
])
|
| 315 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
+
def _decision_color(self, decision: Optional[FinalDecision]):
|
| 318 |
+
if decision == FinalDecision.CONFIRMED_AI_GENERATED:
|
| 319 |
+
return colors.HexColor('#FFEBEE')
|
| 320 |
+
if decision == FinalDecision.SUSPICIOUS_AI_LIKELY:
|
| 321 |
+
return colors.HexColor('#FFF3E0')
|
| 322 |
+
if decision == FinalDecision.AUTHENTIC_BUT_REVIEW:
|
| 323 |
+
return colors.HexColor('#E3F2FD')
|
| 324 |
+
if decision == FinalDecision.MOSTLY_AUTHENTIC:
|
| 325 |
+
return colors.HexColor('#E8F5E9')
|
| 326 |
+
return colors.white
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ui/index.html
CHANGED
|
@@ -657,6 +657,12 @@
|
|
| 657 |
border: 1px solid rgba(214, 158, 46, 0.3);
|
| 658 |
}
|
| 659 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 660 |
.score-indicator {
|
| 661 |
display: flex;
|
| 662 |
align-items: center;
|
|
@@ -1039,13 +1045,29 @@
|
|
| 1039 |
<h3>Multi-Signal Detection</h3>
|
| 1040 |
<p>Five independent statistical detectors with weighted ensemble aggregation</p>
|
| 1041 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1042 |
|
| 1043 |
<div class="feature-card">
|
| 1044 |
<div class="feature-icon">
|
| 1045 |
<i class="fas fa-file-export"></i>
|
| 1046 |
</div>
|
| 1047 |
<h3>Comprehensive Reports</h3>
|
| 1048 |
-
<p>Export results in CSV
|
| 1049 |
</div>
|
| 1050 |
|
| 1051 |
<div class="feature-card">
|
|
@@ -1215,6 +1237,72 @@
|
|
| 1215 |
</div>
|
| 1216 |
</div>
|
| 1217 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1218 |
</div>
|
| 1219 |
</div>
|
| 1220 |
|
|
@@ -1305,9 +1393,6 @@
|
|
| 1305 |
<button class="action-button secondary-action" id="exportCsvBtn">
|
| 1306 |
<i class="fas fa-file-csv"></i> CSV
|
| 1307 |
</button>
|
| 1308 |
-
<button class="action-button secondary-action" id="exportPdfBtn">
|
| 1309 |
-
<i class="fas fa-file-pdf"></i> PDF
|
| 1310 |
-
</button>
|
| 1311 |
<button class="action-button secondary-action" id="exportJsonBtn">
|
| 1312 |
<i class="fas fa-file-code"></i> JSON
|
| 1313 |
</button>
|
|
@@ -1385,8 +1470,7 @@
|
|
| 1385 |
const HEALTH_ENDPOINT = '/health';
|
| 1386 |
const BATCH_PROGRESS_ENDPOINT = '/batch';
|
| 1387 |
const CSV_REPORT_ENDPOINT = '/report/csv';
|
| 1388 |
-
|
| 1389 |
-
|
| 1390 |
// Global state
|
| 1391 |
let files = [];
|
| 1392 |
let fileDataUrls = {};
|
|
@@ -1419,7 +1503,6 @@
|
|
| 1419 |
const resultsTableBody = document.getElementById('resultsTableBody');
|
| 1420 |
const noResultsRow = document.getElementById('noResultsRow');
|
| 1421 |
const exportCsvBtn = document.getElementById('exportCsvBtn');
|
| 1422 |
-
const exportPdfBtn = document.getElementById('exportPdfBtn');
|
| 1423 |
const exportJsonBtn = document.getElementById('exportJsonBtn');
|
| 1424 |
const toggleDetailedAnalysis = document.getElementById('toggleDetailedAnalysis');
|
| 1425 |
const detailedAnalysisIcon = document.getElementById('detailedAnalysisIcon');
|
|
@@ -1498,7 +1581,6 @@
|
|
| 1498 |
|
| 1499 |
// Export
|
| 1500 |
exportCsvBtn.addEventListener('click', exportCsv);
|
| 1501 |
-
exportPdfBtn.addEventListener('click', exportPdf);
|
| 1502 |
exportJsonBtn.addEventListener('click', exportJson);
|
| 1503 |
|
| 1504 |
// Detailed analysis toggle
|
|
@@ -1832,6 +1914,46 @@
|
|
| 1832 |
}, 1000);
|
| 1833 |
}
|
| 1834 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1835 |
function displayResults() {
|
| 1836 |
if (!batchResults) {
|
| 1837 |
console.error('No results to display:', batchResults);
|
|
@@ -1855,7 +1977,8 @@
|
|
| 1855 |
|
| 1856 |
const filename = resultData.filename || 'Unknown';
|
| 1857 |
const overallScore = resultData.overall_score || 0;
|
| 1858 |
-
const
|
|
|
|
| 1859 |
const confidence = resultData.confidence || 0;
|
| 1860 |
const imageSize = resultData.image_size || [0, 0];
|
| 1861 |
const signals = resultData.signals || [];
|
|
@@ -1863,21 +1986,13 @@
|
|
| 1863 |
|
| 1864 |
const scorePercent = Math.round(overallScore * 100);
|
| 1865 |
let scoreClass = 'score-low';
|
| 1866 |
-
|
| 1867 |
-
if (scorePercent >=
|
| 1868 |
-
|
| 1869 |
-
scoreWidth = '90%';
|
| 1870 |
-
} else if (scorePercent >= 50) {
|
| 1871 |
-
scoreClass = 'score-medium';
|
| 1872 |
-
scoreWidth = '60%';
|
| 1873 |
-
}
|
| 1874 |
|
| 1875 |
const flaggedCount = signals.filter(s => s.status === 'flagged').length;
|
| 1876 |
const warningCount = signals.filter(s => s.status === 'warning').length;
|
| 1877 |
|
| 1878 |
-
// Format status for display (remove underscores)
|
| 1879 |
-
const displayStatus = status.replace(/_/g, ' ');
|
| 1880 |
-
|
| 1881 |
// Get thumbnail
|
| 1882 |
const thumbnailSrc = fileDataUrls[filename] || 'data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40"><rect width="40" height="40" fill="%23f0f0f0"/></svg>';
|
| 1883 |
|
|
@@ -1894,8 +2009,8 @@
|
|
| 1894 |
</div>
|
| 1895 |
</td>
|
| 1896 |
<td>
|
| 1897 |
-
<span class="status-badge ${
|
| 1898 |
-
${
|
| 1899 |
</span>
|
| 1900 |
</td>
|
| 1901 |
<td>
|
|
@@ -1945,153 +2060,190 @@
|
|
| 1945 |
}
|
| 1946 |
|
| 1947 |
function updateSummary(batchResult) {
|
| 1948 |
-
|
| 1949 |
-
|
| 1950 |
-
|
| 1951 |
-
|
| 1952 |
-
let likelyAuthentic = 0;
|
| 1953 |
-
let reviewRequired = 0;
|
| 1954 |
-
|
| 1955 |
-
if (batchResult.results) {
|
| 1956 |
-
batchResult.results.forEach(result => {
|
| 1957 |
-
const resultData = result;
|
| 1958 |
-
const status = resultData.status || 'LIKELY_AUTHENTIC';
|
| 1959 |
-
if (status === 'LIKELY_AUTHENTIC') {
|
| 1960 |
-
likelyAuthentic++;
|
| 1961 |
-
} else if (status === 'REVIEW_REQUIRED') {
|
| 1962 |
-
reviewRequired++;
|
| 1963 |
-
}
|
| 1964 |
-
});
|
| 1965 |
}
|
| 1966 |
-
|
| 1967 |
-
|
| 1968 |
-
|
| 1969 |
-
|
| 1970 |
-
|
| 1971 |
-
|
| 1972 |
-
|
| 1973 |
-
|
| 1974 |
-
|
| 1975 |
-
|
| 1976 |
-
|
| 1977 |
-
|
| 1978 |
-
|
| 1979 |
-
|
| 1980 |
-
|
| 1981 |
-
|
| 1982 |
-
|
| 1983 |
-
|
| 1984 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1985 |
}
|
| 1986 |
-
|
| 1987 |
function showDetailedAnalysis(index) {
|
| 1988 |
if (!batchResults || !batchResults.results || !batchResults.results[index]) return;
|
| 1989 |
-
|
| 1990 |
selectedImageIndex = index;
|
| 1991 |
-
const result
|
| 1992 |
-
|
| 1993 |
-
|
| 1994 |
-
const
|
| 1995 |
-
const
|
| 1996 |
-
const
|
| 1997 |
-
const confidence =
|
| 1998 |
-
const imageSize
|
| 1999 |
-
const processingTime
|
| 2000 |
-
const signals
|
| 2001 |
-
|
| 2002 |
-
|
| 2003 |
-
const
|
| 2004 |
-
|
| 2005 |
-
/
|
| 2006 |
detailedAnalysisContent.classList.add('show');
|
| 2007 |
detailedAnalysisIcon.classList.remove('fa-chevron-down');
|
| 2008 |
detailedAnalysisIcon.classList.add('fa-chevron-up');
|
| 2009 |
-
|
| 2010 |
-
|
| 2011 |
behavior: 'smooth',
|
| 2012 |
block: 'start'
|
| 2013 |
});
|
| 2014 |
-
|
| 2015 |
-
/
|
| 2016 |
let signalsHtml = '';
|
| 2017 |
-
|
|
|
|
| 2018 |
signals.forEach(signal => {
|
| 2019 |
let statusClass = 'signal-passed';
|
| 2020 |
if (signal.status === 'warning') statusClass = 'signal-warning';
|
| 2021 |
if (signal.status === 'flagged') statusClass = 'signal-flagged';
|
| 2022 |
-
|
| 2023 |
const signalScore = Math.round((signal.score || 0) * 100);
|
| 2024 |
-
|
| 2025 |
signalsHtml += `
|
| 2026 |
<div class="signal-card">
|
| 2027 |
<div class="signal-header">
|
| 2028 |
<strong>${signal.name || 'Unknown Metric'}</strong>
|
| 2029 |
<span class="signal-badge ${statusClass}">${signal.status}</span>
|
| 2030 |
</div>
|
| 2031 |
-
<p style="font-size:
|
| 2032 |
${signal.explanation || 'No explanation available.'}
|
| 2033 |
</p>
|
| 2034 |
-
<div style="
|
| 2035 |
-
|
| 2036 |
-
Score: ${signalScore}%
|
| 2037 |
-
</div>
|
| 2038 |
</div>
|
| 2039 |
</div>
|
| 2040 |
`;
|
| 2041 |
});
|
| 2042 |
} else {
|
| 2043 |
-
signalsHtml =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2044 |
}
|
| 2045 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2046 |
detailedAnalysisContent.innerHTML = `
|
| 2047 |
-
<div style="margin-bottom:
|
| 2048 |
-
<div style="display:
|
| 2049 |
-
<img src="${fileDataUrls[filename] || 'data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="60" height="60"
|
| 2050 |
-
|
| 2051 |
-
|
| 2052 |
<div>
|
| 2053 |
-
<h4 style="margin-bottom:
|
| 2054 |
-
<div style="font-size:
|
| 2055 |
${imageSize[0]} × ${imageSize[1]} • ${processingTime.toFixed(2)}s
|
| 2056 |
</div>
|
| 2057 |
</div>
|
| 2058 |
</div>
|
| 2059 |
-
|
| 2060 |
-
<div style="display:
|
| 2061 |
-
<div style="text-align:
|
| 2062 |
-
<div style="font-size:
|
| 2063 |
-
<div style="font-size:
|
| 2064 |
</div>
|
| 2065 |
-
<div style="text-align:
|
| 2066 |
-
<div style="font-size:
|
| 2067 |
-
<div style="font-size:
|
| 2068 |
</div>
|
| 2069 |
-
<div style="text-align:
|
| 2070 |
-
<div style="font-size:
|
| 2071 |
-
<div style="font-size:
|
| 2072 |
</div>
|
| 2073 |
</div>
|
| 2074 |
</div>
|
| 2075 |
-
|
| 2076 |
-
<h4 style="margin-bottom:
|
| 2077 |
<div class="signal-grid">
|
| 2078 |
${signalsHtml}
|
| 2079 |
</div>
|
| 2080 |
-
|
| 2081 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2082 |
<div class="signal-header">
|
| 2083 |
-
<strong>
|
| 2084 |
-
</div>
|
| 2085 |
-
<p style="margin-bottom: 0.5rem;">
|
| 2086 |
-
${displayStatus.includes('REVIEW') ? 'Manual verification recommended' : 'No immediate action required'}
|
| 2087 |
-
</p>
|
| 2088 |
-
<div style="font-size: 0.875rem; color: var(--text-light);">
|
| 2089 |
-
Confidence: ${confidence}% likelihood of ${displayStatus.includes('REVIEW') ? 'AI generation' : 'authenticity'}
|
| 2090 |
</div>
|
|
|
|
| 2091 |
</div>
|
| 2092 |
`;
|
| 2093 |
}
|
| 2094 |
-
|
| 2095 |
// Export functions
|
| 2096 |
async function exportCsv() {
|
| 2097 |
if (!currentBatchId) {
|
|
@@ -2129,42 +2281,6 @@
|
|
| 2129 |
}
|
| 2130 |
}
|
| 2131 |
|
| 2132 |
-
async function exportPdf() {
|
| 2133 |
-
if (!currentBatchId) {
|
| 2134 |
-
showToast('No analysis results to export.', 'warning');
|
| 2135 |
-
return;
|
| 2136 |
-
}
|
| 2137 |
-
|
| 2138 |
-
showLoading(true);
|
| 2139 |
-
try {
|
| 2140 |
-
// Using GET request since backend now accepts both GET and POST
|
| 2141 |
-
const response = await fetch(`${PDF_REPORT_ENDPOINT}/${currentBatchId}`);
|
| 2142 |
-
|
| 2143 |
-
if (response.ok) {
|
| 2144 |
-
// Get the blob data
|
| 2145 |
-
const blob = await response.blob();
|
| 2146 |
-
|
| 2147 |
-
// Create download link
|
| 2148 |
-
const downloadLink = document.createElement('a');
|
| 2149 |
-
downloadLink.href = URL.createObjectURL(blob);
|
| 2150 |
-
downloadLink.download = `ai_screener_report_${currentBatchId}.pdf`;
|
| 2151 |
-
|
| 2152 |
-
document.body.appendChild(downloadLink);
|
| 2153 |
-
downloadLink.click();
|
| 2154 |
-
document.body.removeChild(downloadLink);
|
| 2155 |
-
|
| 2156 |
-
showToast('PDF report downloaded successfully.', 'success');
|
| 2157 |
-
} else {
|
| 2158 |
-
showToast('Failed to generate PDF report.', 'error');
|
| 2159 |
-
}
|
| 2160 |
-
} catch (error) {
|
| 2161 |
-
console.error('PDF export failed:', error);
|
| 2162 |
-
showToast('PDF export failed. Please try again.', 'error');
|
| 2163 |
-
} finally {
|
| 2164 |
-
showLoading(false);
|
| 2165 |
-
}
|
| 2166 |
-
}
|
| 2167 |
-
|
| 2168 |
async function exportJson() {
|
| 2169 |
if (!batchResults) {
|
| 2170 |
showToast('No analysis results to export.', 'warning');
|
|
@@ -2192,7 +2308,7 @@
|
|
| 2192 |
showLoading(false);
|
| 2193 |
}
|
| 2194 |
}
|
| 2195 |
-
|
| 2196 |
// Reset functions
|
| 2197 |
function resetUI() {
|
| 2198 |
analyzeBtn.disabled = false;
|
|
|
|
| 657 |
border: 1px solid rgba(214, 158, 46, 0.3);
|
| 658 |
}
|
| 659 |
|
| 660 |
+
.status-danger {
|
| 661 |
+
background-color: rgba(229, 62, 62, 0.1);
|
| 662 |
+
color: var(--danger);
|
| 663 |
+
border: 1px solid rgba(229, 62, 62, 0.3);
|
| 664 |
+
}
|
| 665 |
+
|
| 666 |
.score-indicator {
|
| 667 |
display: flex;
|
| 668 |
align-items: center;
|
|
|
|
| 1045 |
<h3>Multi-Signal Detection</h3>
|
| 1046 |
<p>Five independent statistical detectors with weighted ensemble aggregation</p>
|
| 1047 |
</div>
|
| 1048 |
+
|
| 1049 |
+
<div class="feature-card">
|
| 1050 |
+
<div class="feature-icon">
|
| 1051 |
+
<i class="fas fa-puzzle-piece"></i>
|
| 1052 |
+
</div>
|
| 1053 |
+
<h3>Evidence Analysis</h3>
|
| 1054 |
+
<p>Aggregates detection signals and metadata into structured evidence, resolving conflicts and supporting explainable, non-scoring decisions</p>
|
| 1055 |
+
</div>
|
| 1056 |
+
|
| 1057 |
+
<div class="feature-card">
|
| 1058 |
+
<div class="feature-icon">
|
| 1059 |
+
<i class="fas fa-balance-scale"></i>
|
| 1060 |
+
</div>
|
| 1061 |
+
<h3>Decision Policy</h3>
|
| 1062 |
+
<p>Applies deterministic rules over metrics and evidence to produce a review-aware final verdict, prioritizing safety and auditability</p>
|
| 1063 |
+
</div>
|
| 1064 |
|
| 1065 |
<div class="feature-card">
|
| 1066 |
<div class="feature-icon">
|
| 1067 |
<i class="fas fa-file-export"></i>
|
| 1068 |
</div>
|
| 1069 |
<h3>Comprehensive Reports</h3>
|
| 1070 |
+
<p>Export results in CSV and JSON formats for integration and documentation</p>
|
| 1071 |
</div>
|
| 1072 |
|
| 1073 |
<div class="feature-card">
|
|
|
|
| 1237 |
</div>
|
| 1238 |
</div>
|
| 1239 |
</div>
|
| 1240 |
+
|
| 1241 |
+
<div class="metric-card">
|
| 1242 |
+
<div class="metric-header">
|
| 1243 |
+
<div class="metric-icon" style="background: linear-gradient(135deg, #2b6cb0 0%, #63b3ed 100%);">
|
| 1244 |
+
<i class="fas fa-camera-retro"></i>
|
| 1245 |
+
</div>
|
| 1246 |
+
<div>
|
| 1247 |
+
<div class="metric-title">EXIF Analyzer</div>
|
| 1248 |
+
</div>
|
| 1249 |
+
<span class="metric-weight">Auxiliary Metric</span>
|
| 1250 |
+
</div>
|
| 1251 |
+
|
| 1252 |
+
<p class="metric-description">
|
| 1253 |
+
Analyzes image metadata for presence, completeness, and plausibility.
|
| 1254 |
+
Real camera images usually contain coherent EXIF data, while AI-generated
|
| 1255 |
+
images often lack metadata or contain inconsistent fields.
|
| 1256 |
+
</p>
|
| 1257 |
+
|
| 1258 |
+
<div class="metric-details">
|
| 1259 |
+
<div class="detail-item">
|
| 1260 |
+
<span class="detail-label">Detection Method</span>
|
| 1261 |
+
<span class="detail-value">Metadata consistency analysis</span>
|
| 1262 |
+
</div>
|
| 1263 |
+
<div class="detail-item">
|
| 1264 |
+
<span class="detail-label">Signals</span>
|
| 1265 |
+
<span class="detail-value">Missing, stripped, or malformed EXIF</span>
|
| 1266 |
+
</div>
|
| 1267 |
+
<div class="detail-item">
|
| 1268 |
+
<span class="detail-label">Sensitivity</span>
|
| 1269 |
+
<span class="detail-value">Medium</span>
|
| 1270 |
+
</div>
|
| 1271 |
+
</div>
|
| 1272 |
+
</div>
|
| 1273 |
+
|
| 1274 |
+
<div class="metric-card">
|
| 1275 |
+
<div class="metric-header">
|
| 1276 |
+
<div class="metric-icon" style="background: linear-gradient(135deg, #6b46c1 0%, #b794f4 100%);">
|
| 1277 |
+
<i class="fas fa-fingerprint"></i>
|
| 1278 |
+
</div>
|
| 1279 |
+
<div>
|
| 1280 |
+
<div class="metric-title">Watermark Analyzer</div>
|
| 1281 |
+
</div>
|
| 1282 |
+
<span class="metric-weight">Auxiliary Metric</span>
|
| 1283 |
+
</div>
|
| 1284 |
+
|
| 1285 |
+
<p class="metric-description">
|
| 1286 |
+
Detects known and statistical watermark patterns embedded by generative
|
| 1287 |
+
models. Includes checks for frequency-domain artifacts and spatial
|
| 1288 |
+
regularities associated with AI watermarking techniques.
|
| 1289 |
+
</p>
|
| 1290 |
+
|
| 1291 |
+
<div class="metric-details">
|
| 1292 |
+
<div class="detail-item">
|
| 1293 |
+
<span class="detail-label">Detection Method</span>
|
| 1294 |
+
<span class="detail-value">Pattern & frequency-domain analysis</span>
|
| 1295 |
+
</div>
|
| 1296 |
+
<div class="detail-item">
|
| 1297 |
+
<span class="detail-label">Signals</span>
|
| 1298 |
+
<span class="detail-value">Model-specific watermark artifacts</span>
|
| 1299 |
+
</div>
|
| 1300 |
+
<div class="detail-item">
|
| 1301 |
+
<span class="detail-label">Sensitivity</span>
|
| 1302 |
+
<span class="detail-value">Low–Medium (model-dependent)</span>
|
| 1303 |
+
</div>
|
| 1304 |
+
</div>
|
| 1305 |
+
</div>
|
| 1306 |
</div>
|
| 1307 |
</div>
|
| 1308 |
|
|
|
|
| 1393 |
<button class="action-button secondary-action" id="exportCsvBtn">
|
| 1394 |
<i class="fas fa-file-csv"></i> CSV
|
| 1395 |
</button>
|
|
|
|
|
|
|
|
|
|
| 1396 |
<button class="action-button secondary-action" id="exportJsonBtn">
|
| 1397 |
<i class="fas fa-file-code"></i> JSON
|
| 1398 |
</button>
|
|
|
|
| 1470 |
const HEALTH_ENDPOINT = '/health';
|
| 1471 |
const BATCH_PROGRESS_ENDPOINT = '/batch';
|
| 1472 |
const CSV_REPORT_ENDPOINT = '/report/csv';
|
| 1473 |
+
|
|
|
|
| 1474 |
// Global state
|
| 1475 |
let files = [];
|
| 1476 |
let fileDataUrls = {};
|
|
|
|
| 1503 |
const resultsTableBody = document.getElementById('resultsTableBody');
|
| 1504 |
const noResultsRow = document.getElementById('noResultsRow');
|
| 1505 |
const exportCsvBtn = document.getElementById('exportCsvBtn');
|
|
|
|
| 1506 |
const exportJsonBtn = document.getElementById('exportJsonBtn');
|
| 1507 |
const toggleDetailedAnalysis = document.getElementById('toggleDetailedAnalysis');
|
| 1508 |
const detailedAnalysisIcon = document.getElementById('detailedAnalysisIcon');
|
|
|
|
| 1581 |
|
| 1582 |
// Export
|
| 1583 |
exportCsvBtn.addEventListener('click', exportCsv);
|
|
|
|
| 1584 |
exportJsonBtn.addEventListener('click', exportJson);
|
| 1585 |
|
| 1586 |
// Detailed analysis toggle
|
|
|
|
| 1914 |
}, 1000);
|
| 1915 |
}
|
| 1916 |
|
| 1917 |
+
function decisionMeta(decision) {
|
| 1918 |
+
switch (decision) {
|
| 1919 |
+
case 'MOSTLY_AUTHENTIC':
|
| 1920 |
+
return {
|
| 1921 |
+
label: 'Mostly Authentic',
|
| 1922 |
+
badgeClass: 'status-authentic',
|
| 1923 |
+
recommendation: 'No immediate action required'
|
| 1924 |
+
};
|
| 1925 |
+
|
| 1926 |
+
case 'AUTHENTIC_BUT_REVIEW':
|
| 1927 |
+
return {
|
| 1928 |
+
label: 'Authentic (Review Suggested)',
|
| 1929 |
+
badgeClass: 'status-review',
|
| 1930 |
+
recommendation: 'Optional human review'
|
| 1931 |
+
};
|
| 1932 |
+
|
| 1933 |
+
case 'SUSPICIOUS_AI_LIKELY':
|
| 1934 |
+
return {
|
| 1935 |
+
label: 'Suspicious (AI Likely)',
|
| 1936 |
+
badgeClass: 'status-review',
|
| 1937 |
+
recommendation: 'Manual verification recommended'
|
| 1938 |
+
};
|
| 1939 |
+
|
| 1940 |
+
case 'CONFIRMED_AI_GENERATED':
|
| 1941 |
+
return {
|
| 1942 |
+
label: 'Confirmed AI Generated',
|
| 1943 |
+
badgeClass: 'status-danger',
|
| 1944 |
+
recommendation: 'Block or audit required'
|
| 1945 |
+
};
|
| 1946 |
+
|
| 1947 |
+
default:
|
| 1948 |
+
console.warn('Unknown decision:', decision);
|
| 1949 |
+
return {
|
| 1950 |
+
label: decision,
|
| 1951 |
+
badgeClass: 'status-review',
|
| 1952 |
+
recommendation: 'Manual review required'
|
| 1953 |
+
};
|
| 1954 |
+
}
|
| 1955 |
+
}
|
| 1956 |
+
|
| 1957 |
function displayResults() {
|
| 1958 |
if (!batchResults) {
|
| 1959 |
console.error('No results to display:', batchResults);
|
|
|
|
| 1977 |
|
| 1978 |
const filename = resultData.filename || 'Unknown';
|
| 1979 |
const overallScore = resultData.overall_score || 0;
|
| 1980 |
+
const decision = resultData.final_decision;
|
| 1981 |
+
const meta = decisionMeta(decision);
|
| 1982 |
const confidence = resultData.confidence || 0;
|
| 1983 |
const imageSize = resultData.image_size || [0, 0];
|
| 1984 |
const signals = resultData.signals || [];
|
|
|
|
| 1986 |
|
| 1987 |
const scorePercent = Math.round(overallScore * 100);
|
| 1988 |
let scoreClass = 'score-low';
|
| 1989 |
+
if (scorePercent >= 70) scoreClass = 'score-high';
|
| 1990 |
+
else if (scorePercent >= 50) scoreClass = 'score-medium';
|
| 1991 |
+
const scoreWidth = `${Math.min(scorePercent, 100)}%`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1992 |
|
| 1993 |
const flaggedCount = signals.filter(s => s.status === 'flagged').length;
|
| 1994 |
const warningCount = signals.filter(s => s.status === 'warning').length;
|
| 1995 |
|
|
|
|
|
|
|
|
|
|
| 1996 |
// Get thumbnail
|
| 1997 |
const thumbnailSrc = fileDataUrls[filename] || 'data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40"><rect width="40" height="40" fill="%23f0f0f0"/></svg>';
|
| 1998 |
|
|
|
|
| 2009 |
</div>
|
| 2010 |
</td>
|
| 2011 |
<td>
|
| 2012 |
+
<span class="status-badge ${meta.badgeClass}">
|
| 2013 |
+
${meta.label}
|
| 2014 |
</span>
|
| 2015 |
</td>
|
| 2016 |
<td>
|
|
|
|
| 2060 |
}
|
| 2061 |
|
| 2062 |
function updateSummary(batchResult) {
|
| 2063 |
+
if (!batchResult || !Array.isArray(batchResult.results)) {
|
| 2064 |
+
resultsSummary.innerHTML = '';
|
| 2065 |
+
return;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2066 |
}
|
| 2067 |
+
|
| 2068 |
+
// Count decisions exactly as returned by backend
|
| 2069 |
+
const counts = {};
|
| 2070 |
+
batchResult.results.forEach(result => {
|
| 2071 |
+
const status = result.final_decision;
|
| 2072 |
+
if (!status) return;
|
| 2073 |
+
|
| 2074 |
+
counts[status] = (counts[status] || 0) + 1;
|
| 2075 |
+
});
|
| 2076 |
+
|
| 2077 |
+
// Stable, policy-aligned display order
|
| 2078 |
+
const ORDER = [
|
| 2079 |
+
'CONFIRMED_AI_GENERATED',
|
| 2080 |
+
'SUSPICIOUS_AI_LIKELY',
|
| 2081 |
+
'AUTHENTIC_BUT_REVIEW',
|
| 2082 |
+
'MOSTLY_AUTHENTIC'
|
| 2083 |
+
];
|
| 2084 |
+
|
| 2085 |
+
resultsSummary.innerHTML = ORDER
|
| 2086 |
+
.filter(status => counts[status])
|
| 2087 |
+
.map(status => {
|
| 2088 |
+
const meta = decisionMeta(status);
|
| 2089 |
+
|
| 2090 |
+
return `
|
| 2091 |
+
<div class="summary-card">
|
| 2092 |
+
<div class="summary-value">${counts[status]}</div>
|
| 2093 |
+
<div class="summary-label">${meta.label}</div>
|
| 2094 |
+
</div>
|
| 2095 |
+
`;
|
| 2096 |
+
})
|
| 2097 |
+
.join('');
|
| 2098 |
}
|
| 2099 |
+
|
| 2100 |
function showDetailedAnalysis(index) {
|
| 2101 |
if (!batchResults || !batchResults.results || !batchResults.results[index]) return;
|
| 2102 |
+
|
| 2103 |
selectedImageIndex = index;
|
| 2104 |
+
const result = batchResults.results[index];
|
| 2105 |
+
|
| 2106 |
+
const filename = result.filename || 'Unknown';
|
| 2107 |
+
const overallScore = result.overall_score || 0;
|
| 2108 |
+
const decision = result.final_decision;
|
| 2109 |
+
const meta = decisionMeta(decision);
|
| 2110 |
+
const confidence = result.confidence != null ? Math.round(result.confidence) : 0;
|
| 2111 |
+
const imageSize = result.image_size || [0, 0];
|
| 2112 |
+
const processingTime = result.processing_time || 0;
|
| 2113 |
+
const signals = result.signals || [];
|
| 2114 |
+
const evidence = result.evidence || [];
|
| 2115 |
+
|
| 2116 |
+
const scorePercent = Math.round(overallScore * 100);
|
| 2117 |
+
|
| 2118 |
+
/* ---------- Expand panel ---------- */
|
| 2119 |
detailedAnalysisContent.classList.add('show');
|
| 2120 |
detailedAnalysisIcon.classList.remove('fa-chevron-down');
|
| 2121 |
detailedAnalysisIcon.classList.add('fa-chevron-up');
|
| 2122 |
+
|
| 2123 |
+
detailedAnalysisContent.scrollIntoView({
|
| 2124 |
behavior: 'smooth',
|
| 2125 |
block: 'start'
|
| 2126 |
});
|
| 2127 |
+
|
| 2128 |
+
/* ---------- Signals (Tier-1 Metrics) ---------- */
|
| 2129 |
let signalsHtml = '';
|
| 2130 |
+
|
| 2131 |
+
if (signals.length > 0) {
|
| 2132 |
signals.forEach(signal => {
|
| 2133 |
let statusClass = 'signal-passed';
|
| 2134 |
if (signal.status === 'warning') statusClass = 'signal-warning';
|
| 2135 |
if (signal.status === 'flagged') statusClass = 'signal-flagged';
|
| 2136 |
+
|
| 2137 |
const signalScore = Math.round((signal.score || 0) * 100);
|
| 2138 |
+
|
| 2139 |
signalsHtml += `
|
| 2140 |
<div class="signal-card">
|
| 2141 |
<div class="signal-header">
|
| 2142 |
<strong>${signal.name || 'Unknown Metric'}</strong>
|
| 2143 |
<span class="signal-badge ${statusClass}">${signal.status}</span>
|
| 2144 |
</div>
|
| 2145 |
+
<p style="font-size:0.875rem;color:var(--text-light);margin-bottom:0.5rem;">
|
| 2146 |
${signal.explanation || 'No explanation available.'}
|
| 2147 |
</p>
|
| 2148 |
+
<div style="font-size:0.75rem;color:var(--text-light);">
|
| 2149 |
+
Score: ${signalScore}%
|
|
|
|
|
|
|
| 2150 |
</div>
|
| 2151 |
</div>
|
| 2152 |
`;
|
| 2153 |
});
|
| 2154 |
} else {
|
| 2155 |
+
signalsHtml = `
|
| 2156 |
+
<p class="text-center" style="color:var(--text-light);">
|
| 2157 |
+
No detection signals available.
|
| 2158 |
+
</p>
|
| 2159 |
+
`;
|
| 2160 |
}
|
| 2161 |
+
|
| 2162 |
+
/* ---------- Evidence (Tier-2 Declarative Evidence) ---------- */
|
| 2163 |
+
let evidenceHtml = '';
|
| 2164 |
+
|
| 2165 |
+
if (evidence.length > 0) {
|
| 2166 |
+
evidence.forEach(ev => {
|
| 2167 |
+
let badgeClass = 'signal-passed';
|
| 2168 |
+
if (ev.direction === 'AI_GENERATED') badgeClass = 'signal-flagged';
|
| 2169 |
+
if (ev.direction === 'INDETERMINATE') badgeClass = 'signal-warning';
|
| 2170 |
+
|
| 2171 |
+
evidenceHtml += `
|
| 2172 |
+
<div class="signal-card" style="background:#f1f5f9;">
|
| 2173 |
+
<div class="signal-header">
|
| 2174 |
+
<strong>${ev.source.toUpperCase()}</strong>
|
| 2175 |
+
<span class="signal-badge ${badgeClass}">
|
| 2176 |
+
${ev.strength}
|
| 2177 |
+
</span>
|
| 2178 |
+
</div>
|
| 2179 |
+
<p style="font-size:0.875rem;margin-bottom:0.5rem;">
|
| 2180 |
+
${ev.finding}
|
| 2181 |
+
</p>
|
| 2182 |
+
<div style="font-size:0.75rem;color:#64748b;">
|
| 2183 |
+
Analyzer: ${ev.analyzer}
|
| 2184 |
+
${ev.confidence != null ? ` • Confidence: ${Math.round(ev.confidence * 100)}%` : ''}
|
| 2185 |
+
</div>
|
| 2186 |
+
</div>
|
| 2187 |
+
`;
|
| 2188 |
+
});
|
| 2189 |
+
} else {
|
| 2190 |
+
evidenceHtml = `
|
| 2191 |
+
<p class="text-center" style="color:var(--text-light);">
|
| 2192 |
+
No declarative evidence detected.
|
| 2193 |
+
</p>
|
| 2194 |
+
`;
|
| 2195 |
+
}
|
| 2196 |
+
|
| 2197 |
+
/* ---------- Render ---------- */
|
| 2198 |
detailedAnalysisContent.innerHTML = `
|
| 2199 |
+
<div style="margin-bottom:1.5rem;">
|
| 2200 |
+
<div style="display:flex;align-items:center;gap:1rem;margin-bottom:1rem;">
|
| 2201 |
+
<img src="${fileDataUrls[filename] || 'data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="60" height="60"><rect width="60" height="60" fill="%23f0f0f0"/></svg>'}"
|
| 2202 |
+
alt="${filename}"
|
| 2203 |
+
style="width:60px;height:60px;object-fit:cover;border-radius:0.5rem;border:1px solid var(--border);">
|
| 2204 |
<div>
|
| 2205 |
+
<h4 style="margin-bottom:0.25rem;">${filename}</h4>
|
| 2206 |
+
<div style="font-size:0.875rem;color:var(--text-light);">
|
| 2207 |
${imageSize[0]} × ${imageSize[1]} • ${processingTime.toFixed(2)}s
|
| 2208 |
</div>
|
| 2209 |
</div>
|
| 2210 |
</div>
|
| 2211 |
+
|
| 2212 |
+
<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:1rem;">
|
| 2213 |
+
<div style="text-align:center;padding:1rem;background:#f8fafc;border-radius:0.5rem;">
|
| 2214 |
+
<div style="font-size:1.5rem;font-weight:700;">${scorePercent}%</div>
|
| 2215 |
+
<div style="font-size:0.875rem;color:var(--text-light);">Score</div>
|
| 2216 |
</div>
|
| 2217 |
+
<div style="text-align:center;padding:1rem;background:#f8fafc;border-radius:0.5rem;">
|
| 2218 |
+
<div style="font-size:1.25rem;font-weight:700;">${meta.label}</div>
|
| 2219 |
+
<div style="font-size:0.875rem;color:var(--text-light);">Verdict</div>
|
| 2220 |
</div>
|
| 2221 |
+
<div style="text-align:center;padding:1rem;background:#f8fafc;border-radius:0.5rem;">
|
| 2222 |
+
<div style="font-size:1.5rem;font-weight:700;">${confidence}%</div>
|
| 2223 |
+
<div style="font-size:0.875rem;color:var(--text-light);">Confidence</div>
|
| 2224 |
</div>
|
| 2225 |
</div>
|
| 2226 |
</div>
|
| 2227 |
+
|
| 2228 |
+
<h4 style="margin-bottom:0.75rem;">Detection Signals</h4>
|
| 2229 |
<div class="signal-grid">
|
| 2230 |
${signalsHtml}
|
| 2231 |
</div>
|
| 2232 |
+
|
| 2233 |
+
<h4 style="margin:1.5rem 0 0.75rem;">Evidence</h4>
|
| 2234 |
+
<div class="signal-grid">
|
| 2235 |
+
${evidenceHtml}
|
| 2236 |
+
</div>
|
| 2237 |
+
|
| 2238 |
+
<div class="signal-card" style="margin-top:1.5rem;">
|
| 2239 |
<div class="signal-header">
|
| 2240 |
+
<strong>Decision Explanation</strong>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2241 |
</div>
|
| 2242 |
+
<p>${result.decision_explanation || meta.recommendation}</p>
|
| 2243 |
</div>
|
| 2244 |
`;
|
| 2245 |
}
|
| 2246 |
+
|
| 2247 |
// Export functions
|
| 2248 |
async function exportCsv() {
|
| 2249 |
if (!currentBatchId) {
|
|
|
|
| 2281 |
}
|
| 2282 |
}
|
| 2283 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2284 |
async function exportJson() {
|
| 2285 |
if (!batchResults) {
|
| 2286 |
showToast('No analysis results to export.', 'warning');
|
|
|
|
| 2308 |
showLoading(false);
|
| 2309 |
}
|
| 2310 |
}
|
| 2311 |
+
|
| 2312 |
// Reset functions
|
| 2313 |
function resetUI() {
|
| 2314 |
analyzeBtn.disabled = false;
|