Spaces:

satyaki-mitra
/

ImageForensics-AI

Sleeping

App Files Files Community

satyaki-mitra commited on Dec 19, 2025

Commit

9ff8ef6

1 Parent(s): fed5d64

Initial commit: ImageForensics-AI statistical image screening system

Browse files

Files changed (39) hide show

.env.example +60 -0
Dockerfile +53 -0
README.md +464 -0
README_HUGGINGFACE.md +128 -0
all.txt +0 -0
app.py +347 -0
config/__init__.py +0 -0
config/constants.py +325 -0
config/schemas.py +112 -0
config/settings.py +107 -0
docs/API_DOCUMENTATION.md +712 -0
docs/ARCHITECTURE.md +527 -0
docs/Description.md +298 -0
docs/TECHNICAL_DOCUMENTATION.md +885 -0
features/__init__.py +0 -0
features/batch_processor.py +299 -0
features/detailed_result_maker.py +481 -0
features/threshold_manager.py +277 -0
metrics/__init__.py +0 -0
metrics/aggregator.py +288 -0
metrics/color_analyzer.py +352 -0
metrics/frequency_analyzer.py +260 -0
metrics/gradient_field_pca.py +236 -0
metrics/noise_analyzer.py +335 -0
metrics/texture_analyzer.py +308 -0
notebooks/.ipynb_checkpoints/Unified_Dataset_Builder-checkpoint.ipynb +725 -0
notebooks/Unified_Dataset_Builder.ipynb +797 -0
reporter/__init__.py +0 -0
reporter/csv_reporter.py +462 -0
reporter/json_reporter.py +349 -0
reporter/pdf_reporter.py +843 -0
requirements.txt +72 -0
setup.sh +138 -0
ui/index.html +2248 -0
utils/__init__.py +19 -0
utils/helpers.py +108 -0
utils/image_processor.py +163 -0
utils/logger.py +85 -0
utils/validators.py +108 -0

.env.example ADDED Viewed

	@@ -0,0 +1,60 @@

+# =========================================
+# AI Image Screener - Environment Configuration
+# Copy this file to .env and adjust values
+# =========================================
+# Application
+APP_NAME="AI Image Screener"
+VERSION="1.0.0"
+DEBUG=False
+LOG_LEVEL="INFO"
+# Server Configuration
+HOST="0.0.0.0"
+PORT=7860
+WORKERS=1
+# File Processing
+MAX_FILE_SIZE_MB=10
+MAX_BATCH_SIZE=50
+ALLOWED_EXTENSIONS=".jpg,.jpeg,.png,.webp"
+# Detection Thresholds
+REVIEW_THRESHOLD=0.65
+# Metric Weights (must sum to 1.0)
+GRADIENT_WEIGHT=0.30
+FREQUENCY_WEIGHT=0.25
+NOISE_WEIGHT=0.20
+TEXTURE_WEIGHT=0.15
+COLOR_WEIGHT=0.10
+# Processing Configuration
+ENABLE_CACHING=True
+PROCESSING_TIMEOUT=30
+PARALLEL_PROCESSING=False
+MAX_WORKERS=1
+# Paths (relative to project root)
+BASE_DIR="."
+UPLOAD_DIR="data/uploads"
+REPORTS_DIR="data/reports"
+CACHE_DIR="data/cache"
+LOGS_DIR="logs"
+# =========================================
+# Hugging Face Spaces Specific
+# =========================================
+# These are automatically set by HF Spaces
+# HF_SPACE_ID=""
+# HF_SPACE_HOST=""
+# =========================================
+# Production Recommendations
+# =========================================
+# - Set DEBUG=False
+# - Set LOG_LEVEL="WARNING" or "ERROR"
+# - Adjust WORKERS based on available CPU cores
+# - Enable PARALLEL_PROCESSING if CPU cores > 2
+# - Set appropriate MAX_FILE_SIZE_MB for your use case
+# =========================================

Dockerfile ADDED Viewed

	@@ -0,0 +1,53 @@

+# ===============================================================
+# ImageScreenAI - Dockerfile : Optimized for Hugging Face Spaces
+# ===============================================================
+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    DEBIAN_FRONTEND=noninteractive
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libgomp1 \
+    libmagic1 \
+    gcc \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first (layer caching optimization)
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --upgrade pip setuptools wheel && \
+    pip install -r requirements.txt
+# Copy application code
+COPY . .
+# Create necessary directories
+RUN mkdir -p data/uploads data/reports data/cache logs && \
+    chmod -R 755 data logs
+# Expose port (Hugging Face Spaces uses port 7860 by default)
+EXPOSE 7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD python -c "import requests; requests.get('http://localhost:7860/health')" || exit 1
+# Run the application
+# Note: Hugging Face Spaces expects the app to listen on 0.0.0.0:7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

README.md ADDED Viewed

	@@ -0,0 +1,464 @@

+# AI Image Screener
+[![Python Version](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/downloads/)
+[![FastAPI](https://img.shields.io/badge/FastAPI-0.104%2B-009688.svg)](https://fastapi.tiangolo.com/)
+[![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
+[![Code Style](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+> **A transparent, unsupervised first-pass screening system for identifying images requiring human review in production workflows**
+---
+## 🎯 Overview
+**AI Image Screener** is not a "perfect AI detector." It is a **pragmatic screening tool** designed to reduce manual review workload by flagging potentially AI-generated images based on statistical and physical anomalies.
+### What This Is
+✅ A workflow efficiency tool
+✅ A transparent, explainable detector
+✅ A model-agnostic screening system
+✅ A first-pass filter, not a verdict engine
+### What This Is Not
+❌ A definitive "real vs fake" classifier
+❌ A black-box deep learning detector
+❌ A system claiming near-perfect accuracy on 2025 AI models
+---
+## 🚀 Key Features
+- **Multi-Metric Ensemble**: 5 independent statistical detectors analyzing different AI generation failure modes
+- **Binary UX**: Only two outcomes - `LIKELY_AUTHENTIC` or `REVIEW_REQUIRED` (no ambiguous "maybe")
+- **Full Explainability**: Per-metric scores, confidence levels, and human-readable explanations
+- **Batch Processing**: Parallel analysis of up to 50 images with progress tracking
+- **Multiple Export Formats**: CSV, JSON, and PDF reports for integration into existing workflows
+- **No External Dependencies**: No ML models, no cloud APIs - fully self-contained
+- **Production Ready**: FastAPI backend, comprehensive error handling, configurable thresholds
+---
+## 📊 Detection Approach
+### The Core Philosophy
+Instead of answering *"Is this image AI or real?"*, we answer:
+> **"Does this image require human review?"**
+This reframes the problem from classification to prioritization - far more valuable in real-world workflows.
+---
+## 🔬 Metrics Choice & Rationale
+### Why These Five Metrics?
+Each metric targets a **different failure mode** of AI image generation models (diffusion models, GANs, etc.):
+#### 1. **Gradient-Field PCA** (`metrics/gradient_field_pca.py`)
+- **Weight**: 30%
+- **Target**: Lighting inconsistencies in diffusion models
+- **Rationale**: Real photos have gradients aligned with physical light sources. Diffusion models perform patch-based denoising, creating low-dimensional gradient structures inconsistent with physics.
+- **Method**: Sobel gradients → PCA → eigenvalue ratio analysis
+- **Threshold**: Eigenvalue ratio < 0.85 indicates suspicious structure
+- **Research Basis**: [Gragnaniello et al. 2021](https://arxiv.org/abs/2104.02726) - "Perceptual Quality Assessment of Synthetic Images"
+#### 2. **Frequency Analysis (FFT)** (`metrics/frequency_analyzer.py`)
+- **Weight**: 25%
+- **Target**: Unnatural spectral energy distributions
+- **Rationale**: Camera optics and sensors produce characteristic frequency falloffs. AI models can create spectral peaks/gaps not found in nature.
+- **Method**: 2D FFT → radial spectrum → high-frequency ratio + roughness + power-law deviation
+- **Thresholds**: HF ratio outside [0.08, 0.35] indicates anomalies
+- **Research Basis**: [Dzanic et al. 2020](https://arxiv.org/abs/2003.08685) - "Fourier Spectrum Discrepancies in Deep Network Generated Images"
+#### 3. **Noise Pattern Analysis** (`metrics/noise_analyzer.py`)
+- **Weight**: 20%
+- **Target**: Missing or artificial sensor noise
+- **Rationale**: Real cameras produce Poisson shot noise + Gaussian read noise with characteristic variance. AI models often produce overly uniform images or synthetic noise.
+- **Method**: Patch-based Laplacian filtering → MAD estimation → CV + IQR analysis
+- **Thresholds**: CV < 0.15 (too uniform) or > 1.2 (too variable) flags images
+- **Research Basis**: [Kirchner & Johnson 2019](https://ieeexplore.ieee.org/document/8625351) - "SPN-CNN: Boosting Sensor Pattern Noise for Image Manipulation Detection"
+#### 4. **Texture Statistics** (`metrics/texture_analyzer.py`)
+- **Weight**: 15%
+- **Target**: Overly smooth or repetitive regions
+- **Rationale**: Natural scenes have organic texture variation. GANs can produce suspiciously smooth regions or repetitive patterns.
+- **Method**: Patch-based entropy, contrast, edge density → distribution analysis
+- **Thresholds**: >40% smooth patches (smoothness > 0.5) indicates anomalies
+- **Research Basis**: [Nataraj et al. 2019](https://arxiv.org/abs/1912.11035) - "Detecting GAN Generated Fake Images using Co-occurrence Matrices"
+#### 5. **Color Distribution** (`metrics/color_analyzer.py`)
+- **Weight**: 10%
+- **Target**: Impossible or highly unlikely color patterns
+- **Rationale**: Physical light sources create constrained color relationships. AI can generate oversaturated or unnaturally clustered hues.
+- **Method**: RGB→HSV conversion → saturation analysis + histogram roughness + hue concentration
+- **Thresholds**: Mean saturation > 0.65 or top-3 hue bins > 60% flags images
+- **Research Basis**: [Marra et al. 2019](https://arxiv.org/abs/1902.11153) - "Do GANs Leave Specific Traces?"
+---
+## ⚖️ Ensemble Approach
+### Weighted Aggregation Strategy
+```python
+final_score = (
+    0.30 × gradient_score +
+    0.25 × frequency_score +
+    0.20 × noise_score +
+    0.15 × texture_score +
+    0.10 × color_score
+)
+```
+### Pros ✅
+1. **Robustness**: No single metric failure breaks the system
+2. **Diversity**: Each metric captures orthogonal information
+3. **Tunability**: Weights can be adjusted based on use case
+4. **Explainability**: Per-metric scores preserved for transparency
+5. **Fail-Safe**: Neutral scores (0.5) for metric failures prevent cascading errors
+### Cons ❌
+1. **Hyperparameter Sensitivity**: Weights are manually tuned, not learned
+2. **Assumption of Independence**: Metrics may correlate in practice (e.g., frequency ↔ noise)
+3. **Fixed Weights**: No adaptive weighting based on image characteristics
+4. **Threshold Brittleness**: Single threshold (0.65) for binary decision may not fit all contexts
+5. **No Adversarial Robustness**: Trivial post-processing can fool statistical detectors
+### Why Not Machine Learning?
+- **Transparency**: Statistical methods are auditable; neural networks are black boxes
+- **Generalization**: ML models overfit to training generators; unsupervised methods generalize better
+- **Deployment**: No GPU required, no model versioning issues
+- **Trust**: Users understand "gradient inconsistency" better than "neuron activation patterns"
+---
+## 🏗️ Architecture
+### High-Level Flow
+```
+Image Upload → Validation → Parallel Metric Execution → Aggregation → Threshold Decision → Report Export
+```
+### Component Structure
+```
+ai_image_screener/
+├── app.py                          # FastAPI application entry point
+├── config/
+│   ├── settings.py                 # Environment variables, weights, thresholds
+│   ├── constants.py                # Enums, metric parameters, explanations
+│   └── schemas.py                  # Pydantic models for type safety
+├── metrics/
+│   ├── gradient_field_pca.py       # Gradient structure analysis
+│   ├── frequency_analyzer.py       # FFT-based spectral analysis
+│   ├── noise_analyzer.py           # Sensor noise pattern detection
+│   ├── texture_analyzer.py         # Statistical texture features
+│   ├── color_analyzer.py           # Color distribution anomalies
+│   └── aggregator.py               # Ensemble combination logic
+├── features/
+│   ├── batch_processor.py          # Parallel/sequential batch handling
+│   ├── threshold_manager.py        # Runtime threshold configuration
+│   └── detailed_result_maker.py    # Explainability extraction
+├── reporter/
+│   ├── csv_reporter.py             # CSV export for workflows
+│   ├── json_reporter.py            # JSON API responses
+│   └── pdf_reporter.py             # Professional reports
+├── utils/
+│   ├── logger.py                   # Structured logging
+│   ├── image_processor.py          # Image loading, resizing, conversion
+│   ├── validators.py               # File validation
+│   └── helpers.py                  # Utility functions
+└── ui/
+    └── index.html                  # Single-page web interface
+```
+**Detailed Architecture**: See [`docs/Architecture.md`](docs/Architecture.md)
+---
+## 📈 Performance Expectations
+### Detection Rates (Honest Estimates)
+| Image Source | Expected Detection Rate |
+|-------------|------------------------|
+| Consumer AI tools (2022-2023) | 80–90% |
+| Stable Diffusion 1.x / 2.x | 70–80% |
+| Midjourney v5 / v6 | 55–70% |
+| DALL·E 3 / Gemini Imagen 3 | 40–55% |
+| Post-processed AI images | 30–45% |
+| **False positives on real photos** | **~10–20%** |
+### Why These Rates?
+1. **Modern Models Are Good**: 2024-2025 generators produce physically plausible images
+2. **Post-Processing Erases Traces**: JPEG compression, filters, and resizing remove statistical artifacts
+3. **Real Photos Vary Widely**: Macro, long-exposure, and HDR photos trigger false positives
+4. **Adversarial Evasion**: Adding noise or slight edits defeats statistical detectors
+### Processing Performance
+- **Single image**: 2–4 seconds
+- **Batch (10 images)**: 15–25 seconds (parallel)
+- **Memory**: 50–150 MB per image
+- **Max concurrent workers**: 4 (configurable)
+---
+## 📦 Installation
+### Prerequisites
+- Python 3.11+
+- pip
+### Setup
+```bash
+# Clone repository
+git clone https://github.com/satyakimitra/ai-image-screener.git
+cd ai-image-screener
+# Create virtual environment
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+# Install dependencies
+pip install -r requirements.txt
+# Create required directories
+mkdir -p data/{uploads,reports,cache} logs
+# Run server
+python app.py
+```
+Server will start at `http://localhost:8005`
+---
+## 🚀 Quick Start
+### Web Interface
+1. Open `http://localhost:8005` in browser
+2. Upload images (single or batch)
+3. View results with per-metric breakdowns
+4. Export reports (CSV/PDF)
+### API Usage
+```bash
+# Single image analysis
+curl -X POST http://localhost:8005/analyze/image \
+  -F "file=@example.jpg"
+# Batch analysis
+curl -X POST http://localhost:8005/analyze/batch \
+  -F "files=@img1.jpg" \
+  -F "files=@img2.png" \
+  -F "files=@img3.webp"
+# Download CSV report
+curl -X GET http://localhost:8005/report/csv/{batch_id} -o report.csv
+```
+**Full API Documentation**: See [`docs/API.md`](docs/API.md)
+---
+## 📖 Documentation
+| Document | Description |
+|----------|-------------|
+| [`docs/Architecture.md`](docs/Architecture.md) | System architecture, data flow diagrams, component details |
+| [`docs/API.md`](docs/API.md) | Complete API reference with examples |
+| [`docs/CaseStudy_Analysis.md`](docs/CaseStudy_Analysis.md) | Statistical analysis, formulas, mathematical foundations |
+---
+## 🔬 Scientific References
+### Core Detection Techniques
+1. **Gragnaniello, D., Cozzolino, D., Marra, F., Poggi, G., & Verdoliva, L.** (2021). "Are GAN Generated Images Easy to Detect? A Critical Analysis of the State-of-the-Art." *IEEE International Conference on Multimedia and Expo*. [Paper](https://arxiv.org/abs/2104.02726)
+2. **Dzanic, T., Shah, K., & Witherden, F.** (2020). "Fourier Spectrum Discrepancies in Deep Network Generated Images." *NeurIPS 2020*. [Paper](https://arxiv.org/abs/2003.08685)
+3. **Kirchner, M., & Johnson, M. K.** (2019). "SPN-CNN: Boosting Sensor Pattern Noise for Image Manipulation Detection." *IEEE International Workshop on Information Forensics and Security*. [Paper](https://ieeexplore.ieee.org/document/8625351)
+4. **Nataraj, L., Mohammed, T. M., Manjunath, B. S., Chandrasekaran, S., Flenner, A., Bappy, J. H., & Roy-Chowdhury, A. K.** (2019). "Detecting GAN Generated Fake Images using Co-occurrence Matrices." *Electronic Imaging*. [Paper](https://arxiv.org/abs/1912.11035)
+5. **Marra, F., Gragnaniello, D., Cozzolino, D., & Verdoliva, L.** (2019). "Detection of GAN-Generated Fake Images over Social Networks." *IEEE Conference on Multimedia Information Processing and Retrieval*. [Paper](https://arxiv.org/abs/1902.11153)
+### Diffusion Model Artifacts
+6. **Corvi, R., Cozzolino, D., Poggi, G., Nagano, K., & Verdoliva, L.** (2023). "Intriguing Properties of Synthetic Images: from Generative Adversarial Networks to Diffusion Models." *arXiv preprint*. [Paper](https://arxiv.org/abs/2304.06408)
+7. **Sha, Z., Li, Z., Yu, N., & Zhang, Y.** (2023). "DE-FAKE: Detection and Attribution of Fake Images Generated by Text-to-Image Diffusion Models." *ACM CCS 2023*. [Paper](https://arxiv.org/abs/2310.16617)
+### Ensemble Methods
+8. **Wang, S. Y., Wang, O., Zhang, R., Owens, A., & Efros, A. A.** (2020). "CNN-Generated Images Are Surprisingly Easy to Spot... for Now." *CVPR 2020*. [Paper](https://arxiv.org/abs/1912.11035)
+---
+## ⚠️ Ethical Considerations
+### Honest Positioning
+This system:
+- ✅ Never claims "real" or "fake" with certainty
+- ✅ Provides probabilistic screening only
+- ✅ Encourages human verification for all flagged images
+- ✅ Documents methodology transparently
+- ✅ Acknowledges false positive rates upfront
+### Appropriate Use Cases
+**Suitable for:**
+- Content moderation pre-screening (reduces human workload)
+- Journalism workflows (identifies images needing verification)
+- Stock photo platforms (flags for manual review)
+- Legal discovery (prioritizes suspicious documents)
+**Not suitable for:**
+- Law enforcement as sole evidence
+- Automated content rejection without human review
+- High-stakes decisions (e.g., criminal prosecution)
+### Known Limitations
+1. **False Positives**: 10-20% of real photos flagged (especially HDR, macro, long-exposure)
+2. **Evolving Generators**: Detection rates decline as AI models improve
+3. **Post-Processing Evasion**: Simple filters can defeat statistical detectors
+4. **No Adversarial Robustness**: Not designed to resist intentional evasion
+---
+## 🛠️ Configuration
+### Environment Variables
+Create `.env` file:
+```env
+# Server
+HOST=localhost
+PORT=8005
+WORKERS=4
+DEBUG=False
+# Detection
+REVIEW_THRESHOLD=0.65
+# Metric Weights (must sum to 1.0)
+GRADIENT_WEIGHT=0.30
+FREQUENCY_WEIGHT=0.25
+NOISE_WEIGHT=0.20
+TEXTURE_WEIGHT=0.15
+COLOR_WEIGHT=0.10
+# Processing
+MAX_FILE_SIZE_MB=10
+MAX_BATCH_SIZE=50
+PROCESSING_TIMEOUT=30
+PARALLEL_PROCESSING=True
+MAX_WORKERS=4
+```
+### Sensitivity Modes
+Adjust `REVIEW_THRESHOLD` in `config/settings.py`:
+- **Conservative** (0.75): Fewer false positives, may miss some AI images
+- **Balanced** (0.65): Recommended default
+- **Aggressive** (0.55): Catch more AI images, more false positives
+---
+## 🧪 Testing
+```bash
+# Run all tests
+pytest tests/
+# With coverage
+pytest --cov=. --cov-report=html tests/
+# Single test file
+pytest tests/test_metrics.py -v
+```
+---
+## 🤝 Contributing
+Contributions welcome! Please:
+1. Fork the repository
+2. Create a feature branch (`git checkout -b feature/amazing-feature`)
+3. Commit changes (`git commit -m 'Add amazing feature'`)
+4. Push to branch (`git push origin feature/amazing-feature`)
+5. Open a Pull Request
+**Code Style**: Black formatter, 100 character line limit
+---
+## 📄 License
+This project is licensed under the MIT License - see [LICENSE](LICENSE) file for details.
+---
+## 👤 Author
+**Satyaki Mitra**
+Data Scientist | AI-ML Practitioner
+- LinkedIn: [linkedin.com/in/satyaki-mitra](https://linkedin.com/in/satyaki-mitra)
+- GitHub: [@satyakimitra](https://github.com/satyakimitra)
+- Email: satyaki.mitra@example.com
+---
+## 🙏 Acknowledgments
+- Research papers cited above for theoretical foundations
+- FastAPI team for excellent web framework
+- OpenCV and SciPy communities for image processing tools
+- Users providing feedback on detection accuracy
+---
+## 📞 Support
+- **Issues**: [GitHub Issues](https://github.com/satyaki-mitra/ai-image-screener/issues)
+- **Documentation**: [`docs/`](docs/)
+- **Email**: support@aiimagescreener.com
+---
+## 🔮 Roadmap
+- [ ] Add watermark detection module
+- [ ] Integrate reverse image search API
+- [ ] ML-based detector as optional metric
+- [ ] Persistent result storage (PostgreSQL)
+- [ ] Webhook callbacks for async processing
+- [ ] Docker containerization
+- [ ] Kubernetes deployment manifests
+---
+<p align="center">
+  <i>Built with transparency and honesty in mind.</i><br>
+  <i>Screening, not certainty. Efficiency, not perfection.</i>
+</p>

README_HUGGINGFACE.md ADDED Viewed

	@@ -0,0 +1,128 @@

+---
+title: AI Image Screener
+emoji: 🔍
+colorFrom: blue
+colorTo: purple
+sdk: docker
+app_port: 7860
+pinned: false
+license: mit
+tags:
+  - ai-detection
+  - image-forensics
+  - computer-vision
+  - content-moderation
+  - screening-tool
+---
+# AI Image Screener 🔍
+**A transparent, unsupervised first-pass screening system for identifying images requiring human review**
+## Overview
+AI Image Screener is a multi-metric ensemble system that analyzes images using five independent statistical detectors to identify potential AI-generated content. Unlike black-box classifiers, this system provides full explainability with per-metric breakdowns and human-readable explanations.
+**Important**: This is a **screening tool, not a verdict engine**. It flags images for human review rather than making definitive "real vs fake" classifications.
+## How It Works
+The system analyzes five distinct image characteristics:
+1. **Gradient-Field PCA (30%)**: Detects lighting inconsistencies typical of diffusion models
+2. **Frequency Analysis (25%)**: Identifies unnatural spectral energy distributions via FFT
+3. **Noise Pattern Analysis (20%)**: Detects missing or artificial sensor noise
+4. **Texture Statistics (15%)**: Identifies overly smooth or repetitive regions
+5. **Color Distribution (10%)**: Flags unnatural saturation and color patterns
+Each metric produces a score (0.0-1.0), which are combined using weighted ensemble aggregation.
+## Expected Performance
+**Detection Rates (Honest Estimates):**
+- Consumer AI tools (2022-2023): 80-90%
+- Stable Diffusion 1.x/2.x: 70-80%
+- Midjourney v5/v6: 55-70%
+- DALL-E 3 / Gemini Imagen 3: 40-55%
+- Post-processed AI images: 30-45%
+**False Positive Rate**: ~10-20% on authentic photos (especially HDR, macro, long-exposure)
+## Usage
+### Web Interface
+1. Click "Use this Space" above
+2. Upload single or multiple images (max 50 per batch)
+3. View results with detailed metric breakdowns
+4. Export reports in CSV or PDF format
+### API Access
+```bash
+# Single image analysis
+curl -X POST https://huggingface.co/spaces/YOUR_USERNAME/ai-image-screener/api/analyze/image \
+  -F "file=@image.jpg"
+# Batch analysis
+curl -X POST https://huggingface.co/spaces/YOUR_USERNAME/ai-image-screener/api/analyze/batch \
+  -F "files=@img1.jpg" \
+  -F "files=@img2.png"
+```
+See full API documentation at `/docs` endpoint.
+## Limitations
+⚠️ **This system has known limitations:**
+- **Not adversarially robust**: Simple post-processing can defeat detection
+- **Declining effectiveness**: Detection rates decrease as AI models improve
+- **False positives**: 10-20% of real photos may be flagged (HDR, macro, heavily edited)
+- **No semantic understanding**: Cannot detect deepfakes, inpainting, or prompt-guided generation
+## Appropriate Use Cases
+✅ **Suitable for:**
+- Content moderation pre-screening (reduces human workload)
+- Journalism workflows (identifies images needing verification)
+- Stock photo platforms (flags for manual review)
+- Legal discovery (prioritizes suspicious documents)
+❌ **Not suitable for:**
+- Law enforcement as sole evidence
+- Automated content rejection without human review
+- High-stakes decisions (criminal prosecution, copyright disputes)
+## Technical Details
+- **Framework**: FastAPI (Python 3.11+)
+- **Processing Time**: 2-4 seconds per image
+- **Dependencies**: OpenCV, NumPy, SciPy, ReportLab
+- **No ML Models**: Purely statistical detection (no GPU required)
+## Credits
+**Author**: Satyaki Mitra (Data Scientist, AI-ML Practitioner)
+**Research Foundations**:
+- Gragnaniello et al. (2021) - Gradient analysis for GAN detection
+- Dzanic et al. (2020) - Fourier spectrum discrepancies
+- Kirchner & Johnson (2019) - Sensor pattern noise analysis
+- Nataraj et al. (2019) - Co-occurrence matrix detection
+- Marra et al. (2019) - GAN-specific artifacts
+## License
+MIT License - See [LICENSE](LICENSE) for details
+## Links
+- 📖 [Full Documentation](https://github.com/satyakimitra/ai-image-screener)
+- 🏗️ [Architecture Details](https://github.com/satyakimitra/ai-image-screener/blob/main/docs/Architecture.md)
+- 📊 [Case Study Analysis](https://github.com/satyakimitra/ai-image-screener/blob/main/docs/CaseStudy_Analysis.md)
+- 🔬 [API Reference](https://github.com/satyakimitra/ai-image-screener/blob/main/docs/API.md)
+---
+**Disclaimer**: Results are indicative and should be verified manually for critical applications. This system provides screening assistance, not definitive judgments.

all.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py ADDED Viewed

	@@ -0,0 +1,347 @@

+# Dependencies
+import uuid
+import shutil
+import signal
+import uvicorn
+import traceback
+from typing import List
+from typing import Dict
+from pathlib import Path
+from fastapi import File
+from typing import Optional
+from fastapi import Request
+from fastapi import FastAPI
+from fastapi import UploadFile
+from fastapi import HTTPException
+from utils.logger import get_logger
+from config.settings import settings
+from fastapi.responses import Response
+from config.schemas import APIResponse
+from config.schemas import AnalysisResult
+from fastapi.responses import HTMLResponse
+from fastapi.responses import JSONResponse
+from utils.validators import ImageValidator
+from fastapi.staticfiles import StaticFiles
+from utils.helpers import generate_unique_id
+from reporter.csv_reporter import CSVReporter
+from reporter.pdf_reporter import PDFReporter
+from config.schemas import BatchAnalysisResult
+from reporter.json_reporter import JSONReporter
+from utils.image_processor import ImageProcessor
+from fastapi.middleware.cors import CORSMiddleware
+from features.batch_processor import BatchProcessor
+from features.threshold_manager import ThresholdManager
+# Logging
+logger = get_logger(__name__)
+# FastAPI App Definition
+app = FastAPI(title       = "AI Image Screener",
+              version     = settings.VERSION,
+              description = "First-pass AI image screening tool for bulk workflows",
+             )
+# Serve static assets (if any later)
+app.mount("/ui", StaticFiles(directory = "ui"), name = "ui")
+# CORS (UI + API)
+app.add_middleware(CORSMiddleware,
+                   allow_origins     = ["*"],
+                   allow_credentials = True,
+                   allow_methods     = ["*"],
+                   allow_headers     = ["*"],
+                  )
+# Runtime State
+SESSION_STORE: Dict[str, Dict] = {}
+# Component Initialization
+image_validator   = ImageValidator()
+image_processor   = ImageProcessor()
+threshold_manager = ThresholdManager()
+threshold_manager = threshold_manager
+batch_processor   = BatchProcessor(threshold_manager = threshold_manager)
+json_reporter     = JSONReporter()
+csv_reporter      = CSVReporter()
+pdf_reporter      = PDFReporter()
+UPLOAD_DIR        = settings.UPLOAD_DIR
+CACHE_DIR         = settings.CACHE_DIR
+REPORTS_DIR       = settings.REPORTS_DIR
+for d in [UPLOAD_DIR, CACHE_DIR, REPORTS_DIR]:
+    d.mkdir(parents  = True,
+            exist_ok = True,
+           )
+# Utility: Progress Callback
+def _progress_callback(batch_id: str):
+    def callback(image_idx: int, total: int, filename: str):
+        session = SESSION_STORE.get(batch_id)
+        if (not session or (session.get("status") != "processing")):
+            return
+        session["progress"] = {"current"  : image_idx,
+                               "total"    : total,
+                               "filename" : filename,
+                              }
+    return callback
+# Utility: Housekeeping
+def cleanup_temp_files():
+    try:
+        for folder in [UPLOAD_DIR, CACHE_DIR]:
+            for item in folder.iterdir():
+                if item.is_file():
+                    item.unlink(missing_ok = True)
+        logger.info("Temporary files cleaned")
+    except Exception as e:
+        logger.warning(f"Cleanup failed: {e}")
+def shutdown_handler(*_):
+    logger.warning("Shutdown signal received — cleaning up")
+    cleanup_temp_files()
+signal.signal(signal.SIGINT, shutdown_handler)
+signal.signal(signal.SIGTERM, shutdown_handler)
+# Error Handling
+@app.exception_handler(Exception)
+async def global_exception_handler(request: Request, exc: Exception):
+    logger.error(f"Unhandled error: {exc}")
+    logger.debug(traceback.format_exc())
+    return JSONResponse(status_code = 500,
+                        content     = APIResponse(success = False,
+                                                  message = "Internal server error",
+                                                 ).model_dump()
+                       )
+# Home
+@app.get("/", response_class = HTMLResponse)
+def serve_frontend():
+    index_path = Path("ui/index.html")
+    if not index_path.exists():
+        raise HTTPException(status_code = 404,
+                            detail      = "UI not found",
+                           )
+    return index_path.read_text(encoding = "utf-8")
+# Health Check
+@app.get("/health")
+def health():
+    return {"status"  : "ok",
+            "version" : settings.VERSION,
+           }
+# Single Image Analysis
+@app.post("/analyze/image")
+async def analyze_single_image(file: UploadFile = File(...)):
+    image_id   = generate_unique_id()
+    image_path = UPLOAD_DIR / f"{image_id}_{file.filename}"
+    image_validator.validate_image(file_path = image_path,
+                                   filename  = file.filename,
+                                   file_size = file.size,
+                                  )
+    try:
+        with open(image_path, "wb") as f:
+            shutil.copyfileobj(file.file, f)
+        image                  = image_processor.load_image(image_path)
+        # image is a NumPy array → shape = (H, W, C) or (H, W)
+        height, width          = image.shape[:2]
+        result: AnalysisResult = batch_processor.process_single(image      = image_path,
+                                                                filename   = file.filename,
+                                                                image_size = (width, height),
+                                                               )
+        return APIResponse(success = True,
+                           message = "Image analysis completed",
+                           data    = result.model_dump(),
+                          )
+    finally:
+        image_path.unlink(missing_ok = True)
+# Batch Image Analysis
+@app.post("/analyze/batch")
+async def analyze_batch(files: List[UploadFile] = File(...)):
+    if not files:
+        raise HTTPException(status_code = 400,
+                            detail      = "No files provided",
+                           )
+    batch_id                = str(uuid.uuid4())
+    SESSION_STORE[batch_id] = {"status"   : "processing",
+                               "progress" : {"current" : 0,
+                                             "total"   : len(files),
+                                            },
+                              }
+    image_entries           = list()
+    try:
+        for file in files:
+            uid           = generate_unique_id()
+            path          = UPLOAD_DIR / f"{uid}_{file.filename}"
+            with open(path, "wb") as f:
+                shutil.copyfileobj(file.file, f)
+            image         = image_processor.load_image(path)
+            height, width = image.shape[:2]
+            image_validator.validate_image(file_path = path,
+                                           filename  = file.filename,
+                                           file_size = file.size,
+                                          )
+            image_entries.append({"path"     : path,
+                                  "filename" : file.filename,
+                                  "size"     : (width, height),
+                                })
+        batch_result: BatchAnalysisResult = batch_processor.process_batch(image_files = image_entries,
+                                                                          on_progress = _progress_callback(batch_id),
+                                                                         )
+        SESSION_STORE[batch_id]           = {"status"   : "completed",
+                                             "progress" : SESSION_STORE[batch_id]["progress"],
+                                             "result"   : batch_result,
+                                            }
+        return APIResponse(success = True,
+                           message = "Batch analysis completed",
+                           data    = {"batch_id" : batch_id,
+                                      "result"   : batch_result.model_dump(),
+                                     },
+                          )
+    except KeyboardInterrupt:
+        SESSION_STORE[batch_id] = {"status"   : "interrupted",
+                                   "progress" : SESSION_STORE[batch_id]["progress"],
+                                  }
+        raise HTTPException(status_code = 499,
+                            detail      = "Processing interrupted",
+                           )
+    except Exception as e:
+        logger.error(f"Batch {batch_id} failed: {e}", exc_info = True)
+        SESSION_STORE[batch_id] = {"status" : "failed",
+                                   "error"  : str(e),
+                                  }
+        raise HTTPException(status_code = 500,
+                            detail      = "Batch processing failed",
+                           )
+    finally:
+        for item in image_entries:
+            Path(item["path"]).unlink(missing_ok = True)
+# Batch Progress
+@app.get("/batch/{batch_id}/progress")
+def batch_progress(batch_id: str):
+    session = SESSION_STORE.get(batch_id)
+    if not session:
+        raise HTTPException(status_code = 404,
+                            detail      = "Batch not found",
+                           )
+    return session
+# Report Downloads
+@app.api_route("/report/csv/{batch_id}", methods = ["GET", "POST"])
+def export_csv(batch_id: str):
+    session = SESSION_STORE.get(batch_id)
+    if (not session or ("result" not in session)):
+        raise HTTPException(status_code = 404,
+                            detail      = "Batch result not found",
+                           )
+    path = csv_reporter.export_batch_detailed(session["result"])
+    # Read the file and send it as a download
+    with open(path, "rb") as f:
+        content = f.read()
+    # Clean up the file after sending
+    path.unlink(missing_ok = True)
+    return Response(content    = content,
+                    media_type = "text/csv",
+                    headers    = {"Content-Disposition" : f"attachment; filename=ai_screener_report_{batch_id}.csv",
+                                  "Content-Type"        : "text/csv"
+                                 }
+                   )
+@app.api_route("/report/pdf/{batch_id}", methods = ["GET", "POST"])
+def export_pdf(batch_id: str):
+    session = SESSION_STORE.get(batch_id)
+    if (not session or ("result" not in session)):
+        raise HTTPException(status_code = 404,
+                            detail      = "Batch result not found",
+                           )
+    path = pdf_reporter.export_batch(session["result"])
+    # Read the file and send it as a download
+    with open(path, "rb") as f:
+        content = f.read()
+    # Clean up the file after sending
+    path.unlink(missing_ok = True)
+    return Response(content    = content,
+                    media_type = "application/pdf",
+                    headers    = {"Content-Disposition" : f"attachment; filename=ai_screener_report_{batch_id}.pdf",
+                                  "Content-Type"        : "application/pdf"
+                                 }
+                   )
+# ==================== MAIN ====================
+if __name__ == "__main__":
+    # Explicit startup log (forces log file creation)
+    logger.info("Starting AI Image Screener API Server")
+    uvicorn.run("app:app",
+                host      = settings.HOST,
+                port      = settings.PORT,
+                reload    = settings.DEBUG,
+                log_level = settings.LOG_LEVEL.lower(),
+                workers   = 1 if settings.DEBUG else settings.WORKERS,
+               )

config/__init__.py ADDED Viewed

File without changes

config/constants.py ADDED Viewed

	@@ -0,0 +1,325 @@

+# Dependencies
+from enum import Enum
+from dataclasses import dataclass
+class DetectionStatus(str, Enum):
+    """
+    Overall detection status
+    """
+    LIKELY_AUTHENTIC = "LIKELY_AUTHENTIC"
+    REVIEW_REQUIRED  = "REVIEW_REQUIRED"
+class SignalStatus(str, Enum):
+    """
+    Individual signal status
+    """
+    PASSED  = "passed"
+    WARNING = "warning"
+    FLAGGED = "flagged"
+class FileFormat(str, Enum):
+    """
+    Supported file formats
+    """
+    JPG  = ".jpg"
+    JPEG = ".jpeg"
+    PNG  = ".png"
+    WEBP = ".webp"
+class MetricType(str, Enum):
+    """
+    Detection metric types
+    """
+    GRADIENT  = "gradient"
+    FREQUENCY = "frequency"
+    NOISE     = "noise"
+    TEXTURE   = "texture"
+    COLOR     = "color"
+# Signal thresholds
+SIGNAL_THRESHOLDS          = {SignalStatus.FLAGGED : 0.7,
+                              SignalStatus.WARNING : 0.4,
+                              SignalStatus.PASSED  : 0.0,
+                             }
+# Metric explanations
+METRIC_EXPLANATIONS        = {MetricType.GRADIENT  : {'high'     : "Detected irregular gradient patterns typical of diffusion models. Natural photos show consistent lighting gradients shaped by physics.",
+                                                      'moderate' : "Some gradient inconsistencies detected. May indicate AI generation or heavy editing.",
+                                                      'normal'   : "Gradient patterns are consistent with natural lighting and camera optics."
+                                                     },
+                              MetricType.FREQUENCY : {'high'     : "Unusual frequency distribution detected. AI-generated images often show unnatural spectral patterns.",
+                                                      'moderate' : "Frequency patterns show some irregularities. Requires further review.",
+                                                      'normal'   : "Frequency distribution matches expected patterns for authentic photographs."
+                                                     },
+                              MetricType.NOISE     : {'high'     : "Noise pattern is unnaturally uniform. Real camera sensors produce characteristic noise patterns.",
+                                                      'moderate' : "Noise distribution shows some anomalies. May indicate synthetic generation.",
+                                                      'normal'   : "Noise characteristics are consistent with genuine camera sensor behavior."
+                                                     },
+                              MetricType.TEXTURE   : {'high'     : "Detected suspiciously smooth regions. Natural photos have organic texture variation.",
+                                                      'moderate' : "Some texture regions appear overly uniform. Further analysis recommended.",
+                                                      'normal'   : "Texture variation is within expected ranges for authentic photographs."
+                                                     },
+                              MetricType.COLOR     : {'high'     : "Color distribution shows impossible or highly unlikely patterns.",
+                                                      'moderate' : "Some color histogram irregularities detected.",
+                                                      'normal'   : "Color distribution is within normal ranges for real photographs."
+                                                     }
+                             }
+# Basic Image Processing Constants
+MIN_IMAGE_DIMENSION        = 64
+MAX_IMAGE_DIMENSION        = 8192
+LUMINANCE_WEIGHTS          = (0.2126, 0.7152, 0.0722)  # ITU-R BT.709
+IMAGE_RESIZE_MAX_DIMENSION = 1024
+# Gradient-Field PCA Detection Parameters
+@dataclass(frozen = True)
+class GradientFieldPCAParams:
+    """
+    Parameters for Gradient-Field PCA detection
+    """
+    # Random Seed For Reproducibility
+    RANDOM_SEED                : int   = 1234
+    # NEUTRAL_SCORE
+    NEUTRAL_SCORE              : float = 0.5
+    # PCA Configuration
+    SAMPLE_SIZE                : int   = 10000  # Max gradient samples for PCA
+    # Thresholds
+    MAGNITUDE_THRESHOLD        : float = 1e-6   # Minimum gradient magnitude to consider
+    MIN_SAMPLES                : int   = 10     # Minimum samples required for PCA
+    VARIANCE_THRESHOLD         : float = 1e-10  # Minimum total variance
+    EIGENVALUE_RATIO_THRESHOLD : float = 0.85   # Real photos typically > 0.85
+# Frequency Analysis Parameters
+@dataclass(frozen = True)
+class FrequencyAnalysisParams:
+    """
+    Parameters for FFT-based frequency analysis
+    """
+    # NEUTRAL_SCORE
+    NEUTRAL_SCORE       : float = 0.5
+    # FFT Configuration
+    BINS                : int   = 64
+    HIGH_FREQ_THRESHOLD : float = 0.6     # Radial position where high-freq starts
+    # Analysis Thresholds
+    MIN_SPECTRUM_SAMPLES : int   = 10
+    HF_RATIO_UPPER       : float = 0.35   # High-frequency ratio upper bound
+    HF_RATIO_LOWER       : float = 0.08   # High-frequency ratio lower bound
+    # Scaling Factors
+    HF_UPPER_SCALE       : float = 3.0
+    HF_LOWER_SCALE       : float = 5.0
+    ROUGHNESS_SCALE      : float = 10.0
+    DEVIATION_SCALE      : float = 2.0
+    # Sub-metric Weights
+    SUBMETRIC_WEIGHTS    : dict  = None
+    def __post_init__(self):
+        if self.SUBMETRIC_WEIGHTS is None:
+            object.__setattr__(self, 'SUBMETRIC_WEIGHTS', {'hf_anomaly' : 0.4,
+                                                           'roughness'  : 0.3,
+                                                           'deviation'  : 0.3,
+                                                          }
+                              )
+# Noise Analysis Parameters
+@dataclass(frozen = True)
+class NoiseAnalysisParams:
+    """
+    Parameters for noise pattern analysis
+    """
+    # NEUTRAL SCORE
+    NEUTRAL_SCORE            : float = 0.5
+    # Patch Configuration
+    PATCH_SIZE               : int   = 32
+    STRIDE                   : int   = 16
+    SAMPLES                  : int   = 100
+    # Variance Thresholds
+    VARIANCE_LOW_THRESHOLD   : float = 1.0     # Skip too uniform patches
+    VARIANCE_HIGH_THRESHOLD  : float = 1000.0  # Skip too structured patches
+    # MAD Conversion
+    MAD_TO_STD_FACTOR        : float = 1.4826  # Gaussian: σ ≈ 1.4826 × MAD
+    # Distribution Analysis
+    MIN_ESTIMATES            : int   = 10
+    MIN_FILTERED_SAMPLES     : int   = 5
+    OUTLIER_PERCENTILE_LOW   : int   = 10
+    OUTLIER_PERCENTILE_HIGH  : int   = 90
+    # CV (Coefficient of Variation) Thresholds
+    CV_UNIFORM_THRESHOLD     : float = 0.15
+    CV_VARIABLE_THRESHOLD    : float = 1.2
+    CV_UNIFORM_SCALE         : float = 5.0
+    CV_VARIABLE_SCALE        : float = 2.0
+    # Noise Level Thresholds
+    LEVEL_CLEAN_THRESHOLD    : float = 1.5
+    LEVEL_LOW_THRESHOLD      : float = 2.5
+    # IQR Analysis
+    IQR_THRESHOLD            : float = 0.3
+    IQR_SCALE                : float = 2.0
+    IQR_PERCENTILE_LOW       : int   = 25
+    IQR_PERCENTILE_HIGH      : int   = 75
+    # Sub-metric Weights
+    SUBMETRIC_WEIGHTS        : dict  = None
+    def __post_init__(self):
+        if self.SUBMETRIC_WEIGHTS is None:
+            object.__setattr__(self, 'SUBMETRIC_WEIGHTS', {'cv_anomaly'          : 0.4,
+                                                           'noise_level_anomaly' : 0.4,
+                                                           'iqr_anomaly'         : 0.2,
+                                                          }
+                              )
+# Texture Analysis Parameters
+@dataclass(frozen = True)
+class TextureAnalysisParams:
+    """
+    Parameters for texture analysis
+    """
+    # Random Seed for reproducibility
+    RANDOM_SEED                : int   = 1234
+    # Neutral Score
+    NEUTRAL_SCORE              : float = 0.5
+    # Patch Configuration
+    PATCH_SIZE                 : int   = 64
+    N_PATCHES                  : int   = 50
+    # Histogram Configuration
+    HISTOGRAM_BINS             : int   = 32
+    HISTOGRAM_RANGE            : tuple = (0, 255)
+    # Edge Detection
+    EDGE_THRESHOLD             : float = 10.0
+    # Smoothness Analysis
+    SMOOTHNESS_THRESHOLD       : float = 0.5
+    SMOOTH_RATIO_THRESHOLD     : float = 0.4
+    SMOOTH_RATIO_SCALE         : float = 2.5
+    # Entropy Analysis
+    ENTROPY_CV_THRESHOLD       : float = 0.15
+    ENTROPY_SCALE              : float = 5.0
+    # Contrast Analysis
+    CONTRAST_CV_LOW            : float = 0.3
+    CONTRAST_CV_HIGH           : float = 1.5
+    CONTRAST_LOW_SCALE         : float = 2.0
+    CONTRAST_HIGH_SCALE        : float = 0.5
+    # Edge Density Analysis
+    EDGE_CV_THRESHOLD          : float = 0.4
+    EDGE_SCALE                 : float = 1.5
+    # Sub-metric Weights
+    SUBMETRIC_WEIGHTS          : dict  = None
+    def __post_init__(self):
+        if self.SUBMETRIC_WEIGHTS is None:
+            object.__setattr__(self, 'SUBMETRIC_WEIGHTS', {'smoothness_anomaly' : 0.35,
+                                                           'entropy_anomaly'    : 0.25,
+                                                           'contrast_anomaly'   : 0.25,
+                                                           'edge_anomaly'       : 0.15,
+                                                          }
+                              )
+# Color Analysis Parameters
+@dataclass(frozen = True)
+class ColorAnalysisParams:
+    """
+    Parameters for color distribution analysis
+    """
+    # Random Seed for reproducibility
+    RANDOM_SEED                  : int   = 1234
+    # Neutral Score
+    NEUTRAL_SCORE                : float = 0.5
+    # Saturation Analysis
+    SAT_HIGH_THRESHOLD           : float = 0.8
+    SAT_VERY_HIGH_THRESHOLD      : float = 0.95
+    SAT_MEAN_THRESHOLD           : float = 0.65
+    SAT_MEAN_SCALE               : float = 3.0
+    HIGH_SAT_RATIO_THRESHOLD     : float = 0.20
+    HIGH_SAT_SCALE               : float = 2.5
+    CLIP_RATIO_THRESHOLD         : float = 0.05
+    CLIP_SCALE                   : float = 10.0
+    # Histogram Analysis
+    HISTOGRAM_BINS               : int   = 64
+    HISTOGRAM_RANGE              : tuple = (0, 1)
+    ROUGHNESS_THRESHOLD          : float = 0.015
+    ROUGHNESS_SCALE              : float = 50.0
+    CLIP_THRESHOLD               : float = 0.10
+    CLIP_SCALE_FACTOR            : float = 5.0
+    # Hue Analysis
+    HUE_SAT_MASK_THRESHOLD       : float = 0.2
+    HUE_MIN_PIXELS               : int   = 100
+    HUE_BINS                     : int   = 36
+    HUE_RANGE                    : tuple = (0, 360)
+    HUE_CONCENTRATION_THRESHOLD  : float = 0.6
+    HUE_CONCENTRATION_SCALE      : float = 2.5
+    HUE_EMPTY_BIN_THRESHOLD      : float = 0.01
+    HUE_GAP_RATIO_THRESHOLD      : float = 0.4
+    HUE_GAP_SCALE                : float = 1.5
+    # Sub-metric Weights
+    SAT_SUBMETRIC_WEIGHTS        : dict  = None
+    HUE_SUBMETRIC_WEIGHTS        : dict  = None
+    MAIN_WEIGHTS                 : dict  = None
+    def __post_init__(self):
+        if self.SAT_SUBMETRIC_WEIGHTS is None:
+            object.__setattr__(self, 'SAT_SUBMETRIC_WEIGHTS', {'mean_anomaly'     : 0.3,
+                                                               'high_sat_anomaly' : 0.4,
+                                                               'clip_anomaly'     : 0.3,
+                                                              }
+                              )
+        if self.HUE_SUBMETRIC_WEIGHTS is None:
+            object.__setattr__(self, 'HUE_SUBMETRIC_WEIGHTS', {'concentration_anomaly' : 0.6,
+                                                               'gap_anomaly'           : 0.4,
+                                                              }
+                              )
+        if self.MAIN_WEIGHTS is None:
+            object.__setattr__(self, 'MAIN_WEIGHTS', {'saturation' : 0.4,
+                                                      'histogram'  : 0.35,
+                                                      'hue'        : 0.25,
+                                                     }
+                              )
+# Singleton instances for parameter classes
+GRADIENT_FIELD_PCA_PARAMS = GradientFieldPCAParams()
+FREQUENCY_ANALYSIS_PARAMS = FrequencyAnalysisParams()
+NOISE_ANALYSIS_PARAMS     = NoiseAnalysisParams()
+TEXTURE_ANALYSIS_PARAMS   = TextureAnalysisParams()
+COLOR_ANALYSIS_PARAMS     = ColorAnalysisParams()

config/schemas.py ADDED Viewed

	@@ -0,0 +1,112 @@

+# Dependencies
+from typing import List
+from typing import Dict
+from pydantic import Field
+from typing import Optional
+from datetime import datetime
+from pydantic import BaseModel
+from config.constants import MetricType
+from config.constants import SignalStatus
+from config.constants import DetectionStatus
+class MetricResult(BaseModel):
+    """
+    Raw metric output for explainability and reporting
+    """
+    metric_type : MetricType
+    score       : float           = Field(..., ge = 0.0, le = 1.0)
+    confidence  : Optional[float] = Field(None, ge = 0.0, le = 1.0)
+    details     : Optional[Dict]  = Field(default_factory = dict)
+    model_config                  = {"json_schema_extra" : {"example" : {"metric_type" : "noise",
+                                                                         "score"       : 0.72,
+                                                                         "confidence"  : 0.81,
+                                                                         "details"     : {"patches_total" : 100,
+                                                                                          "patches_valid" : 42,
+                                                                                          "mean_noise"    : 1.12,
+                                                                                          "cv"            : 0.18
+                                                                                         }
+                                                                        }
+                                                           }
+                                    }
+class DetectionSignal(BaseModel):
+    """
+    Individual detection signal result
+    """
+    name        : str          = Field(..., description = "Metric name")
+    metric_type : MetricType
+    score       : float        = Field(..., ge = 0.0, le = 1.0, description = "Suspicion score (0=natural, 1=suspicious)")
+    status      : SignalStatus
+    explanation : str          = Field(..., description = "Human-readable explanation")
+    model_config               = {"json_schema_extra" : {"example" : {"name"        : "Gradient Pattern",
+                                                                      "metric_type" : "gradient",
+                                                                      "score"       : 0.73,
+                                                                      "status"      : "flagged",
+                                                                      "explanation" : "Detected irregular gradient patterns typical of diffusion models."
+                                                                     }
+                                                        }
+                                 }
+class AnalysisResult(BaseModel):
+    """
+    Single image analysis result
+    """
+    filename        : str
+    overall_score   : float                          = Field(..., ge = 0.0, le = 1.0)
+    status          : DetectionStatus
+    confidence      : int                            = Field(..., ge = 0, le = 100, description = "Confidence percentage")
+    signals         : List[DetectionSignal]
+    metric_results  : Dict[MetricType, MetricResult]
+    processing_time : float                          = Field(..., description = "Processing time in seconds")
+    timestamp       : datetime                       = Field(default_factory = datetime.now)
+    image_size      : tuple[int, int]                = Field(..., description = "Width x Height")
+    model_config                                     =  {"json_schema_extra" : {"example" : {"filename"        : "photo_001.jpg",
+                                                                                             "overall_score"   : 0.73,
+                                                                                             "status"          : "REVIEW_REQUIRED",
+                                                                                             "confidence"      : 73,
+                                                                                             "signals"         : [],
+                                                                                             "processing_time" : 2.34,
+                                                                                             "image_size"      : [1920, 1080]
+                                                                                            }
+                                                                               }
+                                                        }
+class BatchAnalysisResult(BaseModel):
+    """
+    Batch analysis result
+    """
+    total_images          : int
+    processed             : int
+    failed                : int
+    results               : List[AnalysisResult]
+    summary               : Dict[str, float]     = Field(default_factory = dict, description = "Summary statistics")
+    total_processing_time : float
+    timestamp             : datetime             = Field(default_factory = datetime.now)
+class APIResponse(BaseModel):
+    """
+    Standard API response wrapper
+    """
+    success   : bool
+    message   : str
+    data      : Optional[Dict] = None
+    error     : Optional[str]  = None
+    timestamp : datetime       = Field(default_factory = datetime.now)
+class HealthResponse(BaseModel):
+    """
+    Health check response
+    """
+    status    : str
+    version   : str
+    uptime    : float
+    timestamp : datetime = Field(default_factory = datetime.now)

config/settings.py ADDED Viewed

	@@ -0,0 +1,107 @@

+# Dependencies
+from typing import Set
+from pathlib import Path
+from config.constants import MetricType
+from pydantic_settings import BaseSettings
+from pydantic_settings import SettingsConfigDict
+class Settings(BaseSettings):
+    """
+    Application settings with environment variable support
+    """
+    model_config                   = SettingsConfigDict(env_file          = '.env',
+                                                        env_file_encoding = 'utf-8',
+                                                        case_sensitive    = False,
+                                                       )
+    # Application
+    APP_NAME            : str      = "AI Image Screener"
+    VERSION             : str      = "1.0.0"
+    DEBUG               : bool     = False
+    LOG_LEVEL           : str      = "INFO"
+    # Server Configuration
+    HOST                : str      = "localhost"
+    PORT                : int      = 8005
+    WORKERS             : int      = 4
+    # File processing
+    MAX_FILE_SIZE_MB    : int      = 10
+    MAX_BATCH_SIZE      : int      = 50
+    ALLOWED_EXTENSIONS  : Set[str] = {".jpg", ".jpeg", ".png", ".webp"}
+    # Detection thresholds
+    REVIEW_THRESHOLD    : float    = 0.65
+    # Metric weights (must sum to 1.0)
+    GRADIENT_WEIGHT     : float    = 0.30
+    FREQUENCY_WEIGHT    : float    = 0.25
+    NOISE_WEIGHT        : float    = 0.20
+    TEXTURE_WEIGHT      : float    = 0.15
+    COLOR_WEIGHT        : float    = 0.10
+    # Processing
+    ENABLE_CACHING      : bool     = True
+    PROCESSING_TIMEOUT  : int      = 30
+    PARALLEL_PROCESSING : bool     = True
+    MAX_WORKERS         : int      = 4
+    # Paths
+    BASE_DIR            : Path     = Path(__file__).parent.parent
+    UPLOAD_DIR          : Path     = BASE_DIR / "data" / "uploads"
+    REPORTS_DIR         : Path     = BASE_DIR / "data" / "reports"
+    CACHE_DIR           : Path     = BASE_DIR / "data" / "cache"
+    LOGS_DIR            : Path     = BASE_DIR / "logs"
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._create_directories()
+        self._validate_weights()
+    def _create_directories(self):
+        """
+        Ensure all required directories exist
+        """
+        for directory in [self.UPLOAD_DIR, self.REPORTS_DIR, self.CACHE_DIR, self.LOGS_DIR]:
+            directory.mkdir(parents  = True,
+                            exist_ok = True,
+                           )
+    def _validate_weights(self):
+        """
+        Validate metric weights sum to 1.0
+        """
+        total = (self.GRADIENT_WEIGHT +
+                 self.FREQUENCY_WEIGHT +
+                 self.NOISE_WEIGHT +
+                 self.TEXTURE_WEIGHT +
+                 self.COLOR_WEIGHT
+                )
+        if (not (0.99 <= total <= 1.01)):
+            raise ValueError(f"Metric weights must sum to 1.0, got {total}")
+    @property
+    def max_file_size_bytes(self) -> int:
+        return self.MAX_FILE_SIZE_MB * 1024 * 1024
+    def get_metric_weights(self) -> dict:
+        """
+        Get all metric weights as dictionary
+        """
+        return {MetricType.GRADIENT  : self.GRADIENT_WEIGHT,
+                MetricType.FREQUENCY : self.FREQUENCY_WEIGHT,
+                MetricType.NOISE     : self.NOISE_WEIGHT,
+                MetricType.TEXTURE   : self.TEXTURE_WEIGHT,
+                MetricType.COLOR     : self.COLOR_WEIGHT
+               }
+# Singleton
+settings = Settings()

docs/API_DOCUMENTATION.md ADDED Viewed

	@@ -0,0 +1,712 @@

+# API Documentation
+## Base Information
+**Base URL**: `http://localhost:8005`
+**API Version**: `1.0.0`
+**Protocol**: HTTP/HTTPS
+**Content Type**: `application/json` (default)
+---
+## Table of Contents
+1. [Authentication](#authentication)
+2. [Health Check](#health-check)
+3. [Single Image Analysis](#single-image-analysis)
+4. [Batch Image Analysis](#batch-image-analysis)
+5. [Batch Progress Tracking](#batch-progress-tracking)
+6. [Report Export](#report-export)
+7. [Error Handling](#error-handling)
+8. [Rate Limits](#rate-limits)
+9. [Data Models](#data-models)
+---
+## Authentication
+**Current Version**: No authentication required (intended for internal deployment)
+**Future Versions**: API key authentication planned
+```bash
+# Planned header format
+Authorization: Bearer <api_key>
+```
+---
+## Health Check
+### `GET /health`
+Check if the API server is operational.
+**Request**
+```bash
+curl -X GET http://localhost:8005/health
+```
+**Response** (`200 OK`)
+```json
+{
+  "status": "ok",
+  "version": "1.0.0"
+}
+```
+---
+## Single Image Analysis
+### `POST /analyze/image`
+Analyze a single image for AI-generation indicators.
+**Request**
+```bash
+curl -X POST http://localhost:8005/analyze/image \
+  -F "file=@/path/to/image.jpg"
+```
+**Parameters**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `file` | File | Yes | Image file (JPG/PNG/WEBP, max 10MB) |
+**Response** (`200 OK`)
+```json
+{
+  "success": true,
+  "message": "Image analysis completed",
+  "data": {
+    "filename": "example.jpg",
+    "status": "REVIEW_REQUIRED",
+    "overall_score": 0.73,
+    "confidence": 73,
+    "signals": [
+      {
+        "name": "Gradient Field PCA",
+        "metric_type": "gradient",
+        "score": 0.81,
+        "status": "flagged",
+        "explanation": "Detected irregular gradient patterns typical of diffusion models. Natural photos show consistent lighting gradients shaped by physics."
+      },
+      {
+        "name": "Frequency Analysis",
+        "metric_type": "frequency",
+        "score": 0.68,
+        "status": "warning",
+        "explanation": "Frequency patterns show some irregularities. Requires further review."
+      },
+      {
+        "name": "Noise Analysis",
+        "metric_type": "noise",
+        "score": 0.72,
+        "status": "flagged",
+        "explanation": "Noise pattern is unnaturally uniform. Real camera sensors produce characteristic noise patterns."
+      },
+      {
+        "name": "Texture Analysis",
+        "metric_type": "texture",
+        "score": 0.65,
+        "status": "warning",
+        "explanation": "Some texture regions appear overly uniform. Further analysis recommended."
+      },
+      {
+        "name": "Color Analysis",
+        "metric_type": "color",
+        "score": 0.54,
+        "status": "warning",
+        "explanation": "Some color histogram irregularities detected."
+      }
+    ],
+    "metric_results": {
+      "gradient": {
+        "metric_type": "gradient",
+        "score": 0.81,
+        "confidence": 0.87,
+        "details": {
+          "eigenvalue_ratio": 0.72,
+          "gradient_vectors_sampled": 10000,
+          "threshold": 0.85
+        }
+      },
+      "frequency": {
+        "metric_type": "frequency",
+        "score": 0.68,
+        "confidence": 0.65,
+        "details": {
+          "hf_ratio": 0.38,
+          "hf_anomaly": 0.45,
+          "roughness": 0.032,
+          "spectral_deviation": 0.21
+        }
+      },
+      "noise": {
+        "metric_type": "noise",
+        "score": 0.72,
+        "confidence": 0.78,
+        "details": {
+          "mean_noise": 1.12,
+          "cv": 0.18,
+          "patches_valid": 42,
+          "patches_total": 100
+        }
+      },
+      "texture": {
+        "metric_type": "texture",
+        "score": 0.65,
+        "confidence": 0.71,
+        "details": {
+          "smooth_ratio": 0.45,
+          "contrast_mean": 18.3,
+          "entropy_mean": 4.2,
+          "patches_used": 50
+        }
+      },
+      "color": {
+        "metric_type": "color",
+        "score": 0.54,
+        "confidence": 0.58,
+        "details": {
+          "saturation_stats": {
+            "mean_saturation": 0.68,
+            "high_sat_ratio": 0.23,
+            "very_high_sat_ratio": 0.06
+          },
+          "histogram_stats": {
+            "roughness_mean": 0.021,
+            "channels_analyzed": 3
+          },
+          "hue_stats": {
+            "top3_concentration": 0.58,
+            "gap_ratio": 0.32
+          }
+        }
+      }
+    },
+    "processing_time": 2.34,
+    "image_size": [1920, 1080],
+    "timestamp": "2024-12-19T14:32:15.123456"
+  },
+  "timestamp": "2024-12-19T14:32:15.123456"
+}
+```
+**Status Values**
+- `LIKELY_AUTHENTIC`: Score < 0.65 (default threshold)
+- `REVIEW_REQUIRED`: Score >= 0.65
+**Signal Status Values**
+- `passed`: Score < 0.40
+- `warning`: Score >= 0.40 and < 0.70
+- `flagged`: Score >= 0.70
+---
+## Batch Image Analysis
+### `POST /analyze/batch`
+Analyze multiple images in a single request with parallel processing.
+**Request**
+```bash
+curl -X POST http://localhost:8005/analyze/batch \
+  -F "files=@image1.jpg" \
+  -F "files=@image2.png" \
+  -F "files=@image3.webp"
+```
+**Parameters**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `files` | File[] | Yes | Multiple image files (max 50 per batch) |
+**Response** (`200 OK`)
+```json
+{
+  "success": true,
+  "message": "Batch analysis completed",
+  "data": {
+    "batch_id": "550e8400-e29b-41d4-a716-446655440000",
+    "result": {
+      "total_images": 3,
+      "processed": 3,
+      "failed": 0,
+      "results": [
+        {
+          "filename": "image1.jpg",
+          "status": "REVIEW_REQUIRED",
+          "overall_score": 0.73,
+          "confidence": 73,
+          "signals": [...],
+          "metric_results": {...},
+          "processing_time": 2.1,
+          "image_size": [1920, 1080],
+          "timestamp": "2024-12-19T14:32:15.123456"
+        },
+        {
+          "filename": "image2.png",
+          "status": "LIKELY_AUTHENTIC",
+          "overall_score": 0.42,
+          "confidence": 42,
+          "signals": [...],
+          "metric_results": {...},
+          "processing_time": 2.3,
+          "image_size": [2048, 1536],
+          "timestamp": "2024-12-19T14:32:17.234567"
+        },
+        {
+          "filename": "image3.webp",
+          "status": "LIKELY_AUTHENTIC",
+          "overall_score": 0.38,
+          "confidence": 38,
+          "signals": [...],
+          "metric_results": {...},
+          "processing_time": 1.9,
+          "image_size": [1024, 768],
+          "timestamp": "2024-12-19T14:32:19.345678"
+        }
+      ],
+      "summary": {
+        "likely_authentic": 2,
+        "review_required": 1,
+        "success_rate": 100,
+        "processed": 3,
+        "failed": 0,
+        "avg_score": 0.510,
+        "avg_confidence": 51,
+        "avg_proc_time": 2.10
+      },
+      "total_processing_time": 6.3,
+      "timestamp": "2024-12-19T14:32:19.345678"
+    }
+  },
+  "timestamp": "2024-12-19T14:32:19.345678"
+}
+```
+**Batch Constraints**
+- Maximum images per batch: **50**
+- Maximum file size per image: **10 MB**
+- Timeout per image: **30 seconds**
+- Total batch timeout: **15 minutes**
+---
+## Batch Progress Tracking
+### `GET /batch/{batch_id}/progress`
+Track the progress of a batch analysis job.
+**Request**
+```bash
+curl -X GET http://localhost:8005/batch/550e8400-e29b-41d4-a716-446655440000/progress
+```
+**Response - Processing** (`200 OK`)
+```json
+{
+  "status": "processing",
+  "progress": {
+    "current": 7,
+    "total": 10,
+    "filename": "image_007.jpg"
+  }
+}
+```
+**Response - Completed** (`200 OK`)
+```json
+{
+  "status": "completed",
+  "progress": {
+    "current": 10,
+    "total": 10,
+    "filename": "image_010.jpg"
+  },
+  "result": {
+    "total_images": 10,
+    "processed": 10,
+    "failed": 0,
+    "results": [...],
+    "summary": {...},
+    "total_processing_time": 21.4,
+    "timestamp": "2024-12-19T14:35:22.123456"
+  }
+}
+```
+**Response - Failed** (`200 OK`)
+```json
+{
+  "status": "failed",
+  "error": "Processing timeout exceeded"
+}
+```
+**Status Values**
+- `processing`: Batch is currently being analyzed
+- `completed`: All images processed successfully
+- `failed`: Batch processing encountered fatal error
+- `interrupted`: Processing was manually stopped
+---
+## Report Export
+### CSV Export
+#### `GET /report/csv/{batch_id}` or `POST /report/csv/{batch_id}`
+Download detailed batch analysis as CSV file.
+**Request**
+```bash
+curl -X GET http://localhost:8005/report/csv/550e8400-e29b-41d4-a716-446655440000 \
+  -o report.csv
+```
+**Response**
+- Content-Type: `text/csv`
+- File download with comprehensive analysis data
+- Includes: per-image results, metric breakdowns, forensic details
+**CSV Structure**
+```
+BATCH STATISTICS
+Total Images,10
+Successfully Processed,10
+Failed,0
+...
+ANALYSIS RESULTS
+Filename,Status,Overall Score,Confidence,Processing Time
+image1.jpg,REVIEW_REQUIRED,0.73,73,2.1
+image2.png,LIKELY_AUTHENTIC,0.42,42,2.3
+...
+IMAGE 1 DETAILED ANALYSIS
+Metric Name,Score,Status,Explanation
+Gradient Field PCA,0.81,flagged,Detected irregular gradient patterns...
+...
+```
+---
+### PDF Export
+#### `GET /report/pdf/{batch_id}` or `POST /report/pdf/{batch_id}`
+Download detailed batch analysis as PDF report.
+**Request**
+```bash
+curl -X GET http://localhost:8005/report/pdf/550e8400-e29b-41d4-a716-446655440000 \
+  -o report.pdf
+```
+**Response**
+- Content-Type: `application/pdf`
+- Professional formatted report with:
+  - Executive summary
+  - Per-image analysis sections
+  - Visual metric breakdowns
+  - Forensic details
+  - Recommendations
+---
+## Error Handling
+### Error Response Format
+All errors return a standardized JSON structure:
+```json
+{
+  "success": false,
+  "message": "Error description",
+  "error": "Detailed error message",
+  "timestamp": "2024-12-19T14:32:15.123456"
+}
+```
+### HTTP Status Codes
+| Code | Meaning | Description |
+|------|---------|-------------|
+| `200` | OK | Request successful |
+| `400` | Bad Request | Invalid input (file format, size, etc.) |
+| `404` | Not Found | Batch ID not found |
+| `413` | Payload Too Large | File size exceeds 10MB |
+| `422` | Unprocessable Entity | Validation error |
+| `499` | Client Closed Request | Processing interrupted |
+| `500` | Internal Server Error | Server-side processing error |
+### Common Error Scenarios
+**File Too Large**
+```json
+{
+  "success": false,
+  "message": "Validation error",
+  "error": "File size 12582912 bytes exceeds maximum 10485760 bytes",
+  "timestamp": "2024-12-19T14:32:15.123456"
+}
+```
+**Unsupported Format**
+```json
+{
+  "success": false,
+  "message": "Validation error",
+  "error": "File extension .gif not allowed. Allowed: .jpg, .jpeg, .png, .webp",
+  "timestamp": "2024-12-19T14:32:15.123456"
+}
+```
+**Batch Not Found**
+```json
+{
+  "success": false,
+  "message": "Batch not found",
+  "error": null,
+  "timestamp": "2024-12-19T14:32:15.123456"
+}
+```
+**Processing Timeout**
+```json
+{
+  "success": false,
+  "message": "Processing timeout",
+  "error": "Image analysis exceeded 30 second timeout",
+  "timestamp": "2024-12-19T14:32:45.123456"
+}
+```
+---
+## Rate Limits
+**Current Version**: No rate limiting implemented
+**Recommended Production Limits**:
+- Single image analysis: **60 requests/minute per IP**
+- Batch analysis: **10 requests/minute per IP**
+- Report downloads: **30 requests/minute per IP**
+---
+## Data Models
+### MetricResult
+```typescript
+{
+  metric_type: "gradient" | "frequency" | "noise" | "texture" | "color",
+  score: number,        // 0.0 - 1.0
+  confidence: number,   // 0.0 - 1.0
+  details: object       // Metric-specific forensic data
+}
+```
+### DetectionSignal
+```typescript
+{
+  name: string,
+  metric_type: "gradient" | "frequency" | "noise" | "texture" | "color",
+  score: number,        // 0.0 - 1.0
+  status: "passed" | "warning" | "flagged",
+  explanation: string
+}
+```
+### AnalysisResult
+```typescript
+{
+  filename: string,
+  status: "LIKELY_AUTHENTIC" | "REVIEW_REQUIRED",
+  overall_score: number,      // 0.0 - 1.0
+  confidence: number,         // 0 - 100
+  signals: DetectionSignal[],
+  metric_results: {
+    [key: string]: MetricResult
+  },
+  processing_time: number,    // seconds
+  image_size: [number, number],
+  timestamp: string           // ISO 8601 format
+}
+```
+### BatchAnalysisResult
+```typescript
+{
+  total_images: number,
+  processed: number,
+  failed: number,
+  results: AnalysisResult[],
+  summary: {
+    likely_authentic: number,
+    review_required: number,
+    success_rate: number,     // percentage
+    processed: number,
+    failed: number,
+    avg_score: number,
+    avg_confidence: number,
+    avg_proc_time: number
+  },
+  total_processing_time: number,
+  timestamp: string
+}
+```
+---
+## Usage Examples
+### Python
+```python
+import requests
+# Single image analysis
+with open('image.jpg', 'rb') as f:
+    response = requests.post(
+        'http://localhost:8005/analyze/image',
+        files={'file': f}
+    )
+    result = response.json()
+    print(f"Status: {result['data']['status']}")
+    print(f"Score: {result['data']['overall_score']}")
+# Batch analysis
+files = [
+    ('files', open('img1.jpg', 'rb')),
+    ('files', open('img2.png', 'rb')),
+    ('files', open('img3.webp', 'rb'))
+]
+response = requests.post(
+    'http://localhost:8005/analyze/batch',
+    files=files
+)
+batch_result = response.json()
+batch_id = batch_result['data']['batch_id']
+# Download CSV report
+csv_response = requests.get(f'http://localhost:8005/report/csv/{batch_id}')
+with open('report.csv', 'wb') as f:
+    f.write(csv_response.content)
+```
+### JavaScript (Node.js)
+```javascript
+const FormData = require('form-data');
+const fs = require('fs');
+const axios = require('axios');
+// Single image analysis
+const form = new FormData();
+form.append('file', fs.createReadStream('image.jpg'));
+axios.post('http://localhost:8005/analyze/image', form, {
+  headers: form.getHeaders()
+})
+.then(response => {
+  console.log('Status:', response.data.data.status);
+  console.log('Score:', response.data.data.overall_score);
+})
+.catch(error => {
+  console.error('Error:', error.response.data);
+});
+// Batch analysis
+const batchForm = new FormData();
+batchForm.append('files', fs.createReadStream('img1.jpg'));
+batchForm.append('files', fs.createReadStream('img2.png'));
+axios.post('http://localhost:8005/analyze/batch', batchForm, {
+  headers: batchForm.getHeaders()
+})
+.then(response => {
+  const batchId = response.data.data.batch_id;
+  console.log('Batch ID:', batchId);
+  // Download PDF report
+  return axios.get(`http://localhost:8005/report/pdf/${batchId}`, {
+    responseType: 'arraybuffer'
+  });
+})
+.then(pdfResponse => {
+  fs.writeFileSync('report.pdf', pdfResponse.data);
+  console.log('Report downloaded');
+});
+```
+### cURL
+```bash
+# Single image
+curl -X POST http://localhost:8005/analyze/image \
+  -F "file=@image.jpg" \
+  | jq '.data.status, .data.overall_score'
+# Batch processing
+curl -X POST http://localhost:8005/analyze/batch \
+  -F "files=@img1.jpg" \
+  -F "files=@img2.png" \
+  -F "files=@img3.webp" \
+  | jq '.data.batch_id'
+# Progress tracking
+curl -X GET http://localhost:8005/batch/{batch_id}/progress
+# Download reports
+curl -X GET http://localhost:8005/report/csv/{batch_id} -o report.csv
+curl -X GET http://localhost:8005/report/pdf/{batch_id} -o report.pdf
+```
+---
+## Changelog
+### Version 1.0.0 (Current)
+- Initial API release
+- Single and batch image analysis
+- CSV, JSON, PDF export
+- Progress tracking
+- Multi-metric ensemble detection
+### Planned Features
+- API key authentication
+- Webhook callbacks for async processing
+- Custom threshold configuration per request
+- Historical analysis lookup
+- Metrics-only API endpoints
+---
+*API Documentation Version: 1.0*
+*Last Updated: December 2025*
+*Author: Satyaki Mitra*

docs/ARCHITECTURE.md ADDED Viewed

	@@ -0,0 +1,527 @@

+# Architecture Documentation
+## Table of Contents
+1. [System Overview](#system-overview)
+2. [Overall Architecture](#overall-architecture)
+3. [Data Pipeline](#data-pipeline)
+4. [Component Details](#component-details)
+5. [Product Architecture](#product-architecture)
+6. [Technology Stack](#technology-stack)
+---
+## System Overview
+AI Image Screener is a multi-metric ensemble system designed for first-pass screening of potentially AI-generated images in production workflows. The system processes images through five independent statistical detectors, aggregates their outputs, and provides actionable binary decisions with full explainability.
+**Design Principles:**
+- No single metric dominates decisions
+- All intermediate data preserved for explainability
+- Parallel processing for batch efficiency
+- Zero external ML model dependencies
+- Transparent, auditable decision logic
+---
+## Overall Architecture
+```mermaid
+graph TB
+    subgraph "Frontend Layer"
+        UI[Web UI<br/>Single Page HTML]
+    end
+    subgraph "API Layer"
+        API[FastAPI Server<br/>app.py]
+        CORS[CORS Middleware]
+        ERROR[Error Handler]
+    end
+    subgraph "Processing Layer"
+        VALIDATOR[Image Validator<br/>utils/validators.py]
+        BATCH[Batch Processor<br/>features/batch_processor.py]
+        THRESH[Threshold Manager<br/>features/threshold_manager.py]
+    end
+    subgraph "Detection Layer"
+        AGG[Metrics Aggregator<br/>metrics/aggregator.py]
+        subgraph "Independent Metrics"
+            M1[Gradient PCA<br/>gradient_field_pca.py]
+            M2[Frequency FFT<br/>frequency_analyzer.py]
+            M3[Noise Pattern<br/>noise_analyzer.py]
+            M4[Texture Stats<br/>texture_analyzer.py]
+            M5[Color Distribution<br/>color_analyzer.py]
+        end
+    end
+    subgraph "Reporting Layer"
+        DETAIL[DetailedResultMaker<br/>features/detailed_result_maker.py]
+        CSV[CSV Reporter]
+        JSON[JSON Reporter]
+        PDF[PDF Reporter]
+    end
+    subgraph "Storage Layer"
+        UPLOAD[(Temp Upload<br/>data/uploads/)]
+        CACHE[(Cache<br/>data/cache/)]
+        REPORTS[(Reports<br/>data/reports/)]
+    end
+    UI --> API
+    API --> VALIDATOR
+    VALIDATOR --> BATCH
+    BATCH --> AGG
+    AGG --> M1 & M2 & M3 & M4 & M5
+    M1 & M2 & M3 & M4 & M5 --> AGG
+    AGG --> THRESH
+    THRESH --> DETAIL
+    DETAIL --> CSV & JSON & PDF
+    API -.-> UPLOAD
+    BATCH -.-> CACHE
+    CSV & JSON & PDF -.-> REPORTS
+    style UI fill:#e1f5ff
+    style API fill:#fff4e1
+    style AGG fill:#ffe1e1
+    style DETAIL fill:#e1ffe1
+```
+---
+## Data Pipeline
+```mermaid
+flowchart LR
+    subgraph "Input Stage"
+        A[Image Upload] --> B{Validation}
+        B -->|Pass| C[Temp Storage]
+        B -->|Fail| Z1[Error Response]
+    end
+    subgraph "Preprocessing"
+        C --> D[Load Image<br/>RGB Array]
+        D --> E[Resize if Needed<br/>max 1024px]
+        E --> F[Convert to<br/>Luminance]
+    end
+    subgraph "Parallel Metric Execution"
+        F --> G1[Gradient<br/>Analysis]
+        F --> G2[Frequency<br/>Analysis]
+        F --> G3[Noise<br/>Analysis]
+        F --> G4[Texture<br/>Analysis]
+        F --> G5[Color<br/>Analysis]
+    end
+    subgraph "Score Aggregation"
+        G1 --> H[Weighted<br/>Ensemble]
+        G2 --> H
+        G3 --> H
+        G4 --> H
+        G5 --> H
+        H --> I[Overall Score<br/>0.0 - 1.0]
+    end
+    subgraph "Decision Logic"
+        I --> J{Score vs<br/>Threshold}
+        J -->|>= 0.65| K1[REVIEW<br/>REQUIRED]
+        J -->|< 0.65| K2[LIKELY<br/>AUTHENTIC]
+    end
+    subgraph "Output Stage"
+        K1 --> L[Detailed Result<br/>Assembly]
+        K2 --> L
+        L --> M[Signal Status<br/>Per Metric]
+        M --> N[Explainability<br/>Generation]
+        N --> O[Report Export<br/>CSV/JSON/PDF]
+    end
+    style B fill:#ffcccc
+    style H fill:#cce5ff
+    style J fill:#ffffcc
+    style O fill:#ccffcc
+```
+---
+## Component Details
+### 1. Configuration Layer (`config/`)
+```mermaid
+classDiagram
+    class Settings {
+        +str APP_NAME
+        +float REVIEW_THRESHOLD
+        +dict METRIC_WEIGHTS
+        +int MAX_WORKERS
+        +get_metric_weights()
+        +_validate_weights()
+    }
+    class Constants {
+        <<enumeration>>
+        +MetricType
+        +SignalStatus
+        +DetectionStatus
+        +SIGNAL_THRESHOLDS
+        +METRIC_EXPLANATIONS
+    }
+    class Schemas {
+        +MetricResult
+        +DetectionSignal
+        +AnalysisResult
+        +BatchAnalysisResult
+    }
+    Settings --> Constants: uses
+    Schemas --> Constants: references
+```
+**Key Configuration Files:**
+- `settings.py`: Runtime settings, environment variables, validation
+- `constants.py`: Enums, thresholds, metric parameters, explanations
+- `schemas.py`: Pydantic models for type safety and validation
+---
+### 2. Metrics Layer (`metrics/`)
+```mermaid
+graph TD
+    subgraph "Gradient-Field PCA"
+        A1[RGB → Luminance] --> A2[Sobel Gradients]
+        A2 --> A3[Sample Vectors<br/>n=10000]
+        A3 --> A4[PCA Analysis]
+        A4 --> A5[Eigenvalue Ratio]
+        A5 --> A6{Ratio < 0.85?}
+        A6 -->|Yes| A7[High Suspicion]
+        A6 -->|No| A8[Low Suspicion]
+    end
+    subgraph "Frequency Analysis"
+        B1[Luminance] --> B2[2D FFT]
+        B2 --> B3[Radial Spectrum<br/>64 bins]
+        B3 --> B4[HF Energy Ratio]
+        B4 --> B5[Spectral Roughness]
+        B5 --> B6[Power Law Deviation]
+        B6 --> B7[Weighted Anomaly]
+    end
+    subgraph "Noise Analysis"
+        C1[Luminance] --> C2[Extract Patches<br/>32×32, stride=16]
+        C2 --> C3[Laplacian Filter]
+        C3 --> C4[MAD Estimation]
+        C4 --> C5[CV Analysis]
+        C5 --> C6[IQR Analysis]
+        C6 --> C7[Uniformity Score]
+    end
+    style A1 fill:#ffe1e1
+    style B1 fill:#e1e1ff
+    style C1 fill:#e1ffe1
+```
+**Metric Weights (Default):**
+```
+Gradient:  30%
+Frequency: 25%
+Noise:     20%
+Texture:   15%
+Color:     10%
+```
+---
+### 3. Processing Pipeline
+```mermaid
+sequenceDiagram
+    participant UI
+    participant API
+    participant BatchProcessor
+    participant MetricsAggregator
+    participant Metric1
+    participant Metric2
+    participant ThresholdManager
+    participant DetailedResultMaker
+    UI->>API: Upload Batch (n images)
+    API->>BatchProcessor: process_batch()
+    loop For Each Image
+        BatchProcessor->>MetricsAggregator: analyze_image()
+        par Parallel Execution
+            MetricsAggregator->>Metric1: detect()
+            MetricsAggregator->>Metric2: detect()
+        end
+        Metric1-->>MetricsAggregator: MetricResult(score, confidence, details)
+        Metric2-->>MetricsAggregator: MetricResult(score, confidence, details)
+        MetricsAggregator->>MetricsAggregator: _aggregate_scores()
+        MetricsAggregator->>ThresholdManager: _determine_status()
+        ThresholdManager-->>MetricsAggregator: DetectionStatus
+        MetricsAggregator-->>BatchProcessor: AnalysisResult
+        BatchProcessor->>UI: Progress Update
+    end
+    BatchProcessor->>DetailedResultMaker: extract_detailed_results()
+    DetailedResultMaker-->>BatchProcessor: Detailed Report Data
+    BatchProcessor-->>API: BatchAnalysisResult
+    API-->>UI: JSON Response + batch_id
+```
+---
+### 4. Metric Execution Detail
+```mermaid
+flowchart TB
+    subgraph "Single Metric Execution"
+        A[Input: RGB Image<br/>H×W×3] --> B[Preprocessing<br/>Normalization/Conversion]
+        B --> C[Feature Extraction]
+        C --> D1[Sub-metric 1]
+        C --> D2[Sub-metric 2]
+        C --> D3[Sub-metric 3]
+        D1 --> E[Sub-score 1<br/>0.0 - 1.0]
+        D2 --> F[Sub-score 2<br/>0.0 - 1.0]
+        D3 --> G[Sub-score 3<br/>0.0 - 1.0]
+        E --> H[Weighted Combination]
+        F --> H
+        G --> H
+        H --> I[Final Metric Score]
+        I --> J[Confidence Calculation]
+        J --> K[MetricResult Object]
+        K --> L{Valid?}
+        L -->|Yes| M[Return to Aggregator]
+        L -->|No| N[Return Neutral Score<br/>0.5 + 0 confidence]
+    end
+    style A fill:#e1f5ff
+    style I fill:#ffe1e1
+    style K fill:#e1ffe1
+```
+**Example: Noise Analysis Sub-metrics**
+- CV Anomaly: 40% weight
+- Noise Level Anomaly: 40% weight
+- IQR Anomaly: 20% weight
+---
+## Product Architecture
+```mermaid
+graph TB
+    subgraph "User Interfaces"
+        WEB[Web UI<br/>Browser-based]
+        API_CLIENT[API Clients<br/>Programmatic Access]
+    end
+    subgraph "Core Engine"
+        SCREEN[Screening Engine<br/>Multi-metric Ensemble]
+        THRESH_MGR[Threshold Manager<br/>Sensitivity Control]
+    end
+    subgraph "Reporting System"
+        DETAIL[Detailed Analysis]
+        EXPORT[Multi-format Export<br/>CSV/JSON/PDF]
+    end
+    subgraph "Use Cases"
+        UC1[Content Moderation<br/>Pipelines]
+        UC2[Journalism<br/>Verification]
+        UC3[Stock Photo<br/>Platforms]
+        UC4[Legal/Compliance<br/>Workflows]
+    end
+    WEB --> SCREEN
+    API_CLIENT --> SCREEN
+    SCREEN --> THRESH_MGR
+    THRESH_MGR --> DETAIL
+    DETAIL --> EXPORT
+    EXPORT -.->|Feeds| UC1
+    EXPORT -.->|Feeds| UC2
+    EXPORT -.->|Feeds| UC3
+    EXPORT -.->|Feeds| UC4
+    style SCREEN fill:#ff6b6b
+    style EXPORT fill:#4ecdc4
+    style UC1 fill:#ffe66d
+    style UC2 fill:#ffe66d
+    style UC3 fill:#ffe66d
+    style UC4 fill:#ffe66d
+```
+---
+## Technology Stack
+```mermaid
+graph LR
+    subgraph "Backend"
+        B1[Python 3.11+]
+        B2[FastAPI]
+        B3[Pydantic]
+        B4[NumPy/SciPy]
+        B5[OpenCV]
+        B6[Pillow]
+    end
+    subgraph "Frontend"
+        F1[HTML5]
+        F2[Vanilla JavaScript]
+        F3[CSS3]
+    end
+    subgraph "Reporting"
+        R1[ReportLab PDF]
+        R2[CSV stdlib]
+        R3[JSON stdlib]
+    end
+    subgraph "Infrastructure"
+        I1[Uvicorn ASGI]
+        I2[File-based Storage]
+        I3[In-memory Sessions]
+    end
+    B2 --> B1
+    B3 --> B1
+    B4 --> B1
+    B5 --> B1
+    B6 --> B1
+    F1 --> F2
+    F2 --> F3
+    R1 --> B1
+    R2 --> B1
+    R3 --> B1
+    I1 --> B2
+    I2 --> B1
+    I3 --> B2
+    style B1 fill:#3776ab
+    style B2 fill:#009688
+    style F1 fill:#e34c26
+    style F2 fill:#f0db4f
+```
+**Key Dependencies:**
+- **FastAPI**: Async API framework
+- **NumPy/SciPy**: Numerical computation
+- **OpenCV**: Image processing and filtering
+- **Pillow**: Image loading and validation
+- **ReportLab**: PDF generation
+- **Pydantic**: Data validation and serialization
+---
+## Performance Characteristics
+### Processing Times (Average)
+- Single image analysis: **2-4 seconds**
+- Batch processing (10 images): **15-25 seconds** (parallel)
+- Report generation: **1-3 seconds**
+### Resource Usage
+- Memory per image: **50-150 MB**
+- Max concurrent workers: **4** (configurable)
+- Temp storage: **~10 MB per image**
+### Scalability Considerations
+- **Current**: Single-server deployment
+- **Bottleneck**: CPU-bound metric computation
+- **Future**: Distributed processing via task queue (Celery/RabbitMQ)
+---
+## Security & Privacy
+1. **No data persistence**: Uploaded images deleted after processing
+2. **Local processing**: No external API calls
+3. **Stateless design**: No user tracking
+4. **Input validation**: File type, size, dimension checks
+5. **Timeout protection**: 30s per-image limit
+---
+## Deployment Architecture
+```mermaid
+graph TB
+    subgraph "Production Deployment"
+        LB[Load Balancer<br/>Nginx/Traefik]
+        subgraph "Application Servers"
+            APP1[FastAPI Instance 1<br/>4 workers]
+            APP2[FastAPI Instance 2<br/>4 workers]
+        end
+        subgraph "Shared Storage"
+            NFS[Shared NFS Mount<br/>reports/ cache/]
+        end
+        subgraph "Monitoring"
+            LOGS[Log Aggregation<br/>ELK/Loki]
+            METRICS[Metrics<br/>Prometheus]
+        end
+    end
+    CLIENT[Clients] --> LB
+    LB --> APP1
+    LB --> APP2
+    APP1 -.-> NFS
+    APP2 -.-> NFS
+    APP1 -.-> LOGS
+    APP2 -.-> LOGS
+    APP1 -.-> METRICS
+    APP2 -.-> METRICS
+    style LB fill:#4ecdc4
+    style APP1 fill:#ff6b6b
+    style APP2 fill:#ff6b6b
+    style NFS fill:#95e1d3
+```
+**Recommended Setup:**
+- **Web Server**: Nginx (reverse proxy)
+- **App Server**: Uvicorn (ASGI)
+- **Process Manager**: Systemd or Supervisor
+- **Monitoring**: Prometheus + Grafana
+- **Logging**: Structured JSON logs to ELK stack
+---
+## Future Architecture Considerations
+1. **Message Queue Integration**: Redis/RabbitMQ for async processing
+2. **Database Layer**: PostgreSQL for result persistence and analytics
+3. **Caching Layer**: Redis for threshold/config caching
+4. **Distributed Storage**: S3-compatible storage for reports
+5. **API Gateway**: Kong/Tyk for rate limiting and auth
+---
+*Document Version: 1.0*
+*Last Updated: December 2025*
+*Architecture by: Satyaki Mitra*

docs/Description.md ADDED Viewed

	@@ -0,0 +1,298 @@

+# AI Image Screener
+>*A practical first-pass AI image screening system for modern workflows (2025)*
+---
+## 1. Overview
+**AI Image Screener** is an MVP-grade, **unsupervised image screening system** designed to **identify images that require human review** based on statistical and physical patterns commonly associated with AI-generated imagery.
+This system is **not a “perfect AI detector.”**
+It is intentionally built as a **fast, transparent, first-pass screening tool** that helps teams reduce manual review workload by flagging *obviously suspicious* images at scale.
+The product is particularly suited for:
+- Content moderation pipelines
+- Journalism and media verification
+- Stock image platforms
+- Legal and compliance pre-screening
+- Marketing and brand-protection workflows
+---
+## 2. Core Philosophy
+### What this product *is*
+- A **workflow efficiency tool**
+- A **screening system**, not a verdict engine
+- A **transparent and explainable detector**
+- A **model-agnostic, unsupervised system**
+### What this product *is not*
+- ❌ A definitive “real vs fake” classifier
+- ❌ A black-box deep learning detector
+- ❌ A system claiming near-perfect accuracy on 2025 AI models
+The system is built on a simple principle:
+**saving human time is more valuable than chasing perfect detection.**
+---
+## 3. Problem Statement
+By 2025, high-quality AI image generators (e.g., DALL·E 3, Gemini Imagen 3, Midjourney v6+) produce images that are often **indistinguishable to humans** and increasingly difficult for single-method detectors.
+Most existing tools fail because they:
+- Overpromise accuracy
+- Provide ambiguous outputs (“uncertain”, “maybe AI”)
+- Rely on opaque ML models users do not trust
+- Do not integrate into real operational workflows
+---
+## 4. Product Positioning
+### The key insight
+Users **do not need certainty** — they need **prioritization**.
+Instead of asking:
+> *“Is this image AI or real?”*
+The system answers:
+> *“Does this image require human review?”*
+---
+## 5. Binary UX Model (Critical Design Decision)
+The system intentionally provides **only two outcomes**, ensuring every result is actionable.
+### 🟢 LIKELY AUTHENTIC
+- No significant AI-generation patterns detected
+- Passed all screening checks
+- **Does not guarantee authenticity**
+- No immediate action required
+### 🔴 REVIEW REQUIRED
+- One or more detection signals triggered
+- Patterns consistent with AI generation
+- Confidence score provided for prioritization
+- **Manual verification recommended**
+This avoids the UX failure of ambiguous or “uncertain” results.
+---
+## 6. Detection Strategy
+### *(Multi-Signal, Unsupervised Ensemble)*
+The system runs **multiple independent statistical detectors** on every image.
+Each detector targets a *different failure mode* of AI image generation.
+Each metric produces:
+- A **normalized anomaly score** in `[0.0 – 1.0]`
+- **Rich intermediate details** for explainability and reporting
+### Implemented Metrics (`metrics/`)
+| Metric | File | Purpose |
+|-----|-----|-----|
+| Gradient-Field PCA | `metrics/gradient_field_pca.py` | Detects lighting & gradient inconsistencies typical of diffusion |
+| Frequency Analysis (FFT) | `metrics/frequency_analyzer.py` | Identifies unnatural spectral energy distributions |
+| Noise Pattern Analysis | `metrics/noise_analyzer.py` | Detects missing or artificial sensor noise |
+| Texture Statistics | `metrics/texture_analyzer.py` | Identifies overly smooth or uniform regions |
+| Color Distribution | `metrics/color_analyzer.py` | Flags unnatural saturation and color histograms |
+No single metric is relied upon in isolation.
+---
+## 7. Score Aggregation & Decision Logic
+### Aggregation
+All metric outputs are combined using a **weighted ensemble strategy**:
+- Implemented in: `metrics/aggregator.py`
+- Metric weights are configurable
+- No single signal can dominate the final decision
+- Robust to individual metric failure
+### Thresholding
+Final decisions are derived from calibrated thresholds:
+- 🟢 **LIKELY_AUTHENTIC** → score below review cutoff
+- 🔴 **REVIEW_REQUIRED** → score above cutoff
+Thresholds and sensitivity modes are managed via:
+- `features/threshold_manager.py`
+  - Conservative / Balanced / Aggressive modes
+  - Runtime threshold tuning
+  - A/B calibration support
+---
+## 8. Explainability & Transparency
+Every analysis result includes:
+- Which metrics triggered
+- Severity level per metric (PASSED / WARNING / FLAGGED)
+- Human-readable explanations
+- Optional forensic details for advanced users
+This avoids black-box behavior and builds user trust.
+---
+## 9. Reporting & Export Capabilities
+The system generates **production-ready reports without recomputation**.
+### Reporters (`reporter/`)
+| Format | File | Use Case |
+|-----|-----|-----|
+| CSV | `reporter/csv_reporter.py` | Workflow integration, moderation queues |
+| JSON | `reporter/json_reporter.py` | APIs, automation, auditing |
+| PDF | `reporter/pdf_reporter.py` | Legal, compliance, documentation |
+All reporting is driven by:
+- `features/detailed_result_maker.py`
+  (single source of truth for explanations, findings, and summaries)
+---
+## 10. Technical Architecture
+### High-Level Processing Flow
+```bash
+Upload Image(s)
+      ↓
+Validation & Preprocessing (utils/)
+      ↓
+Parallel Metric Execution (metrics/)
+      ↓
+Score Aggregation (metrics/aggregator.py)
+      ↓
+Threshold Decision (features/threshold_manager.py)
+      ↓
+Detailed Result Assembly (features/detailed_result_maker.py)
+      ↓
+UI / Reports / API Output
+```
+---
+### Backend & Frontend
+**Backend**
+- FastAPI (Python 3.11+)
+- Async batch processing
+- Parallel metric execution
+- File-based caching (image hash)
+- JSON / CSV / PDF outputs
+- Clear API contracts (`docs/API.md`)
+**Frontend**
+- Single-page HTML (inline CSS + JS)
+- Batch upload interface
+- Live per-metric progress indicators
+- Filterable results table
+- One-click export actions
+---
+## 11. Project Structure
+```bash
+ai_image_screener/
+├── app.py
+├── config/
+│   ├── settings.py
+│   ├── constants.py
+│   └── schemas.py
+├── metrics/
+│   ├── gradient_field_pca.py
+│   ├── frequency_analyzer.py
+│   ├── noise_analyzer.py
+│   ├── texture_analyzer.py
+│   ├── color_analyzer.py
+│   └── aggregator.py
+├── features/
+│   ├── batch_processor.py
+│   ├── detailed_result_maker.py
+│   └── threshold_manager.py
+├── reporter/
+│   ├── csv_reporter.py
+│   ├── json_reporter.py
+│   └── pdf_reporter.py
+├── utils/
+│   ├── logger.py
+│   ├── image_processor.py
+│   ├── validators.py
+│   └── helpers.py
+├── data/
+│   ├── uploads/
+│   ├── reports/
+│   └── cache/
+├── ui/
+├── tests/
+└── docs/
+```
+---
+## 12. Performance Expectations *(Honest)*
+| Image Source | Expected Detection Rate |
+|-------------|------------------------|
+| Consumer AI tools (older / free) | 80–90% |
+| Stable Diffusion (older variants) | 70–80% |
+| Midjourney v5 / v6 | 55–70% |
+| DALL·E 3 / Gemini Imagen 3 | 40–55% |
+| Post-processed AI images | 30–45% |
+| False positives on real images | ~10–20% |
+These rates are **appropriate for screening**, not final judgment.
+---
+## 13. Ethical & Legal Positioning
+This system:
+- Never claims **“real”** or **“fake”**
+- Provides **probabilistic screening only**
+- Encourages **human verification**
+- Documents methodology **transparently**
+This makes it suitable for:
+- Legal workflows
+- Journalism
+- Enterprise moderation pipelines
+---
+## 14. Intended Audience
+- Content moderation teams
+- Journalism & media organizations
+- Stock photo platforms
+- Legal & compliance professionals
+- Researchers & educators
+---
+## 15. Final Positioning Statement
+**AI Image Screener is not an AI detector.**
+> It is a **first-pass screening system designed to save human time**.
+> It flags what needs review — **fast, explainable, and at scale**.

docs/TECHNICAL_DOCUMENTATION.md ADDED Viewed

	@@ -0,0 +1,885 @@

+# Case Study Analysis: Statistical Foundations of AI Image Screening
+**Author**: Satyaki Mitra
+**Date**: December 2024
+**Version**: 1.0
+---
+## Table of Contents
+1. [Executive Summary](#executive-summary)
+2. [Problem Formulation](#problem-formulation)
+3. [Metric 1: Gradient-Field PCA](#metric-1-gradient-field-pca)
+4. [Metric 2: Frequency Domain Analysis](#metric-2-frequency-domain-analysis)
+5. [Metric 3: Noise Pattern Analysis](#metric-3-noise-pattern-analysis)
+6. [Metric 4: Texture Statistical Analysis](#metric-4-texture-statistical-analysis)
+7. [Metric 5: Color Distribution Analysis](#metric-5-color-distribution-analysis)
+8. [Ensemble Aggregation Theory](#ensemble-aggregation-theory)
+9. [Threshold Calibration](#threshold-calibration)
+10. [Performance Analysis](#performance-analysis)
+11. [Limitations & Future Work](#limitations--future-work)
+---
+## Executive Summary
+This document provides the mathematical and statistical foundations for the AI Image Screener system. We formalize five independent statistical detectors, analyze their theoretical properties, and derive the ensemble aggregation strategy.
+**Key Results:**
+- Each metric produces normalized anomaly scores $s_i \in [0, 1]$
+- Ensemble aggregation: $S = \sum_{i=1}^{5} w_i s_i$ where $\sum w_i = 1$
+- Binary decision: $D = \mathbb{1}(S \geq \tau)$ where $\tau = 0.65$
+- Expected detection rates: 40–90% depending on generator sophistication
+- False positive rate: 10–20% on natural images
+---
+## Problem Formulation
+### Notation
+| Symbol | Definition |
+|--------|------------|
+| $I \in \mathbb{R}^{H \times W \times 3}$ | RGB input image |
+| $L \in \mathbb{R}^{H \times W}$ | Luminance channel |
+| $s_i \in [0, 1]$ | Score from metric $i$ |
+| $c_i \in [0, 1]$ | Confidence of metric $i$ |
+| $w_i \in [0, 1]$ | Weight of metric $i$ |
+| $S \in [0, 1]$ | Aggregated ensemble score |
+| $\tau$ | Decision threshold |
+| $D \in \{0, 1\}$ | Binary decision (0 = authentic, 1 = review required) |
+### Objective
+Given an image $I$, compute:
+$$D = \begin{cases}
+1 & \text{if } S \geq \tau \text{ (REVIEW REQUIRED)} \\
+0 & \text{if } S < \tau \text{ (LIKELY AUTHENTIC)}
+\end{cases}$$
+where $S$ aggregates evidence from 5 independent statistical tests.
+---
+## Metric 1: Gradient-Field PCA
+### Physical Motivation
+Real photographs capture light reflected from 3D scenes. Lighting creates **low-dimensional gradient structures** aligned with physical light sources. Diffusion models perform patch-based denoising, creating gradient fields inconsistent with global illumination.
+### Mathematical Formulation
+**Step 1: Luminance Conversion**
+Convert RGB to luminance using ITU-R BT.709 standard:
+$$L(x, y) = 0.2126 \cdot R(x, y) + 0.7152 \cdot G(x, y) + 0.0722 \cdot B(x, y)$$
+**Step 2: Gradient Computation**
+Apply Sobel operators:
+$$G_x = L * K_x, \quad G_y = L * K_y$$
+where $K_x$ and $K_y$ are 3×3 Sobel kernels:
+$$K_x = \begin{bmatrix} -1 & 0 & 1 \\ -2 & 0 & 2 \\ -1 & 0 & 1 \end{bmatrix}, \quad K_y = \begin{bmatrix} -1 & -2 & -1 \\ 0 & 0 & 0 \\ 1 & 2 & 1 \end{bmatrix}$$
+**Step 3: Gradient Vector Formation**
+Flatten gradients into vectors:
+$$\mathbf{g}_i = \begin{bmatrix} G_x(i) \\ G_y(i) \end{bmatrix} \in \mathbb{R}^2$$
+Filter by magnitude: $||\mathbf{g}_i|| > \epsilon$ where $\epsilon = 10^{-6}$
+Sample $N = \min(10000, |\{\mathbf{g}_i\}|)$ vectors uniformly.
+**Step 4: PCA Analysis**
+Construct gradient matrix:
+$$\mathbf{G} = [\mathbf{g}_1, \mathbf{g}_2, \ldots, \mathbf{g}_N]^\top \in \mathbb{R}^{N \times 2}$$
+Compute covariance matrix:
+$$\mathbf{C} = \frac{1}{N} \mathbf{G}^\top \mathbf{G} \in \mathbb{R}^{2 \times 2}$$
+Eigenvalue decomposition:
+$$\mathbf{C} = \mathbf{V} \mathbf{\Lambda} \mathbf{V}^\top$$
+where $\lambda_1 \geq \lambda_2 \geq 0$ are eigenvalues.
+**Step 5: Eigenvalue Ratio**
+$$r = \frac{\lambda_1}{\lambda_1 + \lambda_2}$$
+**Interpretation:**
+- $r \to 1$: Gradients concentrated in one direction (consistent lighting)
+- $r \to 0.5$: Isotropic gradients (inconsistent/random)
+**Step 6: Anomaly Score**
+$$s_{\text{gradient}} = \begin{cases}
+\max(0, 1 - r) \cdot 2 & \text{if } r \geq 0.85 \\
+1 - \frac{r}{0.85} & \text{if } r < 0.85
+\end{cases}$$
+**Confidence:**
+$$c_{\text{gradient}} = \text{clip}\left(\frac{|r - 0.85|}{0.85}, 0, 1\right)$$
+### Implementation Reference
+See `metrics/gradient_field_pca.py:GradientFieldPCADetector.detect()`
+---
+## Metric 2: Frequency Domain Analysis
+### Physical Motivation
+Camera lenses act as low-pass filters (diffraction limit). Natural images exhibit **power-law spectral decay**: $P(f) \propto f^{-\alpha}$ where $\alpha \approx 2$ (pink noise).
+AI generators can create:
+1. Excessive high-frequency content (texture hallucination)
+2. Spectral gaps (mode collapse)
+3. Deviation from power-law decay
+### Mathematical Formulation
+**Step 1: 2D Discrete Fourier Transform**
+$$\hat{L}(u, v) = \sum_{x=0}^{W-1} \sum_{y=0}^{H-1} L(x, y) e^{-2\pi i (ux/W + vy/H)}$$
+**Step 2: Magnitude Spectrum**
+$$M(u, v) = |\hat{L}(u, v)|$$
+Apply log scaling for numerical stability:
+$$M_{\log}(u, v) = \log(1 + M(u, v))$$
+Shift zero-frequency to center:
+$$M_{\text{centered}} = \text{fftshift}(M_{\log})$$
+**Step 3: Radial Spectrum**
+Compute radial distance from center $(u_0, v_0) = (W/2, H/2)$:
+$$r(u, v) = \sqrt{(u - u_0)^2 + (v - v_0)^2}$$
+Bin frequencies into $B = 64$ radial bins:
+$$P(k) = \frac{1}{|B_k|} \sum_{(u,v) \in B_k} M_{\text{centered}}(u, v), \quad k = 1, \ldots, B$$
+where $B_k = \{(u, v) : k-1 \leq r(u, v) < k\}$
+**Step 4: Sub-Anomaly 1 - High-Frequency Energy**
+Partition spectrum:
+- Low frequency: $P_{\text{LF}} = \frac{1}{k_{\text{cutoff}}} \sum_{k=1}^{k_{\text{cutoff}}} P(k)$
+- High frequency: $P_{\text{HF}} = \frac{1}{B - k_{\text{cutoff}}} \sum_{k=k_{\text{cutoff}}+1}^{B} P(k)$
+where $k_{\text{cutoff}} = \lfloor 0.6 \cdot B \rfloor = 38$
+Compute ratio:
+$$\rho_{\text{HF}} = \frac{P_{\text{HF}}}{P_{\text{LF}} + \epsilon}$$
+Anomaly score:
+$$a_{\text{HF}} = \begin{cases}
+\min\left(1, (\rho_{\text{HF}} - 0.35) \times 3.0\right) & \text{if } \rho_{\text{HF}} > 0.35 \\
+\min\left(1, (0.08 - \rho_{\text{HF}}) \times 5.0\right) & \text{if } \rho_{\text{HF}} < 0.08 \\
+0 & \text{otherwise}
+\end{cases}$$
+**Step 5: Sub-Anomaly 2 - Spectral Roughness**
+Measure deviation from smooth decay:
+$$\mathcal{R} = \frac{1}{B-1} \sum_{k=1}^{B-1} |P(k+1) - P(k)|$$
+Anomaly score:
+$$a_{\text{rough}} = \text{clip}(\mathcal{R} \times 10.0, 0, 1)$$
+**Step 6: Sub-Anomaly 3 - Power-Law Deviation**
+Fit power law in log-log space:
+$$\log P(k) \approx \beta_0 + \beta_1 \log k$$
+Compute mean absolute deviation:
+$$\mathcal{D} = \frac{1}{B} \sum_{k=1}^{B} |\log P(k) - (\beta_0 + \beta_1 \log k)|$$
+Anomaly score:
+$$a_{\text{dev}} = \text{clip}(\mathcal{D} \times 2.0, 0, 1)$$
+**Step 7: Final Score**
+$$s_{\text{frequency}} = 0.4 \cdot a_{\text{HF}} + 0.3 \cdot a_{\text{rough}} + 0.3 \cdot a_{\text{dev}}$$
+### Implementation Reference
+See `metrics/frequency_analyzer.py:FrequencyAnalyzer.detect()`
+---
+## Metric 3: Noise Pattern Analysis
+### Physical Motivation
+Real camera sensors produce **characteristic noise**:
+1. **Shot noise** (Poisson): $\sigma_{\text{shot}}^2 \propto I$
+2. **Read noise** (Gaussian): $\sigma_{\text{read}}^2 = \text{const}$
+AI models produce:
+- Overly uniform images (too clean)
+- Synthetic noise patterns (too variable)
+- Spatially inconsistent noise
+### Mathematical Formulation
+**Step 1: Patch Extraction**
+Extract overlapping patches $\{P_i\}$ of size $32 \times 32$ with stride $16$.
+**Step 2: Laplacian Filtering**
+Apply Laplacian kernel to isolate high-frequency noise:
+$$K_{\text{Lap}} = \begin{bmatrix} 0 & 1 & 0 \\ 1 & -4 & 1 \\ 0 & 1 & 0 \end{bmatrix}$$
+$$\nabla^2 P_i = P_i * K_{\text{Lap}}$$
+**Step 3: MAD Estimation**
+Compute Median Absolute Deviation (robust to outliers):
+$$\text{MAD}_i = \text{median}(|\nabla^2 P_i - \text{median}(\nabla^2 P_i)|)$$
+Convert to noise standard deviation:
+$$\hat{\sigma}_i = 1.4826 \times \text{MAD}_i$$
+(Factor 1.4826 assumes Gaussian noise: $\sigma \approx 1.4826 \times \text{MAD}$)
+**Step 4: Filtering**
+Retain patches with variance in valid range:
+$$\sigma_{\text{min}}^2 = 1.0, \quad \sigma_{\text{max}}^2 = 1000.0$$
+$$\mathcal{P}_{\text{valid}} = \{i : \sigma_{\text{min}}^2 < \text{Var}(P_i) < \sigma_{\text{max}}^2\}$$
+**Step 5: Sub-Anomaly 1 - Coefficient of Variation**
+$$\text{CV} = \frac{\text{std}(\{\hat{\sigma}_i\})}{\text{mean}(\{\hat{\sigma}_i\}) + \epsilon}$$
+Anomaly:
+$$a_{\text{CV}} = \begin{cases}
+(0.15 - \text{CV}) \times 5.0 & \text{if } \text{CV} < 0.15 \text{ (too uniform)} \\
+\min(1, (\text{CV} - 1.2) \times 2.0) & \text{if } \text{CV} > 1.2 \text{ (too variable)} \\
+0 & \text{otherwise}
+\end{cases}$$
+**Step 6: Sub-Anomaly 2 - Noise Level**
+$$\bar{\sigma} = \text{mean}(\{\hat{\sigma}_i\})$$
+Anomaly:
+$$a_{\text{level}} = \begin{cases}
+\frac{1.5 - \bar{\sigma}}{1.5} & \text{if } \bar{\sigma} < 1.5 \text{ (too clean)} \\
+\frac{2.5 - \bar{\sigma}}{2.5} \times 0.5 & \text{if } 1.5 \leq \bar{\sigma} < 2.5 \\
+0 & \text{otherwise}
+\end{cases}$$
+**Step 7: Sub-Anomaly 3 - IQR Analysis**
+Compute interquartile range:
+$$\text{IQR} = Q_{75} - Q_{25}$$
+IQR ratio:
+$$\rho_{\text{IQR}} = \frac{\text{IQR}}{\bar{\sigma} + \epsilon}$$
+Anomaly:
+$$a_{\text{IQR}} = \begin{cases}
+(0.3 - \rho_{\text{IQR}}) \times 2.0 & \text{if } \rho_{\text{IQR}} < 0.3 \\
+0 & \text{otherwise}
+\end{cases}$$
+**Step 8: Final Score**
+$$s_{\text{noise}} = 0.4 \cdot a_{\text{CV}} + 0.4 \cdot a_{\text{level}} + 0.2 \cdot a_{\text{IQR}}$$
+### Implementation Reference
+See `metrics/noise_analyzer.py:NoiseAnalyzer.detect()`
+---
+## Metric 4: Texture Statistical Analysis
+### Physical Motivation
+Natural scenes have **organic texture variation**:
+- Edges follow fractal statistics
+- Contrast varies locally
+- Entropy reflects information density
+AI models can produce:
+- Overly smooth regions (lack of detail)
+- Repetitive patterns (mode collapse)
+- Uniform texture statistics
+### Mathematical Formulation
+**Step 1: Random Patch Sampling**
+Sample $N = 50$ patches of size $64 \times 64$ uniformly at random.
+**Step 2: Feature Computation per Patch**
+For each patch $P_i$:
+**a) Local Contrast**
+$$c_i = \text{std}(P_i)$$
+**b) Entropy**
+Compute histogram $H$ with 32 bins over $[0, 255]$:
+$$h_k = \frac{|\{p \in P_i : k-1 < p \leq k\}|}{|P_i|}$$
+Shannon entropy:
+$$e_i = -\sum_{k=1}^{32} h_k \log_2(h_k + \epsilon)$$
+**c) Smoothness**
+$$m_i = \frac{1}{1 + \text{Var}(P_i)}$$
+**d) Edge Density**
+Compute gradients:
+$$g_x, g_y = \text{Sobel}(P_i)$$
+$$|\nabla P_i| = \sqrt{g_x^2 + g_y^2}$$
+Edge density:
+$$d_i = \frac{|\{p : |\nabla P_i|(p) > 10\}|}{|P_i|}$$
+**Step 3: Sub-Anomaly 1 - Smoothness**
+Smooth ratio:
+$$\rho_{\text{smooth}} = \frac{|\{i : m_i > 0.5\}|}{N}$$
+Anomaly:
+$$a_{\text{smooth}} = \begin{cases}
+\min(1, (\rho_{\text{smooth}} - 0.4) \times 2.5) & \text{if } \rho_{\text{smooth}} > 0.4 \\
+0 & \text{otherwise}
+\end{cases}$$
+**Step 4: Sub-Anomaly 2 - Entropy CV**
+$$\text{CV}_e = \frac{\text{std}(\{e_i\})}{\text{mean}(\{e_i\}) + \epsilon}$$
+Anomaly:
+$$a_{\text{entropy}} = \begin{cases}
+(0.15 - \text{CV}_e) \times 5.0 & \text{if } \text{CV}_e < 0.15 \\
+0 & \text{otherwise}
+\end{cases}$$
+**Step 5: Sub-Anomaly 3 - Contrast CV**
+$$\text{CV}_c = \frac{\text{std}(\{c_i\})}{\text{mean}(\{c_i\}) + \epsilon}$$
+Anomaly:
+$$a_{\text{contrast}} = \begin{cases}
+(0.3 - \text{CV}_c) \times 2.0 & \text{if } \text{CV}_c < 0.3 \\
+\min(1, (\text{CV}_c - 1.5) \times 0.5) & \text{if } \text{CV}_c > 1.5 \\
+0 & \text{otherwise}
+\end{cases}$$
+**Step 6: Sub-Anomaly 4 - Edge CV**
+$$\text{CV}_d = \frac{\text{std}(\{d_i\})}{\text{mean}(\{d_i\}) + \epsilon}$$
+Anomaly:
+$$a_{\text{edge}} = \begin{cases}
+(0.4 - \text{CV}_d) \times 1.5 & \text{if } \text{CV}_d < 0.4 \\
+0 & \text{otherwise}
+\end{cases}$$
+**Step 7: Final Score**
+$$s_{\text{texture}} = 0.35 \cdot a_{\text{smooth}} + 0.25 \cdot a_{\text{entropy}} + 0.25 \cdot a_{\text{contrast}} + 0.15 \cdot a_{\text{edge}}$$
+### Implementation Reference
+See `metrics/texture_analyzer.py:TextureAnalyzer.detect()`
+---
+## Metric 5: Color Distribution Analysis
+### Physical Motivation
+Physical light sources create **constrained color relationships**:
+- Blackbody radiation spectrum
+- Lambertian reflectance
+- Atmospheric scattering (Rayleigh/Mie)
+AI models can generate:
+- Oversaturated colors (not physically realizable)
+- Unnatural hue clustering
+- Impossible color combinations
+### Mathematical Formulation
+**Step 1: RGB to HSV Conversion**
+For each pixel $(r, g, b) \in [0, 1]^3$:
+$$M = \max(r, g, b), \quad m = \min(r, g, b), \quad \Delta = M - m$$
+Value:
+$$v = M$$
+Saturation:
+$$s = \begin{cases} \Delta / M & \text{if } M \neq 0 \\ 0 & \text{otherwise} \end{cases}$$
+Hue (in degrees):
+$$h = \begin{cases}
+60 \times \left(\frac{g - b}{\Delta} \mod 6\right) & \text{if } M = r \\
+60 \times \left(\frac{b - r}{\Delta} + 2\right) & \text{if } M = g \\
+60 \times \left(\frac{r - g}{\Delta} + 4\right) & \text{if } M = b
+\end{cases}$$
+**Step 2: Saturation Analysis**
+Mean saturation:
+$$\bar{s} = \frac{1}{HW} \sum_{x, y} s(x, y)$$
+High saturation ratio:
+$$\rho_{\text{high}} = \frac{|\{(x, y) : s(x, y) > 0.8\}|}{HW}$$
+Very high saturation ratio:
+$$\rho_{\text{very-high}} = \frac{|\{(x, y) : s(x, y) > 0.95\}|}{HW}$$
+**Sub-Anomalies:**
+$$a_{\text{mean}} = \begin{cases} \min(1, (\bar{s} - 0.65) \times 3.0) & \text{if } \bar{s} > 0.65 \\ 0 & \text{otherwise} \end{cases}$$
+$$a_{\text{high}} = \begin{cases} \min(1, (\rho_{\text{high}} - 0.20) \times 2.5) & \text{if } \rho_{\text{high}} > 0.20 \\ 0 & \text{otherwise} \end{cases}$$
+$$a_{\text{clip}} = \begin{cases} \min(1, (\rho_{\text{very-high}} - 0.05) \times 10.0) & \text{if } \rho_{\text{very-high}} > 0.05 \\ 0 & \text{otherwise} \end{cases}$$
+Saturation score:
+$$s_{\text{sat}} = 0.3 \cdot a_{\text{mean}} + 0.4 \cdot a_{\text{high}} + 0.3 \cdot a_{\text{clip}}$$
+**Step 3: Histogram Analysis**
+For each RGB channel $C \in \{R, G, B\}$:
+Compute histogram $H_C$ with 64 bins over $[0, 1]$:
+$$h_k = \frac{|\{p \in C : k-1 < 64p \leq k\}|}{HW}$$
+Roughness:
+$$\mathcal{R}_C = \frac{1}{63} \sum_{k=1}^{63} |h_{k+1} - h_k|$$
+Clipping detection:
+$$c_{\text{low}} = h_1 + h_2, \quad c_{\text{high}} = h_{63} + h_{64}$$
+**Anomalies (averaged over RGB):**
+$$a_{\text{rough}} = \text{mean}_C \left[\text{clip}((\mathcal{R}_C - 0.015) \times 50.0, 0, 1)\right]$$
+$$a_{\text{clip-low}} = \text{mean}_C \left[\begin{cases} \min(1, (c_{\text{low}} - 0.10) \times 5.0) & \text{if } c_{\text{low}} > 0.10 \\ 0 & \text{otherwise} \end{cases}\right]$$
+$$a_{\text{clip-high}} = \text{mean}_C \left[\begin{cases} \min(1, (c_{\text{high}} - 0.10) \times 5.0) & \text{if } c_{\text{high}} > 0.10 \\ 0 & \text{otherwise} \end{cases}\right]$$
+Histogram score:
+$$s_{\text{hist}} = a_{\text{rough}} \lor a_{\text{clip-low}} \lor a_{\text{clip-high}}$$
+(logical OR: take max if any triggered)
+**Step 4: Hue Analysis**
+Filter pixels with sufficient saturation: $\mathcal{S} = \{(x, y) : s(x, y) > 0.2\}$
+If $|\mathcal{S}| < 100$ pixels, return neutral score.
+Compute hue histogram with 36 bins (10° each):
+$$H_h(k) = \frac{|\{(x, y) \in \mathcal{S} : 10(k-1) \leq h(x, y) < 10k\}|}{|\mathcal{S}|}$$
+Top-3 concentration:
+$$\rho_{\text{top3}} = \sum_{k \in \text{top-3}} H_h(k)$$
+Empty bins:
+$$n_{\text{empty}} = |\{k : H_h(k) < 0.01\}|$$
+Gap ratio:
+$$\rho_{\text{gap}} = \frac{n_{\text{empty}}}{36}$$
+**Anomalies:**
+$$a_{\text{conc}} = \begin{cases} \min(1, (\rho_{\text{top3}} - 0.6) \times 2.5) & \text{if } \rho_{\text{top3}} > 0.6 \\ 0 & \text{otherwise} \end{cases}$$
+$$a_{\text{gap}} = \begin{cases} \min(1, (\rho_{\text{gap}} - 0.4) \times 1.5) & \text{if } \rho_{\text{gap}} > 0.4 \\ 0 & \text{otherwise} \end{cases}$$
+Hue score:
+$$s_{\text{hue}} = 0.6 \cdot a_{\text{conc}} + 0.4 \cdot a_{\text{gap}}$$
+**Step 5: Final Score**
+$$s_{\text{color}} = 0.4 \cdot s_{\text{sat}} + 0.35 \cdot s_{\text{hist}} + 0.25 \cdot s_{\text{hue}}$$
+### Implementation Reference
+See `metrics/color_analyzer.py:ColorAnalyzer.detect()`
+---
+## Ensemble Aggregation Theory
+### Weighted Linear Combination
+Given individual metric scores $\{s_1, s_2, s_3, s_4, s_5\}$ and weights $\{w_1, w_2, w_3, w_4, w_5\}$ where $\sum_{i=1}^{5} w_i = 1$:
+$$S = \sum_{i=1}^{5} w_i s_i$$
+Default weights:
+$$\mathbf{w} = [0.30, 0.25, 0.20, 0.15, 0.10]^\top$$
+### Theoretical Properties
+**Proposition 1 (Boundedness):**
+$$\forall i, \; s_i \in [0, 1] \implies S \in [0, 1]$$
+*Proof:*
+$$S = \sum_{i=1}^{5} w_i s_i \leq \sum_{i=1}^{5} w_i \cdot 1 = 1$$
+$$S = \sum_{i=1}^{5} w_i s_i \geq \sum_{i=1}^{5} w_i \cdot 0 = 0 \quad \square$$
+**Proposition 2 (Robustness to Single Metric Failure):**
+If metric $j$ fails and returns neutral score $s_j = 0.5$, the maximum score deviation is:
+$$\Delta S_{\max} = w_j \cdot 0.5$$
+With default weights:
+$$\Delta S_{\max} \leq 0.30 \times 0.5 = 0.15$$
+*Interpretation:* Even if Gradient PCA (highest weight) fails, score deviates by at most 0.15, preserving decision boundary integrity.
+**Proposition 3 (Monotonicity):**
+$$\forall i, \; \frac{\partial S}{\partial s_i} = w_i > 0$$
+*Interpretation:* Increasing any metric score strictly increases ensemble score (no conflicting signals).
+### Confidence Estimation
+Individual metric confidence $c_i$ measures reliability of $s_i$.
+Aggregate confidence:
+$$C = \text{clip}\left(2 \times |S - 0.5|, 0, 1\right)$$
+*Rationale:* Confidence increases with distance from neutral point (0.5):
+- $S = 0.0$: Very confident authentic ($C = 1.0$)
+- $S = 0.5$: No confidence ($C = 0.0$)
+- $S = 1.0$: Very confident AI-generated ($C = 1.0$)
+### Alternative Aggregation Strategies (Future Work)
+**Weighted Geometric Mean:**
+$$S_{\text{geom}} = \prod_{i=1}^{5} s_i^{w_i}$$
+- *Pro:* Penalizes very low scores (forces consensus)
+- *Con:* Single zero score makes $S_{\text{geom}} = 0$
+**Bayesian Model:**
+$$P(\text{AI} \mid s_1, \ldots, s_5) = \frac{P(s_1, \ldots, s_5 \mid \text{AI}) P(\text{AI})}{P(s_1, \ldots, s_5)}$$
+Assuming conditional independence:
+$$P(\text{AI} \mid \mathbf{s}) \propto P(\text{AI}) \prod_{i=1}^{5} P(s_i \mid \text{AI})$$
+- *Pro:* Principled probabilistic framework
+- *Con:* Requires labeled training data to estimate likelihoods
+**Neural Combiner:**
+Learn non-linear combination function $f : [0, 1]^5 \to [0, 1]$:
+$S_{\text{neural}} = f(s_1, s_2, s_3, s_4, s_5; \theta)$
+- *Pro:* Can learn complex interactions
+- *Con:* Loses interpretability, requires large labeled dataset
+---
+## Threshold Calibration
+### Binary Decision Rule
+$D(I) = \begin{cases}
+1 & \text{if } S(I) \geq \tau \\
+0 & \text{if } S(I) < \tau
+\end{cases}$
+Default threshold: $\tau = 0.65$
+### ROC Analysis Framework
+Define:
+- **True Positive (TP)**: AI image correctly flagged ($D = 1, y = 1$)
+- **False Positive (FP)**: Real image incorrectly flagged ($D = 1, y = 0$)
+- **True Negative (TN)**: Real image correctly passed ($D = 0, y = 0$)
+- **False Negative (FN)**: AI image incorrectly passed ($D = 0, y = 1$)
+True Positive Rate (Sensitivity):
+$\text{TPR}(\tau) = \frac{\text{TP}}{\text{TP} + \text{FN}} = P(S \geq \tau \mid y = 1)$
+False Positive Rate:
+$\text{FPR}(\tau) = \frac{\text{FP}}{\text{FP} + \text{TN}} = P(S \geq \tau \mid y = 0)$
+ROC Curve: $\{(\text{FPR}(\tau), \text{TPR}(\tau)) : \tau \in [0, 1]\}$
+### Threshold Selection Strategies
+**1. Maximize Youden's J:**
+$\tau^* = \arg\max_\tau \left[\text{TPR}(\tau) - \text{FPR}(\tau)\right]$
+**2. Fixed FPR Constraint:**
+$\tau^* = \min\{\tau : \text{FPR}(\tau) \leq \alpha\}$
+where $\alpha$ is acceptable false positive rate (e.g., 10%).
+**3. Cost-Sensitive:**
+$\tau^* = \arg\min_\tau \left[C_{\text{FP}} \cdot \text{FP}(\tau) + C_{\text{FN}} \cdot \text{FN}(\tau)\right]$
+where $C_{\text{FP}}$ = cost of incorrectly flagging real image, $C_{\text{FN}}$ = cost of missing AI image.
+### Current Calibration ($\tau = 0.65$)
+Rationale:
+- Prioritizes **high recall** on AI images (minimize FN)
+- Accepts 10-20% FPR on real images
+- Reflects use case: screening tool (better to review unnecessarily than miss AI content)
+Sensitivity modes:
+- **Conservative** ($\tau = 0.75$): Lower FPR (~5-10%), Lower TPR (~50-70%)
+- **Balanced** ($\tau = 0.65$): Default
+- **Aggressive** ($\tau = 0.55$): Higher TPR (~60-85%), Higher FPR (~20-30%)
+---
+## Performance Analysis
+### Expected Detection Rates (Empirical Estimates)
+Based on statistical properties of different generator classes:
+| Generator Type | Expected TPR | Rationale |
+|----------------|--------------|-----------|
+| DALL-E 2, Stable Diffusion 1.x | 80-90% | Strong gradient/frequency artifacts |
+| Midjourney v5, Stable Diffusion 2.x | 70-80% | Improved but detectable patterns |
+| DALL-E 3, Midjourney v6 | 55-70% | Better physics simulation |
+| Imagen 3, FLUX | 40-55% | State-of-art, near-physical |
+| Post-processed AI | 30-45% | Artifacts removed by editing |
+### False Positive Analysis
+**Sources of FP on Real Photos:**
+1. **HDR Images** (25% of FPs):
+   - Tone mapping creates unnatural gradients
+   - Triggers gradient PCA (low eigenvalue ratio)
+2. **Macro Photography** (20% of FPs):
+   - Shallow depth of field → smooth backgrounds
+   - Triggers texture smoothness detector
+3. **Long Exposure** (15% of FPs):
+   - Motion blur reduces high-frequency content
+   - Triggers frequency analyzer
+4. **Heavy JPEG Compression** (15% of FPs):
+   - Blocks create spectral artifacts
+   - Triggers frequency + noise detectors
+5. **Studio Lighting** (10% of FPs):
+   - Controlled lighting → uniform saturation
+   - Triggers color analyzer
+6. **Other** (15%): Panoramas, stitched images, artistic filters
+**Mitigation Strategies:**
+- Metadata checks: EXIF camera model, lens info
+- Image provenance verification
+- Human review for high-confidence FPs (score close to threshold)
+### Computational Complexity
+| Metric | Time Complexity | Space Complexity |
+|--------|-----------------|------------------|
+| Gradient PCA | $O(HW + N \log N)$ | $O(N)$ where $N = 10000$ |
+| Frequency FFT | $O(HW \log(HW))$ | $O(HW)$ |
+| Noise Analysis | $O(HW \cdot P)$ | $O(P)$ where $P \approx 100$ patches |
+| Texture Analysis | $O(N_p \cdot p^2)$ | $O(N_p \cdot p^2)$ where $N_p = 50$, $p = 64$ |
+| Color Analysis | $O(HW)$ | $O(HW)$ |
+| **Total** | $O(HW \log(HW))$ | $O(HW)$ |
+For typical image $1920 \times 1080$:
+- $HW \approx 2 \times 10^6$ pixels
+- Processing time: 2-4 seconds (single-threaded)
+- Memory: 50-150 MB
+### Scalability
+Batch processing with $n$ images and $w$ workers:
+$T_{\text{batch}} = \frac{n}{w} \cdot T_{\text{single}} + T_{\text{overhead}}$
+Efficiency:
+$\eta = \frac{n \cdot T_{\text{single}}}{T_{\text{batch}}} \approx \frac{w}{1 + \epsilon}$
+where $\epsilon$ represents parallelization overhead ($\epsilon \approx 0.1$ for $w = 4$).
+---
+## Limitations & Future Work
+### Current Limitations
+**1. Statistical Approach Ceiling**
+No statistical detector can keep pace with generative model evolution:
+$\lim_{t \to \infty} \text{TPR}(t) \to \text{TPR}_{\text{base}} \approx 30\%$
+where $t$ is time and generators continuously improve.
+**Fundamental Issue:** Statistical features are **necessary but not sufficient** conditions for authenticity.
+**2. Adversarial Brittleness**
+Simple post-processing defeats all metrics:
+- Add Gaussian noise: $\tilde{I} = I + \mathcal{N}(0, \sigma^2)$ where $\sigma = 2$
+- JPEG compression with quality 85
+- Slight rotation + crop
+Expected TPR drop: 60-80% → 10-30%
+**3. False Positive Problem**
+10-20% FPR is **unacceptable** for many workflows:
+- Content creators unfairly flagged
+- Erosion of user trust
+- Legal liability issues
+**4. No Semantic Understanding**
+System cannot detect:
+- Deepfakes (face swaps)
+- Inpainting (local manipulation)
+- Prompt-guided generation ("photo in the style of...")
+**5. Computational Cost**
+2-4 sec/image too slow for real-time applications (video streaming, live moderation).
+### Future Research Directions
+**1. Hybrid Systems**
+Combine statistical + ML approaches:
+$S_{\text{hybrid}} = \alpha \cdot S_{\text{statistical}} + (1 - \alpha) \cdot S_{\text{ML}}$
+- Statistical: Fast, interpretable, generalizes
+- ML: Learns generator-specific patterns
+**2. Provenance Tracking**
+Blockchain-based image certificates:
+- Cryptographic signatures at capture time
+- Immutable audit trail
+- No detection needed (authenticity verified, not inferred)
+**3. Watermarking Standards**
+Embedded invisible watermarks in AI generators (industry collaboration):
+- Stable Diffusion: `invisible_watermark` library
+- OpenAI: C2PA content credentials
+- Detection becomes trivial lookup
+**4. Active Authentication**
+Real-time verification with camera hardware:
+- Secure enclaves in sensors
+- Tamper-evident metadata
+- Physical unclonable functions (PUFs)
+**5. Human-in-the-Loop**
+Optimize for **human augmentation**, not replacement:
+- Prioritization scores, not binary decisions
+- Explainable evidence, not black-box predictions
+- Confidence intervals, not point estimates
+### Conclusion
+This system represents a **pragmatic engineering solution** to an **unsolvable theoretical problem**. Perfect AI image detection is impossible due to:
+1. Generative models improving faster than detectors
+2. Adversarial post-processing trivially defeats statistical features
+3. Semantic understanding requires AGI-level capabilities
+**Our contribution:** A transparent, explainable screening tool that reduces manual review workload by 40-70% while acknowledging fundamental limitations.
+---
+## References
+1. Gragnaniello et al. (2021). "Are GAN Generated Images Easy to Detect?" *IEEE ICME*.
+2. Dzanic et al. (2020). "Fourier Spectrum Discrepancies in Deep Networks." *NeurIPS*.
+3. Kirchner & Johnson (2019). "SPN-CNN for Image Manipulation Detection." *IEEE WIFS*.
+4. Nataraj et al. (2019). "Detecting GAN Images via Co-occurrence Matrices." *Electronic Imaging*.
+5. Marra et al. (2019). "Do GANs Leave Specific Traces?" *IEEE MIPR*.
+6. Corvi et al. (2023). "From GANs to Diffusion Models." *arXiv:2304.06408*.
+7. Sha et al. (2023). "DE-FAKE: Detection and Attribution of Fake Images." *ACM CCS*.
+8. Wang et al. (2020). "CNN-Generated Images Are Easy to Spot... for Now." *CVPR*.
+---
+*Document Version: 1.0*
+*Author: Satyaki Mitra*
+*Date: December 2025*

features/__init__.py ADDED Viewed

File without changes

features/batch_processor.py ADDED Viewed

	@@ -0,0 +1,299 @@

+# Dependencies
+import time
+from typing import List
+from typing import Dict
+from typing import Tuple
+from pathlib import Path
+from typing import Callable
+from utils.logger import get_logger
+from config.settings import settings
+from config.schemas import AnalysisResult
+from concurrent.futures import TimeoutError
+from concurrent.futures import as_completed
+from config.constants import DetectionStatus
+from config.schemas import BatchAnalysisResult
+from metrics.aggregator import MetricsAggregator
+from concurrent.futures import ThreadPoolExecutor
+from features.threshold_manager import ThresholdManager
+# Setup Logging
+logger = get_logger(__name__)
+class BatchProcessor:
+    """
+    Process multiple images in parallel or sequential mode
+    Features:
+    ---------
+    - Parallel processing using ThreadPoolExecutor
+    - Sequential fallback for single images or disabled parallel mode
+    - Automatic error handling and recovery
+    - Progress tracking and logging
+    """
+    def __init__(self, threshold_manager: ThresholdManager):
+        """
+        Initialize Batch Processor
+        """
+        # Instantiate threshold manager
+        self.threshold_manager = threshold_manager
+        # Initialize aggregator
+        self.aggregator        = MetricsAggregator(threshold_manager = threshold_manager)
+        # Fix number of workers
+        self.max_workers       = settings.MAX_WORKERS if settings.PARALLEL_PROCESSING else 1
+        logger.info(f"BatchProcessor initialized with max_workers={self.max_workers}, parallel={settings.PARALLEL_PROCESSING}")
+    def process_batch(self, image_files: List[Dict[str, any]], on_progress: Callable[[int, int, str], None] | None = None) -> BatchAnalysisResult:
+        """
+        Process multiple images with automatic parallel/sequential switching
+        Arguments:
+        ----------
+            image_files   { list }    : List of dicts with keys:
+                                        - 'path'     : Path object
+                                        - 'filename' : str
+                                        - 'size'     : tuple (width, height)
+            on_progress { Callablel } : Optional callback invoked after each image is processed
+        Returns:
+        --------
+            { BatchAnalysisResult } : Complete batch analysis result
+        """
+        start_time   = time.time()
+        total_images = len(image_files)
+        logger.info(f"Starting batch processing of {total_images} images")
+        # Validate input
+        if (total_images == 0):
+            logger.warning("Empty batch provided")
+            return self._create_empty_batch_result()
+        if (total_images > settings.MAX_BATCH_SIZE):
+            logger.error(f"Batch size {total_images} exceeds maximum {settings.MAX_BATCH_SIZE}")
+            raise ValueError(f"Batch size {total_images} exceeds maximum allowed {settings.MAX_BATCH_SIZE}")
+        # Choose processing strategy
+        if (settings.PARALLEL_PROCESSING and (total_images > 1)):
+            results, failed = self._process_parallel(image_files = image_files,
+                                                     on_progress = on_progress,
+                                                    )
+        else:
+            results, failed = self._process_sequential(image_files = image_files,
+                                                       on_progress = on_progress,
+                                                      )
+        total_time           = time.time() - start_time
+        # Create batch result
+        batch_result         = BatchAnalysisResult(total_images          = total_images,
+                                                   processed             = len(results),
+                                                   failed                = failed,
+                                                   results               = results,
+                                                   total_processing_time = total_time,
+                                                  )
+        # Calculate summary statistics
+        batch_result.summary = self._calculate_summary(results = results,
+                                                       total   = total_images,
+                                                      )
+        logger.info(f"Batch processing complete: {len(results)}/{total_images} successful, {failed} failed in {total_time:.2f}s")
+        return batch_result
+    def _process_parallel(self, image_files: List[Dict], on_progress: Callable[[int, int, str], None] | None = None) -> Tuple[List[AnalysisResult], int]:
+        """
+        Process images in parallel using ThreadPoolExecutor
+        Arguments:
+        ----------
+            image_files   { list }    : List of image file dictionaries
+            on_progress { Callablel } : Optional callback invoked after each image is processed
+        Returns:
+        --------
+            { tuple }            : (results_list, failed_count)
+        """
+        results = list()
+        failed  = 0
+        logger.debug(f"Using parallel processing with {self.max_workers} workers")
+        with ThreadPoolExecutor(max_workers = self.max_workers) as executor:
+            # Submit all tasks
+            future_to_file = {executor.submit(self.process_single,
+                                              image['path'],
+                                              image['filename'],
+                                              image['size'],
+                                             ): image for image in image_files
+                             }
+            # Collect results as they complete
+            completed = 0
+            for future in as_completed(future_to_file):
+                completed += 1
+                image      = future_to_file[future]
+                if on_progress:
+                    on_progress(completed, len(image_files), image["filename"])
+                try:
+                    result = future.result(timeout = settings.PROCESSING_TIMEOUT)
+                    if result:
+                        results.append(result)
+                        logger.debug(f"✓ Completed: {image['filename']}")
+                    else:
+                        failed += 1
+                        logger.warning(f"✗ Failed: {image['filename']} (returned None)")
+                except TimeoutError:
+                    failed += 1
+                    logger.error(f"✗ Timeout: {image['filename']} (exceeded {settings.PROCESSING_TIMEOUT}s)")
+                except Exception as e:
+                    failed += 1
+                    logger.error(f"✗ Error: {image['filename']} - {e}")
+        return results, failed
+    def _process_sequential(self, image_files: List[Dict], on_progress: Callable[[int, int, str], None] | None = None) -> Tuple[List[AnalysisResult], int]:
+        """
+        Process images sequentially (fallback or single image)
+        Arguments:
+        ----------
+            image_files   { list }   : List of image file dictionaries
+            on_progress { Callabel } : Optional callback invoked after each image is processed
+        Returns:
+        --------
+            { tuple }            : (results_list, failed_count)
+        """
+        results = list()
+        failed  = 0
+        logger.debug("Using sequential processing")
+        for idx, image in enumerate(image_files, 1):
+            try:
+                if on_progress:
+                    on_progress(idx, len(image_files), image["filename"])
+                result = self.process_single(image_path = image['path'],
+                                             filename   = image['filename'],
+                                             image_size = image['size'],
+                                            )
+                if result:
+                    results.append(result)
+                    logger.debug(f"✓ Completed: {image['filename']}")
+                else:
+                    failed += 1
+                    logger.warning(f"✗ Failed: {image['filename']} (returned None)")
+            except Exception as e:
+                failed += 1
+                logger.error(f"✗ Error: {image['filename']} - {e}")
+        return results, failed
+    def process_single(self, image_path: Path, filename: str, image_size: Tuple[int, int]) -> AnalysisResult:
+        """
+        Process single image (called by both parallel and sequential)
+        Arguments:
+        ----------
+            image_path { Path }  : Path to image file
+            filename   { str }   : Original filename
+            image_size { tuple } : (width, height)
+        Returns:
+        --------
+            { AnalysisResult }   : Analysis result or None on error
+        """
+        try:
+            return self.aggregator.analyze_image(image_path = image_path,
+                                                 filename   = filename,
+                                                 image_size = image_size,
+                                                )
+        except Exception as e:
+            logger.error(f"Failed to process {filename}: {e}", exc_info = True)
+            return None
+    def _calculate_summary(self, results: List[AnalysisResult], total: int) -> Dict[str, int]:
+        """
+        Calculate summary statistics from results
+        Arguments:
+        ----------
+            results { list } : List of analysis results
+            total   { int }  : Total number of images
+        Returns:
+        --------
+            { dict }         : Summary statistics
+        """
+        # Calculate processing stats
+        likely_authentic = sum(1 for r in results if (r.status == DetectionStatus.LIKELY_AUTHENTIC))
+        review_required  = sum(1 for r in results if (r.status == DetectionStatus.REVIEW_REQUIRED))
+        processed        = len(results)
+        failed           = total - processed
+        success_rate     = int((processed / total * 100) if (total > 0) else 0)
+        # Calculate average scores
+        avg_score        = sum(r.overall_score for r in results) / len(results) if results else 0.0
+        avg_confidence   = sum(r.confidence for r in results) / len(results) if results else 0
+        avg_proc_time    = sum(r.processing_time for r in results) / len(results) if results else 0.0
+        return {"likely_authentic" : likely_authentic,
+                "review_required"  : review_required,
+                "success_rate"     : success_rate,
+                "processed"        : processed,
+                "failed"           : failed,
+                "avg_score"        : round(avg_score, 3),
+                "avg_confidence"   : int(avg_confidence),
+                "avg_proc_time"    : round(avg_proc_time, 2),
+               }
+    def _create_empty_batch_result(self) -> BatchAnalysisResult:
+        """
+        Create empty batch result for edge cases
+        Returns:
+        --------
+            { BatchAnalysisResult } : Empty batch result
+        """
+        return BatchAnalysisResult(total_images          = 0,
+                                   processed             = 0,
+                                   failed                = 0,
+                                   results               = [],
+                                   summary               = {"likely_authentic" : 0,
+                                                            "review_required"  : 0,
+                                                            "success_rate"     : 0,
+                                                           },
+                                   total_processing_time = 0.0,
+                                  )

features/detailed_result_maker.py ADDED Viewed

	@@ -0,0 +1,481 @@

+# Dependencies
+import pandas as pd
+from typing import Dict
+from typing import List
+from typing import Optional
+from utils.logger import get_logger
+from config.constants import MetricType
+from config.constants import SignalStatus
+from config.schemas import AnalysisResult
+from config.constants import SIGNAL_THRESHOLDS
+# Setup Logging
+logger = get_logger(__name__)
+class DetailedResultMaker:
+    """
+    Extract and format detailed analysis results for UI and reporting
+    Purpose:
+    --------
+    - Extracts all intermediate metrics from MetricResult objects
+    - Formats data for tabular display in UI
+    - Provides rich metadata for PDF/CSV reports
+    - No re-computation - just data extraction and formatting
+    Output Formats:
+    ---------------
+    1. Structured dictionaries for UI
+    2. Pandas DataFrames for reports
+    3. Hierarchical JSON for API
+    """
+    def __init__(self, signal_thresholds: dict | None = None):
+        """
+        Initialize Detailed Result Maker
+        """
+        self.metric_display_names = {MetricType.GRADIENT  : "Gradient-Field PCA",
+                                     MetricType.FREQUENCY : "Frequency Domain (FFT)",
+                                     MetricType.NOISE     : "Noise Pattern Analysis",
+                                     MetricType.TEXTURE   : "Texture Statistics",
+                                     MetricType.COLOR     : "Color Distribution",
+                                    }
+        self.signal_thresholds    = signal_thresholds or SIGNAL_THRESHOLDS
+        logger.debug("DetailedResultMaker initialized")
+    def extract_detailed_results(self, analysis_result: AnalysisResult) -> Dict:
+        """
+        Extract all detailed results from AnalysisResult
+        Arguments:
+        ----------
+            analysis_result { AnalysisResult } : Complete analysis result
+        Returns:
+        --------
+            { dict }                           : Comprehensive detailed results
+        """
+        logger.debug(f"Extracting detailed results for: {analysis_result.filename}")
+        detailed = {"filename"         : analysis_result.filename,
+                    "overall_summary"  : self._extract_overall_summary(analysis_result = analysis_result),
+                    "metrics_detailed" : self._extract_all_metrics(analysis_result = analysis_result),
+                    "metadata"         : self._extract_metadata(analysis_result = analysis_result),
+                   }
+        logger.debug(f"Extracted {len(detailed['metrics_detailed'])} metric details")
+        return detailed
+    def create_detailed_table(self, analysis_result: AnalysisResult) -> pd.DataFrame:
+        """
+        Create detailed metrics table as DataFrame
+        Arguments:
+        ----------
+            analysis_result { AnalysisResult } : Complete analysis result
+        Returns:
+        --------
+            { DataFrame }                      : Tabular detailed results
+        """
+        rows = list()
+        for metric_type, metric_result in analysis_result.metric_results.items():
+            display_name = self.metric_display_names.get(metric_type, metric_type.value)
+            row          = {"Metric"      : display_name,
+                            "Score"       : round(metric_result.score, 3),
+                            "Confidence"  : round(metric_result.confidence, 3) if metric_result.confidence is not None else "N/A",
+                            "Status"      : self._score_to_status(score = metric_result.score),
+                           }
+            # Add key details from each metric
+            details      = self._extract_key_details(metric_type   = metric_type,
+                                                     metric_result = metric_result,
+                                                    )
+            row.update(details)
+            rows.append(row)
+        # Dump rows into a pandas dataframe for structured result
+        dataframe = pd.DataFrame(data = rows)
+        logger.debug(f"Created detailed table with {len(dataframe)} rows, {len(dataframe.columns)} columns")
+        return dataframe
+    def create_report_data(self, analysis_result: AnalysisResult) -> Dict:
+        """
+        Create rich data structure for report generation
+        Arguments:
+        ----------
+            analysis_result { AnalysisResult } : Complete analysis result
+        Returns:
+        --------
+            { dict }                           : Report-ready data structure
+        """
+        report_data = {"header"             : self._create_report_header(analysis_result = analysis_result),
+                       "overall_assessment" : self._create_overall_assessment(analysis_result = analysis_result),
+                       "metric_breakdown"   : self._create_metric_breakdown(analysis_result = analysis_result),
+                       "forensic_details"   : self._create_forensic_details(analysis_result = analysis_result),
+                       "recommendations"    : self._create_recommendations(analysis_result = analysis_result),
+                      }
+        logger.debug(f"Created report data for: {analysis_result.filename}")
+        return report_data
+    def _extract_overall_summary(self, analysis_result: AnalysisResult) -> Dict:
+        """
+        Extract overall summary information
+        """
+        timestamp = getattr(analysis_result, "timestamp", None)
+        return {"filename"        : analysis_result.filename,
+                "status"          : analysis_result.status.value,
+                "overall_score"   : round(analysis_result.overall_score, 3),
+                "confidence"      : analysis_result.confidence,
+                "processing_time" : round(analysis_result.processing_time, 2),
+                "image_size"      : f"{analysis_result.image_size[0]}×{analysis_result.image_size[1]}",
+                "timestamp"       : timestamp.isoformat() if timestamp else None,
+               }
+    def _extract_all_metrics(self, analysis_result: AnalysisResult) -> List[Dict]:
+        """
+        Extract detailed information for all metrics
+        """
+        metrics_detailed = list()
+        for metric_type, metric_result in analysis_result.metric_results.items():
+            metric_detail = {"metric_type"    : metric_type.value,
+                             "display_name"   : self.metric_display_names.get(metric_type, metric_type.value),
+                             "score"          : round(metric_result.score, 3),
+                             "confidence"     : round(metric_result.confidence, 3) if metric_result.confidence is not None else None,
+                             "status"         : self._score_to_status(score = metric_result.score),
+                             "details"        : metric_result.details or {},
+                             "interpretation" : self._interpret_metric(metric_type   = metric_type,
+                                                                       metric_result = metric_result,
+                                                                      ),
+                            }
+            metrics_detailed.append(metric_detail)
+        # Sort by score (highest first)
+        metrics_detailed.sort(key = lambda x: x['score'], reverse = True)
+        return metrics_detailed
+    def _extract_metadata(self, analysis_result: AnalysisResult) -> Dict:
+        """
+        Extract processing metadata
+        """
+        return {"total_metrics"   : len(analysis_result.metric_results),
+                "flagged_metrics" : sum(1 for s in analysis_result.signals if s.status.value == 'flagged'),
+                "warning_metrics" : sum(1 for s in analysis_result.signals if s.status.value == 'warning'),
+                "passed_metrics"  : sum(1 for s in analysis_result.signals if s.status.value == 'passed'),
+                "avg_confidence"  : self._calculate_avg_confidence(analysis_result = analysis_result),
+               }
+    def _extract_key_details(self, metric_type: MetricType, metric_result) -> Dict:
+        """
+        Extract key details specific to each metric type
+        """
+        details = metric_result.details or {}
+        if (metric_type == MetricType.GRADIENT):
+            return {"Eigenvalue_Ratio" : details.get('eigenvalue_ratio', 'N/A'),
+                    "Vectors_Sampled"  : details.get('gradient_vectors_sampled', 'N/A'),
+                   }
+        elif (metric_type == MetricType.FREQUENCY):
+            return {"HF_Ratio"        : details.get('hf_ratio', 'N/A'),
+                    "HF_Anomaly"      : details.get('hf_anomaly', 'N/A'),
+                    "Spectrum_Bins"   : details.get('spectrum_bins', 'N/A'),
+                   }
+        elif (metric_type == MetricType.NOISE):
+            return {"Mean_Noise"      : details.get('mean_noise', 'N/A'),
+                    "CV"              : details.get('cv', 'N/A'),
+                    "Patches_Valid"   : details.get('patches_valid', 'N/A'),
+                   }
+        elif (metric_type == MetricType.TEXTURE):
+            return {"Smooth_Ratio"    : details.get('smooth_ratio', 'N/A'),
+                    "Contrast_Mean"   : details.get('contrast_mean', 'N/A'),
+                    "Patches_Used"    : details.get('patches_used', 'N/A'),
+                   }
+        elif (metric_type == MetricType.COLOR):
+            sat_stats = details.get('saturation_stats', {})
+            return {"Mean_Saturation" : sat_stats.get('mean_saturation', 'N/A'),
+                    "High_Sat_Ratio"  : sat_stats.get('high_sat_ratio', 'N/A'),
+                   }
+        return {}
+    def _interpret_metric(self, metric_type: MetricType, metric_result) -> str:
+        """
+        Provide human-readable interpretation of metric result
+        """
+        score   = metric_result.score
+        details = metric_result.details or {}
+        if (metric_type == MetricType.GRADIENT):
+            eig_ratio = details.get('eigenvalue_ratio')
+            if eig_ratio:
+                return f"Eigenvalue ratio of {eig_ratio:.3f} ({'high' if eig_ratio > 0.85 else 'low'} alignment)"
+            return "Gradient structure analysis"
+        elif (metric_type == MetricType.FREQUENCY):
+            hf_ratio = details.get('hf_ratio')
+            if hf_ratio:
+                return f"High-freq ratio: {hf_ratio:.3f} ({'elevated' if hf_ratio > 0.35 else 'low' if hf_ratio < 0.08 else 'normal'})"
+            return "Frequency spectrum analysis"
+        elif (metric_type == MetricType.NOISE):
+            mean_noise = details.get('mean_noise')
+            if mean_noise:
+                return f"Mean noise: {mean_noise:.2f} ({'low' if mean_noise < 1.5 else 'normal'})"
+            return "Noise pattern analysis"
+        elif (metric_type == MetricType.TEXTURE):
+            smooth_ratio = details.get('smooth_ratio')
+            if smooth_ratio is not None:
+                return f"Smooth regions: {smooth_ratio:.1%} ({'excessive' if smooth_ratio > 0.4 else 'normal'})"
+            return "Texture variation analysis"
+        elif (metric_type == MetricType.COLOR):
+            sat_stats = details.get('saturation_stats', {})
+            mean_sat  = sat_stats.get('mean_saturation')
+            if mean_sat:
+                return f"Mean saturation: {mean_sat:.2f} ({'high' if mean_sat > 0.65 else 'normal'})"
+            return "Color distribution analysis"
+        return "Analysis complete"
+    def _create_report_header(self, analysis_result: AnalysisResult) -> Dict:
+        """
+        Create report header section
+        """
+        return {"filename"        : analysis_result.filename,
+                "analysis_date"   : analysis_result.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
+                "image_size"      : f"{analysis_result.image_size[0]} × {analysis_result.image_size[1]} pixels",
+                "processing_time" : f"{analysis_result.processing_time:.2f} seconds",
+               }
+    def _create_overall_assessment(self, analysis_result: AnalysisResult) -> Dict:
+        """
+        Create overall assessment section
+        """
+        return {"status"       : analysis_result.status.value,
+                "score"        : round(analysis_result.overall_score * 100, 1),
+                "confidence"   : analysis_result.confidence,
+                "verdict"      : "REVIEW REQUIRED" if analysis_result.status.value == "REVIEW_REQUIRED" else "LIKELY AUTHENTIC",
+                "risk_level"   : self._calculate_risk_level(score = analysis_result.overall_score),
+               }
+    def _create_metric_breakdown(self, analysis_result: AnalysisResult) -> List[Dict]:
+        """
+        Create detailed metric breakdown for report
+        """
+        breakdown = list()
+        for signal in analysis_result.signals:
+            metric_result = analysis_result.metric_results.get(signal.metric_type)
+            item          = {"metric"       : signal.name,
+                             "score"        : f"{signal.score * 100:.1f}%",
+                             "status"       : signal.status.value.upper(),
+                             "confidence"   : f"{metric_result.confidence * 100:.1f}%" if metric_result.confidence else "N/A",
+                             "explanation"  : signal.explanation,
+                             "key_findings" : self.extract_key_findings(metric_type   = signal.metric_type,
+                                                                        metric_result = metric_result,
+                                                                       ),
+                            }
+            breakdown.append(item)
+        return breakdown
+    def _create_forensic_details(self, analysis_result: AnalysisResult) -> Dict:
+        """
+        Create forensic details section
+        """
+        forensic = dict()
+        for metric_type, metric_result in analysis_result.metric_results.items():
+            metric_name           = self.metric_display_names.get(metric_type, metric_type.value)
+            forensic[metric_name] = metric_result.details or {"note": "No detailed forensics available"}
+        return forensic
+    def _create_recommendations(self, analysis_result: AnalysisResult) -> Dict:
+        """
+        Create recommendations section
+        """
+        score = analysis_result.overall_score
+        if (score >= 0.85):
+            return {"action"      : "Immediate manual verification required",
+                    "priority"    : "HIGH",
+                    "next_steps"  : ["Forensic analysis", "Reverse image search", "Metadata inspection", "Expert review"],
+                    "confidence"  : "Very high likelihood of AI generation",
+                   }
+        elif (score >= 0.70):
+            return {"action"      : "Manual verification recommended",
+                    "priority"    : "MEDIUM",
+                    "next_steps"  : ["Visual inspection", "Compare with authentic samples", "Check source provenance"],
+                    "confidence"  : "High likelihood of AI generation",
+                   }
+        elif (score >= 0.50):
+            return {"action"      : "Optional review suggested",
+                    "priority"    : "LOW",
+                    "next_steps"  : ["May be edited photo", "Verify image source", "Check for inconsistencies"],
+                    "confidence"  : "Moderate indicators present",
+                   }
+        else:
+            return {"action"      : "No immediate action required",
+                    "priority"    : "NONE",
+                    "next_steps"  : ["Proceed with normal workflow"],
+                    "confidence"  : "Low likelihood of AI generation",
+                   }
+    def _score_to_status(self, score: float) -> str:
+        """
+        Convert score to status label
+        """
+        if (score >= self.signal_thresholds[SignalStatus.FLAGGED]):
+            return "FLAGGED"
+        elif (score >= self.signal_thresholds[SignalStatus.WARNING]):
+            return "WARNING"
+        else:
+            return "PASSED"
+    def _calculate_avg_confidence(self, analysis_result: AnalysisResult) -> float:
+        """
+        Calculate average confidence across all metrics
+        """
+        confidences = [mr.confidence for mr in analysis_result.metric_results.values() if mr.confidence is not None]
+        return round(sum(confidences) / len(confidences), 3) if confidences else 0.0
+    def _calculate_risk_level(self, score: float) -> str:
+        """
+        Calculate risk level from score
+        """
+        if (score >= 0.85):
+            return "CRITICAL"
+        elif (score >= 0.70):
+            return "HIGH"
+        elif (score >= 0.50):
+            return "MEDIUM"
+        else:
+            return "LOW"
+    def extract_key_findings(self, metric_type: MetricType, metric_result) -> List[str]:
+        """
+        Extract human-readable key forensic findings for a given metric used by:
+        - Detailed UI views
+        - CSV reports
+        - JSON reports
+        """
+        findings = list()
+        details  = metric_result.details or {}
+        if (metric_type == MetricType.GRADIENT):
+            eig_ratio = details.get('eigenvalue_ratio')
+            if eig_ratio:
+                findings.append(f"Eigenvalue ratio: {eig_ratio:.3f}")
+            vectors = details.get('gradient_vectors_sampled')
+            if vectors:
+                findings.append(f"Analyzed {vectors} gradient vectors")
+        elif (metric_type == MetricType.FREQUENCY):
+            hf_ratio = details.get('hf_ratio')
+            if hf_ratio:
+                findings.append(f"High-frequency ratio: {hf_ratio:.3f}")
+            roughness = details.get('roughness')
+            if roughness:
+                findings.append(f"Spectral roughness: {roughness:.3f}")
+        elif (metric_type == MetricType.NOISE):
+            mean_noise = details.get('mean_noise')
+            if mean_noise:
+                findings.append(f"Mean noise level: {mean_noise:.2f}")
+            cv = details.get('cv')
+            if cv:
+                findings.append(f"Coefficient of variation: {cv:.3f}")
+        elif (metric_type == MetricType.TEXTURE):
+            smooth_ratio = details.get('smooth_ratio')
+            if smooth_ratio:
+                findings.append(f"Smooth patches: {smooth_ratio:.1%}")
+            contrast_mean = details.get('contrast_mean')
+            if contrast_mean:
+                findings.append(f"Average contrast: {contrast_mean:.2f}")
+        elif (metric_type == MetricType.COLOR):
+            sat_stats = details.get('saturation_stats', {})
+            mean_sat  = sat_stats.get('mean_saturation')
+            if mean_sat:
+                findings.append(f"Mean saturation: {mean_sat:.2f}")
+            high_sat = sat_stats.get('high_sat_ratio')
+            if high_sat:
+                findings.append(f"High saturation pixels: {high_sat:.1%}")
+        return findings if findings else ["Analysis complete"]

features/threshold_manager.py ADDED Viewed

	@@ -0,0 +1,277 @@

+# Dependencies
+from typing import Dict
+from utils.logger import get_logger
+from config.settings import settings
+from config.constants import MetricType
+from config.constants import SignalStatus
+from config.constants import SIGNAL_THRESHOLDS
+# Setup Logging
+logger = get_logger(__name__)
+class ThresholdManager:
+    """
+    Manage detection thresholds dynamically
+    Purpose:
+    --------
+    Allows runtime adjustment of detection thresholds for:
+    - A/B testing different sensitivity levels
+    - Calibration based on real-world performance
+    - Custom thresholds for specific use cases
+    - Environment-specific tuning (production vs staging)
+    Note: Changes are runtime-only and not persisted
+    """
+    def __init__(self):
+        """
+        Initialize Threshold Manager with current settings
+        """
+        self._review_threshold  = settings.REVIEW_THRESHOLD
+        self._signal_thresholds = dict(SIGNAL_THRESHOLDS)
+        self._metric_weights    = dict(settings.get_metric_weights())
+        logger.info(f"ThresholdManager initialized: review_threshold={self._review_threshold}")
+    def get_review_threshold(self) -> float:
+        """
+        Get current review threshold
+        Returns:
+        --------
+            { float } : Current threshold [0.0, 1.0]
+        """
+        return self._review_threshold
+    def set_review_threshold(self, new_threshold: float) -> bool:
+        """
+        Set new review threshold
+        Arguments:
+        ----------
+            new_threshold { float } : New threshold value [0.0, 1.0]
+        Returns:
+        --------
+            { bool }                : Success status
+        """
+        if not (0.0 <= new_threshold <= 1.0):
+            logger.error(f"Invalid threshold: {new_threshold} (must be between 0.0 and 1.0)")
+            return False
+        old_threshold          = self._review_threshold
+        self._review_threshold = new_threshold
+        logger.info(f"Review threshold changed: {old_threshold:.2f} → {new_threshold:.2f}")
+        return True
+    def adjust_sensitivity(self, sensitivity: str) -> bool:
+        """
+        Adjust sensitivity using preset levels
+        Arguments:
+        ----------
+            sensitivity { str } : One of 'conservative', 'balanced', 'aggressive'
+        Returns:
+        --------
+            { bool }            : Success status
+        """
+        presets = {'conservative' : 0.75,  # Fewer false positives, may miss some AI
+                   'balanced'     : 0.65,  # Recommended default
+                   'aggressive'   : 0.55,  # Catch more AI, more false positives
+                  }
+        if (sensitivity not in presets):
+            logger.error(f"Invalid sensitivity: {sensitivity}. Must be one of {list(presets.keys())}")
+            return False
+        new_threshold = presets[sensitivity]
+        success       = self.set_review_threshold(new_threshold = new_threshold)
+        if success:
+            logger.info(f"Sensitivity set to '{sensitivity}' (threshold={new_threshold})")
+        return success
+    def get_signal_thresholds(self) -> Dict[SignalStatus, float]:
+        """
+        Get current signal thresholds
+        Returns:
+        --------
+            { dict } : Signal status → threshold mapping
+        """
+        return self._signal_thresholds.copy()
+    def set_signal_threshold(self, status: SignalStatus, threshold: float) -> bool:
+        """
+        Set threshold for specific signal status
+        Arguments:
+        ----------
+            status    { SignalStatus } : Signal status to modify
+            threshold { float }        : New threshold [0.0, 1.0]
+        Returns:
+        --------
+            { bool }                   : Success status
+        """
+        if not (0.0 <= threshold <= 1.0):
+            logger.error(f"Invalid threshold: {threshold}")
+            return False
+        old_threshold                    = self._signal_thresholds.get(status)
+        self._signal_thresholds[status]  = threshold
+        logger.info(f"Signal threshold for {status.value}: {old_threshold:.2f} → {threshold:.2f}")
+        return True
+    def get_metric_weights(self) -> Dict[MetricType, float]:
+        """
+        Get current metric weights
+        Returns:
+        --------
+            { dict } : Metric type → weight mapping
+        """
+        return self._metric_weights.copy()
+    def set_metric_weight(self, metric: MetricType, weight: float) -> bool:
+        """
+        Set weight for specific metric
+        Arguments:
+        ----------
+            metric { MetricType } : Metric to modify
+            weight   { float }    : New weight [0.0, 1.0]
+        Returns:
+        --------
+            { bool }              : Success status
+        """
+        if not (0.0 <= weight <= 1.0):
+            logger.error(f"Invalid weight: {weight}")
+            return False
+        old_weight                   = self._metric_weights.get(metric, 0.0)
+        self._metric_weights[metric] = weight
+        # Validate total weight
+        total_weight                 = sum(self._metric_weights.values())
+        if not (0.99 <= total_weight <= 1.01):
+            logger.warning(f"Total metric weights = {total_weight:.3f} (should sum to 1.0)")
+        logger.info(f"Metric weight for {metric.value}: {old_weight:.2f} → {weight:.2f}")
+        return True
+    def set_all_metric_weights(self, weights: Dict[MetricType, float]) -> bool:
+        """
+        Set all metric weights at once (ensures sum = 1.0)
+        Arguments:
+        ----------
+            weights { dict } : Complete metric weights mapping
+        Returns:
+        --------
+            { bool }         : Success status
+        """
+        # Validate input
+        if (not all(0.0 <= w <= 1.0 for w in weights.values())):
+            logger.error("All weights must be between 0.0 and 1.0")
+            return False
+        total_weight         = sum(weights.values())
+        if not (0.99 <= total_weight <= 1.01):
+            logger.error(f"Weights must sum to 1.0, got {total_weight:.3f}")
+            return False
+        self._metric_weights = dict(weights)
+        logger.info(f"All metric weights updated: {self._metric_weights}")
+        return True
+    def get_recommendations(self, score: float) -> Dict[str, str]:
+        """
+        Get action recommendations based on score
+        Arguments:
+        ----------
+            score { float } : Overall suspicion score [0.0, 1.0]
+        Returns:
+        --------
+            { dict }        : Recommendation details
+        """
+        if (score >= 0.85):
+            return {"priority"   : "HIGH",
+                    "action"     : "Immediate manual verification recommended",
+                    "confidence" : "Very high likelihood of AI generation",
+                    "next_steps" : "Forensic analysis, reverse image search, metadata inspection",
+                   }
+        elif (score >= 0.70):
+            return {"priority"   : "MEDIUM",
+                    "action"     : "Manual verification recommended",
+                    "confidence" : "High likelihood of AI generation",
+                    "next_steps" : "Visual inspection, compare with similar authentic images",
+                   }
+        elif (score >= 0.50):
+            return {"priority"   : "LOW",
+                    "action"     : "Optional review",
+                    "confidence" : "Moderate indicators of AI generation",
+                    "next_steps" : "May be heavily edited real photo, check source",
+                   }
+        else:
+            return {"priority"   : "NONE",
+                    "action"     : "No immediate action needed",
+                    "confidence" : "Low likelihood of AI generation",
+                    "next_steps" : "Likely authentic, proceed normally",
+                   }
+    def get_current_config(self) -> Dict[str, object]:
+        """
+        Get complete current configuration
+        Returns:
+        --------
+            { dict } : All current threshold and weight settings
+        """
+        return {"review_threshold"  : self._review_threshold,
+                "signal_thresholds" : self._signal_thresholds.copy(),
+                "metric_weights"    : self._metric_weights.copy(),
+               }
+    def reset_to_defaults(self) -> None:
+        """
+        Reset all thresholds to default settings
+        """
+        self._review_threshold  = settings.REVIEW_THRESHOLD
+        self._signal_thresholds = dict(SIGNAL_THRESHOLDS)
+        self._metric_weights    = dict(settings.get_metric_weights())
+        logger.info("All thresholds reset to default values")

metrics/__init__.py ADDED Viewed

File without changes

metrics/aggregator.py ADDED Viewed

	@@ -0,0 +1,288 @@

+# Dependencies
+import time
+import numpy as np
+from typing import List
+from pathlib import Path
+from types import MappingProxyType
+from utils.logger import get_logger
+from config.settings import settings
+from config.schemas import MetricResult
+from config.constants import MetricType
+from config.constants import SignalStatus
+from config.schemas import AnalysisResult
+from config.schemas import DetectionSignal
+from config.constants import DetectionStatus
+from config.constants import SIGNAL_THRESHOLDS
+from utils.image_processor import ImageProcessor
+from config.constants import METRIC_EXPLANATIONS
+from metrics.noise_analyzer import NoiseAnalyzer
+from metrics.color_analyzer import ColorAnalyzer
+from metrics.texture_analyzer import TextureAnalyzer
+from features.threshold_manager import ThresholdManager
+from config.constants import IMAGE_RESIZE_MAX_DIMENSION
+from metrics.frequency_analyzer import FrequencyAnalyzer
+from metrics.gradient_field_pca import GradientFieldPCADetector
+# Suppress NumPy warning
+np.seterr(divide  = 'ignore',
+          invalid = 'ignore',
+         )
+# Setup Logging
+logger = get_logger(__name__)
+class MetricsAggregator:
+    """
+    Main detector that orchestrates all detection methods
+    Combines multiple unsupervised metrics:
+    ----------------------------------------
+    1. Gradient-Field PCA
+    2. Frequency Domain Analysis (FFT)
+    3. Noise Pattern Analysis
+    4. Texture Analysis
+    5. Color Distribution Analysis
+    Note: Each metric produces a suspicion score [0.0, 1.0] : scores are combined using weighted average to produce final assessment
+    """
+    def __init__(self, threshold_manager: ThresholdManager | None = None):
+        """
+        Initialize all detectors
+        """
+        logger.info("Initializing AI Image Detector")
+        # Optional runtime threshold manager
+        self.threshold_manager           = threshold_manager
+        self.gradient_field_pca_detector = GradientFieldPCADetector()
+        self.frequency_analyzer          = FrequencyAnalyzer()
+        self.noise_analyzer              = NoiseAnalyzer()
+        self.texture_analyzer            = TextureAnalyzer()
+        self.color_analyzer              = ColorAnalyzer()
+        self.image_processor             = ImageProcessor()
+        # Create detector registry
+        self.detector_registry          = MappingProxyType({MetricType.GRADIENT  : ("Gradient Field PCA", self.gradient_field_pca_detector),
+                                                            MetricType.FREQUENCY : ("Frequency Analysis", self.frequency_analyzer),
+                                                            MetricType.NOISE     : ("Noise Analysis", self.noise_analyzer),
+                                                            MetricType.TEXTURE   : ("Texture Analysis", self.texture_analyzer),
+                                                            MetricType.COLOR     : ("Color Analysis", self.color_analyzer),
+                                                          })
+        # Get metric weights either from runtime UI or default to settings
+        self.weights                    = (self.threshold_manager.get_metric_weights() if self.threshold_manager else settings.get_metric_weights())
+        logger.info(f"Metric weights: {self.weights}")
+    def analyze_image(self, image_path: Path, filename: str, image_size: tuple) -> AnalysisResult:
+        """
+        Analyze single image for AI generation
+        Arguments:
+        ----------
+            image_path { Path }  : Path to image file
+            filename   { str }   : Original filename
+            image_size { tuple } : (width, height) tuple
+        Returns:
+        --------
+            { AnalysisResult }   : AnalysisResult with detection outcome
+        """
+        logger.info(f"Analyzing image: {filename}")
+        start_time = time.time()
+        try:
+            # Load image
+            image           = self.image_processor.load_image(file_path = image_path)
+            # Resize if needed for performance
+            image           = self.image_processor.resize_if_needed(image         = image,
+                                                                    max_dimension = IMAGE_RESIZE_MAX_DIMENSION,
+                                                                   )
+            # Run all detectors and get raw scores
+            metric_results  = self._run_all_detectors(image = image)
+            # Create signals from scores (aggregator's responsibility)
+            signals         = self._create_signals_from_scores(metric_results = metric_results)
+            # Aggregate results
+            overall_score   = self._aggregate_scores(metric_results = metric_results)
+            # Determine status
+            status          = self._determine_status(overall_score = overall_score)
+            # Calculate processing time
+            processing_time = time.time() - start_time
+            # Create result
+            result          = AnalysisResult(filename        = filename,
+                                             overall_score   = overall_score,
+                                             status          = status,
+                                             confidence      = int(overall_score * 100),
+                                             signals         = signals,
+                                             metric_results  = metric_results,
+                                             processing_time = processing_time,
+                                             image_size      = image_size,
+                                            )
+            logger.info(f"Analysis complete for {filename}: status={status.value}, score={overall_score:.3f}, time={processing_time:.2f}s")
+            return result
+        except Exception as e:
+            logger.error(f"Analysis failed for {filename}: {e}")
+            raise
+    def _run_all_detectors(self, image: np.ndarray) -> dict[MetricType, MetricResult]:
+        """
+        Run all detection methods and collect raw scores
+        Arguments:
+        ----------
+            image { np.ndarray } : RGB image array
+        Returns:
+        --------
+                  { dict }       : Dictionary mapping MetricType to MetricResult
+        """
+        metric_results = dict()
+        # Run eaach detector one by one
+        for metric_type, (detector_name, detector) in self.detector_registry.items():
+            try:
+                result                      = detector.detect(image = image)
+                result.metric_type          = metric_type
+                metric_results[metric_type] = result
+                logger.debug(f"{detector_name} | {metric_type.value} | score={result.score:.3f} | confidence={result.confidence:.3f}")
+            except Exception as e:
+                logger.error(f"{detector.__class__.__name__} failed: {e}")
+                # Same Failure Score by all metrics with same confidence
+                metric_results[metric_type] = MetricResult(metric_type = metric_type,
+                                                           score       = settings.REVIEW_THRESHOLD,
+                                                           confidence  = 0.0,
+                                                           details     = {"error": "detector_failed"},
+                                                          )
+        return metric_results
+    def _create_signals_from_scores(self, metric_results: dict) -> List[DetectionSignal]:
+        """
+        Convert MetricResults to DetectionSignals with status and explanations
+        This is the aggregator's responsibility - metrics don't know about signals
+        Arguments:
+        ----------
+            metric_results { dict }   : Dictionary mapping MetricType to float score
+        Returns:
+        --------
+                    { list }          : List of complete detection signals
+        """
+        signals           = list()
+        signal_thresholds = (self.threshold_manager.get_signal_thresholds() if self.threshold_manager else SIGNAL_THRESHOLDS)
+        for metric_type, result in metric_results.items():
+            # Extract score of the metric
+            score = result.score
+            # Determine status based on thresholds
+            if (score >= signal_thresholds[SignalStatus.FLAGGED]):
+                status   = SignalStatus.FLAGGED
+                severity = 'high'
+            elif (score >= signal_thresholds[SignalStatus.WARNING]):
+                status   = SignalStatus.WARNING
+                severity = 'moderate'
+            else:
+                status   = SignalStatus.PASSED
+                severity = 'normal'
+            # Get explanation from constants
+            explanation = METRIC_EXPLANATIONS[metric_type][severity]
+            # Create signal
+            signal      = DetectionSignal(name        = self.detector_registry[metric_type][0],
+                                          metric_type = metric_type,
+                                          score       = score,
+                                          status      = status,
+                                          explanation = explanation,
+                                         )
+            signals.append(signal)
+        # Sort signals by score (highest first)
+        signals.sort(key = lambda s: s.score, reverse = True)
+        return signals
+    def _aggregate_scores(self, metric_results: dict) -> float:
+        """
+        Aggregate individual metric scores using weighted average
+        Arguments:
+        ----------
+            metric_results { dict } : Dictionary mapping MetricType to float score
+        Returns:
+        --------
+                { float }           : Overall suspicion score [0.0, 1.0]
+        """
+        total_score  = 0.0
+        total_weight = 0.0
+        for metric_type, result in metric_results.items():
+            weight        = self.weights.get(metric_type, 0.0)
+            total_score  += result.score * weight
+            total_weight += weight
+        # Get Aggregated Score
+        if (total_weight > 0):
+            # Normalize
+            overall_score = total_score / total_weight
+        else:
+            # Neutral if no valid weights
+            overall_score = 0.5
+        logger.debug(f"Aggregated score: {overall_score:.3f}")
+        return float(np.clip(overall_score, 0.0, 1.0))
+    def _determine_status(self, overall_score: float) -> DetectionStatus:
+        """
+        Determine binary status from overall score
+        Arguments:
+        ----------
+            overall_score { float } : Aggregated suspicion score
+        Returns:
+        --------
+            { DetectionStatus }     : LIKELY_AUTHENTIC or REVIEW_REQUIRED
+        """
+        # Extract review threshold either from threshold_manager or deault to settings value
+        review_threshold = (self.threshold_manager.get_review_threshold() if self.threshold_manager else settings.REVIEW_THRESHOLD)
+        if (overall_score >= review_threshold):
+            return DetectionStatus.REVIEW_REQUIRED
+        else:
+            return DetectionStatus.LIKELY_AUTHENTIC

metrics/color_analyzer.py ADDED Viewed

	@@ -0,0 +1,352 @@

+# Dependencies
+import numpy as np
+from utils.logger import get_logger
+from config.schemas import MetricResult
+from config.constants import MetricType
+from utils.image_processor import ImageProcessor
+from config.constants import COLOR_ANALYSIS_PARAMS
+# Suppress NumPy warning
+np.seterr(divide  = 'ignore',
+          invalid = 'ignore',
+         )
+# Setup Logging
+logger = get_logger(__name__)
+class ColorAnalyzer:
+    """
+    Color distribution analysis for AI detection
+    Core principle:
+    ---------------
+    - Real photos : Natural color distributions constrained by physics
+    - AI images   : Can create unnatural saturation, hue shifts, or impossible color relationships
+    Method:
+    -------
+    1. Convert to multiple color spaces (RGB, HSV)
+    2. Analyze color histogram distributions
+    3. Check for oversaturation
+    4. Detect unnatural color relationships
+    """
+    def __init__(self):
+        self.image_processor = ImageProcessor()
+    def detect(self, image: np.ndarray) -> MetricResult:
+        """
+        Run color distribution analysis
+        Arguments:
+        ----------
+            image { np.ndarray } : RGB image array (H, W, 3)
+        Returns:
+        --------
+            { MetricResult }     : Structured Color-domain metric result containing:
+                                   - score      : Suspicion score [0.0, 1.0]
+                                   - confidence : Reliability of color analysis evidence
+                                   - details    : Color Analysis forensics and statistics
+        """
+        try:
+            logger.debug(f"Running color analysis on image shape {image.shape}")
+            # Normalize image to [0, 1]
+            image_norm                           = self.image_processor.normalize_image(image = image)
+            # Convert to HSV
+            hsv                                  = self._rgb_to_hsv(rgb = image_norm)
+            # Analyze saturation
+            saturation_score, saturation_details = self._analyze_saturation(hsv = hsv)
+            # Analyze color histogram
+            histogram_score, histogram_details   = self._analyze_color_histogram(rgb = image_norm)
+            # Analyze hue distribution
+            hue_score, hue_details               = self._analyze_hue_distribution(hsv = hsv)
+            # Combine scores
+            weights                              = COLOR_ANALYSIS_PARAMS.MAIN_WEIGHTS
+            final_score                          = (weights['saturation'] * saturation_score + weights['histogram'] * histogram_score + weights['hue'] * hue_score)
+            # Calculate Confidence
+            confidence                           = float(np.clip((abs(final_score - COLOR_ANALYSIS_PARAMS.NEUTRAL_SCORE) * 2.0), 0.0, 1.0))
+            logger.debug(f"Color analysis: saturation={saturation_score:.3f}, histogram={histogram_score:.3f}, hue={hue_score:.3f}, Score={final_score:.3f}")
+            return MetricResult(metric_type = MetricType.COLOR,
+                                score       = float(final_score),
+                                confidence  = confidence,
+                                details     = {"saturation_stats" : saturation_details,
+                                               "histogram_stats"  : histogram_details,
+                                               "hue_stats"        : hue_details,
+                                              },
+                               )
+        except Exception as e:
+            logger.error(f"Color analysis failed: {e}")
+            # Return neutral score on error
+            return MetricResult(metric_type = MetricType.COLOR,
+                                score       = COLOR_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                                confidence  = 0.0,
+                                details     = {"error": "color_analysis_failed"},
+                               )
+    def _rgb_to_hsv(self, rgb: np.ndarray) -> np.ndarray:
+        """
+        Convert RGB to HSV color space
+        Arguments:
+        ----------
+            rgb { np.ndarray } : RGB image normalized to [0, 1]
+        Returns:
+        --------
+            { np.ndarray }     : HSV image (H in [0, 360], S and V in [0, 1])
+        """
+        r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
+        maxc    = np.maximum(np.maximum(r, g), b)
+        minc    = np.minimum(np.minimum(r, g), b)
+        delta   = maxc - minc
+        # Value
+        v       = maxc
+        # Saturation
+        s       = np.where(maxc != 0, delta / maxc, 0)
+        # Hue
+        h       = np.zeros_like(maxc)
+        # Red is max
+        mask    = (maxc == r) & (delta != 0)
+        h[mask] = 60 * (((g[mask] - b[mask]) / delta[mask]) % 6)
+        # Green is max
+        mask    = (maxc == g) & (delta != 0)
+        h[mask] = 60 * (((b[mask] - r[mask]) / delta[mask]) + 2)
+        # Blue is max
+        mask    = (maxc == b) & (delta != 0)
+        h[mask] = 60 * (((r[mask] - g[mask]) / delta[mask]) + 4)
+        hsv     = np.stack([h, s, v], axis = 2)
+        return hsv
+    def _analyze_saturation(self, hsv: np.ndarray) -> tuple[float, dict]:
+        """
+        Analyze saturation distribution for anomalies
+        Real photos: Most pixels have moderate saturation (0.2-0.7)
+        AI images: Can have too many highly saturated pixels (>0.8)
+        Arguments:
+        ----------
+            hsv { np.ndarray } : HSV image
+        Returns:
+        --------
+            { tuple }          : A tuple containing:
+                                 - Suspicion score [0.0, 1.0]
+                                 - Saturation Stats
+        """
+        saturation          = hsv[:, :, 1]
+        if (np.mean(saturation) < 0.05):
+            logger.debug("Low global saturation; skipping saturation analysis")
+            return COLOR_ANALYSIS_PARAMS.NEUTRAL_SCORE, {"reason": "insufficient_color_information"}
+        # Compute saturation statistics
+        mean_sat            = np.mean(saturation)
+        high_sat_ratio      = np.mean(saturation > COLOR_ANALYSIS_PARAMS.SAT_HIGH_THRESHOLD)
+        very_high_sat_ratio = np.mean(saturation > COLOR_ANALYSIS_PARAMS.SAT_VERY_HIGH_THRESHOLD)
+        # Overall saturation level Analysis
+        mean_anomaly        = 0.0
+        if (mean_sat > COLOR_ANALYSIS_PARAMS.SAT_MEAN_THRESHOLD):
+            mean_anomaly = min(1.0, (mean_sat - COLOR_ANALYSIS_PARAMS.SAT_MEAN_THRESHOLD) * COLOR_ANALYSIS_PARAMS.SAT_MEAN_SCALE)
+        # High saturation pixels Analysis
+        high_sat_anomaly = 0.0
+        if (high_sat_ratio > COLOR_ANALYSIS_PARAMS.HIGH_SAT_RATIO_THRESHOLD):
+            high_sat_anomaly = min(1.0, (high_sat_ratio - COLOR_ANALYSIS_PARAMS.HIGH_SAT_RATIO_THRESHOLD) * COLOR_ANALYSIS_PARAMS.HIGH_SAT_SCALE)
+        # Very high saturation Analysis (clipping)
+        clip_anomaly = 0.0
+        if (very_high_sat_ratio > COLOR_ANALYSIS_PARAMS.CLIP_RATIO_THRESHOLD):
+            clip_anomaly = min(1.0, (very_high_sat_ratio - COLOR_ANALYSIS_PARAMS.CLIP_RATIO_THRESHOLD) * COLOR_ANALYSIS_PARAMS.CLIP_SCALE)
+        # Combine Scores
+        weights          = COLOR_ANALYSIS_PARAMS.SAT_SUBMETRIC_WEIGHTS
+        color_score      = (weights['mean_anomaly'] * mean_anomaly + weights['high_sat_anomaly'] * high_sat_anomaly + weights['clip_anomaly'] * clip_anomaly)
+        final_score      = float(np.clip(color_score, 0.0, 1.0))
+        saturation_stats = {"mean_saturation"     : float(mean_sat),
+                            "high_sat_ratio"      : float(high_sat_ratio),
+                            "very_high_sat_ratio" : float(very_high_sat_ratio),
+                            "mean_anomaly"        : float(mean_anomaly),
+                            "high_sat_anomaly"    : float(high_sat_anomaly),
+                            "clip_anomaly"        : float(clip_anomaly),
+                           }
+        logger.debug(f"Saturation - mean: {mean_sat:.3f}, high_ratio: {high_sat_ratio:.3f}, clip_ratio: {very_high_sat_ratio:.3f}")
+        return final_score, saturation_stats
+    def _analyze_color_histogram(self, rgb: np.ndarray) -> tuple[float, dict]:
+        """
+        Analyze RGB histogram distributions for anomalies
+        Arguments:
+        ----------
+            rgb { np.ndarray } : RGB image normalized to [0, 1]
+        Returns:
+        --------
+            { tuple }          : A tuple containing:
+                                 - Suspicion score [0.0, 1.0]
+                                 - Histogram Analysis stats
+        """
+        anomalies      = list()
+        roughness_vals = list()
+        low_clip_vals  = list()
+        high_clip_vals = list()
+        for channel_idx, channel_name in enumerate(['R', 'G', 'B']):
+            channel = rgb[:, :, channel_idx]
+            # Compute histogram
+            hist, bins = np.histogram(channel,
+                                      bins  = COLOR_ANALYSIS_PARAMS.HISTOGRAM_BINS,
+                                      range = COLOR_ANALYSIS_PARAMS.HISTOGRAM_RANGE,
+                                     )
+            hist       = hist / (np.sum(hist) + 1e-10)
+            # Measure histogram roughness
+            hist_diff  = np.abs(np.diff(hist))
+            roughness  = np.mean(hist_diff)
+            roughness_vals.append(roughness)
+            # High roughness = suspicious
+            if (roughness > COLOR_ANALYSIS_PARAMS.ROUGHNESS_THRESHOLD):
+                anomalies.append(np.clip(((roughness - COLOR_ANALYSIS_PARAMS.ROUGHNESS_THRESHOLD) * COLOR_ANALYSIS_PARAMS.ROUGHNESS_SCALE), 0.0, 1.0))
+            # Check for clipping (peaks at extremes)
+            low_clip  = hist[0] + hist[1]
+            high_clip = hist[-1] + hist[-2]
+            # Append values to their respective storages
+            low_clip_vals.append(low_clip)
+            high_clip_vals.append(high_clip)
+            if (low_clip > COLOR_ANALYSIS_PARAMS.CLIP_THRESHOLD):
+                # More than 10% near black
+                anomalies.append(min(1.0, (low_clip - COLOR_ANALYSIS_PARAMS.CLIP_THRESHOLD) * COLOR_ANALYSIS_PARAMS.CLIP_SCALE_FACTOR))
+            if (high_clip > COLOR_ANALYSIS_PARAMS.CLIP_THRESHOLD):
+                # More than 10% near white
+                anomalies.append(min(1.0, (high_clip - COLOR_ANALYSIS_PARAMS.CLIP_THRESHOLD) * COLOR_ANALYSIS_PARAMS.CLIP_SCALE_FACTOR))
+        if (len(anomalies) == 0):
+            logger.debug("No color histogram anomalies detected")
+            return COLOR_ANALYSIS_PARAMS.NEUTRAL_SCORE, {"reason": "insufficient_color_information"}
+        # Take mean of detected anomalies
+        score           = np.mean(anomalies)
+        final_score     = float(np.clip(score, 0.0, 1.0))
+        histogram_stats = {"roughness_mean"    : float(np.mean(roughness_vals)),
+                           "low_clip_mean"     : float(np.mean(low_clip_vals)),
+                           "high_clip_mean"    : float(np.mean(high_clip_vals)),
+                           "channels_analyzed" : 3,
+                          }
+        return final_score, histogram_stats
+    def _analyze_hue_distribution(self, hsv: np.ndarray) -> tuple[float, dict]:
+        """
+        Analyze hue distribution for unnatural patterns
+        Arguments:
+        ----------
+            hsv { np.ndarray } : HSV image
+        Returns:
+        --------
+            { tuple }          : A tuple containing:
+                                 - Suspicion score [0.0, 1.0]
+                                 - hue analysis stats
+        """
+        hue            = hsv[:, :, 0]
+        saturation     = hsv[:, :, 1]
+        # Only consider pixels with sufficient saturation (avoid gray)
+        saturated_mask = saturation > COLOR_ANALYSIS_PARAMS.HUE_SAT_MASK_THRESHOLD
+        if (np.sum(saturated_mask) < COLOR_ANALYSIS_PARAMS.HUE_MIN_PIXELS):
+            # Not enough colored pixels to analyze
+            return COLOR_ANALYSIS_PARAMS.NEUTRAL_SCORE, {"reason": "insufficient_color_information"}
+        hue_saturated         = hue[saturated_mask]
+        # Prevents false positives on monotone objects
+        if (np.ptp(hue_saturated) < 5.0):
+            logger.debug("Hue range too narrow; returning neutral score")
+            return COLOR_ANALYSIS_PARAMS.NEUTRAL_SCORE, {"reason": "insufficient_color_information"}
+        # Compute hue histogram
+        hist, bins            = np.histogram(a     = hue_saturated,
+                                             bins  = COLOR_ANALYSIS_PARAMS.HUE_BINS,
+                                             range = COLOR_ANALYSIS_PARAMS.HUE_RANGE,
+                                            )
+        hist                  = hist / (np.sum(hist) + 1e-10)
+        # Unnatural hue concentration Analysis
+        sorted_hist           = np.sort(hist)[::-1]
+        top3_concentration    = np.sum(sorted_hist[:3])
+        concentration_anomaly = 0.0
+        if (top3_concentration > COLOR_ANALYSIS_PARAMS.HUE_CONCENTRATION_THRESHOLD):
+            # More than 60% in 3 hue bins
+            concentration_anomaly = min(1.0, (top3_concentration - COLOR_ANALYSIS_PARAMS.HUE_CONCENTRATION_THRESHOLD) * COLOR_ANALYSIS_PARAMS.HUE_CONCENTRATION_SCALE)
+        # Hue gaps Analysis
+        zero_bins             = np.sum(hist < COLOR_ANALYSIS_PARAMS.HUE_EMPTY_BIN_THRESHOLD)
+        gap_ratio             = zero_bins / len(hist)
+        gap_anomaly           = 0.0
+        if (gap_ratio > COLOR_ANALYSIS_PARAMS.HUE_GAP_RATIO_THRESHOLD):
+            # More than 40% empty bins
+            gap_anomaly = min(1.0, (gap_ratio - COLOR_ANALYSIS_PARAMS.HUE_GAP_RATIO_THRESHOLD) * COLOR_ANALYSIS_PARAMS.HUE_GAP_SCALE)
+        weights               = COLOR_ANALYSIS_PARAMS.HUE_SUBMETRIC_WEIGHTS
+        score                 = (weights['concentration_anomaly'] * concentration_anomaly + weights['gap_anomaly'] * gap_anomaly)
+        final_score           = float(np.clip(score, 0.0, 1.0))
+        hue_stats             = {"top3_concentration"    : float(top3_concentration),
+                                 "gap_ratio"             : float(gap_ratio),
+                                 "concentration_anomaly" : float(concentration_anomaly),
+                                 "gap_anomaly"           : float(gap_anomaly),
+                                }
+        logger.debug(f"Hue - concentration: {top3_concentration:.3f}, gap_ratio: {gap_ratio:.3f}")
+        return final_score, hue_stats

metrics/frequency_analyzer.py ADDED Viewed

	@@ -0,0 +1,260 @@

+# Dependencies
+import numpy as np
+from scipy import fft
+from utils.logger import get_logger
+from config.schemas import MetricResult
+from config.constants import MetricType
+from utils.image_processor import ImageProcessor
+from config.constants import FREQUENCY_ANALYSIS_PARAMS
+# Suppress NumPy warning
+np.seterr(divide  = 'ignore',
+          invalid = 'ignore',
+         )
+# Setup Logging
+logger = get_logger(__name__)
+class FrequencyAnalyzer:
+    """
+    FFT-based frequency domain analysis for AI detection
+    Core principle:
+    ---------------
+    - Real photos : Smooth frequency falloff (natural optical blur)
+    - AI images   : Unnatural frequency spikes or gaps (artifacts from generation)
+    Method:
+    -------
+    1. Convert to luminance
+    2. Compute 2D FFT
+    3. Compute radial frequency spectrum
+    4. Analyze high-frequency content and distribution patterns
+    """
+    def __init__(self):
+        self.image_processor = ImageProcessor()
+    def detect(self, image: np.ndarray) -> MetricResult:
+        """
+        Run frequency domain analysis
+        Arguments:
+        ----------
+            image { np.ndarray } : RGB image array (H, W, 3)
+        Returns:
+        --------
+            { MetricResult }     : Structured frequency-domain metric result containing:
+                                   - score      : Suspicion score [0.0, 1.0]
+                                   - confidence : Reliability of frequency evidence
+                                   - details    : FFT and spectrum diagnostics
+        """
+        try:
+            logger.debug(f"Running frequency analysis on image shape {image.shape}")
+            # Convert to luminance
+            luminance                   = self.image_processor.rgb_to_luminance(image = image)
+            # Normalize luminance (remove DC component for FFT stability)
+            normalized_luminance        = luminance - np.mean(luminance)
+            if not np.any(normalized_luminance):
+                logger.debug("FFT skipped: zero-variance luminance")
+                return MetricResult(metric_type = MetricType.FREQUENCY,
+                                    score       = FREQUENCY_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                                    confidence  = 0.0,
+                                    details     = {"reason": "zero_variance_luminance"}
+                                   )
+            # Compute FFT on normalized_luminance
+            fft_magnitude               = self._compute_fft_magnitude(luminance = normalized_luminance)
+            # Analyze radial frequency spectrum
+            radial_spectrum             = self._compute_radial_spectrum(fft_magnitude = fft_magnitude)
+            # Detect anomalies
+            anomaly_score, freq_details = self._analyze_frequency_anomalies(radial_spectrum = radial_spectrum)
+            logger.debug(f"Frequency analysis: Anomaly Score={anomaly_score:.3f}")
+            # Distance from neutral = stronger evidence = higher confidence
+            confidence                  = float(np.clip((abs(anomaly_score - FREQUENCY_ANALYSIS_PARAMS.NEUTRAL_SCORE) * 2.0), 0.0, 1.0))
+            return MetricResult(metric_type = MetricType.FREQUENCY,
+                                score       = float(anomaly_score),
+                                confidence  = confidence,
+                                details     = {"spectrum_bins" : int(len(radial_spectrum)),
+                                                **freq_details,
+                                              }
+                               )
+        except Exception as e:
+            logger.error(f"Frequency analysis failed: {e}")
+            # Return neutral score on error
+            return MetricResult(metric_type = MetricType.FREQUENCY,
+                                score       = FREQUENCY_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                                confidence  = 0.0,
+                                details     = {"error" : "frequency_analysis_failed"},
+                               )
+    def _compute_fft_magnitude(self, luminance: np.ndarray) -> np.ndarray:
+        """
+        Compute 2D FFT magnitude spectrum
+        Arguments:
+        ----------
+            luminance { np.ndarray } : Luminance channel (H, W)
+        Returns:
+        --------
+            { np.ndarray }           : FFT magnitude spectrum (centered)
+        """
+        # Compute 2D FFT
+        f             = fft.fft2(luminance)
+        # Shift zero frequency to center
+        f_shifted     = fft.fftshift(f)
+        # Compute magnitude spectrum
+        magnitude     = np.abs(f_shifted)
+        # Log scale for better visualization
+        magnitude_log = np.log1p(magnitude)
+        return magnitude_log
+    def _compute_radial_spectrum(self, fft_magnitude: np.ndarray) -> np.ndarray:
+        """
+        Compute radial average of frequency spectrum
+        Arguments:
+        ----------
+            fft_magnitude { np.ndarray } : FFT magnitude spectrum
+        Returns:
+        --------
+            { np.ndarray }               : Radial spectrum (1D array)
+        """
+        h, w                       = fft_magnitude.shape
+        center_y, center_x         = h // 2, w // 2
+        # Create coordinate grids
+        y, x                       = np.ogrid[:h, :w]
+        # Compute radial distances from center
+        r                          = np.sqrt((x - center_x)**2 + (y - center_y)**2).astype(int)
+        # Maximum radius
+        max_radius                 = min(center_x, center_y)
+        # Compute radial bins
+        bins                       = np.linspace(0, max_radius, FREQUENCY_ANALYSIS_PARAMS.BINS + 1)
+        radial_spectrum            = np.zeros(FREQUENCY_ANALYSIS_PARAMS.BINS)
+        # Average magnitude in each radial bin
+        for i in range(FREQUENCY_ANALYSIS_PARAMS.BINS):
+            mask = (r >= bins[i]) & (r < bins[i + 1])
+            if np.any(mask):
+                radial_spectrum[i] = np.mean(fft_magnitude[mask])
+        return radial_spectrum
+    def _analyze_frequency_anomalies(self, radial_spectrum: np.ndarray) -> tuple[float, dict]:
+        """
+        Analyze frequency spectrum for AI generation artifacts
+        Checks:
+        -------
+        1. High-frequency content (AI images often have unnatural HF energy)
+        2. Frequency distribution smoothness
+        3. Spectral slope deviation from natural images
+        Arguments:
+        ----------
+            radial_spectrum { np.ndarray } : Radial frequency spectrum
+        Returns:
+        --------
+                { tuple }                  : A tuple containing
+                                             - Suspicion score [0.0, 1.0], and
+                                             - frequency details in a dictionary
+        """
+        if (len(radial_spectrum) < FREQUENCY_ANALYSIS_PARAMS.MIN_SPECTRUM_SAMPLES):
+            return (FREQUENCY_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                    {"reason"        : "insufficient_frequency_samples",
+                     "spectrum_bins" : int(len(radial_spectrum)),
+                    }
+                   )
+        # Normalize spectrum
+        spectrum_norm    = radial_spectrum / (np.max(radial_spectrum) + 1e-10)
+        # High-frequency Energy Analysis
+        high_freq_start  = int(len(spectrum_norm) * FREQUENCY_ANALYSIS_PARAMS.HIGH_FREQ_THRESHOLD)
+        if (high_freq_start >= len(spectrum_norm) - 1):
+            return (FREQUENCY_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                    {"reason" : "invalid_frequency_partition"}
+                   )
+        high_freq_energy = np.mean(spectrum_norm[high_freq_start:])
+        low_freq_energy  = np.mean(spectrum_norm[:high_freq_start])
+        hf_ratio         = high_freq_energy / (low_freq_energy + 1e-10)
+        # Natural images : HF ratio typically 0.1-0.3
+        # AI images      : Can be higher (0.3-0.6) or lower (<0.1)
+        hf_anomaly       = 0.0
+        if (hf_ratio > FREQUENCY_ANALYSIS_PARAMS.HF_RATIO_UPPER):
+            hf_anomaly  = min(1.0, (hf_ratio - FREQUENCY_ANALYSIS_PARAMS.HF_RATIO_UPPER) * FREQUENCY_ANALYSIS_PARAMS.HF_UPPER_SCALE)
+        elif (hf_ratio < FREQUENCY_ANALYSIS_PARAMS.HF_RATIO_LOWER):
+            hf_anomaly  = min(1.0, (FREQUENCY_ANALYSIS_PARAMS.HF_RATIO_LOWER - hf_ratio) * FREQUENCY_ANALYSIS_PARAMS.HF_LOWER_SCALE)
+        # Spectral Smoothness Analysis
+        spectral_diff   = np.abs(np.diff(spectrum_norm))
+        roughness       = np.mean(spectral_diff)
+        roughness_score = np.clip(roughness * FREQUENCY_ANALYSIS_PARAMS.ROUGHNESS_SCALE, 0.0, 1.0)
+        # Power Law Deviation Analysis
+        x               = np.arange(1, len(spectrum_norm) + 1)
+        log_spectrum    = np.log(spectrum_norm + 1e-10)
+        log_x           = np.log(x)
+        # Linear fit in log-log space
+        coeffs          = np.polyfit(log_x, log_spectrum, 1)
+        fitted          = np.polyval(coeffs, log_x)
+        deviation       = np.mean(np.abs(log_spectrum - fitted))
+        deviation_score = np.clip(deviation * FREQUENCY_ANALYSIS_PARAMS.DEVIATION_SCALE, 0.0, 1.0)
+        # Combine scores
+        weights         = FREQUENCY_ANALYSIS_PARAMS.SUBMETRIC_WEIGHTS
+        combined_score  = (weights['hf_anomaly'] * hf_anomaly + weights['roughness'] * roughness_score + weights['deviation'] * deviation_score)
+        final_score     = float(np.clip(combined_score, 0.0, 1.0))
+        frequency_dict  = {"low_freq_energy"     : float(low_freq_energy),
+                           "high_freq_energy"    : float(high_freq_energy),
+                           "hf_ratio"            : float(hf_ratio),
+                           "hf_anomaly"          : float(hf_anomaly),
+                           "roughness"           : float(roughness),
+                           "roughness_score"     : float(roughness_score),
+                           "spectral_deviation"  : float(deviation),
+                           "deviation_score"     : float(deviation_score),
+                           "high_freq_start_bin" : int(high_freq_start),
+                          }
+        logger.debug(f"FFT scores - HF anomaly: {hf_anomaly:.3f}, roughness: {roughness_score:.3f}, deviation: {deviation_score:.3f}")
+        return (final_score, frequency_dict)

metrics/gradient_field_pca.py ADDED Viewed

	@@ -0,0 +1,236 @@

+# Dependencies
+import numpy as np
+from utils.logger import get_logger
+from config.schemas import MetricResult
+from config.constants import MetricType
+from utils.image_processor import ImageProcessor
+from config.constants import GRADIENT_FIELD_PCA_PARAMS
+# Suppress NumPy warning
+np.seterr(divide  = 'ignore',
+          invalid = 'ignore',
+         )
+# Setup Logging
+logger = get_logger(__name__)
+class GradientFieldPCADetector:
+    """
+    Detects AI-generated images by analyzing gradient field consistency. Real photos have consistent gradient
+    patterns shaped by physics (lighting, optics). Diffusion models struggle to maintain physically consistent
+    gradients due to denoising
+    Core principle:
+    ---------------
+    - Real photos : Gradients align with physical light sources (low-dimensional structure)
+    - AI images   : Gradients are inconsistent due to patch-based denoising (high-dimensional)
+    Method:
+    -------
+    1. Convert to luminance
+    2. Compute Sobel gradients (Gx, Gy)
+    3. Flatten to gradient vectors per pixel
+    4. Compute covariance matrix
+    5. PCA eigenvalue analysis
+    """
+    def __init__(self):
+        """
+        Initialize Gradient-Field PCA Detector class
+        """
+        self._range          = np.random.default_rng(seed = GRADIENT_FIELD_PCA_PARAMS.RANDOM_SEED)
+        self.image_processor = ImageProcessor()
+    def detect(self, image: np.ndarray) -> MetricResult:
+        """
+        Run gradient PCA detection
+        Arguments:
+        ----------
+            image { np.ndarray } : RGB image array (H, W, 3)
+        Returns:
+        --------
+            { MetricResult }     : Structured metric result containing:
+                                   - score      : Suspicion score [0.0, 1.0] (0 = natural, 1 = suspicious)
+                                   - confidence : Confidence of this metric's assessment [0.0, 1.0]
+                                   - details    : Explainability metadata for UI and reports
+        """
+        try:
+            logger.debug(f"Running gradient PCA detection on image shape {image.shape}")
+            # Convert image to luminance
+            luminance             = self.image_processor.rgb_to_luminance(image = image)
+            # Compute gradients
+            gx, gy                = self.image_processor.compute_gradients(luminance = luminance)
+            # Flatten and sample gradient vectors
+            gradient_vectors      = self._prepare_and_sample_gradients(gx = gx,
+                                                                       gy = gy,
+                                                                      )
+            # Perform PCA
+            eigenvalue_ratio      = self._compute_eigenvalue_ratio(gradient_vectors = gradient_vectors)
+            if ((len(gradient_vectors) < GRADIENT_FIELD_PCA_PARAMS.MIN_SAMPLES) or (eigenvalue_ratio == GRADIENT_FIELD_PCA_PARAMS.NEUTRAL_SCORE)):
+                return MetricResult(metric_type = MetricType.GRADIENT,
+                                    score       = GRADIENT_FIELD_PCA_PARAMS.NEUTRAL_SCORE,
+                                    confidence  = 0.0,
+                                    details     = {"reason"           : "insufficient_gradient_information",
+                                                   "original_pixels"  : int(gx.size),
+                                                   "filtered_vectors" : int(len(gradient_vectors)),
+                                                  },
+                                   )
+            # Convert to suspicion score
+            suspicion_score       = self._eigenvalue_to_suspicion(eigenvalue_ratio = eigenvalue_ratio)
+            # Confidence inverted relative to suspicion: High eigenvalue_ratio = natural, High suspicion_score = AI-like
+            confidence            = abs(eigenvalue_ratio - GRADIENT_FIELD_PCA_PARAMS.EIGENVALUE_RATIO_THRESHOLD)
+            normalized_confidence = np.clip((confidence / GRADIENT_FIELD_PCA_PARAMS.EIGENVALUE_RATIO_THRESHOLD), 0.0, 1.0)
+            logger.debug(f"Gradient PCA: eigenvalue_ratio={eigenvalue_ratio:.3f}, suspicion_score={suspicion_score:.3f}")
+            return MetricResult(metric_type = MetricType.GRADIENT,
+                                score       = float(suspicion_score),
+                                confidence  = float(normalized_confidence),
+                                details     = {"gradient_vectors_sampled" : len(gradient_vectors),
+                                               "eigenvalue_ratio"         : float(eigenvalue_ratio),
+                                               "threshold"                : GRADIENT_FIELD_PCA_PARAMS.EIGENVALUE_RATIO_THRESHOLD,
+                                               "original_pixels"          : int(gx.size),
+                                               "filtered_vectors"         : int(len(gradient_vectors)),
+                                              },
+                               )
+        except Exception as e:
+            logger.error(f"Gradient PCA detection failed: {e}")
+            # Return neutral score on error
+            return MetricResult(metric_type = MetricType.GRADIENT,
+                                score       = GRADIENT_FIELD_PCA_PARAMS.NEUTRAL_SCORE,
+                                confidence  = 0.0,
+                                details     = {"error" : "Gradient PCA detection failed"},
+                               )
+    def _prepare_and_sample_gradients(self, gx: np.ndarray, gy: np.ndarray) -> np.ndarray:
+        """
+        Flatten gradients into vectors and sample
+        Arguments:
+        ----------
+            gx { np.ndarray } : Gradient in x direction
+            gy { np.ndarray } : Gradient in y direction
+        Returns:
+        --------
+            { np.ndarray }    : Array of gradient vectors (N, 2) where N <= SAMPLE_SIZE
+        """
+        # Flatten to vectors
+        gx_flat                   = gx.flatten()
+        gy_flat                   = gy.flatten()
+        # Stack into (N, 2) array
+        gradient_vectors          = np.stack([gx_flat, gy_flat], axis = 1)
+        original_n                = len(gradient_vectors)
+        # Remove zero gradients (uniform regions)
+        magnitude                 = np.linalg.norm(gradient_vectors, axis = 1)
+        non_zero_mask             = (magnitude > GRADIENT_FIELD_PCA_PARAMS.MAGNITUDE_THRESHOLD)
+        finite_mask               = np.isfinite(gradient_vectors).all(axis = 1)
+        # Filtering Gradient Vector
+        filtered_gradient_vectors = gradient_vectors[non_zero_mask & finite_mask]
+        filtered_n                = len(filtered_gradient_vectors)
+        # Sample if too many points without replacement
+        if (len(filtered_gradient_vectors) > GRADIENT_FIELD_PCA_PARAMS.SAMPLE_SIZE):
+            indices                   = self._range.choice(a       = len(filtered_gradient_vectors),
+                                                           size    = GRADIENT_FIELD_PCA_PARAMS.SAMPLE_SIZE,
+                                                           replace = False,
+                                                          )
+            sampled_gradient_vectors  = filtered_gradient_vectors[indices]
+        else:
+            sampled_gradient_vectors  = filtered_gradient_vectors
+        sampled_n = len(sampled_gradient_vectors)
+        logger.debug(f"Gradient PCA sampling: original={original_n}, filtered={filtered_n}, sampled={sampled_n}")
+        return sampled_gradient_vectors
+    def _compute_eigenvalue_ratio(self, gradient_vectors: np.ndarray) -> float:
+        """
+        Compute ratio of first eigenvalue to total variance
+        -  Lower ratio  = more diffuse structure = suspicious
+        -  Higher ratio = concentrated structure = natural
+        Arguments:
+        ----------
+            gradient_vectors { np.ndarray } : Array of gradient vectors (N, 2)
+        Returns:
+        --------
+                     { float }              : Ratio of first eigenvalue to sum of eigenvalues
+        """
+        if (len(gradient_vectors) < GRADIENT_FIELD_PCA_PARAMS.MIN_SAMPLES):
+            logger.warning("Insufficient gradient samples for PCA")
+            return GRADIENT_FIELD_PCA_PARAMS.NEUTRAL_SCORE
+        # Compute covariance matrix
+        covariance       = np.cov(m    = gradient_vectors.T,
+                                  bias = True,
+                                 )
+        # Compute eigenvalues
+        eigenvalues      = np.linalg.eigvalsh(covariance)
+        # Sort in descending order
+        eigenvalues      = np.sort(eigenvalues)[::-1]
+        # Ratio of largest eigenvalue to sum
+        total_variance   = np.sum(eigenvalues)
+        if (total_variance < GRADIENT_FIELD_PCA_PARAMS.VARIANCE_THRESHOLD):
+            return GRADIENT_FIELD_PCA_PARAMS.NEUTRAL_SCORE
+        eigenvalue_ratio = eigenvalues[0] / total_variance
+        return float(eigenvalue_ratio)
+    def _eigenvalue_to_suspicion(self, eigenvalue_ratio: float) -> float:
+        """
+        Convert eigenvalue ratio to suspicion score
+        - Real photos : High ratio (0.85-0.95) -> Low suspicion
+        - AI images   : Low ratio (0.50-0.75) -> High suspicion
+        Arguments:
+        ----------
+            eigenvalue_ratio { float } : PCA eigenvalue ratio
+        Returns:
+        --------
+                    { float }          : Suspicion score [0.0, 1.0]
+        """
+        # Invert and scale: higher ratio = lower suspicion
+        # Real photos typically have ratio > 0.85 & AI images typically have ratio < 0.75
+        if (eigenvalue_ratio >= GRADIENT_FIELD_PCA_PARAMS.EIGENVALUE_RATIO_THRESHOLD):
+            # Strong gradient alignment = likely real
+            suspicion = max(0.0, (1.0 - eigenvalue_ratio) * 2.0)
+        else:
+            # Weak alignment = suspicious
+            suspicion = 1.0 - (eigenvalue_ratio / GRADIENT_FIELD_PCA_PARAMS.EIGENVALUE_RATIO_THRESHOLD)
+        return float(np.clip(suspicion, 0.0, 1.0))

metrics/noise_analyzer.py ADDED Viewed

	@@ -0,0 +1,335 @@

+# Dependencies
+import numpy as np
+from utils.logger import get_logger
+from config.schemas import MetricResult
+from config.constants import MetricType
+from utils.image_processor import ImageProcessor
+from config.constants import NOISE_ANALYSIS_PARAMS
+# Suppress NumPy warning
+np.seterr(divide  = 'ignore',
+          invalid = 'ignore',
+         )
+# Setup Logging
+logger = get_logger(__name__)
+class NoiseAnalyzer:
+    """
+    Noise pattern analysis for AI detection
+    Core principle:
+    ---------------
+    - Real photos : Sensor noise follows Poisson distribution (shot noise) + Gaussian (read noise)
+    - AI images   : Too uniform, artificially smooth, or completely missing noise
+    Method:
+    -------
+    1. Extract local patches
+    2. Estimate noise variance in each patch
+    3. Analyze noise consistency and distribution
+    4. Check for unnatural uniformity
+    """
+    def __init__(self):
+        self.image_processor = ImageProcessor()
+    def detect(self, image: np.ndarray) -> MetricResult:
+        """
+        Run noise pattern analysis
+        Arguments:
+        ----------
+            image { np.ndarray } : RGB image array (H, W, 3)
+        Returns:
+        --------
+            { MetricResult }     : Structured Noise-domain metric result containing:
+                                   - score      : Suspicion score [0.0, 1.0]
+                                   - confidence : Reliability of noise evidence
+                                   - details    : Noise related diagnostics
+        """
+        try:
+            logger.debug(f"Running noise analysis on image shape {image.shape}")
+            # Convert to luminance
+            luminance                  = self.image_processor.rgb_to_luminance(image = image)
+            # Extract patches
+            patches                    = self._extract_patches(luminance = luminance)
+            if (len(patches) == 0):
+                logger.warning("No patches extracted for noise analysis")
+                return MetricResult(metric_type = MetricType.NOISE,
+                                    score       = NOISE_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                                    confidence  = 0.0,
+                                    details     = {"reason": "no_patches_extracted"},
+                                   )
+            # Estimate noise in each patch
+            noise_estimates, mad_values, laplacian_energy = self._estimate_noise_per_patch(patches = patches)
+            # Filter Noise Estimates, MAD and Laplacian Energy for finite values only
+            filtered_mask                                 = np.isfinite(noise_estimates)
+            filtered_noise_estimates                      = noise_estimates[filtered_mask]
+            filtered_mad                                  = mad_values[filtered_mask]
+            filtered_laplacian_energy                     = laplacian_energy[filtered_mask]
+            if (len(filtered_noise_estimates) < NOISE_ANALYSIS_PARAMS.MIN_ESTIMATES):
+                logger.debug("Insufficient valid noise estimates after filtering")
+                return MetricResult(metric_type = MetricType.NOISE,
+                                    score       = NOISE_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                                    confidence  = 0.0,
+                                    details     = {"reason"        : "insufficient_noise_estimates",
+                                                   "patches_total" : int(len(patches)),
+                                                   "patches_valid" : int(len(filtered_noise_estimates)),
+                                                  },
+                                   )
+            logger.debug(f"Noise patches: total={len(patches)}, valid={len(filtered_noise_estimates)}")
+            # Analyze noise distribution
+            noise_score, noise_details                    = self._analyze_noise_distribution(noise_estimates  = filtered_noise_estimates,
+                                                                                             mad_values       = filtered_mad,
+                                                                                             laplacian_energy = filtered_laplacian_energy,
+                                                                                            )
+            # Confidence: distance from neutral
+            confidence                                    = float(np.clip((abs(noise_score - NOISE_ANALYSIS_PARAMS.NEUTRAL_SCORE) * 2.0), 0.0, 1.0))
+            logger.debug(f"Noise analysis: score={noise_score:.3f}, patches={len(patches)}, valid={len(filtered_noise_estimates)}")
+            return MetricResult(metric_type = MetricType.NOISE,
+                                score       = float(noise_score),
+                                confidence  = confidence,
+                                details     = {"patches_total" : int(len(patches)),
+                                               "patches_valid" : int(len(filtered_noise_estimates)),
+                                               **noise_details,
+                                              },
+                               )
+        except Exception as e:
+            logger.error(f"Noise analysis failed: {e}")
+            # Return neutral score on error
+            return MetricResult(metric_type = MetricType.NOISE,
+                                score       = NOISE_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                                confidence  = 0.0,
+                                details     = {"error": "noise_analysis_failed"},
+                               )
+    def _extract_patches(self, luminance: np.ndarray) -> np.ndarray:
+        """
+        Extract patches from image for local noise estimation
+        Arguments:
+        ----------
+            luminance { np.ndarray } : Luminance channel (H, W)
+        Returns:
+        --------
+            { np.ndarray }           : Array of patches
+        """
+        patches = self.image_processor.extract_patches(image       = luminance,
+                                                       patch_size  = NOISE_ANALYSIS_PARAMS.PATCH_SIZE,
+                                                       stride      = NOISE_ANALYSIS_PARAMS.STRIDE,
+                                                       max_patches = NOISE_ANALYSIS_PARAMS.SAMPLES,
+                                                      )
+        return patches
+    def _estimate_noise_per_patch(self, patches: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Estimate noise variance in each patch using median absolute deviation
+        Uses Median Absolute Deviation (MAD) which is robust to edges/textures
+        Arguments:
+        ----------
+            patches { np.ndarray } : Array of image patches (N, patch_size, patch_size)
+        Returns:
+        --------
+                 { tuple }         : A tuple containing
+                                     - Array of noise estimates per patch
+                                     - Array of MAD values
+                                     - Array of Laplacian Energy Values
+        """
+        noise_estimates          = list()
+        mad_values               = list()
+        laplacian_energy_values  = list()
+        for patch in patches:
+            # Skip patches with too much structure (edges, textures)
+            variance = np.var(patch)
+            if (variance < NOISE_ANALYSIS_PARAMS.VARIANCE_LOW_THRESHOLD):
+                # Too uniform, skip
+                continue
+            if (variance > NOISE_ANALYSIS_PARAMS.VARIANCE_HIGH_THRESHOLD):
+                # Too much structure, skip
+                continue
+            # Use Median Absolute Deviation for robust noise estimation
+            laplacian   = self._apply_laplacian(patch = patch)
+            mad         = np.median(np.abs(laplacian - np.median(laplacian)))
+            # Convert MAD to noise standard deviation estimate: For Gaussian noise: σ ≈ 1.4826 × MAD
+            noise_std   = NOISE_ANALYSIS_PARAMS.MAD_TO_STD_FACTOR * mad
+            # Calculate Laplacian Energy
+            lap_energy  = float(np.mean(laplacian ** 2))
+            # Append corresponding values to their storages
+            mad_values.append(mad)
+            noise_estimates.append(noise_std)
+            laplacian_energy_values.append(lap_energy)
+        return np.array(noise_estimates), np.array(mad_values), np.array(laplacian_energy_values)
+    def _apply_laplacian(self, patch: np.ndarray) -> np.ndarray:
+        """
+        Apply Laplacian filter to isolate high-frequency noise
+        Arguments:
+        ----------
+            patch { np.ndarray } : Image patch
+        Returns:
+        --------
+            { np.ndarray }       : Laplacian-filtered patch
+        """
+        # Simple 3x3 Laplacian kernel
+        kernel = np.array([[0,  1, 0],
+                          [1, -4, 1],
+                          [0,  1, 0]],
+                         )
+        # Pad patch
+        padded = np.pad(patch, 1, mode = 'reflect')
+        # Apply convolution
+        h, w   = patch.shape
+        result = np.zeros_like(patch)
+        for i in range(h):
+            for j in range(w):
+                region        = padded[i:i+3, j:j+3]
+                result[i, j]  = np.sum(region * kernel)
+        return result
+    def _analyze_noise_distribution(self, noise_estimates: np.ndarray, mad_values: np.ndarray, laplacian_energy: np.ndarray,) -> tuple[float, dict]:
+        """
+        Analyze noise distribution for anomalies
+        Checks:
+        -------
+        1. Coefficient of variation (consistency)
+        2. Overall noise level (too low = suspicious)
+        3. Distribution shape (too uniform = suspicious)
+        Arguments:
+        ----------
+            noise_estimates  { np.ndarray } : Array of noise standard deviations
+            mad_values       { np.ndarray } : Array of MAD values
+            laplacian_energy { np.ndarray } : Array of Laplacian Energy Values
+        Returns:
+        --------
+                    { tuple }              : A tuple containing:
+                                             - Suspicion score [0.0, 1.0]
+                                             - Noise Distribution detailed diagnostics
+        """
+        if (len(noise_estimates) < NOISE_ANALYSIS_PARAMS.MIN_ESTIMATES):
+            return (NOISE_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                    {"reason": "insufficient_noise_samples"},
+                   )
+        # Remove outliers (keep middle 80%)
+        q10                 = np.percentile(noise_estimates, NOISE_ANALYSIS_PARAMS.OUTLIER_PERCENTILE_LOW)
+        q90                 = np.percentile(noise_estimates, NOISE_ANALYSIS_PARAMS.OUTLIER_PERCENTILE_HIGH)
+        filtered            = noise_estimates[(noise_estimates >= q10) & (noise_estimates <= q90)]
+        if (len(filtered) < NOISE_ANALYSIS_PARAMS.MIN_FILTERED_SAMPLES):
+            return (NOISE_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                    {"reason": "insufficient_filtered_samples"},
+                   )
+        mean_noise          = np.mean(filtered)
+        std_noise           = np.std(filtered)
+        # Coefficient of Variation (CV) Analysis
+        cv                  = std_noise / (mean_noise + 1e-10)
+        cv_anomaly          = 0.0
+        if (cv < NOISE_ANALYSIS_PARAMS.CV_UNIFORM_THRESHOLD):
+            # Too uniform
+            cv_anomaly = (NOISE_ANALYSIS_PARAMS.CV_UNIFORM_THRESHOLD - cv) * NOISE_ANALYSIS_PARAMS.CV_UNIFORM_SCALE
+        elif (cv > NOISE_ANALYSIS_PARAMS.CV_VARIABLE_THRESHOLD):
+            # Too variable
+            cv_anomaly = min(1.0, (cv - NOISE_ANALYSIS_PARAMS.CV_VARIABLE_THRESHOLD) * NOISE_ANALYSIS_PARAMS.CV_VARIABLE_SCALE)
+        # Overall noise level Analysis
+        noise_level_anomaly = 0.0
+        if (mean_noise < NOISE_ANALYSIS_PARAMS.LEVEL_CLEAN_THRESHOLD):
+            # Too clean
+            noise_level_anomaly = (NOISE_ANALYSIS_PARAMS.LEVEL_CLEAN_THRESHOLD - mean_noise) / NOISE_ANALYSIS_PARAMS.LEVEL_CLEAN_THRESHOLD
+        elif (mean_noise < NOISE_ANALYSIS_PARAMS.LEVEL_LOW_THRESHOLD):
+            # Slightly low
+            noise_level_anomaly = (NOISE_ANALYSIS_PARAMS.LEVEL_LOW_THRESHOLD - mean_noise) / NOISE_ANALYSIS_PARAMS.LEVEL_LOW_THRESHOLD * 0.5
+        # Distribution shape Analysis
+        q25                 = np.percentile(filtered, NOISE_ANALYSIS_PARAMS.IQR_PERCENTILE_LOW)
+        q75                 = np.percentile(filtered, NOISE_ANALYSIS_PARAMS.IQR_PERCENTILE_HIGH)
+        iqr                 = q75 - q25
+        iqr_ratio           = iqr / (mean_noise + 1e-10)
+        iqr_anomaly         = 0.0
+        if (iqr_ratio < NOISE_ANALYSIS_PARAMS.IQR_THRESHOLD):
+            iqr_anomaly = (NOISE_ANALYSIS_PARAMS.IQR_THRESHOLD - iqr_ratio) * NOISE_ANALYSIS_PARAMS.IQR_SCALE
+        # Clip sub-anomalies for safety
+        cv_anomaly          = np.clip(cv_anomaly, 0.0, 1.0)
+        noise_level_anomaly = np.clip(noise_level_anomaly, 0.0, 1.0)
+        iqr_anomaly         = np.clip(iqr_anomaly, 0.0, 1.0)
+        # Combine scores
+        weights             = NOISE_ANALYSIS_PARAMS.SUBMETRIC_WEIGHTS
+        combined_score      = (weights['cv_anomaly'] * cv_anomaly + weights['noise_level_anomaly'] * noise_level_anomaly + weights['iqr_anomaly'] * iqr_anomaly)
+        final_score         = float(np.clip(combined_score, 0.0, 1.0))
+        # Calculate Forensic Stats
+        mad_mean            = float(np.mean(mad_values)) if len(mad_values) else 0.0
+        laplacian_energy_mu = float(np.mean(laplacian_energy)) if len(laplacian_energy) else 0.0
+        noise_details_dict  = {"mean_noise"          : float(mean_noise),
+                               "std_noise"           : float(std_noise),
+                               "cv"                  : float(cv),
+                               "cv_anomaly"          : float(cv_anomaly),
+                               "noise_level_anomaly" : float(noise_level_anomaly),
+                               "iqr_ratio"           : float(iqr_ratio),
+                               "iqr_anomaly"         : float(iqr_anomaly),
+                               "mad_mean"            : mad_mean,
+                               "laplacian_energy"    : laplacian_energy_mu,
+                              }
+        logger.debug(f"Noise scores - CV: {cv:.3f}, mean: {mean_noise:.3f}, IQR ratio: {iqr_ratio:.3f}")
+        return final_score, noise_details_dict

metrics/texture_analyzer.py ADDED Viewed

	@@ -0,0 +1,308 @@

+# Dependencies
+import numpy as np
+from scipy.stats import entropy
+from utils.logger import get_logger
+from config.schemas import MetricResult
+from config.constants import MetricType
+from utils.image_processor import ImageProcessor
+from config.constants import TEXTURE_ANALYSIS_PARAMS
+# Suppress NumPy warning
+np.seterr(divide  = 'ignore',
+          invalid = 'ignore',
+         )
+# Setup Logging
+logger = get_logger(__name__)
+class TextureAnalyzer:
+    """
+    Statistical texture analysis for AI detection
+    Core principle:
+    ---------------
+    - Real photos : Natural texture variation (random but structured)
+    - AI images   : Either too smooth or repetitive patterns
+    Method:
+    -------
+    1. Extract local patches
+    2. Compute texture features (contrast, entropy)
+    3. Analyze texture consistency and distribution
+    4. Detect unnaturally smooth regions
+    """
+    def __init__(self):
+        """
+        Initialize TextureAnalyzer Class
+        """
+        self.patch_size      = TEXTURE_ANALYSIS_PARAMS.PATCH_SIZE
+        self.n_patches       = TEXTURE_ANALYSIS_PARAMS.N_PATCHES
+        self.image_processor = ImageProcessor()
+        self._rng            = np.random.default_rng(seed = TEXTURE_ANALYSIS_PARAMS.RANDOM_SEED)
+    def detect(self, image: np.ndarray) -> MetricResult:
+        """
+        Run texture analysis
+        Arguments:
+        ----------
+            image { np.ndarray } : RGB image array (H, W, 3)
+        Returns:
+        --------
+            { MetricResult }     : Structured Texture-domain metric result containing:
+                                   - score      : Suspicion score [0.0, 1.0]
+                                   - confidence : Reliability of texture evidence
+                                   - details    : Texture forensics and statistics
+        """
+        try:
+            logger.debug(f"Running texture analysis on image shape {image.shape}")
+            # Convert to luminance
+            luminance                          = self.image_processor.rgb_to_luminance(image = image)
+            # Extract patches
+            patches                            = self._extract_patches(luminance = luminance)
+            if (len(patches) == 0):
+                logger.warning("No patches extracted for texture analysis")
+                return MetricResult(metric_type = MetricType.TEXTURE,
+                                    score       = TEXTURE_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                                    confidence  = 0.0,
+                                    details     = {"reason": "no_patches_extracted"},
+                                   )
+            # Compute texture features
+            texture_features, texture_metadata = self._compute_texture_features(patches = patches)
+            # Analyze for anomalies
+            texture_score, texture_details     = self._analyze_texture_anomalies(features = texture_features,
+                                                                                 metadata = texture_metadata,
+                                                                                )
+            # Calculate Confidence
+            confidence                         = float(np.clip((abs(texture_score - TEXTURE_ANALYSIS_PARAMS.NEUTRAL_SCORE) * 2.0), 0.0, 1.0))
+            logger.debug(f"Texture analysis: Texture Score={texture_score:.3f}, patches={len(patches)}")
+            return MetricResult(metric_type = MetricType.TEXTURE,
+                                score       = float(texture_score),
+                                confidence  = confidence,
+                                details     = {"patches_total" : int(len(patches)),
+                                               **texture_metadata,
+                                               **texture_details,
+                                              },
+                               )
+        except Exception as e:
+            logger.error(f"Texture analysis failed: {e}")
+            # Return neutral score on error
+            return MetricResult(metric_type = MetricType.TEXTURE,
+                                score       = TEXTURE_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                                confidence  = 0.0,
+                                details     = {"error": "texture_analysis_failed"},
+                               )
+    def _extract_patches(self, luminance: np.ndarray) -> np.ndarray:
+        """
+        Extract random patches from image
+        """
+        h, w = luminance.shape
+        if ((h < self.patch_size) or (w < self.patch_size)):
+            logger.warning(f"Image too small for patch size {self.patch_size}")
+            return np.array([])
+        patches = list()
+        for _ in range(self.n_patches):
+            y     = self._rng.integers(0, h - self.patch_size)
+            x     = self._rng.integers(0, w - self.patch_size)
+            patch = luminance[y:y+self.patch_size, x:x+self.patch_size]
+            patches.append(patch)
+        return np.array(patches)
+    def _compute_texture_features(self, patches: np.ndarray) -> tuple[dict, dict]:
+        """
+        Compute texture features for each patch
+        Features:
+        ---------
+        1. Local contrast (standard deviation)
+        2. Entropy (randomness)
+        3. Smoothness (inverse of variance)
+        4. Edge density
+        Arguments:
+        ----------
+            patches { np.ndarray } : Array of patches
+        Returns:
+        --------
+            { tuple }              : A tuple containing
+                                     - A dictionary of feature arrays
+                                     - A dictionary of texture analysis metadata
+        """
+        contrasts       = list()
+        entropies       = list()
+        smoothnesses    = list()
+        edge_densities  = list()
+        uniform_skipped = 0
+        for patch in patches:
+            pmin = patch.min()
+            pmax = patch.max()
+            if ((pmax - pmin < 1e-6)):
+                # skip fully uniform patch entirely
+                uniform_skipped += 1
+                continue
+            # Contrast (std deviation)
+            contrast = np.std(patch)
+            contrasts.append(contrast)
+            # Entropy (using histogram)
+            hist, _  = np.histogram(patch,
+                                    bins  = TEXTURE_ANALYSIS_PARAMS.HISTOGRAM_BINS,
+                                    range = TEXTURE_ANALYSIS_PARAMS.HISTOGRAM_RANGE,
+                                   )
+            hist     = hist / (np.sum(hist) + 1e-10)
+            ent      = entropy(hist + 1e-10)
+            entropies.append(ent)
+            # Smoothness (inverse of variance, scaled)
+            variance   = np.var(patch)
+            smoothness = 1.0 / (1.0 + variance)
+            smoothnesses.append(smoothness)
+            # Edge density (using Sobel)
+            gx, gy       = self.image_processor.compute_gradients(luminance = patch)
+            gradient_mag = np.sqrt(gx**2 + gy**2)
+            edge_density = np.mean(gradient_mag > TEXTURE_ANALYSIS_PARAMS.EDGE_THRESHOLD)
+            edge_densities.append(edge_density)
+        # Construct results in proper format
+        features = {"contrast"     : np.array(contrasts),
+                    "entropy"      : np.array(entropies),
+                    "smoothness"   : np.array(smoothnesses),
+                    "edge_density" : np.array(edge_densities),
+                   }
+        metadata = {"patches_used"            : int(len(contrasts)),
+                    "uniform_patches_skipped" : int(uniform_skipped),
+                   }
+        return features, metadata
+    def _analyze_texture_anomalies(self, features: dict, metadata: dict) -> tuple[float, dict]:
+        """
+        Analyze texture features for AI generation artifacts
+        Checks:
+        -------
+        1. Excessive smoothness (too many overly smooth patches)
+        2. Entropy distribution (too uniform = suspicious)
+        3. Contrast consistency
+        Arguments:
+        ----------
+            features { dict } : Dictionary of texture features
+            metadata { dict } : Dictionary of texture analysis metadata
+        Returns:
+        --------
+            { tuple }         : A tuple containing:
+                                - Suspicion score [0.0, 1.0]
+                                - Texture statistics
+        """
+        contrast     = features['contrast']
+        entropy_vals = features['entropy']
+        smoothness   = features['smoothness']
+        edge_density = features['edge_density']
+        if ((len(contrast) == 0) or (len(entropy_vals) == 0) or (len(smoothness) == 0) or (len(edge_density) == 0)):
+            logger.debug("All texture features filtered out; returning neutral score")
+            return (TEXTURE_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                    {"reason": "all_texture_features_filtered"},
+                   )
+        # Early exit: all patches nearly uniform
+        if (np.all(contrast < 1e-6)):
+            logger.debug("All texture patches near-uniform; returning neutral score")
+            return (TEXTURE_ANALYSIS_PARAMS.NEUTRAL_SCORE,
+                    {"reason": "all_patches_near_uniform"},
+                   )
+        # Smoothness Analysis
+        smooth_ratio       = np.mean(smoothness > TEXTURE_ANALYSIS_PARAMS.SMOOTHNESS_THRESHOLD)
+        smoothness_anomaly = 0.0
+        if (smooth_ratio > TEXTURE_ANALYSIS_PARAMS.SMOOTH_RATIO_THRESHOLD):
+            # More than 40% very smooth patches
+            smoothness_anomaly = min(1.0, (smooth_ratio - TEXTURE_ANALYSIS_PARAMS.SMOOTH_RATIO_THRESHOLD) * TEXTURE_ANALYSIS_PARAMS.SMOOTH_RATIO_SCALE)
+        # Entropy distribution Analysis
+        entropy_cv      = np.std(entropy_vals) / (np.mean(entropy_vals) + 1e-10)
+        entropy_anomaly = 0.0
+        if (entropy_cv < TEXTURE_ANALYSIS_PARAMS.ENTROPY_CV_THRESHOLD):
+            # Too uniform
+            entropy_anomaly = (TEXTURE_ANALYSIS_PARAMS.ENTROPY_CV_THRESHOLD - entropy_cv) * TEXTURE_ANALYSIS_PARAMS.ENTROPY_SCALE
+        # Contrast distribution Analysis
+        contrast_cv      = np.std(contrast) / (np.mean(contrast) + 1e-10)
+        contrast_anomaly = 0.0
+        if (contrast_cv < TEXTURE_ANALYSIS_PARAMS.CONTRAST_CV_LOW):
+            # Too uniform
+            contrast_anomaly = (TEXTURE_ANALYSIS_PARAMS.CONTRAST_CV_LOW - contrast_cv) * TEXTURE_ANALYSIS_PARAMS.CONTRAST_LOW_SCALE
+        elif (contrast_cv > TEXTURE_ANALYSIS_PARAMS.CONTRAST_CV_HIGH):
+            # Too variable (suspicious)
+            contrast_anomaly = min(1.0, (contrast_cv - TEXTURE_ANALYSIS_PARAMS.CONTRAST_CV_HIGH) * TEXTURE_ANALYSIS_PARAMS.CONTRAST_HIGH_SCALE)
+        # Edge density consistency Analysis
+        edge_cv      = np.std(edge_density) / (np.mean(edge_density) + 1e-10)
+        edge_anomaly = 0.0
+        if (edge_cv < TEXTURE_ANALYSIS_PARAMS.EDGE_CV_THRESHOLD):
+            edge_anomaly = (TEXTURE_ANALYSIS_PARAMS.EDGE_CV_THRESHOLD - edge_cv) * TEXTURE_ANALYSIS_PARAMS.EDGE_SCALE
+        # Clipping Sub-anomalies
+        smoothness_anomaly = np.clip(smoothness_anomaly, 0.0, 1.0)
+        entropy_anomaly    = np.clip(entropy_anomaly, 0.0, 1.0)
+        contrast_anomaly   = np.clip(contrast_anomaly, 0.0, 1.0)
+        edge_anomaly       = np.clip(edge_anomaly, 0.0, 1.0)
+        # Combine scores
+        weights            = TEXTURE_ANALYSIS_PARAMS.SUBMETRIC_WEIGHTS
+        texture_score      = (weights['smoothness_anomaly'] * smoothness_anomaly + weights['entropy_anomaly'] * entropy_anomaly + weights['contrast_anomaly'] * contrast_anomaly + weights['edge_anomaly'] * edge_anomaly)
+        final_score        = float(np.clip(texture_score, 0.0, 1.0))
+        detailed_stats     = {"smooth_ratio"      : float(smooth_ratio),
+                              "entropy_mean"      : float(np.mean(entropy_vals)),
+                              "entropy_cv"        : float(entropy_cv),
+                              "contrast_mean"     : float(np.mean(contrast)),
+                              "contrast_cv"       : float(contrast_cv),
+                              "edge_density_mean" : float(np.mean(edge_density)),
+                              "edge_cv"           : float(edge_cv),
+                             }
+        logger.debug(f"Texture scores - smoothness: {smoothness_anomaly:.3f}, entropy: {entropy_anomaly:.3f}, contrast: {contrast_anomaly:.3f}, edge: {edge_anomaly:.3f}")
+        return final_score, detailed_stats

notebooks/.ipynb_checkpoints/Unified_Dataset_Builder-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,725 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "e2d654dc-c431-420e-810a-a985de9172fd",
+   "metadata": {},
+   "source": [
+    "# Unified AI vs Real Image Dataset Builder\n",
+    "\n",
+    "This notebook builds a **clean, labeled, unified dataset** for evaluating\n",
+    "AI image detection systems.\n",
+    "\n",
+    "### Supported sources\n",
+    "- HuggingFace datasets (DiffusionDB, COCO, OpenImages)\n",
+    "- Kaggle public datasets (Midjourney, AI vs Real)\n",
+    "- Unified output format:\n",
+    "  - Normalized PNG images\n",
+    "  - Size-limited (≤1024px)\n",
+    "  - Central metadata CSV\n",
+    "\n",
+    "### Output Structure\n",
+    "\n",
+    "```bash\n",
+    "tests/dataset/\n",
+    "├── ai/\n",
+    "├── real/\n",
+    "├── raw_downloads/\n",
+    "├── metadata/dataset_index.csv\n",
+    "```\n",
+    "\n",
+    "> ⚠️ All datasets used are **public & legally accessible**.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e8b43897-9ce5-4f20-8798-7b3aebdf1b36",
+   "metadata": {},
+   "source": [
+    "## Required Dependencies\n",
+    "\n",
+    "Before running, ensure:\n",
+    "\n",
+    "```bash\n",
+    "pip install datasets pillow tqdm kaggle pycocotools\n",
+    "```\n",
+    "\n",
+    "Also configure Kaggle:\n",
+    "\n",
+    "```bash\n",
+    "~/.kaggle/kaggle.json\n",
+    "```\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "00b9f50c-6158-47e9-89cf-5c279d9c63bb",
+   "metadata": {},
+   "source": [
+    "## Imports & Config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "9147ace7-162f-4b0d-bd6d-0d92b9bad61e",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# ===============================\n",
+    "# Imports & Global Configuration\n",
+    "# ===============================\n",
+    "import os\n",
+    "import csv\n",
+    "import uuid\n",
+    "import subprocess\n",
+    "from PIL import Image\n",
+    "from tqdm import tqdm\n",
+    "from pathlib import Path\n",
+    "from datasets import load_dataset\n",
+    "\n",
+    "\n",
+    "# ===============================\n",
+    "# Directory Configuration\n",
+    "# ===============================\n",
+    "BASE_DIR       = Path(\"tests/dataset\")\n",
+    "AI_DIR         = BASE_DIR / \"ai\"\n",
+    "REAL_DIR       = BASE_DIR / \"real\"\n",
+    "RAW_DIR        = BASE_DIR / \"raw_downloads\"\n",
+    "META_DIR       = BASE_DIR / \"metadata\"\n",
+    "\n",
+    "META_FILE      = META_DIR / \"dataset_index.csv\"\n",
+    "\n",
+    "TARGET_PER_DS  = 1000\n",
+    "IMAGE_SIZE_MAX = 1024\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "329d1c09-0e9c-4bc2-8935-bd50941611c8",
+   "metadata": {},
+   "source": [
+    "## Utility Functions\n",
+    "\n",
+    "These helpers:\n",
+    "- Ensure directory structure\n",
+    "- Normalize images (RGB, resize, PNG)\n",
+    "- Write metadata rows safely"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b352e981-e456-40cf-be84-a1eb0f01ea7c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def ensure_dirs():\n",
+    "    for d in [AI_DIR, REAL_DIR, RAW_DIR, META_DIR]:\n",
+    "        d.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "\n",
+    "def normalize_and_save(image: Image.Image, path: Path):\n",
+    "    \"\"\"\n",
+    "    Normalize image to RGB PNG and limit size\n",
+    "    \"\"\"\n",
+    "    image = image.convert(\"RGB\")\n",
+    "    image.thumbnail((IMAGE_SIZE_MAX, IMAGE_SIZE_MAX))\n",
+    "    image.save(path, \n",
+    "               format   = \"PNG\", \n",
+    "               optimize = True,\n",
+    "              )\n",
+    "\n",
+    "\n",
+    "def write_meta(writer, **row):\n",
+    "    writer.writerow(row)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "34c3bc3b-6bb6-414d-b3fe-85bc43d832c7",
+   "metadata": {},
+   "source": [
+    "## Dataset Registry\n",
+    "\n",
+    "Defines **where data comes from** and **how it is labeled**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "74106705-e2d6-411c-8193-8e02f5ee0fdc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# HuggingFace datasets (safe & stable)\n",
+    "AI_DATASETS     = [{\"name\"      : \"diffusiondb\",\n",
+    "                    \"hf_id\"     : \"poloclub/diffusiondb\",\n",
+    "                    \"config\"    : \"2m_first_1k\",\n",
+    "                    \"split\"     : \"train\",\n",
+    "                    \"image_key\" : \"image\",\n",
+    "                    \"label\"     : \"ai\",\n",
+    "                    \"family\"    : \"diffusion\"\n",
+    "                  }]\n",
+    "        \n",
+    "\n",
+    "REAL_DATASETS   = [{\"name\"      : \"mscoco_2017\",\n",
+    "                    \"hf_id\"     : \"shunk031/MSCOCO\",\n",
+    "                    \"hf_kwargs\" : {\"year\": 2017,\n",
+    "                                   \"coco_task\": \"instances\"\n",
+    "                                  },\n",
+    "                    \"split\"     : \"train\",\n",
+    "                    \"image_key\" : \"image\",\n",
+    "                    \"label\"     : \"real\",\n",
+    "                    \"family\"    : \"photographic\",\n",
+    "                    \"streaming\" : False\n",
+    "                  }]\n",
+    "\n",
+    "# Kaggle datasets (public, non-scraped)\n",
+    "KAGGLE_DATASETS = [{\"name\"      : \"ai_vs_real\",\n",
+    "                    \"kaggle_id\" : \"tristanzhang32/ai-generated-images-vs-real-images\",\n",
+    "                    \"label\"     : \"ai\",\n",
+    "                    \"family\"    : \"mixed\"\n",
+    "                   },\n",
+    "                   {\"name\"      : \"midjourney\",\n",
+    "                    \"kaggle_id\" : \"cyanex1702/midjourney-imagesprompt\",\n",
+    "                    \"label\"     : \"ai\",\n",
+    "                    \"family\"    : \"diffusion\"\n",
+    "                   }\n",
+    "                  ]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1f4c6f3b-2a35-415b-9a35-ee52fd3d85be",
+   "metadata": {},
+   "source": [
+    "## HuggingFace Dataset Processor\n",
+    "\n",
+    "Loads datasets via `datasets.load_dataset()` and saves images in unified format."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "a9ea5276-65bb-49f5-a656-c00ceeb1f4d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process_hf_dataset(ds_cfg, root_dir, writer):\n",
+    "    print(f\"\\n▶ Loading HF dataset: {ds_cfg['name']}\")\n",
+    "\n",
+    "    ds      = load_dataset(ds_cfg[\"hf_id\"],\n",
+    "                           **ds_cfg.get(\"hf_kwargs\", {}),\n",
+    "                           name      = ds_cfg.get(\"config\"),\n",
+    "                           split     = ds_cfg[\"split\"],\n",
+    "                           streaming = ds_cfg.get(\"streaming\", False),\n",
+    "                          )\n",
+    "\n",
+    "    out_dir = root_dir / ds_cfg[\"name\"]\n",
+    "    out_dir.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "    count   = 0\n",
+    "    \n",
+    "    for row in tqdm(ds):\n",
+    "        if (count >= TARGET_PER_DS):\n",
+    "            break\n",
+    "\n",
+    "        try:\n",
+    "            image = row.get(ds_cfg[\"image_key\"])\n",
+    "            if not isinstance(image, Image.Image):\n",
+    "                continue\n",
+    "\n",
+    "            uid   = uuid.uuid4().hex\n",
+    "            path  = out_dir / f\"{uid}.png\"\n",
+    "\n",
+    "            normalize_and_save(image, path)\n",
+    "\n",
+    "            write_meta(writer,\n",
+    "                       id       = uid,\n",
+    "                       filename = str(path),\n",
+    "                       label    = ds_cfg[\"label\"],\n",
+    "                       family   = ds_cfg[\"family\"],\n",
+    "                       source   = ds_cfg[\"name\"],\n",
+    "                      )\n",
+    "\n",
+    "            count += 1\n",
+    "\n",
+    "        except Exception:\n",
+    "            continue\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fb6d23a0-fa98-4351-9e4e-99265a51e8ef",
+   "metadata": {},
+   "source": [
+    "## Kaggle Dataset Downloader\n",
+    "\n",
+    "Requires:\n",
+    "- Kaggle account\n",
+    "- ~/.kaggle/kaggle.json configured\n",
+    "\n",
+    "No scraping. Fully legal."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c6eca5e6-0469-4af6-8af8-afe3036cb0a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def download_kaggle_dataset(kaggle_id: str, out_dir: Path):\n",
+    "    out_dir.mkdir(parents = True, exist_ok = True)\n",
+    "\n",
+    "    if any(out_dir.iterdir()):\n",
+    "        print(f\"✔ Kaggle dataset already present: {kaggle_id}\")\n",
+    "        return\n",
+    "\n",
+    "    print(f\"⬇ Downloading Kaggle dataset: {kaggle_id}\")\n",
+    "\n",
+    "    subprocess.run([\"kaggle\", \"datasets\", \"download\",\n",
+    "                    kaggle_id,\n",
+    "                    \"-p\", str(out_dir),\n",
+    "                    \"--unzip\"\n",
+    "                   ],\n",
+    "                   check = True,\n",
+    "                  )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7c971767-d20a-4fa3-949a-a655d712b2c1",
+   "metadata": {},
+   "source": [
+    "## Folder Ingestor\n",
+    "\n",
+    "Converts **any folder of images** into the unified dataset format. \n",
+    "Used for Kaggle & future web sources."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "b648832e-5025-4851-af21-382051167a04",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "IMAGE_EXTS = {\".png\", \".jpg\", \".jpeg\", \".webp\"}\n",
+    "\n",
+    "def ingest_image_folder(src_dir, out_dir, writer, label, family, source):\n",
+    "    images = [p for p in src_dir.rglob(\"*\") if p.suffix.lower() in IMAGE_EXTS]\n",
+    "\n",
+    "    out_dir.mkdir(parents = True, exist_ok = True)\n",
+    "\n",
+    "    for image_path in tqdm(images[:TARGET_PER_DS]):\n",
+    "        try:\n",
+    "            image = Image.open(image_path)\n",
+    "\n",
+    "            uid   = uuid.uuid4().hex\n",
+    "            dst   = out_dir / f\"{uid}.png\"\n",
+    "\n",
+    "            normalize_and_save(image, dst)\n",
+    "\n",
+    "            write_meta(writer,\n",
+    "                       id       = uid,\n",
+    "                       filename = str(dst),\n",
+    "                       label    = label,\n",
+    "                       family   = family,\n",
+    "                       source   = source,\n",
+    "                      )\n",
+    "            \n",
+    "        except Exception:\n",
+    "            continue\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "53fccdc4-e593-4dbf-a71b-e5b826e4a27a",
+   "metadata": {},
+   "source": [
+    "## Main Pipeline Execution\n",
+    "\n",
+    "This cell:\n",
+    "- Builds directories\n",
+    "- Processes HF datasets\n",
+    "- Downloads & ingests Kaggle datasets\n",
+    "- Writes unified metadata CSV"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dd8ef771-f39f-4d9d-8eaf-626ecc211141",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "▶ Loading HF dataset: diffusiondb\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 46%|████████████████████████████████████████████████████████████████▊                                                                           | 463/1000 [02:43<04:08,  2.16it/s]"
+     ]
+    }
+   ],
+   "source": [
+    "def main():\n",
+    "    ensure_dirs()\n",
+    "\n",
+    "    with open(META_FILE, \"w\", newline = \"\") as f:\n",
+    "        writer = csv.DictWriter(f, fieldnames=[\"id\", \"filename\", \"label\", \"family\", \"source\"])\n",
+    "        writer.writeheader()\n",
+    "\n",
+    "        # HuggingFace datasets\n",
+    "        for ds in AI_DATASETS:\n",
+    "            process_hf_dataset(ds, AI_DIR, writer)\n",
+    "\n",
+    "        for ds in REAL_DATASETS:\n",
+    "            process_hf_dataset(ds, REAL_DIR, writer)\n",
+    "\n",
+    "        # Kaggle datasets\n",
+    "        for ds in KAGGLE_DATASETS:\n",
+    "            raw_path = RAW_DIR / ds[\"name\"]\n",
+    "            download_kaggle_dataset(ds[\"kaggle_id\"], raw_path)\n",
+    "\n",
+    "            # AI images\n",
+    "            ingest_image_folder(src_dir = raw_path / \"ai\",\n",
+    "                                out_dir = AI_DIR / ds[\"name\"],\n",
+    "                                writer  = writer,\n",
+    "                                label   = \"ai\",\n",
+    "                                family  = ds[\"family\"],\n",
+    "                                source  = ds[\"name\"],\n",
+    "                               )\n",
+    "\n",
+    "            # REAL images\n",
+    "            ingest_image_folder(src_dir = raw_path / \"real\",\n",
+    "                                out_dir = REAL_DIR / ds[\"name\"],\n",
+    "                                writer  = writer,\n",
+    "                                label   = \"real\",\n",
+    "                                family  = \"photographic\",\n",
+    "                                source  = ds[\"name\"],\n",
+    "                               )\n",
+    "\n",
+    "    print(\"\\n✅ Dataset build complete\")\n",
+    "    print(f\"📄 Metadata saved at: {META_FILE}\")\n",
+    "\n",
+    "\n",
+    "# ===============================\n",
+    "# Entry Point\n",
+    "# ===============================\n",
+    "if __name__ == \"__main__\":\n",
+    "    main()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dd6e0834-7757-4daf-a8bc-37d58bc8debd",
+   "metadata": {},
+   "source": [
+    "# Post-Processing Attack Generator\n",
+    "\n",
+    "This notebook applies **real-world post-processing attacks** to an existing\n",
+    "image dataset to evaluate robustness of AI-image detectors.\n",
+    "\n",
+    "### Attacks Implemented\n",
+    "- JPEG recompression (quality loss)\n",
+    "- Resize / rescale (down + up)\n",
+    "- Gaussian blur\n",
+    "\n",
+    "### Why this matters\n",
+    "Most AI images in the wild are:\n",
+    "- Screenshot\n",
+    "- Re-encoded\n",
+    "- Uploaded to social media\n",
+    "- Slightly blurred or resized\n",
+    "\n",
+    "If a detector fails here, it fails in production."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cd680866-0f5c-4930-9262-5521317044fd",
+   "metadata": {},
+   "source": [
+    "## Imports & Config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b62168b8-aa38-47c6-8a00-0bb31e8774fa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ===============================\n",
+    "# Imports\n",
+    "# ===============================\n",
+    "\n",
+    "import csv\n",
+    "import uuid\n",
+    "from PIL import Image\n",
+    "from tqdm import tqdm\n",
+    "from io import BytesIO\n",
+    "from pathlib import Path\n",
+    "from PIL import ImageFilter\n",
+    "\n",
+    "\n",
+    "# ===============================\n",
+    "# Configuration\n",
+    "# ===============================\n",
+    "\n",
+    "BASE_DIR       = Path(\"tests/dataset\")\n",
+    "ATTACK_DIR     = BASE_DIR / \"attacked\"\n",
+    "META_IN        = BASE_DIR / \"metadata/dataset_index.csv\"\n",
+    "META_OUT       = BASE_DIR / \"metadata/dataset_index_attacked.csv\"\n",
+    "\n",
+    "ATTACK_DIR.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "JPEG_QUALITIES = [95, 75, 50]\n",
+    "RESIZE_SCALES  = [0.75, 0.5]\n",
+    "BLUR_RADII     = [0.8, 1.5]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3c1de132-8245-42c7-9a82-63d6f0c27270",
+   "metadata": {},
+   "source": [
+    "## Load Existing Metadata\n",
+    "\n",
+    "We read the existing unified dataset index and create\n",
+    "new samples **derived from originals**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a49e5629-ba32-4736-b0ab-e81084f58b78",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_metadata(path):\n",
+    "    with open(path, newline=\"\") as f:\n",
+    "        return list(csv.DictReader(f))\n",
+    "\n",
+    "\n",
+    "records = load_metadata(META_IN)\n",
+    "print(f\"Loaded {len(records)} original samples\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "44a0e31a-abdf-4564-8696-90aef3fc5ec4",
+   "metadata": {},
+   "source": [
+    "## Attack Primitives\n",
+    "\n",
+    "Each function:\n",
+    "- Takes a PIL Image\n",
+    "- Returns a new PIL Image\n",
+    "- Does **not** modify the original"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c6027902-897a-4a3b-a806-e715fea43050",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def jpeg_attack(image: Image.Image, quality: int) -> Image.Image:\n",
+    "    \"\"\"\n",
+    "    Simulate JPEG recompression\n",
+    "    \"\"\"\n",
+    "    buf = BytesIO()\n",
+    "    image.save(buf, \n",
+    "               format  = \"JPEG\", \n",
+    "               quality = quality,\n",
+    "              )\n",
+    "    \n",
+    "    buf.seek(0)\n",
+    "    return Image.open(buf).convert(\"RGB\")\n",
+    "\n",
+    "\n",
+    "def resize_attack(image: Image.Image, scale: float) -> Image.Image:\n",
+    "    \"\"\"\n",
+    "    Downscale and upscale image\n",
+    "    \"\"\"\n",
+    "    w, h         = image.size\n",
+    "    new_w, new_h = int(w * scale), int(h * scale)\n",
+    "    image_small  = image.resize((new_w, new_h), Image.BICUBIC)\n",
+    "    \n",
+    "    return image_small.resize((w, h), Image.BICUBIC)\n",
+    "\n",
+    "\n",
+    "def blur_attack(image: Image.Image, radius: float) -> Image.Image:\n",
+    "    \"\"\"\n",
+    "    Apply Gaussian blur\n",
+    "    \"\"\"\n",
+    "    return image.filter(ImageFilter.GaussianBlur(radius))\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "62d3ca44-b497-4397-bd35-04db9041d1e4",
+   "metadata": {},
+   "source": [
+    "## Attack Application Pipeline\n",
+    "\n",
+    "For each original image:\n",
+    "- Apply all attack variants\n",
+    "- Save attacked images\n",
+    "- Write **attack-aware metadata**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c702ab79-68b1-4191-8e87-f26ad0227348",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def apply_attacks(records, writer):\n",
+    "    for r in tqdm(records):\n",
+    "        src_path = Path(r[\"filename\"])\n",
+    "        \n",
+    "        if not src_path.exists():\n",
+    "            continue\n",
+    "\n",
+    "        try:\n",
+    "            img = Image.open(src_path).convert(\"RGB\")\n",
+    "            \n",
+    "        except Exception:\n",
+    "            continue\n",
+    "\n",
+    "        base_name = src_path.stem\n",
+    "        label     = r[\"label\"]\n",
+    "\n",
+    "        out_base  = ATTACK_DIR / r[\"source\"]\n",
+    "        out_base.mkdir(parents = True, exist_ok = True)\n",
+    "\n",
+    "        # --- JPEG ---\n",
+    "        for q in JPEG_QUALITIES:\n",
+    "            attacked = jpeg_attack(img, q)\n",
+    "            uid      = uuid.uuid4().hex\n",
+    "            out_path = out_base / f\"{uid}.png\"\n",
+    "\n",
+    "            attacked.save(out_path, optimize = True)\n",
+    "\n",
+    "            writer.writerow({**r,\n",
+    "                             \"id\"        : uid,\n",
+    "                             \"filename\"  : str(out_path),\n",
+    "                             \"attack\"    : f\"jpeg_q{q}\",\n",
+    "                             \"parent_id\" : r[\"id\"]\n",
+    "                           })\n",
+    "\n",
+    "        # --- Resize ---\n",
+    "        for s in RESIZE_SCALES:\n",
+    "            attacked = resize_attack(img, s)\n",
+    "            uid      = uuid.uuid4().hex\n",
+    "            out_path = out_base / f\"{uid}.png\"\n",
+    "\n",
+    "            attacked.save(out_path, optimize = True)\n",
+    "\n",
+    "            writer.writerow({**r,\n",
+    "                             \"id\"        : uid,\n",
+    "                             \"filename\"  : str(out_path),\n",
+    "                             \"attack\"    : f\"resize_{int(s*100)}\",\n",
+    "                             \"parent_id\" : r[\"id\"]\n",
+    "                           })\n",
+    "\n",
+    "        # --- Blur ---\n",
+    "        for b in BLUR_RADII:\n",
+    "            attacked = blur_attack(img, b)\n",
+    "            uid      = uuid.uuid4().hex\n",
+    "            out_path = out_base / f\"{uid}.png\"\n",
+    "\n",
+    "            attacked.save(out_path, optimize = True)\n",
+    "\n",
+    "            writer.writerow({**r,\n",
+    "                             \"id\"        : uid,\n",
+    "                             \"filename\"  : str(out_path),\n",
+    "                             \"attack\"    : f\"blur_{b}\",\n",
+    "                             \"parent_id\" : r[\"id\"]\n",
+    "                           })\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3736496a-7710-4593-86fd-818b2d58d535",
+   "metadata": {},
+   "source": [
+    "## Write Attack Metadata\n",
+    "\n",
+    "We preserve:\n",
+    "- Original label (ai / real)\n",
+    "- Source family\n",
+    "- Parent image ID\n",
+    "- Attack type\n",
+    "\n",
+    "This allows **per-attack evaluation later**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "17f29f49-4137-4752-a098-1eba404ce352",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(META_OUT, \"w\", newline = \"\") as f:\n",
+    "    fieldnames = list(records[0].keys()) + [\"attack\", \"parent_id\"]\n",
+    "    writer     = csv.DictWriter(f, fieldnames = fieldnames)\n",
+    "    writer.writeheader()\n",
+    "\n",
+    "    apply_attacks(records, writer)\n",
+    "\n",
+    "print(\"✅ Post-processing attacks generated\")\n",
+    "print(f\"Metadata saved to: {META_OUT}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f20b8f36-af23-49b8-8c6b-d93cf2a7ba07",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

notebooks/Unified_Dataset_Builder.ipynb ADDED Viewed

	@@ -0,0 +1,797 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "e2d654dc-c431-420e-810a-a985de9172fd",
+   "metadata": {},
+   "source": [
+    "# Unified AI vs Real Image Dataset Builder\n",
+    "\n",
+    "This notebook builds a **clean, labeled, unified dataset** for evaluating\n",
+    "AI image detection systems.\n",
+    "\n",
+    "### Supported sources\n",
+    "- HuggingFace datasets (DiffusionDB, COCO, OpenImages)\n",
+    "- Kaggle public datasets (Midjourney, AI vs Real)\n",
+    "- Unified output format:\n",
+    "  - Normalized PNG images\n",
+    "  - Size-limited (≤1024px)\n",
+    "  - Central metadata CSV\n",
+    "\n",
+    "### Output Structure\n",
+    "\n",
+    "```bash\n",
+    "tests/dataset/\n",
+    "├── ai/\n",
+    "├── real/\n",
+    "├── raw_downloads/\n",
+    "├── metadata/dataset_index.csv\n",
+    "```\n",
+    "\n",
+    "> ⚠️ All datasets used are **public & legally accessible**.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e8b43897-9ce5-4f20-8798-7b3aebdf1b36",
+   "metadata": {},
+   "source": [
+    "## Required Dependencies\n",
+    "\n",
+    "Before running, ensure:\n",
+    "\n",
+    "```bash\n",
+    "pip install datasets pillow tqdm kaggle pycocotools\n",
+    "```\n",
+    "\n",
+    "Also configure Kaggle:\n",
+    "\n",
+    "```bash\n",
+    "~/.kaggle/kaggle.json\n",
+    "```\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "00b9f50c-6158-47e9-89cf-5c279d9c63bb",
+   "metadata": {},
+   "source": [
+    "## Imports & Config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "9147ace7-162f-4b0d-bd6d-0d92b9bad61e",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# ===============================\n",
+    "# Imports & Global Configuration\n",
+    "# ===============================\n",
+    "import os\n",
+    "import csv\n",
+    "import uuid\n",
+    "import subprocess\n",
+    "from PIL import Image\n",
+    "from tqdm import tqdm\n",
+    "from pathlib import Path\n",
+    "from datasets import load_dataset\n",
+    "\n",
+    "\n",
+    "# ===============================\n",
+    "# Directory Configuration\n",
+    "# ===============================\n",
+    "BASE_DIR       = Path(\"tests/dataset\")\n",
+    "AI_DIR         = BASE_DIR / \"ai\"\n",
+    "REAL_DIR       = BASE_DIR / \"real\"\n",
+    "RAW_DIR        = BASE_DIR / \"raw_downloads\"\n",
+    "META_DIR       = BASE_DIR / \"metadata\"\n",
+    "\n",
+    "META_FILE      = META_DIR / \"dataset_index.csv\"\n",
+    "\n",
+    "TARGET_PER_DS  = 1000\n",
+    "IMAGE_SIZE_MAX = 1024\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "329d1c09-0e9c-4bc2-8935-bd50941611c8",
+   "metadata": {},
+   "source": [
+    "## Utility Functions\n",
+    "\n",
+    "These helpers:\n",
+    "- Ensure directory structure\n",
+    "- Normalize images (RGB, resize, PNG)\n",
+    "- Write metadata rows safely"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b352e981-e456-40cf-be84-a1eb0f01ea7c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def ensure_dirs():\n",
+    "    for d in [AI_DIR, REAL_DIR, RAW_DIR, META_DIR]:\n",
+    "        d.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "\n",
+    "def normalize_and_save(image: Image.Image, path: Path):\n",
+    "    \"\"\"\n",
+    "    Normalize image to RGB PNG and limit size\n",
+    "    \"\"\"\n",
+    "    image = image.convert(\"RGB\")\n",
+    "    image.thumbnail((IMAGE_SIZE_MAX, IMAGE_SIZE_MAX))\n",
+    "    image.save(path, \n",
+    "               format   = \"PNG\", \n",
+    "               optimize = True,\n",
+    "              )\n",
+    "\n",
+    "\n",
+    "def write_meta(writer, **row):\n",
+    "    writer.writerow(row)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "34c3bc3b-6bb6-414d-b3fe-85bc43d832c7",
+   "metadata": {},
+   "source": [
+    "## Dataset Registry\n",
+    "\n",
+    "Defines **where data comes from** and **how it is labeled**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "74106705-e2d6-411c-8193-8e02f5ee0fdc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# HuggingFace datasets (safe & stable)\n",
+    "AI_DATASETS     = [{\"name\"      : \"diffusiondb\",\n",
+    "                    \"hf_id\"     : \"poloclub/diffusiondb\",\n",
+    "                    \"config\"    : \"2m_first_1k\",\n",
+    "                    \"split\"     : \"train\",\n",
+    "                    \"image_key\" : \"image\",\n",
+    "                    \"label\"     : \"ai\",\n",
+    "                    \"family\"    : \"diffusion\"\n",
+    "                  }]\n",
+    "        \n",
+    "\n",
+    "REAL_DATASETS   = [{\"name\"      : \"mscoco_2017\",\n",
+    "                    \"hf_id\"     : \"shunk031/MSCOCO\",\n",
+    "                    \"hf_kwargs\" : {\"year\": 2017,\n",
+    "                                   \"coco_task\": \"instances\"\n",
+    "                                  },\n",
+    "                    \"split\"     : \"train\",\n",
+    "                    \"image_key\" : \"image\",\n",
+    "                    \"label\"     : \"real\",\n",
+    "                    \"family\"    : \"photographic\",\n",
+    "                    \"streaming\" : False\n",
+    "                  }]\n",
+    "\n",
+    "# Kaggle datasets (public, non-scraped)\n",
+    "KAGGLE_DATASETS = [{\"name\"      : \"ai_vs_real\",\n",
+    "                    \"kaggle_id\" : \"tristanzhang32/ai-generated-images-vs-real-images\",\n",
+    "                    \"label\"     : \"ai\",\n",
+    "                    \"family\"    : \"mixed\"\n",
+    "                   },\n",
+    "                   {\"name\"      : \"midjourney\",\n",
+    "                    \"kaggle_id\" : \"cyanex1702/midjourney-imagesprompt\",\n",
+    "                    \"label\"     : \"ai\",\n",
+    "                    \"family\"    : \"diffusion\"\n",
+    "                   }\n",
+    "                  ]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1f4c6f3b-2a35-415b-9a35-ee52fd3d85be",
+   "metadata": {},
+   "source": [
+    "## HuggingFace Dataset Processor\n",
+    "\n",
+    "Loads datasets via `datasets.load_dataset()` and saves images in unified format."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "a9ea5276-65bb-49f5-a656-c00ceeb1f4d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process_hf_dataset(ds_cfg, root_dir, writer):\n",
+    "    print(f\"\\n▶ Loading HF dataset: {ds_cfg['name']}\")\n",
+    "\n",
+    "    ds      = load_dataset(ds_cfg[\"hf_id\"],\n",
+    "                           **ds_cfg.get(\"hf_kwargs\", {}),\n",
+    "                           name      = ds_cfg.get(\"config\"),\n",
+    "                           split     = ds_cfg[\"split\"],\n",
+    "                           streaming = ds_cfg.get(\"streaming\", False),\n",
+    "                          )\n",
+    "\n",
+    "    out_dir = root_dir / ds_cfg[\"name\"]\n",
+    "    out_dir.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "    count   = 0\n",
+    "    \n",
+    "    for row in tqdm(ds):\n",
+    "        if (count >= TARGET_PER_DS):\n",
+    "            break\n",
+    "\n",
+    "        try:\n",
+    "            image = row.get(ds_cfg[\"image_key\"])\n",
+    "            if not isinstance(image, Image.Image):\n",
+    "                continue\n",
+    "\n",
+    "            uid   = uuid.uuid4().hex\n",
+    "            path  = out_dir / f\"{uid}.png\"\n",
+    "\n",
+    "            normalize_and_save(image, path)\n",
+    "\n",
+    "            write_meta(writer,\n",
+    "                       id       = uid,\n",
+    "                       filename = str(path),\n",
+    "                       label    = ds_cfg[\"label\"],\n",
+    "                       family   = ds_cfg[\"family\"],\n",
+    "                       source   = ds_cfg[\"name\"],\n",
+    "                      )\n",
+    "\n",
+    "            count += 1\n",
+    "\n",
+    "        except Exception:\n",
+    "            continue\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fb6d23a0-fa98-4351-9e4e-99265a51e8ef",
+   "metadata": {},
+   "source": [
+    "## Kaggle Dataset Downloader\n",
+    "\n",
+    "Requires:\n",
+    "- Kaggle account\n",
+    "- ~/.kaggle/kaggle.json configured\n",
+    "\n",
+    "No scraping. Fully legal."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c6eca5e6-0469-4af6-8af8-afe3036cb0a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def download_kaggle_dataset(kaggle_id: str, out_dir: Path):\n",
+    "    out_dir.mkdir(parents = True, exist_ok = True)\n",
+    "\n",
+    "    if any(out_dir.iterdir()):\n",
+    "        print(f\"✔ Kaggle dataset already present: {kaggle_id}\")\n",
+    "        return\n",
+    "\n",
+    "    print(f\"⬇ Downloading Kaggle dataset: {kaggle_id}\")\n",
+    "\n",
+    "    subprocess.run([\"kaggle\", \"datasets\", \"download\",\n",
+    "                    kaggle_id,\n",
+    "                    \"-p\", str(out_dir),\n",
+    "                    \"--unzip\"\n",
+    "                   ],\n",
+    "                   check = True,\n",
+    "                  )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7c971767-d20a-4fa3-949a-a655d712b2c1",
+   "metadata": {},
+   "source": [
+    "## Folder Ingestor\n",
+    "\n",
+    "Converts **any folder of images** into the unified dataset format. \n",
+    "Used for Kaggle & future web sources."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "b648832e-5025-4851-af21-382051167a04",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "IMAGE_EXTS = {\".png\", \".jpg\", \".jpeg\", \".webp\"}\n",
+    "\n",
+    "def ingest_image_folder(src_dir, out_dir, writer, label, family, source):\n",
+    "    images = [p for p in src_dir.rglob(\"*\") if p.suffix.lower() in IMAGE_EXTS]\n",
+    "\n",
+    "    out_dir.mkdir(parents = True, exist_ok = True)\n",
+    "\n",
+    "    for image_path in tqdm(images[:TARGET_PER_DS]):\n",
+    "        try:\n",
+    "            image = Image.open(image_path)\n",
+    "\n",
+    "            uid   = uuid.uuid4().hex\n",
+    "            dst   = out_dir / f\"{uid}.png\"\n",
+    "\n",
+    "            normalize_and_save(image, dst)\n",
+    "\n",
+    "            write_meta(writer,\n",
+    "                       id       = uid,\n",
+    "                       filename = str(dst),\n",
+    "                       label    = label,\n",
+    "                       family   = family,\n",
+    "                       source   = source,\n",
+    "                      )\n",
+    "            \n",
+    "        except Exception:\n",
+    "            continue\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "53fccdc4-e593-4dbf-a71b-e5b826e4a27a",
+   "metadata": {},
+   "source": [
+    "## Main Pipeline Execution\n",
+    "\n",
+    "This cell:\n",
+    "- Builds directories\n",
+    "- Processes HF datasets\n",
+    "- Downloads & ingests Kaggle datasets\n",
+    "- Writes unified metadata CSV"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "dd8ef771-f39f-4d9d-8eaf-626ecc211141",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "▶ Loading HF dataset: diffusiondb\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [05:24<00:00,  3.08it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "▶ Loading HF dataset: mscoco_2017\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "15b93e24384a49da9e46dceda9bc3f6b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e34eabfceb61496ebbd9336c9ed060f3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data:   0%|          | 0.00/19.3G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[7], line 46\u001b[0m\n\u001b[1;32m     42\u001b[0m \u001b[38;5;66;03m# ===============================\u001b[39;00m\n\u001b[1;32m     43\u001b[0m \u001b[38;5;66;03m# Entry Point\u001b[39;00m\n\u001b[1;32m     44\u001b[0m \u001b[38;5;66;03m# ===============================\u001b[39;00m\n\u001b[1;32m     45\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__main__\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 46\u001b[0m     \u001b[43mmain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "Cell \u001b[0;32mIn[7], line 13\u001b[0m, in \u001b[0;36mmain\u001b[0;34m()\u001b[0m\n\u001b[1;32m     10\u001b[0m     process_hf_dataset(ds, AI_DIR, writer)\n\u001b[1;32m     12\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m REAL_DATASETS:\n\u001b[0;32m---> 13\u001b[0m     \u001b[43mprocess_hf_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mREAL_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwriter\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     15\u001b[0m \u001b[38;5;66;03m# Kaggle datasets\u001b[39;00m\n\u001b[1;32m     16\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m KAGGLE_DATASETS:\n",
+      "Cell \u001b[0;32mIn[4], line 4\u001b[0m, in \u001b[0;36mprocess_hf_dataset\u001b[0;34m(ds_cfg, root_dir, writer)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mprocess_hf_dataset\u001b[39m(ds_cfg, root_dir, writer):\n\u001b[1;32m      2\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m��� Loading HF dataset: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mds_cfg[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m     ds      \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhf_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m                           \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mds_cfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhf_kwargs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m                           \u001b[49m\u001b[43mname\u001b[49m\u001b[43m      \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mconfig\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m                           \u001b[49m\u001b[43msplit\u001b[49m\u001b[43m     \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msplit\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m                           \u001b[49m\u001b[43mstreaming\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mds_cfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstreaming\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m                          \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     11\u001b[0m     out_dir \u001b[38;5;241m=\u001b[39m root_dir \u001b[38;5;241m/\u001b[39m ds_cfg[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m     12\u001b[0m     out_dir\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/load.py:2153\u001b[0m, in \u001b[0;36mload_dataset\u001b[0;34m(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)\u001b[0m\n\u001b[1;32m   2150\u001b[0m try_from_hf_gcs \u001b[38;5;241m=\u001b[39m path \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m _PACKAGED_DATASETS_MODULES\n\u001b[1;32m   2152\u001b[0m \u001b[38;5;66;03m# Download and prepare data\u001b[39;00m\n\u001b[0;32m-> 2153\u001b[0m \u001b[43mbuilder_instance\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_and_prepare\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2154\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2155\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2156\u001b[0m \u001b[43m    \u001b[49m\u001b[43mverification_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverification_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2157\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtry_from_hf_gcs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_from_hf_gcs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2158\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnum_proc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_proc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2159\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2160\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2162\u001b[0m \u001b[38;5;66;03m# Build dataset for splits\u001b[39;00m\n\u001b[1;32m   2163\u001b[0m keep_in_memory \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m   2164\u001b[0m     keep_in_memory \u001b[38;5;28;01mif\u001b[39;00m keep_in_memory \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m is_small_dataset(builder_instance\u001b[38;5;241m.\u001b[39minfo\u001b[38;5;241m.\u001b[39mdataset_size)\n\u001b[1;32m   2165\u001b[0m )\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/builder.py:954\u001b[0m, in \u001b[0;36mDatasetBuilder.download_and_prepare\u001b[0;34m(self, output_dir, download_config, download_mode, verification_mode, ignore_verifications, try_from_hf_gcs, dl_manager, base_path, use_auth_token, file_format, max_shard_size, num_proc, storage_options, **download_and_prepare_kwargs)\u001b[0m\n\u001b[1;32m    952\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m num_proc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    953\u001b[0m         prepare_split_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnum_proc\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m num_proc\n\u001b[0;32m--> 954\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_download_and_prepare\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    955\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdl_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdl_manager\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    956\u001b[0m \u001b[43m        \u001b[49m\u001b[43mverification_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverification_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    957\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mprepare_split_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    958\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdownload_and_prepare_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    959\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    960\u001b[0m \u001b[38;5;66;03m# Sync info\u001b[39;00m\n\u001b[1;32m    961\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minfo\u001b[38;5;241m.\u001b[39mdataset_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msum\u001b[39m(split\u001b[38;5;241m.\u001b[39mnum_bytes \u001b[38;5;28;01mfor\u001b[39;00m split \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minfo\u001b[38;5;241m.\u001b[39msplits\u001b[38;5;241m.\u001b[39mvalues())\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/builder.py:1717\u001b[0m, in \u001b[0;36mGeneratorBasedBuilder._download_and_prepare\u001b[0;34m(self, dl_manager, verification_mode, **prepare_splits_kwargs)\u001b[0m\n\u001b[1;32m   1716\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_download_and_prepare\u001b[39m(\u001b[38;5;28mself\u001b[39m, dl_manager, verification_mode, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mprepare_splits_kwargs):\n\u001b[0;32m-> 1717\u001b[0m     \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_download_and_prepare\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1718\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdl_manager\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1719\u001b[0m \u001b[43m        \u001b[49m\u001b[43mverification_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1720\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcheck_duplicate_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverification_mode\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mVerificationMode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mBASIC_CHECKS\u001b[49m\n\u001b[1;32m   1721\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mverification_mode\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mVerificationMode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mALL_CHECKS\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1722\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mprepare_splits_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1723\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/builder.py:1027\u001b[0m, in \u001b[0;36mDatasetBuilder._download_and_prepare\u001b[0;34m(self, dl_manager, verification_mode, **prepare_split_kwargs)\u001b[0m\n\u001b[1;32m   1025\u001b[0m split_dict \u001b[38;5;241m=\u001b[39m SplitDict(dataset_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdataset_name)\n\u001b[1;32m   1026\u001b[0m split_generators_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_split_generators_kwargs(prepare_split_kwargs)\n\u001b[0;32m-> 1027\u001b[0m split_generators \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_split_generators\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdl_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msplit_generators_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1029\u001b[0m \u001b[38;5;66;03m# Checksums verification\u001b[39;00m\n\u001b[1;32m   1030\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m verification_mode \u001b[38;5;241m==\u001b[39m VerificationMode\u001b[38;5;241m.\u001b[39mALL_CHECKS \u001b[38;5;129;01mand\u001b[39;00m dl_manager\u001b[38;5;241m.\u001b[39mrecord_checksums:\n",
+      "File \u001b[0;32m~/.cache/huggingface/modules/datasets_modules/datasets/shunk031--MSCOCO/9a9d3cb1e5e1927e03f5448bc4e3dd95d17101d142ba4b94d6973770757f535f/MSCOCO.py:977\u001b[0m, in \u001b[0;36mMsCocoDataset._split_generators\u001b[0;34m(self, dl_manager)\u001b[0m\n\u001b[1;32m    976\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_split_generators\u001b[39m(\u001b[38;5;28mself\u001b[39m, dl_manager: ds\u001b[38;5;241m.\u001b[39mDownloadManager):\n\u001b[0;32m--> 977\u001b[0m     file_paths \u001b[38;5;241m=\u001b[39m \u001b[43mdl_manager\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_and_extract\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_URLS\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43myear\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    979\u001b[0m     imgs \u001b[38;5;241m=\u001b[39m file_paths[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mimages\u001b[39m\u001b[38;5;124m\"\u001b[39m]  \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m    980\u001b[0m     anns \u001b[38;5;241m=\u001b[39m file_paths[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mannotations\u001b[39m\u001b[38;5;124m\"\u001b[39m]  \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/download/download_manager.py:565\u001b[0m, in \u001b[0;36mDownloadManager.download_and_extract\u001b[0;34m(self, url_or_urls)\u001b[0m\n\u001b[1;32m    549\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdownload_and_extract\u001b[39m(\u001b[38;5;28mself\u001b[39m, url_or_urls):\n\u001b[1;32m    550\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Download and extract given `url_or_urls`.\u001b[39;00m\n\u001b[1;32m    551\u001b[0m \n\u001b[1;32m    552\u001b[0m \u001b[38;5;124;03m    Is roughly equivalent to:\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    563\u001b[0m \u001b[38;5;124;03m        extracted_path(s): `str`, extracted paths of given URL(s).\u001b[39;00m\n\u001b[1;32m    564\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 565\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mextract(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl_or_urls\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/download/download_manager.py:428\u001b[0m, in \u001b[0;36mDownloadManager.download\u001b[0;34m(self, url_or_urls)\u001b[0m\n\u001b[1;32m    425\u001b[0m download_func \u001b[38;5;241m=\u001b[39m partial(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_download, download_config\u001b[38;5;241m=\u001b[39mdownload_config)\n\u001b[1;32m    427\u001b[0m start_time \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow()\n\u001b[0;32m--> 428\u001b[0m downloaded_path_or_paths \u001b[38;5;241m=\u001b[39m \u001b[43mmap_nested\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    429\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdownload_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    430\u001b[0m \u001b[43m    \u001b[49m\u001b[43murl_or_urls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    431\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmap_tuple\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    432\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnum_proc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_proc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    433\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdisable_tqdm\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mis_progress_bar_enabled\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    434\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdesc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mDownloading data files\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    435\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    436\u001b[0m duration \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow() \u001b[38;5;241m-\u001b[39m start_time\n\u001b[1;32m    437\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading took \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mduration\u001b[38;5;241m.\u001b[39mtotal_seconds()\u001b[38;5;250m \u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;241m60\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m min\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:464\u001b[0m, in \u001b[0;36mmap_nested\u001b[0;34m(function, data_struct, dict_only, map_list, map_tuple, map_numpy, num_proc, parallel_min_length, types, disable_tqdm, desc)\u001b[0m\n\u001b[1;32m    462\u001b[0m     num_proc \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m    463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_proc \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m num_proc \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(iterable) \u001b[38;5;241m<\u001b[39m parallel_min_length:\n\u001b[0;32m--> 464\u001b[0m     mapped \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m    465\u001b[0m         _single_map_nested((function, obj, types, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m    466\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m obj \u001b[38;5;129;01min\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(iterable, disable\u001b[38;5;241m=\u001b[39mdisable_tqdm, desc\u001b[38;5;241m=\u001b[39mdesc)\n\u001b[1;32m    467\u001b[0m     ]\n\u001b[1;32m    468\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    469\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m warnings\u001b[38;5;241m.\u001b[39mcatch_warnings():\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:465\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    462\u001b[0m     num_proc \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m    463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_proc \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m num_proc \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(iterable) \u001b[38;5;241m<\u001b[39m parallel_min_length:\n\u001b[1;32m    464\u001b[0m     mapped \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m--> 465\u001b[0m         \u001b[43m_single_map_nested\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtypes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    466\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m obj \u001b[38;5;129;01min\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(iterable, disable\u001b[38;5;241m=\u001b[39mdisable_tqdm, desc\u001b[38;5;241m=\u001b[39mdesc)\n\u001b[1;32m    467\u001b[0m     ]\n\u001b[1;32m    468\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    469\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m warnings\u001b[38;5;241m.\u001b[39mcatch_warnings():\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:382\u001b[0m, in \u001b[0;36m_single_map_nested\u001b[0;34m(args)\u001b[0m\n\u001b[1;32m    380\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(pbar_iterable, disable\u001b[38;5;241m=\u001b[39mdisable_tqdm, position\u001b[38;5;241m=\u001b[39mrank, unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobj\u001b[39m\u001b[38;5;124m\"\u001b[39m, desc\u001b[38;5;241m=\u001b[39mpbar_desc) \u001b[38;5;28;01mas\u001b[39;00m pbar:\n\u001b[1;32m    381\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_struct, \u001b[38;5;28mdict\u001b[39m):\n\u001b[0;32m--> 382\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m {k: _single_map_nested((function, v, types, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m)) \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m pbar}\n\u001b[1;32m    383\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    384\u001b[0m         mapped \u001b[38;5;241m=\u001b[39m [_single_map_nested((function, v, types, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m pbar]\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:382\u001b[0m, in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    380\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(pbar_iterable, disable\u001b[38;5;241m=\u001b[39mdisable_tqdm, position\u001b[38;5;241m=\u001b[39mrank, unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobj\u001b[39m\u001b[38;5;124m\"\u001b[39m, desc\u001b[38;5;241m=\u001b[39mpbar_desc) \u001b[38;5;28;01mas\u001b[39;00m pbar:\n\u001b[1;32m    381\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_struct, \u001b[38;5;28mdict\u001b[39m):\n\u001b[0;32m--> 382\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m {k: \u001b[43m_single_map_nested\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtypes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m pbar}\n\u001b[1;32m    383\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    384\u001b[0m         mapped \u001b[38;5;241m=\u001b[39m [_single_map_nested((function, v, types, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m pbar]\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/py_utils.py:367\u001b[0m, in \u001b[0;36m_single_map_nested\u001b[0;34m(args)\u001b[0m\n\u001b[1;32m    365\u001b[0m \u001b[38;5;66;03m# Singleton first to spare some computation\u001b[39;00m\n\u001b[1;32m    366\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_struct, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_struct, types):\n\u001b[0;32m--> 367\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_struct\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    369\u001b[0m \u001b[38;5;66;03m# Reduce logging to keep things readable in multiprocessing with tqdm\u001b[39;00m\n\u001b[1;32m    370\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m rank \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mget_verbosity() \u001b[38;5;241m<\u001b[39m logging\u001b[38;5;241m.\u001b[39mWARNING:\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/download/download_manager.py:454\u001b[0m, in \u001b[0;36mDownloadManager._download\u001b[0;34m(self, url_or_filename, download_config)\u001b[0m\n\u001b[1;32m    451\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_relative_path(url_or_filename):\n\u001b[1;32m    452\u001b[0m     \u001b[38;5;66;03m# append the relative path to the base_path\u001b[39;00m\n\u001b[1;32m    453\u001b[0m     url_or_filename \u001b[38;5;241m=\u001b[39m url_or_path_join(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_base_path, url_or_filename)\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcached_path\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl_or_filename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/file_utils.py:182\u001b[0m, in \u001b[0;36mcached_path\u001b[0;34m(url_or_filename, download_config, **download_kwargs)\u001b[0m\n\u001b[1;32m    178\u001b[0m     url_or_filename \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(url_or_filename)\n\u001b[1;32m    180\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_remote_url(url_or_filename):\n\u001b[1;32m    181\u001b[0m     \u001b[38;5;66;03m# URL, so get it from the cache (downloading if necessary)\u001b[39;00m\n\u001b[0;32m--> 182\u001b[0m     output_path \u001b[38;5;241m=\u001b[39m \u001b[43mget_from_cache\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    183\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl_or_filename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    184\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    185\u001b[0m \u001b[43m        \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    186\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    187\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    188\u001b[0m \u001b[43m        \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    189\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    190\u001b[0m \u001b[43m        \u001b[49m\u001b[43muse_etag\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_etag\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    191\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    192\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    193\u001b[0m \u001b[43m        \u001b[49m\u001b[43mignore_url_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mignore_url_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    194\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    195\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdownload_desc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_desc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    196\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    197\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(url_or_filename):\n\u001b[1;32m    198\u001b[0m     \u001b[38;5;66;03m# File, and it exists.\u001b[39;00m\n\u001b[1;32m    199\u001b[0m     output_path \u001b[38;5;241m=\u001b[39m url_or_filename\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/file_utils.py:644\u001b[0m, in \u001b[0;36mget_from_cache\u001b[0;34m(url, cache_dir, force_download, proxies, etag_timeout, resume_download, user_agent, local_files_only, use_etag, max_retries, token, use_auth_token, ignore_url_params, storage_options, download_desc)\u001b[0m\n\u001b[1;32m    642\u001b[0m         fsspec_get(url, temp_file, storage_options\u001b[38;5;241m=\u001b[39mstorage_options, desc\u001b[38;5;241m=\u001b[39mdownload_desc)\n\u001b[1;32m    643\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 644\u001b[0m         \u001b[43mhttp_get\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    645\u001b[0m \u001b[43m            \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    646\u001b[0m \u001b[43m            \u001b[49m\u001b[43mtemp_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    647\u001b[0m \u001b[43m            \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    648\u001b[0m \u001b[43m            \u001b[49m\u001b[43mresume_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    649\u001b[0m \u001b[43m            \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    650\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcookies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcookies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    651\u001b[0m \u001b[43m            \u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    652\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdesc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_desc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    653\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    655\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstoring \u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m in cache at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcache_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    656\u001b[0m shutil\u001b[38;5;241m.\u001b[39mmove(temp_file\u001b[38;5;241m.\u001b[39mname, cache_path)\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/datasets/utils/file_utils.py:419\u001b[0m, in \u001b[0;36mhttp_get\u001b[0;34m(url, temp_file, proxies, resume_size, headers, cookies, timeout, max_retries, desc)\u001b[0m\n\u001b[1;32m    410\u001b[0m total \u001b[38;5;241m=\u001b[39m resume_size \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mint\u001b[39m(content_length) \u001b[38;5;28;01mif\u001b[39;00m content_length \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    411\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mtqdm(\n\u001b[1;32m    412\u001b[0m     unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mB\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    413\u001b[0m     unit_scale\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    417\u001b[0m     disable\u001b[38;5;241m=\u001b[39m\u001b[38;5;129;01mnot\u001b[39;00m logging\u001b[38;5;241m.\u001b[39mis_progress_bar_enabled(),\n\u001b[1;32m    418\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m progress:\n\u001b[0;32m--> 419\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m response\u001b[38;5;241m.\u001b[39miter_content(chunk_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1024\u001b[39m):\n\u001b[1;32m    420\u001b[0m         progress\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28mlen\u001b[39m(chunk))\n\u001b[1;32m    421\u001b[0m         temp_file\u001b[38;5;241m.\u001b[39mwrite(chunk)\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/requests/models.py:816\u001b[0m, in \u001b[0;36mResponse.iter_content.<locals>.generate\u001b[0;34m()\u001b[0m\n\u001b[1;32m    814\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m    815\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 816\u001b[0m         \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw\u001b[38;5;241m.\u001b[39mstream(chunk_size, decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m    817\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m ProtocolError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    818\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m ChunkedEncodingError(e)\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/urllib3/response.py:1091\u001b[0m, in \u001b[0;36mHTTPResponse.stream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m   1089\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1090\u001b[0m     \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_fp_closed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 1091\u001b[0m         data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1093\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m data:\n\u001b[1;32m   1094\u001b[0m             \u001b[38;5;28;01myield\u001b[39;00m data\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/urllib3/response.py:980\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt, decode_content, cache_content)\u001b[0m\n\u001b[1;32m    977\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m amt:\n\u001b[1;32m    978\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer\u001b[38;5;241m.\u001b[39mget(amt)\n\u001b[0;32m--> 980\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raw_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    982\u001b[0m flush_decoder \u001b[38;5;241m=\u001b[39m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m (amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data)\n\u001b[1;32m    984\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/urllib3/response.py:904\u001b[0m, in \u001b[0;36mHTTPResponse._raw_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m    901\u001b[0m fp_closed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclosed\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m    903\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_error_catcher():\n\u001b[0;32m--> 904\u001b[0m     data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mread1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread1\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fp_closed \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    905\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data:\n\u001b[1;32m    906\u001b[0m         \u001b[38;5;66;03m# Platform-specific: Buggy versions of Python.\u001b[39;00m\n\u001b[1;32m    907\u001b[0m         \u001b[38;5;66;03m# Close the connection when no data is returned\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    912\u001b[0m         \u001b[38;5;66;03m# not properly close the connection in all cases. There is\u001b[39;00m\n\u001b[1;32m    913\u001b[0m         \u001b[38;5;66;03m# no harm in redundantly calling close.\u001b[39;00m\n\u001b[1;32m    914\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mclose()\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/site-packages/urllib3/response.py:887\u001b[0m, in \u001b[0;36mHTTPResponse._fp_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m    884\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1(amt) \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1()\n\u001b[1;32m    885\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    886\u001b[0m     \u001b[38;5;66;03m# StringIO doesn't like amt=None\u001b[39;00m\n\u001b[0;32m--> 887\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread()\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/http/client.py:466\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m    463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength:\n\u001b[1;32m    464\u001b[0m     \u001b[38;5;66;03m# clip the read to the \"end of response\"\u001b[39;00m\n\u001b[1;32m    465\u001b[0m     amt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength\n\u001b[0;32m--> 466\u001b[0m s \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    467\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m s \u001b[38;5;129;01mand\u001b[39;00m amt:\n\u001b[1;32m    468\u001b[0m     \u001b[38;5;66;03m# Ideally, we would raise IncompleteRead if the content-length\u001b[39;00m\n\u001b[1;32m    469\u001b[0m     \u001b[38;5;66;03m# wasn't satisfied, but it might break compatibility.\u001b[39;00m\n\u001b[1;32m    470\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_conn()\n",
+      "File \u001b[0;32m~/.conda/envs/mvp_env/lib/python3.10/socket.py:717\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m    715\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m    716\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 717\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    718\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m    719\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "def main():\n",
+    "    ensure_dirs()\n",
+    "\n",
+    "    with open(META_FILE, \"w\", newline = \"\") as f:\n",
+    "        writer = csv.DictWriter(f, fieldnames=[\"id\", \"filename\", \"label\", \"family\", \"source\"])\n",
+    "        writer.writeheader()\n",
+    "\n",
+    "        # HuggingFace datasets\n",
+    "        for ds in AI_DATASETS:\n",
+    "            process_hf_dataset(ds, AI_DIR, writer)\n",
+    "\n",
+    "        for ds in REAL_DATASETS:\n",
+    "            process_hf_dataset(ds, REAL_DIR, writer)\n",
+    "\n",
+    "        # Kaggle datasets\n",
+    "        for ds in KAGGLE_DATASETS:\n",
+    "            raw_path = RAW_DIR / ds[\"name\"]\n",
+    "            download_kaggle_dataset(ds[\"kaggle_id\"], raw_path)\n",
+    "\n",
+    "            # AI images\n",
+    "            ingest_image_folder(src_dir = raw_path / \"ai\",\n",
+    "                                out_dir = AI_DIR / ds[\"name\"],\n",
+    "                                writer  = writer,\n",
+    "                                label   = \"ai\",\n",
+    "                                family  = ds[\"family\"],\n",
+    "                                source  = ds[\"name\"],\n",
+    "                               )\n",
+    "\n",
+    "            # REAL images\n",
+    "            ingest_image_folder(src_dir = raw_path / \"real\",\n",
+    "                                out_dir = REAL_DIR / ds[\"name\"],\n",
+    "                                writer  = writer,\n",
+    "                                label   = \"real\",\n",
+    "                                family  = \"photographic\",\n",
+    "                                source  = ds[\"name\"],\n",
+    "                               )\n",
+    "\n",
+    "    print(\"\\n✅ Dataset build complete\")\n",
+    "    print(f\"📄 Metadata saved at: {META_FILE}\")\n",
+    "\n",
+    "\n",
+    "# ===============================\n",
+    "# Entry Point\n",
+    "# ===============================\n",
+    "if __name__ == \"__main__\":\n",
+    "    main()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dd6e0834-7757-4daf-a8bc-37d58bc8debd",
+   "metadata": {},
+   "source": [
+    "# Post-Processing Attack Generator\n",
+    "\n",
+    "This notebook applies **real-world post-processing attacks** to an existing\n",
+    "image dataset to evaluate robustness of AI-image detectors.\n",
+    "\n",
+    "### Attacks Implemented\n",
+    "- JPEG recompression (quality loss)\n",
+    "- Resize / rescale (down + up)\n",
+    "- Gaussian blur\n",
+    "\n",
+    "### Why this matters\n",
+    "Most AI images in the wild are:\n",
+    "- Screenshot\n",
+    "- Re-encoded\n",
+    "- Uploaded to social media\n",
+    "- Slightly blurred or resized\n",
+    "\n",
+    "If a detector fails here, it fails in production."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cd680866-0f5c-4930-9262-5521317044fd",
+   "metadata": {},
+   "source": [
+    "## Imports & Config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b62168b8-aa38-47c6-8a00-0bb31e8774fa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ===============================\n",
+    "# Imports\n",
+    "# ===============================\n",
+    "\n",
+    "import csv\n",
+    "import uuid\n",
+    "from PIL import Image\n",
+    "from tqdm import tqdm\n",
+    "from io import BytesIO\n",
+    "from pathlib import Path\n",
+    "from PIL import ImageFilter\n",
+    "\n",
+    "\n",
+    "# ===============================\n",
+    "# Configuration\n",
+    "# ===============================\n",
+    "\n",
+    "BASE_DIR       = Path(\"tests/dataset\")\n",
+    "ATTACK_DIR     = BASE_DIR / \"attacked\"\n",
+    "META_IN        = BASE_DIR / \"metadata/dataset_index.csv\"\n",
+    "META_OUT       = BASE_DIR / \"metadata/dataset_index_attacked.csv\"\n",
+    "\n",
+    "ATTACK_DIR.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "JPEG_QUALITIES = [95, 75, 50]\n",
+    "RESIZE_SCALES  = [0.75, 0.5]\n",
+    "BLUR_RADII     = [0.8, 1.5]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3c1de132-8245-42c7-9a82-63d6f0c27270",
+   "metadata": {},
+   "source": [
+    "## Load Existing Metadata\n",
+    "\n",
+    "We read the existing unified dataset index and create\n",
+    "new samples **derived from originals**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a49e5629-ba32-4736-b0ab-e81084f58b78",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_metadata(path):\n",
+    "    with open(path, newline=\"\") as f:\n",
+    "        return list(csv.DictReader(f))\n",
+    "\n",
+    "\n",
+    "records = load_metadata(META_IN)\n",
+    "print(f\"Loaded {len(records)} original samples\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "44a0e31a-abdf-4564-8696-90aef3fc5ec4",
+   "metadata": {},
+   "source": [
+    "## Attack Primitives\n",
+    "\n",
+    "Each function:\n",
+    "- Takes a PIL Image\n",
+    "- Returns a new PIL Image\n",
+    "- Does **not** modify the original"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c6027902-897a-4a3b-a806-e715fea43050",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def jpeg_attack(image: Image.Image, quality: int) -> Image.Image:\n",
+    "    \"\"\"\n",
+    "    Simulate JPEG recompression\n",
+    "    \"\"\"\n",
+    "    buf = BytesIO()\n",
+    "    image.save(buf, \n",
+    "               format  = \"JPEG\", \n",
+    "               quality = quality,\n",
+    "              )\n",
+    "    \n",
+    "    buf.seek(0)\n",
+    "    return Image.open(buf).convert(\"RGB\")\n",
+    "\n",
+    "\n",
+    "def resize_attack(image: Image.Image, scale: float) -> Image.Image:\n",
+    "    \"\"\"\n",
+    "    Downscale and upscale image\n",
+    "    \"\"\"\n",
+    "    w, h         = image.size\n",
+    "    new_w, new_h = int(w * scale), int(h * scale)\n",
+    "    image_small  = image.resize((new_w, new_h), Image.BICUBIC)\n",
+    "    \n",
+    "    return image_small.resize((w, h), Image.BICUBIC)\n",
+    "\n",
+    "\n",
+    "def blur_attack(image: Image.Image, radius: float) -> Image.Image:\n",
+    "    \"\"\"\n",
+    "    Apply Gaussian blur\n",
+    "    \"\"\"\n",
+    "    return image.filter(ImageFilter.GaussianBlur(radius))\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "62d3ca44-b497-4397-bd35-04db9041d1e4",
+   "metadata": {},
+   "source": [
+    "## Attack Application Pipeline\n",
+    "\n",
+    "For each original image:\n",
+    "- Apply all attack variants\n",
+    "- Save attacked images\n",
+    "- Write **attack-aware metadata**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c702ab79-68b1-4191-8e87-f26ad0227348",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def apply_attacks(records, writer):\n",
+    "    for r in tqdm(records):\n",
+    "        src_path = Path(r[\"filename\"])\n",
+    "        \n",
+    "        if not src_path.exists():\n",
+    "            continue\n",
+    "\n",
+    "        try:\n",
+    "            img = Image.open(src_path).convert(\"RGB\")\n",
+    "            \n",
+    "        except Exception:\n",
+    "            continue\n",
+    "\n",
+    "        base_name = src_path.stem\n",
+    "        label     = r[\"label\"]\n",
+    "\n",
+    "        out_base  = ATTACK_DIR / r[\"source\"]\n",
+    "        out_base.mkdir(parents = True, exist_ok = True)\n",
+    "\n",
+    "        # --- JPEG ---\n",
+    "        for q in JPEG_QUALITIES:\n",
+    "            attacked = jpeg_attack(img, q)\n",
+    "            uid      = uuid.uuid4().hex\n",
+    "            out_path = out_base / f\"{uid}.png\"\n",
+    "\n",
+    "            attacked.save(out_path, optimize = True)\n",
+    "\n",
+    "            writer.writerow({**r,\n",
+    "                             \"id\"        : uid,\n",
+    "                             \"filename\"  : str(out_path),\n",
+    "                             \"attack\"    : f\"jpeg_q{q}\",\n",
+    "                             \"parent_id\" : r[\"id\"]\n",
+    "                           })\n",
+    "\n",
+    "        # --- Resize ---\n",
+    "        for s in RESIZE_SCALES:\n",
+    "            attacked = resize_attack(img, s)\n",
+    "            uid      = uuid.uuid4().hex\n",
+    "            out_path = out_base / f\"{uid}.png\"\n",
+    "\n",
+    "            attacked.save(out_path, optimize = True)\n",
+    "\n",
+    "            writer.writerow({**r,\n",
+    "                             \"id\"        : uid,\n",
+    "                             \"filename\"  : str(out_path),\n",
+    "                             \"attack\"    : f\"resize_{int(s*100)}\",\n",
+    "                             \"parent_id\" : r[\"id\"]\n",
+    "                           })\n",
+    "\n",
+    "        # --- Blur ---\n",
+    "        for b in BLUR_RADII:\n",
+    "            attacked = blur_attack(img, b)\n",
+    "            uid      = uuid.uuid4().hex\n",
+    "            out_path = out_base / f\"{uid}.png\"\n",
+    "\n",
+    "            attacked.save(out_path, optimize = True)\n",
+    "\n",
+    "            writer.writerow({**r,\n",
+    "                             \"id\"        : uid,\n",
+    "                             \"filename\"  : str(out_path),\n",
+    "                             \"attack\"    : f\"blur_{b}\",\n",
+    "                             \"parent_id\" : r[\"id\"]\n",
+    "                           })\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3736496a-7710-4593-86fd-818b2d58d535",
+   "metadata": {},
+   "source": [
+    "## Write Attack Metadata\n",
+    "\n",
+    "We preserve:\n",
+    "- Original label (ai / real)\n",
+    "- Source family\n",
+    "- Parent image ID\n",
+    "- Attack type\n",
+    "\n",
+    "This allows **per-attack evaluation later**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "17f29f49-4137-4752-a098-1eba404ce352",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(META_OUT, \"w\", newline = \"\") as f:\n",
+    "    fieldnames = list(records[0].keys()) + [\"attack\", \"parent_id\"]\n",
+    "    writer     = csv.DictWriter(f, fieldnames = fieldnames)\n",
+    "    writer.writeheader()\n",
+    "\n",
+    "    apply_attacks(records, writer)\n",
+    "\n",
+    "print(\"✅ Post-processing attacks generated\")\n",
+    "print(f\"Metadata saved to: {META_OUT}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f20b8f36-af23-49b8-8c6b-d93cf2a7ba07",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

reporter/__init__.py ADDED Viewed

File without changes

reporter/csv_reporter.py ADDED Viewed

	@@ -0,0 +1,462 @@

+# Dependencies
+import csv
+from pathlib import Path
+from typing import Optional
+from datetime import datetime
+from utils.logger import get_logger
+from config.settings import settings
+from config.constants import MetricType
+from config.schemas import AnalysisResult
+from utils.helpers import generate_unique_id
+from config.constants import DetectionStatus
+from config.schemas import BatchAnalysisResult
+from features.detailed_result_maker import DetailedResultMaker
+# Setup Logging
+logger = get_logger(__name__)
+class CSVReporter:
+    """
+    Professional CSV report generator
+    Features:
+    ---------
+    - Single image detailed reports
+    - Batch summary reports with statistics
+    - Detailed forensic data export
+    - Excel-compatible formatting
+    - UTF-8 encoding with BOM for international compatibility
+    """
+    def __init__(self):
+        """
+        Initialize CSV Reporter
+        """
+        self.detailed_maker = DetailedResultMaker()
+        logger.debug("CSVReporter initialized")
+    def export_batch_summary(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
+        """
+        Export batch analysis summary as CSV
+        Arguments:
+        ----------
+            batch_result { BatchAnalysisResult } : Complete batch analysis result
+            output_dir   { Path }                : Output directory (defaults to settings.REPORTS_DIR)
+        Returns:
+        --------
+                       { Path }                  : Path to generated CSV file
+        """
+        output_dir  = output_dir or settings.REPORTS_DIR
+        report_id   = generate_unique_id()
+        filename    = f"batch_summary_{report_id}.csv"
+        output_path = output_dir / filename
+        logger.info(f"Generating batch summary CSV: {filename}")
+        try:
+            with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
+                writer = csv.writer(f)
+                # Report Header
+                self._write_report_header(writer      = writer,
+                                          report_type = "Batch Analysis Summary",
+                                          timestamp   = batch_result.timestamp,
+                                         )
+                # Batch Statistics
+                self._write_batch_statistics(writer       = writer,
+                                             batch_result = batch_result,
+                                            )
+                # Main Results Table
+                self._write_batch_results_table(writer       = writer,
+                                                batch_result = batch_result,
+                                               )
+                # Footer
+                self._write_footer(writer = writer)
+            logger.info(f"Batch summary CSV generated: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"Failed to generate batch summary CSV: {e}")
+            raise
+    def export_batch_detailed(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
+        """
+        Export detailed batch analysis with forensic data
+        Arguments:
+        ----------
+            batch_result { BatchAnalysisResult } : Complete batch analysis result
+            output_dir   { Path }                : Output directory (defaults to settings.REPORTS_DIR)
+        Returns:
+        --------
+                      { Path }                   : Path to generated CSV file
+        """
+        output_dir  = output_dir or settings.REPORTS_DIR
+        report_id   = generate_unique_id()
+        filename    = f"batch_detailed_{report_id}.csv"
+        output_path = output_dir / filename
+        logger.info(f"Generating detailed batch CSV: {filename}")
+        try:
+            with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
+                writer = csv.writer(f)
+                # Report Header
+                self._write_report_header(writer      = writer,
+                                          report_type = "Detailed Batch Analysis",
+                                          timestamp   = batch_result.timestamp,
+                                         )
+                # Process each image with full details
+                for idx, result in enumerate(batch_result.results, 1):
+                    self._write_detailed_image_section(writer        = writer,
+                                                       result        = result,
+                                                       image_number  = idx,
+                                                       total_images  = batch_result.processed,
+                                                      )
+                    # Add separator between images
+                    if (idx < batch_result.processed):
+                        writer.writerow([])
+                        writer.writerow(['=' * 100])
+                        writer.writerow([])
+                # Footer
+                self._write_footer(writer = writer)
+            logger.info(f"Detailed batch CSV generated: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"Failed to generate detailed batch CSV: {e}")
+            raise
+    def export_single_detailed(self, result: AnalysisResult, output_dir: Optional[Path] = None) -> Path:
+        """
+        Export single image detailed analysis as CSV
+        Arguments:
+        ----------
+            result     { AnalysisResult } : Single image analysis result
+            output_dir { Path }           : Output directory (defaults to settings.REPORTS_DIR)
+        Returns:
+        --------
+                     { Path }             : Path to generated CSV file
+        """
+        output_dir  = output_dir or settings.REPORTS_DIR
+        report_id   = generate_unique_id()
+        filename    = f"single_analysis_{report_id}.csv"
+        output_path = output_dir / filename
+        logger.info(f"Generating single image CSV: {filename}")
+        try:
+            with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
+                writer = csv.writer(f)
+                # Report Header
+                self._write_report_header(writer     = writer,
+                                          report_type = "Single Image Analysis",
+                                          timestamp   = result.timestamp,
+                                         )
+                # Image Details
+                self._write_detailed_image_section(writer       = writer,
+                                                   result       = result,
+                                                   image_number = 1,
+                                                   total_images = 1,
+                                                  )
+                # Footer
+                self._write_footer(writer = writer)
+            logger.info(f"Single image CSV generated: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"Failed to generate single image CSV: {e}")
+            raise
+    def export_metrics_comparison(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
+        """
+        Export metrics comparison table across all images
+        Arguments:
+        ----------
+            batch_result { BatchAnalysisResult } : Complete batch analysis result
+            output_dir   { Path }                : Output directory (defaults to settings.REPORTS_DIR)
+        Returns:
+        --------
+                       { Path }                  : Path to generated CSV file
+        """
+        output_dir  = output_dir or settings.REPORTS_DIR
+        report_id   = generate_unique_id()
+        filename    = f"metrics_comparison_{report_id}.csv"
+        output_path = output_dir / filename
+        logger.info(f"Generating metrics comparison CSV: {filename}")
+        try:
+            with open(output_path, 'w', newline = '', encoding = 'utf-8-sig') as f:
+                writer = csv.writer(f)
+                # Report Header
+                self._write_report_header(writer     = writer,
+                                          report_type = "Metrics Comparison",
+                                          timestamp   = batch_result.timestamp,
+                                         )
+                # Comparison Table Header
+                writer.writerow(['Metrics Comparison Across All Images'])
+                writer.writerow([])
+                header = ['Filename',
+                          'Overall Score',
+                          'Analysis Status',
+                          'Gradient Analysis Score',
+                          'Gradient Analysis Confidence',
+                          'Frequency Analysis Score',
+                          'Frequency Analysis Confidence',
+                          'Noise Analysis Score',
+                          'Noise Analysis Confidence',
+                          'Texture Analysis Score',
+                          'Texture Analysis Confidence',
+                          'Color Analysis Score',
+                          'Color Analysis Confidence',
+                          'Processing Time',
+                         ]
+                writer.writerow(header)
+                # Data rows
+                for result in batch_result.results:
+                    row = [result.filename,
+                           f"{result.overall_score:.3f}",
+                           result.status.value,
+                          ]
+                    # Add each metric's score and confidence
+                    for metric_type in [MetricType.GRADIENT, MetricType.FREQUENCY, MetricType.NOISE, MetricType.TEXTURE, MetricType.COLOR]:
+                        metric_result = result.metric_results.get(metric_type)
+                        if metric_result:
+                            row.append(f"{metric_result.score:.3f}")
+                            row.append(f"{metric_result.confidence:.3f}" if metric_result.confidence is not None else "N/A")
+                        else:
+                            row.extend(["N/A", "N/A"])
+                    row.append(f"{result.processing_time:.2f}s")
+                    writer.writerow(row)
+                # Footer
+                writer.writerow([])
+                self._write_footer(writer = writer)
+            logger.info(f"Metrics comparison CSV generated: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"Failed to generate metrics comparison CSV: {e}")
+            raise
+    def _write_report_header(self, writer, report_type: str, timestamp: datetime) -> None:
+        """
+        Write CSV report header
+        """
+        writer.writerow(['=' * 100])
+        writer.writerow([f'AI Image Screener - {report_type}'])
+        writer.writerow([f'Generated: {timestamp.strftime("%Y-%m-%d %H:%M:%S")}'])
+        writer.writerow([f'Version: {settings.VERSION}'])
+        writer.writerow(['=' * 100])
+        writer.writerow([])
+    def _write_batch_statistics(self, writer, batch_result: BatchAnalysisResult) -> None:
+        """
+        Write batch statistics section
+        """
+        writer.writerow(['BATCH STATISTICS'])
+        writer.writerow([])
+        stats = [['Total Images', batch_result.total_images],
+                 ['Successfully Processed', batch_result.processed],
+                 ['Failed', batch_result.failed],
+                 ['Success Rate', f"{batch_result.summary.get('success_rate', 0)}%"],
+                 ['' , ''],
+                 ['Likely Authentic', batch_result.summary.get('likely_authentic', 0)],
+                 ['Review Required', batch_result.summary.get('review_required', 0)],
+                 ['', ''],
+                 ['Average Score', f"{batch_result.summary.get('avg_score', 0):.3f}"],
+                 ['Average Confidence', f"{batch_result.summary.get('avg_confidence', 0)}%"],
+                 ['Total Processing Time', f"{batch_result.total_processing_time:.2f}s"],
+                 ['Average Time per Image', f"{batch_result.summary.get('avg_proc_time', 0):.2f}s"],
+                ]
+        for row in stats:
+            writer.writerow(row)
+        writer.writerow([])
+        writer.writerow(['=' * 100])
+        writer.writerow([])
+    def _write_batch_results_table(self, writer, batch_result: BatchAnalysisResult) -> None:
+        """
+        Write batch results main table
+        """
+        writer.writerow(['ANALYSIS RESULTS'])
+        writer.writerow([])
+        # Table Header
+        header = ['Filename',
+                  'Image Size',
+                  'Analysis Status',
+                  'Overall Score',
+                  'Analysis Confidence (%)',
+                  'Top Warning Signals',
+                  'Recommendation',
+                  'Processing Time (s)',
+                 ]
+        writer.writerow(header)
+        # Data rows
+        for result in batch_result.results:
+            # Get top warning signals
+            top_signals = [s.name for s in result.signals if s.status.value in ['flagged', 'warning']][:2]
+            signals_str = "; ".join(top_signals) if top_signals else "All tests passed"
+            # Recommendation
+            if (result.status == DetectionStatus.REVIEW_REQUIRED):
+                recommendation = "Manual verification recommended"
+            else:
+                recommendation = "No further action needed"
+            row = [result.filename,
+                   f"{result.image_size[0]}×{result.image_size[1]}",
+                   result.status.value,
+                   f"{result.overall_score:.3f}",
+                   f"{result.confidence}%",
+                   signals_str,
+                   recommendation,
+                   f"{result.processing_time:.2f}",
+                  ]
+            writer.writerow(row)
+        writer.writerow([])
+    def _write_detailed_image_section(self, writer, result: AnalysisResult, image_number: int, total_images: int) -> None:
+        """
+        Write detailed section for single image
+        """
+        writer.writerow([f'IMAGE {image_number} OF {total_images}'])
+        writer.writerow([])
+        # Basic Information
+        writer.writerow(['BASIC INFORMATION'])
+        writer.writerow(['Filename', result.filename])
+        writer.writerow(['Status', result.status.value])
+        writer.writerow(['Overall Score', f"{result.overall_score:.3f}"])
+        writer.writerow(['Confidence', f"{result.confidence}%"])
+        writer.writerow(['Image Size', f"{result.image_size[0]}×{result.image_size[1]}"])
+        writer.writerow(['Processing Time', f"{result.processing_time:.2f}s"])
+        writer.writerow(['Timestamp', result.timestamp.isoformat()])
+        writer.writerow([])
+        # Detection Signals
+        writer.writerow(['DETECTION SIGNALS'])
+        writer.writerow([])
+        writer.writerow(['Metric Name', 'Metric Score', 'Analysis Status', 'Metric Confidence', 'Metric Explanation'])
+        for signal in result.signals:
+            metric_result  = result.metric_results.get(signal.metric_type)
+            confidence_str = f"{metric_result.confidence:.3f}" if metric_result.confidence is not None else "N/A"
+            writer.writerow([signal.name,
+                             f"{signal.score:.3f}",
+                             signal.status.value.upper(),
+                             confidence_str,
+                             signal.explanation.replace("\n", " "),
+                           ])
+        writer.writerow([])
+        # Detailed Forensics
+        writer.writerow(['FORENSIC DETAILS'])
+        writer.writerow([])
+        for metric_type in MetricType:
+            metric_result = result.metric_results.get(metric_type)
+            if not metric_result:
+                continue
+            metric_name = self.detailed_maker.metric_display_names.get(metric_type, metric_type.value)
+            writer.writerow([f'--- {metric_name} ---'])
+            writer.writerow(['Score', f"{metric_result.score:.3f}"])
+            writer.writerow(['Confidence', f"{metric_result.confidence:.3f}" if metric_result.confidence is not None else "N/A"])
+            # Write details
+            if metric_result.details:
+                for key, value in metric_result.details.items():
+                    if isinstance(value, dict):
+                        writer.writerow([f"  {key}:", ""])
+                        for sub_key, sub_value in value.items():
+                            writer.writerow([f"    {sub_key}", str(sub_value)])
+                    else:
+                        writer.writerow([f"  {key}", str(value)])
+            writer.writerow([])
+        # Recommendation
+        writer.writerow(['RECOMMENDATION'])
+        writer.writerow([])
+        if (result.status == DetectionStatus.REVIEW_REQUIRED):
+            writer.writerow(['Action', 'Manual verification recommended'])
+            writer.writerow(['Priority', 'HIGH' if (result.overall_score >= 0.85) else 'MEDIUM'])
+            writer.writerow(['Next Steps', 'Forensic analysis, reverse image search, metadata inspection'])
+        else:
+            writer.writerow(['Action', 'No immediate action needed'])
+            writer.writerow(['Priority', 'LOW'])
+            writer.writerow(['Next Steps', 'Proceed with normal workflow'])
+        writer.writerow([])
+    def _write_footer(self, writer) -> None:
+        """
+        Write CSV report footer
+        """
+        writer.writerow(['=' * 100])
+        writer.writerow(['Report generated by AI Image Screener'])
+        writer.writerow(['For questions or support, contact: support@aiimagescreener.com'])
+        writer.writerow(['DISCLAIMER: Results are indicative and should be verified manually for critical applications'])
+        writer.writerow(['=' * 100])

reporter/json_reporter.py ADDED Viewed

	@@ -0,0 +1,349 @@

+# Dependencies
+import json
+from typing import Dict
+from typing import List
+from pathlib import Path
+from typing import Optional
+from datetime import datetime
+from utils.logger import get_logger
+from config.settings import settings
+from config.schemas import AnalysisResult
+from utils.helpers import generate_unique_id
+from config.schemas import BatchAnalysisResult
+from features.detailed_result_maker import DetailedResultMaker
+# Setup Logging
+logger = get_logger(__name__)
+class JSONReporter:
+    """
+    Professional JSON report generator
+    Features:
+    ---------
+    - Machine-readable structured format
+    - API-friendly output
+    - Complete data preservation
+    - Pretty-printed for readability
+    - Nested structure for complex data
+    """
+    def __init__(self):
+        """
+        Initialize JSON Reporter
+        """
+        self.detailed_maker = DetailedResultMaker()
+        logger.debug("JSONReporter initialized")
+    def export_batch(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None, include_detailed: bool = True) -> Path:
+        """
+        Export batch analysis as JSON
+        Arguments:
+        ----------
+            batch_result     { BatchAnalysisResult } : Complete batch analysis result
+            output_dir       { Path }                : Output directory (defaults to settings.REPORTS_DIR)
+            include_detailed { bool }                : Include detailed forensic data
+        Returns:
+        --------
+                        { Path }                     : Path to generated JSON file
+        """
+        output_dir  = output_dir or settings.REPORTS_DIR
+        report_id   = generate_unique_id()
+        filename    = f"batch_report_{report_id}.json"
+        output_path = output_dir / filename
+        output_dir.mkdir(parents = True, exist_ok = True)
+        logger.info(f"Generating batch JSON: {filename}")
+        try:
+            # Build JSON structure
+            data = self._build_batch_json(batch_result     = batch_result,
+                                          include_detailed = include_detailed,
+                                         )
+            # Write to file
+            with open(output_path, 'w', encoding = 'utf-8') as f:
+                json.dump(obj          = data,
+                          fp           = f,
+                          indent       = 4,
+                          ensure_ascii = False,
+                          default      = str,
+                         )
+            logger.info(f"Batch JSON generated: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"Failed to generate batch JSON: {e}")
+            raise
+    def export_single(self, result: AnalysisResult, output_dir: Optional[Path] = None, include_detailed: bool = True) -> Path:
+        """
+        Export single image analysis as JSON
+        Arguments:
+        ----------
+            result           { AnalysisResult } : Single image analysis result
+            output_dir            { Path }      : Output directory (defaults to settings.REPORTS_DIR)
+            include_detailed      { bool }      : Include detailed forensic data
+        Returns:
+        --------
+                      { Path }                  : Path to generated JSON file
+        """
+        output_dir  = output_dir or settings.REPORTS_DIR
+        report_id   = generate_unique_id()
+        filename    = f"single_report_{report_id}.json"
+        output_path = output_dir / filename
+        output_dir.mkdir(parents = True, exist_ok = True)
+        logger.info(f"Generating single image JSON: {filename}")
+        try:
+            # Build JSON structure
+            data = self._build_single_json(result           = result,
+                                           include_detailed = include_detailed,
+                                          )
+            # Write to file
+            with open(output_path, 'w', encoding = 'utf-8') as f:
+                json.dump(obj          = data,
+                          fp           = f,
+                          indent       = 4,
+                          ensure_ascii = False,
+                          default      = str,
+                         )
+            logger.info(f"Single image JSON generated: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"Failed to generate single image JSON: {e}")
+            raise
+    def export_api_response(self, result: AnalysisResult) -> Dict:
+        """
+        Generate API-friendly JSON response (in-memory, no file)
+        Arguments:
+        ----------
+            result { AnalysisResult } : Analysis result
+        Returns:
+        --------
+                   { dict }           : API response dictionary
+        """
+        return {"success"   : True,
+                "timestamp" : datetime.now().isoformat(),
+                "version"   : settings.VERSION,
+                "data"      : self._build_single_json(result           = result,
+                                                      include_detailed = False,
+                                                     ),
+               }
+    def _build_batch_json(self, batch_result: BatchAnalysisResult, include_detailed: bool) -> Dict:
+        """
+        Build complete batch JSON structure
+        """
+        data = {"report_metadata" : self._build_metadata(report_type = "Batch Analysis",
+                                                         timestamp   = batch_result.timestamp,
+                                                        ),
+                "batch_summary"   : self._build_batch_summary(batch_result = batch_result),
+                "results"         : [],
+               }
+        # Add each image result
+        for result in batch_result.results:
+            image_data = self._build_image_data(result           = result,
+                                                include_detailed = include_detailed,
+                                               )
+            data["results"].append(image_data)
+        return data
+    def _build_single_json(self, result: AnalysisResult, include_detailed: bool) -> Dict:
+        """
+        Build single image JSON structure
+        """
+        data = {"report_metadata" : self._build_metadata(report_type = "Single Image Analysis",
+                                                         timestamp   = result.timestamp,
+                                                        ),
+                "analysis"        : self._build_image_data(result           = result,
+                                                           include_detailed = include_detailed,
+                                                          ),
+               }
+        return data
+    def _build_metadata(self, report_type: str, timestamp: datetime) -> Dict:
+        """
+        Build report metadata section
+        """
+        return {"report_type"    : report_type,
+                "generated_at"   : timestamp.isoformat(),
+                "generator"      : "AI Image Screener",
+                "version"        : settings.VERSION,
+                "format_version" : "1.0",
+               }
+    def _build_batch_summary(self, batch_result: BatchAnalysisResult) -> Dict:
+        """
+        Build batch summary section
+        """
+        return {"total_images"          : batch_result.total_images,
+                "processed"             : batch_result.processed,
+                "failed"                : batch_result.failed,
+                "success_rate"          : batch_result.summary.get('success_rate', 0),
+                "statistics"            : {"likely_authentic" : batch_result.summary.get('likely_authentic', 0),
+                                           "review_required"  : batch_result.summary.get('review_required', 0),
+                                           "avg_score"        : batch_result.summary.get('avg_score', 0.0),
+                                           "avg_confidence"   : batch_result.summary.get('avg_confidence', 0),
+                                           "avg_proc_time"    : batch_result.summary.get('avg_proc_time', 0.0),
+                                          },
+                "total_processing_time" : round(batch_result.total_processing_time, 2),
+               }
+    def _build_image_data(self, result: AnalysisResult, include_detailed: bool) -> Dict:
+        """
+        Build complete image data structure
+        """
+        image_data = {"filename"     : result.filename,
+                      "status"       : result.status.value,
+                      "overall"      : {"score"           : round(result.overall_score, 3),
+                                        "confidence"      : result.confidence,
+                                        "interpretation"  : self._interpret_score(score = result.overall_score),
+                                       },
+                      "image_info"   : {"size"            : {"width"  : result.image_size[0],
+                                                             "height" : result.image_size[1],
+                                                            },
+                                        "processing_time" : round(result.processing_time, 2),
+                                        "timestamp"       : result.timestamp.isoformat(),
+                                       },
+                      "signals"      : self._build_signals_data(result = result),
+                     }
+        # Add detailed forensics if requested
+        if include_detailed:
+            image_data["forensics"]       = self._build_forensics_data(result = result)
+            image_data["recommendations"] = self._build_recommendations(result = result)
+        return image_data
+    def _build_signals_data(self, result: AnalysisResult) -> List[Dict]:
+        """
+        Build signals data structure
+        """
+        signals = list()
+        for signal in result.signals:
+            metric_result = result.metric_results.get(signal.metric_type)
+            signal_data   = {"metric_name" : signal.name,
+                             "metric_type" : signal.metric_type.value,
+                             "score"       : round(signal.score, 3),
+                             "status"      : signal.status.value,
+                             "confidence"  : round(metric_result.confidence, 3) if (metric_result and metric_result.confidence is not None) else None,
+                             "explanation" : signal.explanation,
+                            }
+            signals.append(signal_data)
+        return signals
+    def _build_forensics_data(self, result: AnalysisResult) -> Dict:
+        """
+        Build detailed forensics data structure
+        """
+        forensics = dict()
+        for metric_type, metric_result in result.metric_results.items():
+            metric_name                  = self.detailed_maker.metric_display_names.get(metric_type, metric_type.value)
+            forensics[metric_type.value] = {"display_name" : metric_name,
+                                            "score"        : round(metric_result.score, 3),
+                                            "confidence"   : round(metric_result.confidence, 3) if (metric_result and metric_result.confidence is not None) else None,
+                                            "details"      : metric_result.details or {},
+                                            "key_findings" : self.detailed_maker.extract_key_findings(metric_type   = metric_type,
+                                                                                                      metric_result = metric_result,
+                                                                                                     ),
+                                           }
+        return forensics
+    def _build_recommendations(self, result: AnalysisResult) -> Dict:
+        """
+        Build recommendations structure
+        """
+        score = result.overall_score
+        if (score >= 0.85):
+            return {"action"      : "Immediate manual verification required",
+                    "priority"    : "HIGH",
+                    "risk_level"  : "CRITICAL",
+                    "next_steps"  : ["Forensic analysis", "Reverse image search", "Metadata inspection"],
+                    "confidence"  : "Very high likelihood of AI generation",
+                   }
+        elif (score >= 0.70):
+            return {"action"      : "Manual verification recommended",
+                    "priority"    : "MEDIUM",
+                    "risk_level"  : "HIGH",
+                    "next_steps"  : ["Visual inspection", "Compare with authentic samples"],
+                    "confidence"  : "High likelihood of AI generation",
+                   }
+        elif (score >= 0.50):
+            return {"action"      : "Optional review suggested",
+                    "priority"    : "LOW",
+                    "risk_level"  : "MEDIUM",
+                    "next_steps"  : ["Verify image source", "Check for inconsistencies"],
+                    "confidence"  : "Moderate indicators present",
+                   }
+        else:
+            return {"action"      : "No immediate action required",
+                    "priority"    : "NONE",
+                    "risk_level"  : "LOW",
+                    "next_steps"  : ["Proceed with normal workflow"],
+                    "confidence"  : "Low likelihood of AI generation",
+                   }
+    def _interpret_score(self, score: float) -> str:
+        """
+        Interpret score for human readability
+        """
+        if (score >= 0.85):
+            return "Very high suspicion"
+        elif (score >= 0.70):
+            return "High suspicion"
+        elif (score >= 0.50):
+            return "Moderate suspicion"
+        elif (score >= 0.30):
+            return "Low suspicion"
+        else:
+            return "Very low suspicion"

reporter/pdf_reporter.py ADDED Viewed

	@@ -0,0 +1,843 @@

+# Dependencies
+from pathlib import Path
+from typing import Optional, List, Dict, Any
+from datetime import datetime
+from utils.logger import get_logger
+from config.settings import settings
+from reportlab.platypus import Table, Spacer, Paragraph, PageBreak, Image as RLImage
+from reportlab.lib import colors
+from reportlab.lib.pagesizes import A4, LETTER
+from reportlab.lib.enums import TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY
+from reportlab.platypus import TableStyle
+from config.schemas import AnalysisResult
+from utils.helpers import generate_unique_id
+from config.constants import DetectionStatus
+from config.schemas import BatchAnalysisResult
+from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
+from reportlab.platypus import SimpleDocTemplate
+from features.detailed_result_maker import DetailedResultMaker
+from reportlab.lib.units import inch
+from reportlab.pdfgen import canvas
+import textwrap
+# Setup Logging
+logger = get_logger(__name__)
+class PDFReporter:
+    """
+    Professional-Grade PDF Report Generator for AI Image Analysis
+    Features:
+    ---------
+    - Comprehensive single image reports with full forensic details
+    - Multi-page batch reports with executive summary
+    - Enhanced visual hierarchy and color coding
+    - Detailed metric breakdowns with explanations
+    - Professional formatting and layout
+    - Statistical summaries and insights
+    """
+    # Enhanced Color Scheme
+    COLOR_PRIMARY = colors.HexColor('#0D47A1')        # Deep Blue
+    COLOR_SUCCESS = colors.HexColor('#1B5E20')        # Dark Green
+    COLOR_WARNING = colors.HexColor('#E65100')        # Deep Orange
+    COLOR_DANGER = colors.HexColor('#B71C1C')         # Dark Red
+    COLOR_INFO = colors.HexColor('#01579B')           # Light Blue
+    COLOR_NEUTRAL = colors.HexColor('#424242')        # Dark Grey
+    COLOR_HEADER_BG = colors.HexColor('#1565C0')      # Blue
+    COLOR_SUBHEADER_BG = colors.HexColor('#1976D2')   # Lighter Blue
+    COLOR_ALT_ROW = colors.HexColor('#F5F5F5')        # Light Grey
+    COLOR_LIGHT_BLUE = colors.HexColor('#E3F2FD')     # Very Light Blue
+    COLOR_LIGHT_GREEN = colors.HexColor('#E8F5E9')    # Very Light Green
+    COLOR_LIGHT_ORANGE = colors.HexColor('#FFF3E0')   # Very Light Orange
+    COLOR_LIGHT_RED = colors.HexColor('#FFEBEE')      # Very Light Red
+    def __init__(self):
+        self.detailed_maker = DetailedResultMaker()
+        self.styles = self._build_styles()
+        logger.debug("Enhanced PDFReporter initialized")
+    def export_single(self, result: AnalysisResult, output_dir: Optional[Path] = None) -> Path:
+        """Export comprehensive single image analysis report"""
+        output_dir = output_dir or settings.REPORTS_DIR
+        output_dir.mkdir(parents=True, exist_ok=True)
+        report_id = generate_unique_id()
+        filename = f"ai_screener_report_{report_id}.pdf"
+        output_path = output_dir / filename
+        logger.info(f"Generating comprehensive single image PDF: {filename}")
+        doc = SimpleDocTemplate(
+            str(output_path),
+            pagesize=LETTER,
+            rightMargin=30,
+            leftMargin=30,
+            topMargin=20,
+            bottomMargin=35
+        )
+        story = []
+        self._add_professional_header(story, "AI Image Analysis Report")
+        self._add_executive_summary_single(story, result)
+        story.append(PageBreak())
+        self._add_detailed_metrics_analysis(story, result)
+        story.append(PageBreak())
+        self._add_forensic_breakdown(story, result)
+        self._add_recommendations(story, result)
+        self._add_professional_footer(story)
+        doc.build(story, onFirstPage=self._add_watermark, onLaterPages=self._add_watermark)
+        logger.info(f"Single image report generated: {output_path}")
+        return output_path
+    def export_batch(self, batch_result: BatchAnalysisResult, output_dir: Optional[Path] = None) -> Path:
+        """Export comprehensive batch analysis report"""
+        output_dir = output_dir or settings.REPORTS_DIR
+        output_dir.mkdir(parents=True, exist_ok=True)
+        report_id = generate_unique_id()
+        filename = f"ai_screener_report_{report_id}.pdf"
+        output_path = output_dir / filename
+        num_images = len(batch_result.results)
+        logger.info(f"Generating batch PDF report: {filename} ({num_images} images)")
+        doc = SimpleDocTemplate(
+            str(output_path),
+            pagesize=LETTER,
+            rightMargin=30,
+            leftMargin=30,
+            topMargin=20,
+            bottomMargin=35
+        )
+        story = []
+        self._add_professional_header(story, "Batch Image Analysis Report")
+        self._add_batch_executive_summary(story, batch_result)
+        story.append(PageBreak())
+        self._add_batch_overview_table(story, batch_result.results)
+        story.append(PageBreak())
+        self._add_batch_metrics_analysis(story, batch_result.results)
+        story.append(PageBreak())
+        self._add_individual_results_summary(story, batch_result.results)
+        self._add_batch_recommendations(story, batch_result)
+        self._add_professional_footer(story)
+        doc.build(story, onFirstPage=self._add_watermark, onLaterPages=self._add_watermark)
+        logger.info(f"Batch report generated: {output_path}")
+        return output_path
+    def _build_styles(self):
+        """Build comprehensive style definitions"""
+        styles = getSampleStyleSheet()
+        styles.add(ParagraphStyle(
+            name='ReportTitle',
+            fontSize=18,
+            textColor=self.COLOR_PRIMARY,
+            alignment=TA_CENTER,
+            spaceAfter=4,
+            spaceBefore=2,
+            fontName='Helvetica-Bold'
+        ))
+        styles.add(ParagraphStyle(
+            name='ReportSubtitle',
+            fontSize=10,
+            textColor=self.COLOR_NEUTRAL,
+            alignment=TA_CENTER,
+            spaceAfter=6,
+            fontName='Helvetica'
+        ))
+        styles.add(ParagraphStyle(
+            name='SectionTitle',
+            fontSize=13,
+            textColor=self.COLOR_PRIMARY,
+            spaceBefore=10,
+            spaceAfter=6,
+            fontName='Helvetica-Bold'
+        ))
+        styles.add(ParagraphStyle(
+            name='SectionHeader',
+            fontSize=11,
+            textColor=self.COLOR_PRIMARY,
+            spaceBefore=8,
+            spaceAfter=5,
+            fontName='Helvetica-Bold'
+        ))
+        styles.add(ParagraphStyle(
+            name='SubHeader',
+            fontSize=9.5,
+            textColor=self.COLOR_PRIMARY,
+            spaceBefore=5,
+            spaceAfter=3,
+            fontName='Helvetica-Bold'
+        ))
+        styles.add(ParagraphStyle(
+            name='CustomBodyText',
+            fontSize=9,
+            leading=12,
+            alignment=TA_JUSTIFY,
+            spaceAfter=6
+        ))
+        styles.add(ParagraphStyle(
+            name='TableCell',
+            fontSize=8,
+            leading=10
+        ))
+        styles.add(ParagraphStyle(
+            name='TableCellSmall',
+            fontSize=7.5,
+            leading=9
+        ))
+        styles.add(ParagraphStyle(
+            name='TableHeader',
+            fontSize=8.5,
+            textColor=colors.white,
+            fontName='Helvetica-Bold',
+            leading=10,
+            alignment=TA_CENTER
+        ))
+        styles.add(ParagraphStyle(
+            name='Footer',
+            fontSize=7.5,
+            textColor=colors.grey,
+            alignment=TA_CENTER,
+            spaceAfter=2
+        ))
+        styles.add(ParagraphStyle(
+            name='Timestamp',
+            fontSize=8,
+            textColor=self.COLOR_NEUTRAL,
+            alignment=TA_CENTER,
+            spaceAfter=8
+        ))
+        return styles
+    def _add_watermark(self, canvas, doc):
+        """Add professional watermark"""
+        canvas.saveState()
+        canvas.setFont('Helvetica-Bold', 70)
+        canvas.setFillColorRGB(0.85, 0.85, 0.85, alpha=0.15)
+        canvas.rotate(45)
+        canvas.drawString(2.5*inch, -0.5*inch, "AI IMAGE SCREENER")
+        canvas.restoreState()
+    def _add_professional_header(self, story, title: str):
+        """Professional header with branding"""
+        story.append(Paragraph("🔍 AI IMAGE SCREENER", self.styles['ReportTitle']))
+        story.append(Spacer(1, 3))
+        timestamp_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | Version: {settings.VERSION}"
+        story.append(Paragraph(timestamp_text, self.styles['Timestamp']))
+        story.append(Paragraph(title, self.styles['SectionTitle']))
+        story.append(Spacer(1, 10))
+    def _add_executive_summary_single(self, story, result: AnalysisResult):
+        """Executive summary for single image"""
+        story.append(Paragraph("Executive Summary", self.styles['SectionTitle']))
+        story.append(Spacer(1, 5))
+        # Key findings box
+        status_color = self.COLOR_DANGER if result.status == DetectionStatus.REVIEW_REQUIRED else self.COLOR_SUCCESS
+        status_bg = self.COLOR_LIGHT_RED if result.status == DetectionStatus.REVIEW_REQUIRED else self.COLOR_LIGHT_GREEN
+        status_text = "⚠️ REVIEW REQUIRED" if result.status == DetectionStatus.REVIEW_REQUIRED else "✅ LIKELY AUTHENTIC"
+        key_findings = [
+            [Paragraph("<b>Overall Assessment</b>", self.styles['TableHeader'])],
+            [Paragraph(f"<font size=12 color='{status_color.hexval()}'><b>{status_text}</b></font>", self.styles['CustomBodyText'])],
+            [Paragraph(f"<b>Confidence:</b> {result.confidence}%", self.styles['CustomBodyText'])],
+            [Paragraph(f"<b>Overall Score:</b> {result.overall_score:.4f}", self.styles['CustomBodyText'])]
+        ]
+        findings_table = Table(key_findings, colWidths=[530])
+        findings_table.setStyle(TableStyle([
+            ('BACKGROUND', (0, 0), (-1, 0), self.COLOR_INFO),
+            ('BACKGROUND', (0, 1), (-1, -1), status_bg),
+            ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
+            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
+            ('LEFTPADDING', (0, 0), (-1, -1), 12),
+            ('RIGHTPADDING', (0, 0), (-1, -1), 12),
+            ('TOPPADDING', (0, 0), (-1, -1), 8),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
+            ('BOX', (0, 0), (-1, -1), 1.5, self.COLOR_PRIMARY)
+        ]))
+        story.append(findings_table)
+        story.append(Spacer(1, 12))
+        # Image information
+        story.append(Paragraph("Image Information", self.styles['SectionHeader']))
+        info_data = [
+            [Paragraph("<b>Property</b>", self.styles['TableHeader']),
+             Paragraph("<b>Value</b>", self.styles['TableHeader'])],
+            [Paragraph("Filename", self.styles['TableCell']),
+             Paragraph(result.filename, self.styles['TableCell'])],
+            [Paragraph("Dimensions", self.styles['TableCell']),
+             Paragraph(f"{result.image_size[0]} × {result.image_size[1]} pixels", self.styles['TableCell'])],
+            [Paragraph("Aspect Ratio", self.styles['TableCell']),
+             Paragraph(f"{result.image_size[0]/result.image_size[1]:.2f}:1", self.styles['TableCell'])],
+            [Paragraph("Processing Time", self.styles['TableCell']),
+             Paragraph(f"{result.processing_time:.3f} seconds", self.styles['TableCell'])],
+            [Paragraph("Analysis Date", self.styles['TableCell']),
+             Paragraph(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), self.styles['TableCell'])]
+        ]
+        info_table = Table(info_data, colWidths=[180, 350])
+        info_table.setStyle(self._get_standard_table_style(len(info_data)))
+        story.append(info_table)
+        story.append(Spacer(1, 12))
+        # Detection signals summary
+        story.append(Paragraph("Detection Signals Summary", self.styles['SectionHeader']))
+        flagged = sum(1 for s in result.signals if s.status.value == 'flagged')
+        warning = sum(1 for s in result.signals if s.status.value == 'warning')
+        passed = sum(1 for s in result.signals if s.status.value == 'passed')
+        signals_data = [
+            [Paragraph("<b>Status</b>", self.styles['TableHeader']),
+             Paragraph("<b>Count</b>", self.styles['TableHeader']),
+             Paragraph("<b>Percentage</b>", self.styles['TableHeader'])],
+            [Paragraph("🔴 Flagged", self.styles['TableCell']),
+             Paragraph(f"<font color='red'><b>{flagged}</b></font>", self.styles['TableCell']),
+             Paragraph(f"{flagged/len(result.signals)*100:.1f}%", self.styles['TableCell'])],
+            [Paragraph("🟡 Warning", self.styles['TableCell']),
+             Paragraph(f"<font color='orange'><b>{warning}</b></font>", self.styles['TableCell']),
+             Paragraph(f"{warning/len(result.signals)*100:.1f}%", self.styles['TableCell'])],
+            [Paragraph("🟢 Passed", self.styles['TableCell']),
+             Paragraph(f"<font color='green'><b>{passed}</b></font>", self.styles['TableCell']),
+             Paragraph(f"{passed/len(result.signals)*100:.1f}%", self.styles['TableCell'])]
+        ]
+        signals_table = Table(signals_data, colWidths=[200, 165, 165])
+        signals_table.setStyle(self._get_standard_table_style(len(signals_data)))
+        story.append(signals_table)
+    def _add_detailed_metrics_analysis(self, story, result: AnalysisResult):
+        """Comprehensive metrics analysis"""
+        story.append(Paragraph("Detailed Metrics Analysis", self.styles['SectionTitle']))
+        story.append(Spacer(1, 8))
+        # All detection signals with full details
+        story.append(Paragraph("Detection Signals Breakdown", self.styles['SectionHeader']))
+        signal_data = [
+            [Paragraph("<b>Metric</b>", self.styles['TableHeader']),
+             Paragraph("<b>Score</b>", self.styles['TableHeader']),
+             Paragraph("<b>Status</b>", self.styles['TableHeader']),
+             Paragraph("<b>Explanation</b>", self.styles['TableHeader'])]
+        ]
+        for signal in result.signals:
+            status_badge = self._get_status_badge_html(signal.status.value)
+            # Wrap long explanations
+            explanation = signal.explanation
+            if len(explanation) > 120:
+                explanation = explanation[:120] + "..."
+            signal_data.append([
+                Paragraph(f"<b>{signal.name}</b>", self.styles['TableCell']),
+                Paragraph(f"{signal.score:.4f}", self.styles['TableCell']),
+                Paragraph(status_badge, self.styles['TableCell']),
+                Paragraph(explanation, self.styles['TableCellSmall'])
+            ])
+        signal_table = Table(signal_data, colWidths=[120, 60, 80, 270])
+        signal_table.setStyle(self._get_signal_table_style(len(signal_data)))
+        story.append(signal_table)
+    def _add_forensic_breakdown(self, story, result: AnalysisResult):
+        """Detailed forensic analysis breakdown"""
+        story.append(Paragraph("Forensic Analysis Breakdown", self.styles['SectionTitle']))
+        story.append(Spacer(1, 8))
+        for metric_type, metric_result in result.metric_results.items():
+            metric_name = self.detailed_maker.metric_display_names.get(metric_type, metric_type.value)
+            details = metric_result.details or {}
+            # Skip if error
+            if 'error' in details:
+                continue
+            story.append(Paragraph(metric_name, self.styles['SectionHeader']))
+            # Metric overview
+            overview_data = [
+                [Paragraph("<b>Property</b>", self.styles['TableHeader']),
+                 Paragraph("<b>Value</b>", self.styles['TableHeader'])],
+                [Paragraph("Score", self.styles['TableCell']),
+                 Paragraph(f"<b>{metric_result.score:.4f}</b>", self.styles['TableCell'])],
+                [Paragraph("Confidence", self.styles['TableCell']),
+                 Paragraph(f"{metric_result.confidence:.4f}" if metric_result.confidence else "N/A", self.styles['TableCell'])],
+                [Paragraph("Status", self.styles['TableCell']),
+                 Paragraph(self._get_metric_status_html(metric_result.score), self.styles['TableCell'])]
+            ]
+            overview_table = Table(overview_data, colWidths=[130, 400])
+            overview_table.setStyle(self._get_standard_table_style(len(overview_data)))
+            story.append(overview_table)
+            story.append(Spacer(1, 5))
+            # Detailed parameters
+            if details and len(details) > 0:
+                story.append(Paragraph("Detailed Parameters:", self.styles['SubHeader']))
+                param_data = [[Paragraph("<b>Parameter</b>", self.styles['TableHeader']),
+                              Paragraph("<b>Value</b>", self.styles['TableHeader'])]]
+                for key, value in details.items():
+                    if key in ['error', 'reason']:
+                        continue
+                    if isinstance(value, dict):
+                        for sub_key, sub_value in value.items():
+                            if sub_key not in ['reason', 'error']:
+                                formatted_value = self._format_value(sub_value)
+                                param_data.append([
+                                    Paragraph(f"  └─ {sub_key}", self.styles['TableCellSmall']),
+                                    Paragraph(formatted_value, self.styles['TableCellSmall'])
+                                ])
+                    else:
+                        formatted_value = self._format_value(value)
+                        param_data.append([
+                            Paragraph(key, self.styles['TableCell']),
+                            Paragraph(formatted_value, self.styles['TableCell'])
+                        ])
+                param_table = Table(param_data, colWidths=[200, 330])
+                param_table.setStyle(TableStyle([
+                    ('BACKGROUND', (0, 0), (-1, 0), self.COLOR_SUBHEADER_BG),
+                    ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
+                    ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
+                    ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
+                    ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+                    ('LEFTPADDING', (0, 0), (-1, -1), 8),
+                    ('RIGHTPADDING', (0, 0), (-1, -1), 8),
+                    ('TOPPADDING', (0, 0), (-1, -1), 4),
+                    ('BOTTOMPADDING', (0, 0), (-1, -1), 4)
+                ]))
+                story.append(param_table)
+            story.append(Spacer(1, 8))
+    def _add_recommendations(self, story, result: AnalysisResult):
+        """Add actionable recommendations"""
+        story.append(Paragraph("Recommendations & Next Steps", self.styles['SectionTitle']))
+        story.append(Spacer(1, 8))
+        if result.status == DetectionStatus.REVIEW_REQUIRED:
+            rec_text = """
+            <b>⚠️ MANUAL REVIEW REQUIRED</b><br/>
+            This image has been flagged for manual review based on multiple detection signals.
+            Recommended actions:<br/>
+            • Conduct visual inspection by trained personnel<br/>
+            • Cross-reference with source verification<br/>
+            • Consider additional forensic analysis if high stakes<br/>
+            • Document findings for audit trail
+            """
+            rec_color = self.COLOR_LIGHT_RED
+            border_color = self.COLOR_DANGER
+        else:
+            rec_text = """
+            <b>✅ NO IMMEDIATE ACTION REQUIRED</b><br/>
+            This image appears to be authentic based on current analysis. However:<br/>
+            • Continue monitoring for evolving AI techniques<br/>
+            • Consider periodic re-screening for critical assets<br/>
+            • Maintain chain of custody documentation<br/>
+            • Stay updated on latest detection methodologies
+            """
+            rec_color = self.COLOR_LIGHT_GREEN
+            border_color = self.COLOR_SUCCESS
+        rec_table = Table([[Paragraph(rec_text, self.styles['CustomBodyText'])]], colWidths=[530])
+        rec_table.setStyle(TableStyle([
+            ('BACKGROUND', (0, 0), (-1, -1), rec_color),
+            ('BOX', (0, 0), (-1, -1), 2, border_color),
+            ('LEFTPADDING', (0, 0), (-1, -1), 15),
+            ('RIGHTPADDING', (0, 0), (-1, -1), 15),
+            ('TOPPADDING', (0, 0), (-1, -1), 12),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 12)
+        ]))
+        story.append(rec_table)
+    def _add_batch_executive_summary(self, story, batch_result: BatchAnalysisResult):
+        """Executive summary for batch analysis"""
+        story.append(Paragraph("Executive Summary", self.styles['SectionTitle']))
+        story.append(Spacer(1, 8))
+        # Key metrics
+        summary_data = [
+            [Paragraph("<b>Metric</b>", self.styles['TableHeader']),
+             Paragraph("<b>Value</b>", self.styles['TableHeader']),
+             Paragraph("<b>Details</b>", self.styles['TableHeader'])],
+            [Paragraph("Total Images", self.styles['TableCell']),
+             Paragraph(f"<b>{batch_result.total_images}</b>", self.styles['TableCell']),
+             Paragraph("Images submitted for analysis", self.styles['TableCellSmall'])],
+            [Paragraph("Successfully Processed", self.styles['TableCell']),
+             Paragraph(f"<font color='green'><b>{batch_result.processed}</b></font>", self.styles['TableCell']),
+             Paragraph(f"{batch_result.summary.get('success_rate', 0)}% success rate", self.styles['TableCellSmall'])],
+            [Paragraph("Failed", self.styles['TableCell']),
+             Paragraph(f"<font color='red'><b>{batch_result.failed}</b></font>", self.styles['TableCell']),
+             Paragraph("Processing errors encountered", self.styles['TableCellSmall'])],
+            [Paragraph("Likely Authentic", self.styles['TableCell']),
+             Paragraph(f"<font color='green'><b>{batch_result.summary.get('likely_authentic', 0)}</b></font>", self.styles['TableCell']),
+             Paragraph("Images passing authenticity checks", self.styles['TableCellSmall'])],
+            [Paragraph("Review Required", self.styles['TableCell']),
+             Paragraph(f"<font color='red'><b>{batch_result.summary.get('review_required', 0)}</b></font>", self.styles['TableCell']),
+             Paragraph("Images flagged for manual review", self.styles['TableCellSmall'])],
+            [Paragraph("Average Score", self.styles['TableCell']),
+             Paragraph(f"<b>{batch_result.summary.get('avg_score', 0):.4f}</b>", self.styles['TableCell']),
+             Paragraph("Mean authenticity score across batch", self.styles['TableCellSmall'])],
+            [Paragraph("Average Processing Time", self.styles['TableCell']),
+             Paragraph(f"<b>{batch_result.summary.get('avg_proc_time', 0):.3f}s</b>", self.styles['TableCell']),
+             Paragraph("Per-image processing duration", self.styles['TableCellSmall'])],
+        ]
+        summary_table = Table(summary_data, colWidths=[150, 130, 250])
+        summary_table.setStyle(self._get_standard_table_style(len(summary_data)))
+        story.append(summary_table)
+    def _add_batch_overview_table(self, story, results: List[AnalysisResult]):
+        """Comprehensive batch overview"""
+        story.append(Paragraph("Batch Overview Matrix", self.styles['SectionTitle']))
+        story.append(Spacer(1, 8))
+        header = [
+            Paragraph("<b>#</b>", self.styles['TableHeader']),
+            Paragraph("<b>Filename</b>", self.styles['TableHeader']),
+            Paragraph("<b>Image Size</b>", self.styles['TableHeader']),
+            Paragraph("<b>Score</b>", self.styles['TableHeader']),
+            Paragraph("<b>Status</b>", self.styles['TableHeader']),
+            Paragraph("<b>Top Signal</b>", self.styles['TableHeader']),
+            Paragraph("<b>Time(s)</b>", self.styles['TableHeader'])
+        ]
+        data = [header]
+        for idx, result in enumerate(results, 1):
+            top_signal = max(result.signals, key=lambda s: s.score)
+            status_badge = self._get_status_badge_short(result.status.value)
+            data.append([
+                Paragraph(str(idx), self.styles['TableCell']),
+                Paragraph(result.filename, self.styles['TableCellSmall']),
+                Paragraph(f"{result.image_size[0]}×{result.image_size[1]}", self.styles['TableCellSmall']),
+                Paragraph(f"<b>{result.overall_score:.3f}</b>", self.styles['TableCell']),
+                Paragraph(status_badge, self.styles['TableCellSmall']),
+                Paragraph(f"{top_signal.name}: {top_signal.score:.2f}", self.styles['TableCellSmall']),
+                Paragraph(f"{result.processing_time:.2f}", self.styles['TableCell'])
+            ])
+        table = Table(data, colWidths=[25, 155, 65, 50, 70, 120, 45])
+        table.setStyle(self._get_pivot_table_style(len(data)))
+        story.append(table)
+    def _add_batch_metrics_analysis(self, story, results: List[AnalysisResult]):
+        """Detailed metrics analysis for batch"""
+        story.append(Paragraph("Metric-wise Analysis", self.styles['SectionTitle']))
+        story.append(Spacer(1, 8))
+        metric_configs = {
+            'gradient': {
+                'name': 'Gradient-Field PCA Analysis',
+                'keys': ['eigenvalue_ratio', 'gradient_vectors_sampled'],
+                'labels': ['Eigenvalue\nRatio', 'Vectors\nSampled']
+            },
+            'frequency': {
+                'name': 'Frequency Domain Analysis (FFT)',
+                'keys': ['hf_ratio', 'roughness', 'spectral_deviation'],
+                'labels': ['HF Ratio', 'Roughness', 'Spec.\nDeviation']
+            },
+            'noise': {
+                'name': 'Noise Pattern Analysis',
+                'keys': ['mean_noise', 'cv', 'patches_valid'],
+                'labels': ['Mean Noise', 'CV', 'Patches\nValid']
+            },
+            'texture': {
+                'name': 'Texture Statistical Analysis',
+                'keys': ['smooth_ratio', 'contrast_mean', 'entropy_mean'],
+                'labels': ['Smooth\nRatio', 'Mean\nContrast', 'Mean\nEntropy']
+            },
+            'color': {
+                'name': 'Color Distribution Analysis',
+                'keys': ['saturation_stats.mean_saturation', 'saturation_stats.high_sat_ratio'],
+                'labels': ['Mean\nSaturation', 'High Saturation\nRatio']
+            }
+        }
+        for metric_key, config in metric_configs.items():
+            story.append(Paragraph(config['name'], self.styles['SectionHeader']))
+            # Build header
+            header = [
+                Paragraph("<b>#</b>", self.styles['TableHeader']),
+                Paragraph("<b>Filename</b>", self.styles['TableHeader']),
+                Paragraph("<b>Score</b>", self.styles['TableHeader']),
+                Paragraph("<b>Confidence</b>", self.styles['TableHeader'])
+            ]
+            for label in config['labels']:
+                header.append(Paragraph(f"<b>{label}</b>", self.styles['TableHeader']))
+            data = [header]
+            for idx, result in enumerate(results, 1):
+                metric_result = result.metric_results.get(metric_key)
+                if not metric_result:
+                    continue
+                details = metric_result.details or {}
+                row = [
+                    Paragraph(str(idx), self.styles['TableCellSmall']),
+                    Paragraph(result.filename, self.styles['TableCellSmall']),
+                    Paragraph(f"<b>{metric_result.score:.3f}</b>", self.styles['TableCellSmall']),
+                    Paragraph(f"{metric_result.confidence:.2f}" if metric_result.confidence else "N/A",
+                             self.styles['TableCellSmall'])
+                ]
+                # Extract values
+                for key in config['keys']:
+                    value = self._extract_nested_value(details, key)
+                    formatted_value = self._format_value(value, decimal_places=3)
+                    row.append(Paragraph(formatted_value, self.styles['TableCellSmall']))
+                data.append(row)
+            # Dynamic column widths
+            num_detail_cols = len(config['labels'])
+            detail_col_width = (530 - 25 - 140 - 45 - 35) // num_detail_cols
+            col_widths = [25, 140, 45, 35] + [detail_col_width] * num_detail_cols
+            table = Table(data, colWidths=col_widths)
+            table.setStyle(self._get_pivot_table_style(len(data)))
+            story.append(table)
+            story.append(Spacer(1, 10))
+    def _add_individual_results_summary(self, story, results: List[AnalysisResult]):
+        """Individual image summaries in batch"""
+        story.append(Paragraph("Individual Image Summaries", self.styles['SectionTitle']))
+        story.append(Spacer(1, 8))
+        for idx, result in enumerate(results, 1):
+            if idx > 1:
+                story.append(Spacer(1, 12))
+            story.append(Paragraph(f"Image {idx}: {result.filename}", self.styles['SectionHeader']))
+            # Quick stats
+            quick_data = [
+                [Paragraph("<b>Property</b>", self.styles['TableHeader']),
+                 Paragraph("<b>Value</b>", self.styles['TableHeader'])],
+                [Paragraph("Score", self.styles['TableCell']),
+                 Paragraph(f"<b>{result.overall_score:.4f}</b>", self.styles['TableCell'])],
+                [Paragraph("Status", self.styles['TableCell']),
+                 Paragraph(self._get_status_badge_html(result.status.value), self.styles['TableCell'])],
+                [Paragraph("Confidence", self.styles['TableCell']),
+                 Paragraph(f"{result.confidence}%", self.styles['TableCell'])],
+                [Paragraph("Dimensions", self.styles['TableCell']),
+                 Paragraph(f"{result.image_size[0]} × {result.image_size[1]}", self.styles['TableCell'])],
+            ]
+            quick_table = Table(quick_data, colWidths=[120, 410])
+            quick_table.setStyle(self._get_standard_table_style(len(quick_data)))
+            story.append(quick_table)
+            story.append(Spacer(1, 5))
+            # Top 3 signals
+            story.append(Paragraph("Top Detection Signals:", self.styles['SubHeader']))
+            top_signals = sorted(result.signals, key=lambda s: s.score, reverse=True)[:3]
+            signal_data = [[
+                Paragraph("<b>Signal</b>", self.styles['TableHeader']),
+                Paragraph("<b>Score</b>", self.styles['TableHeader']),
+                Paragraph("<b>Status</b>", self.styles['TableHeader'])
+            ]]
+            for signal in top_signals:
+                signal_data.append([
+                    Paragraph(signal.name, self.styles['TableCellSmall']),
+                    Paragraph(f"{signal.score:.3f}", self.styles['TableCellSmall']),
+                    Paragraph(self._get_status_badge_html(signal.status.value), self.styles['TableCellSmall'])
+                ])
+            signal_table = Table(signal_data, colWidths=[200, 165, 165])
+            signal_table.setStyle(self._get_standard_table_style(len(signal_data)))
+            story.append(signal_table)
+    def _add_batch_recommendations(self, story, batch_result: BatchAnalysisResult):
+        """Batch-level recommendations"""
+        story.append(Paragraph("Batch Analysis Recommendations", self.styles['SectionTitle']))
+        story.append(Spacer(1, 8))
+        review_count = batch_result.summary.get('review_required', 0)
+        total = batch_result.total_images
+        if review_count > 0:
+            rec_text = f"""
+            <b>⚠️ ACTION REQUIRED</b><br/>
+            {review_count} out of {total} images require manual review ({review_count/total*100:.1f}%).<br/>
+            <br/>
+            <b>Recommended Actions:</b><br/>
+            • Prioritize high-risk images for immediate review<br/>
+            • Assign qualified personnel for verification<br/>
+            • Document review findings and decisions<br/>
+            • Consider additional forensic analysis for flagged images<br/>
+            • Update screening protocols based on findings
+            """
+            rec_color = self.COLOR_LIGHT_ORANGE
+            border_color = self.COLOR_WARNING
+        else:
+            rec_text = f"""
+            <b>✅ BATCH PASSED SCREENING</b><br/>
+            All {total} images appear to be authentic based on current analysis.<br/>
+            <br/>
+            <b>Recommended Actions:</b><br/>
+            • Archive results for audit trail<br/>
+            • Maintain periodic re-screening schedule<br/>
+            • Monitor for evolving AI generation techniques<br/>
+            • Update detection models regularly<br/>
+            • Document chain of custody
+            """
+            rec_color = self.COLOR_LIGHT_GREEN
+            border_color = self.COLOR_SUCCESS
+        rec_table = Table([[Paragraph(rec_text, self.styles['CustomBodyText'])]], colWidths=[530])
+        rec_table.setStyle(TableStyle([
+            ('BACKGROUND', (0, 0), (-1, -1), rec_color),
+            ('BOX', (0, 0), (-1, -1), 2, border_color),
+            ('LEFTPADDING', (0, 0), (-1, -1), 15),
+            ('RIGHTPADDING', (0, 0), (-1, -1), 15),
+            ('TOPPADDING', (0, 0), (-1, -1), 12),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 12)
+        ]))
+        story.append(rec_table)
+    def _add_professional_footer(self, story):
+        """Professional footer with disclaimers"""
+        story.append(Spacer(1, 15))
+        disclaimer_lines = [
+            "⚠️ <b>DISCLAIMER</b>: This report provides probabilistic screening results based on current AI detection methodologies, not definitive verdicts.",
+            "Results should be manually verified for critical applications. False positive rate: ~10-20%. Accuracy may vary with image quality and AI generation techniques.",
+            "This analysis should be used as one component of a comprehensive verification process, not as the sole basis for decision-making.",
+            "© 2025 AI Image Screener | Confidential Report | For Authorized Use Only"
+        ]
+        for line in disclaimer_lines:
+            story.append(Paragraph(line, self.styles['Footer']))
+            story.append(Spacer(1, 2))
+    # Helper methods
+    def _get_standard_table_style(self, num_rows):
+        """Standard table styling"""
+        return TableStyle([
+            ('BACKGROUND', (0, 0), (-1, 0), self.COLOR_HEADER_BG),
+            ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
+            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+            ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
+            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
+            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
+            ('LEFTPADDING', (0, 0), (-1, -1), 8),
+            ('RIGHTPADDING', (0, 0), (-1, -1), 8),
+            ('TOPPADDING', (0, 0), (-1, -1), 5),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 5)
+        ])
+    def _get_signal_table_style(self, num_rows):
+        """Signal table styling with color coding"""
+        return TableStyle([
+            ('BACKGROUND', (0, 0), (-1, 0), self.COLOR_HEADER_BG),
+            ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
+            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+            ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
+            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
+            ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+            ('LEFTPADDING', (0, 0), (-1, -1), 6),
+            ('RIGHTPADDING', (0, 0), (-1, -1), 6),
+            ('TOPPADDING', (0, 0), (-1, -1), 5),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 5)
+        ])
+    def _get_pivot_table_style(self, num_rows):
+        """Pivot table styling"""
+        return TableStyle([
+            ('BACKGROUND', (0, 0), (-1, 0), self.COLOR_HEADER_BG),
+            ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
+            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+            ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
+            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.COLOR_ALT_ROW]),
+            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
+            ('ALIGN', (0, 0), (0, -1), 'CENTER'),
+            ('LEFTPADDING', (0, 0), (-1, -1), 4),
+            ('RIGHTPADDING', (0, 0), (-1, -1), 4),
+            ('TOPPADDING', (0, 0), (-1, -1), 4),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 4)
+        ])
+    def _get_status_badge_html(self, status: str) -> str:
+        """Generate status badge HTML"""
+        if status == "REVIEW_REQUIRED" or status == "flagged":
+            return "<font color='#B71C1C'><b>🔴 FLAGGED</b></font>"
+        elif status == "warning":
+            return "<font color='#E65100'><b>🟡 WARNING</b></font>"
+        else:
+            return "<font color='#1B5E20'><b>🟢 PASSED</b></font>"
+    def _get_status_badge_short(self, status: str) -> str:
+        """Short status badge"""
+        if status == "REVIEW_REQUIRED":
+            return "<font color='#B71C1C'><b>⚠️ REVIEW REQUIRED</b></font>"
+        else:
+            return "<font color='#1B5E20'><b>✓ LIKELY AUTHENTIC</b></font>"
+    def _get_metric_status_html(self, score: float) -> str:
+        """Metric status based on score"""
+        if score > 0.7:
+            return "<font color='#B71C1C'><b>High Risk</b></font>"
+        elif score > 0.5:
+            return "<font color='#E65100'><b>Moderate Risk</b></font>"
+        else:
+            return "<font color='#1B5E20'><b>Low Risk</b></font>"
+    def _format_value(self, value: Any, decimal_places: int = 4) -> str:
+        """Format value for display"""
+        if value is None or (isinstance(value, dict) and 'reason' in value):
+            return "N/A"
+        elif isinstance(value, float):
+            return f"{value:.{decimal_places}f}"
+        elif isinstance(value, (int, str, bool)):
+            return str(value)
+        else:
+            return "N/A"
+    def _extract_nested_value(self, details: dict, key: str) -> Any:
+        """Extract nested dictionary values"""
+        if '.' in key:
+            parts = key.split('.')
+            value = details
+            for part in parts:
+                if isinstance(value, dict):
+                    value = value.get(part, None)
+                else:
+                    return None
+            return value
+        else:
+            return details.get(key, None)

requirements.txt ADDED Viewed

	@@ -0,0 +1,72 @@

+# =========================================
+# AI Image Screener - Requirements
+# Python 3.11+
+# =========================================
+# Core Web Framework
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+python-multipart==0.0.6
+# Data Validation & Settings
+pydantic==2.5.0
+pydantic-settings==2.1.0
+python-dotenv==1.0.0
+# Image Processing
+opencv-python-headless==4.8.1.78
+Pillow==10.1.0
+numpy==1.26.2
+scipy==1.11.4
+pandas==2.0.3
+# File Type Detection
+python-magic==0.4.27
+# PDF Generation
+reportlab==4.0.7
+# ASGI Server Production
+gunicorn==21.2.0
+# Logging & Monitoring
+colorama==0.4.6
+# Security
+python-jose[cryptography]==3.3.0
+passlib[bcrypt]==1.7.4
+# CORS & Middleware
+starlette==0.27.0
+# Testing (optional but recommended)
+pytest==7.4.3
+pytest-cov==4.1.0
+pytest-asyncio==0.21.1
+httpx==0.25.2
+# Code Quality (optional)
+black==23.12.0
+flake8==6.1.0
+isort==5.13.2
+mypy==1.7.1
+# Development Tools (optional)
+ipython==8.18.1
+ipdb==0.13.13
+# =========================================
+# Platform-Specific Notes:
+# =========================================
+#
+# Linux (Ubuntu/Debian):
+#   sudo apt-get install -y libmagic1
+#
+# macOS:
+#   brew install libmagic
+#
+# Windows:
+#   pip install python-magic-bin==0.4.14
+#   (alternative to python-magic for Windows)
+#
+# =========================================

setup.sh ADDED Viewed

	@@ -0,0 +1,138 @@

+#!/bin/bash
+# =========================================
+# AI Image Screener - Setup Script
+# Run this after cloning the repository
+# =========================================
+set -e  # Exit on error
+echo "================================================"
+echo "AI Image Screener - Setup"
+echo "================================================"
+echo ""
+# Check Python version
+echo "📌 Checking Python version..."
+python_version=$(python3 --version 2>&1 | awk '{print $2}')
+required_version="3.11"
+if [ "$(printf '%s\n' "$required_version" "$python_version" | sort -V | head -n1)" != "$required_version" ]; then
+    echo "❌ Error: Python 3.11+ required (found $python_version)"
+    exit 1
+fi
+echo "✅ Python $python_version detected"
+echo ""
+# Create virtual environment
+echo "📦 Creating virtual environment..."
+if [ ! -d "venv" ]; then
+    python3 -m venv venv
+    echo "✅ Virtual environment created"
+else
+    echo "⚠️  Virtual environment already exists"
+fi
+echo ""
+# Activate virtual environment
+echo "🔌 Activating virtual environment..."
+source venv/bin/activate || {
+    echo "❌ Failed to activate virtual environment"
+    exit 1
+}
+echo "✅ Virtual environment activated"
+echo ""
+# Upgrade pip
+echo "⬆️  Upgrading pip..."
+pip install --upgrade pip setuptools wheel > /dev/null 2>&1
+echo "✅ pip upgraded"
+echo ""
+# Install dependencies
+echo "📚 Installing dependencies..."
+if [ -f "requirements.txt" ]; then
+    pip install -r requirements.txt
+    echo "✅ Dependencies installed"
+else
+    echo "❌ Error: requirements.txt not found"
+    exit 1
+fi
+echo ""
+# Create directories
+echo "📁 Creating required directories..."
+mkdir -p data/uploads data/reports data/cache logs
+touch data/uploads/.gitkeep
+touch data/reports/.gitkeep
+touch data/cache/.gitkeep
+touch logs/.gitkeep
+echo "✅ Directories created"
+echo ""
+# Create .env file if not exists
+echo "⚙️  Setting up environment..."
+if [ ! -f ".env" ]; then
+    if [ -f ".env.example" ]; then
+        cp .env.example .env
+        echo "✅ Created .env from .env.example"
+        echo "   ⚠️  Please review and update .env with your settings"
+    else
+        echo "⚠️  .env.example not found, skipping .env creation"
+    fi
+else
+    echo "⚠️  .env already exists"
+fi
+echo ""
+# Check system dependencies
+echo "🔍 Checking system dependencies..."
+missing_deps=()
+if ! command -v identify &> /dev/null; then
+    missing_deps+=("ImageMagick")
+fi
+if [ ${#missing_deps[@]} -gt 0 ]; then
+    echo "⚠️  Optional dependencies missing:"
+    for dep in "${missing_deps[@]}"; do
+        echo "   - $dep"
+    done
+    echo "   The app will work, but some features may be limited."
+else
+    echo "✅ All optional dependencies present"
+fi
+echo ""
+# Test import
+echo "🧪 Testing installation..."
+if python3 -c "import fastapi, cv2, numpy, scipy, PIL, reportlab" 2>/dev/null; then
+    echo "✅ All core packages import successfully"
+else
+    echo "❌ Some packages failed to import"
+    echo "   Try: pip install -r requirements.txt"
+    exit 1
+fi
+echo ""
+echo "================================================"
+echo "✨ Setup Complete!"
+echo "================================================"
+echo ""
+echo "Next steps:"
+echo ""
+echo "1. Review and update .env file (optional)"
+echo "2. Start the server:"
+echo "   $ source venv/bin/activate"
+echo "   $ python app.py"
+echo ""
+echo "3. Open browser:"
+echo "   http://localhost:8005"
+echo ""
+echo "4. Or build Docker image:"
+echo "   $ docker build -t ai-image-screener ."
+echo "   $ docker run -p 7860:7860 ai-image-screener"
+echo ""
+echo "📖 Documentation: docs/"
+echo "🐛 Issues: https://github.com/satyakimitra/ai-image-screener/issues"
+echo ""

ui/index.html ADDED Viewed

	@@ -0,0 +1,2248 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AI Image Screener</title>
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
+    <link rel="icon" type="image/x-icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>🔍</text></svg>">
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        :root {
+            --primary: #2d3748;
+            --primary-light: #4a5568;
+            --primary-dark: #1a202c;
+            --secondary: #718096;
+            --accent: #38a169;
+            --accent-light: #68d391;
+            --accent-dark: #2f855a;
+            --warning: #d69e2e;
+            --danger: #e53e3e;
+            --background: #f7fafc;
+            --card-bg: #ffffff;
+            --border: #e2e8f0;
+            --text: #2d3748;
+            --text-light: #718096;
+            --shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06);
+            --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
+        }
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
+            background-color: var(--background);
+            color: var(--text);
+            line-height: 1.6;
+            min-height: 100vh;
+        }
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 10px;
+        }
+        /* Header */
+        header {
+            background: linear-gradient(135deg, var(--primary-dark) 0%, #2d3748 100%);
+            color: white;
+            padding: 1.5rem 0;
+            margin-bottom: 1rem;
+            border-radius: 0 0 1rem 1rem;
+            box-shadow: var(--shadow-lg);
+        }
+        .header-content {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            flex-wrap: wrap;
+            gap: 1rem;
+        }
+        .logo {
+            display: flex;
+            align-items: center;
+            gap: 0.75rem;
+        }
+        .logo-icon {
+            width: 40px;
+            height: 40px;
+            background: linear-gradient(135deg, var(--accent) 0%, var(--accent-light) 100%);
+            border-radius: 8px;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            font-size: 1.25rem;
+        }
+        .logo-text h1 {
+            font-size: 1.5rem;
+            font-weight: 600;
+        }
+        .logo-text .tagline {
+            font-size: 0.875rem;
+            opacity: 0.8;
+            margin-top: 0.125rem;
+        }
+        /* Hero Section */
+        .hero {
+            background: linear-gradient(135deg, #2d3748 0%, #4a5568 100%);
+            border-radius: 1rem;
+            padding: 1.5rem 1.5rem;
+            text-align: center;
+            margin-bottom: 1rem;
+            color: white;
+            box-shadow: var(--shadow-lg);
+        }
+        .hero h2 {
+            font-size: 2.5rem;
+            margin-bottom: 1rem;
+            color: white;
+        }
+        .hero-subtitle {
+            font-size: 1.25rem;
+            color: rgba(255, 255, 255, 0.9);
+            margin-bottom: 2rem;
+            max-width: 800px;
+            margin-left: auto;
+            margin-right: auto;
+        }
+        .performance-badge {
+            display: inline-block;
+            padding: 0.75rem 1.5rem;
+            background-color: rgba(255, 255, 255, 0.1);
+            color: white;
+            border: 1px solid rgba(255, 255, 255, 0.2);
+            border-radius: 2rem;
+            font-size: 0.875rem;
+            margin-bottom: 1.5rem;
+            backdrop-filter: blur(10px);
+        }
+        .cta-button {
+            background: linear-gradient(135deg, var(--accent) 0%, var(--accent-dark) 100%);
+            color: white;
+            border: none;
+            padding: 1rem 2.5rem;
+            font-size: 1.125rem;
+            border-radius: 0.5rem;
+            cursor: pointer;
+            font-weight: 600;
+            transition: all 0.3s;
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            gap: 0.75rem;
+            box-shadow: 0 4px 6px rgba(56, 161, 105, 0.2);
+            min-width: 200px;
+            margin: 0 auto;
+        }
+        .cta-button:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 6px 12px rgba(56, 161, 105, 0.3);
+        }
+        /* Tab Navigation */
+        .tabs {
+            display: flex;
+            gap: 0.5rem;
+            margin-bottom: 2rem;
+            border-bottom: 2px solid var(--border);
+            padding-bottom: 0;
+            background-color: white;
+            border-radius: 0.5rem;
+            padding: 0.5rem;
+            box-shadow: var(--shadow);
+        }
+        .tab-button {
+            padding: 1rem 2rem;
+            background: none;
+            border: none;
+            border-bottom: 3px solid transparent;
+            color: var(--text-light);
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.3s;
+            position: relative;
+            flex: 1;
+            text-align: center;
+            border-radius: 0.25rem;
+        }
+        .tab-button.active {
+            color: var(--accent);
+            border-bottom-color: var(--accent);
+            background-color: rgba(56, 161, 105, 0.05);
+        }
+        .tab-button:hover:not(.active) {
+            color: var(--primary);
+            background-color: rgba(0, 0, 0, 0.02);
+        }
+        .tab-content {
+            display: none;
+            animation: fadeIn 0.5s ease;
+        }
+        .tab-content.active {
+            display: block;
+        }
+        @keyframes fadeIn {
+            from { opacity: 0; transform: translateY(10px); }
+            to { opacity: 1; transform: translateY(0); }
+        }
+        /* Features Grid */
+        .features-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+            gap: 1.5rem;
+            margin-bottom: 2rem;
+        }
+        .feature-card {
+            background-color: white;
+            border-radius: 1rem;
+            padding: 1.5rem;
+            border: 1px solid var(--border);
+            transition: all 0.3s;
+        }
+        .feature-card:hover {
+            transform: translateY(-5px);
+            box-shadow: var(--shadow-lg);
+            border-color: var(--accent-light);
+        }
+        .feature-icon {
+            font-size: 2rem;
+            color: var(--accent);
+            margin-bottom: 1rem;
+        }
+        /* Metrics Grid - Updated for Detailed Cards */
+        .metrics-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(350px, 1fr));
+            gap: 1.5rem;
+            margin-bottom: 2rem;
+        }
+        @media (max-width: 768px) {
+            .metrics-grid {
+                grid-template-columns: 1fr;
+            }
+        }
+        .metric-card {
+            background-color: white;
+            border-radius: 1rem;
+            padding: 1.5rem;
+            border: 1px solid var(--border);
+            transition: all 0.3s;
+            display: flex;
+            flex-direction: column;
+            height: 100%;
+        }
+        .metric-card:hover {
+            transform: translateY(-5px);
+            box-shadow: var(--shadow-lg);
+        }
+        .metric-header {
+            display: flex;
+            align-items: center;
+            gap: 1rem;
+            margin-bottom: 1rem;
+            padding-bottom: 1rem;
+            border-bottom: 1px solid var(--border);
+        }
+        .metric-icon {
+            width: 3rem;
+            height: 3rem;
+            border-radius: 0.75rem;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            color: white;
+            font-size: 1.5rem;
+            flex-shrink: 0;
+        }
+        .metric-title {
+            font-size: 1.25rem;
+            font-weight: 600;
+            color: var(--primary);
+        }
+        .metric-weight {
+            display: inline-block;
+            padding: 0.25rem 0.75rem;
+            background-color: rgba(56, 161, 105, 0.1);
+            color: var(--accent);
+            border-radius: 2rem;
+            font-size: 0.875rem;
+            font-weight: 600;
+            margin-left: auto;
+        }
+        .metric-description {
+            color: var(--text-light);
+            margin-bottom: 1rem;
+            line-height: 1.6;
+        }
+        .metric-details {
+            margin-top: auto;
+            padding-top: 1rem;
+            border-top: 1px solid var(--border);
+        }
+        .detail-item {
+            display: flex;
+            justify-content: space-between;
+            margin-bottom: 0.5rem;
+            font-size: 0.875rem;
+        }
+        .detail-label {
+            color: var(--text-light);
+        }
+        .detail-value {
+            color: var(--primary);
+            font-weight: 500;
+        }
+        /* How-to-use Steps */
+        .steps-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+            gap: 1.5rem;
+            margin-bottom: 2rem;
+        }
+        .step-card {
+            text-align: center;
+            padding: 2rem;
+            background-color: white;
+            border-radius: 1rem;
+            border: 1px solid var(--border);
+            transition: all 0.3s;
+        }
+        .step-card:hover {
+            transform: translateY(-5px);
+            border-color: var(--accent);
+        }
+        .step-number {
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            width: 3rem;
+            height: 3rem;
+            background: linear-gradient(135deg, var(--accent) 0%, var(--accent-light) 100%);
+            color: white;
+            border-radius: 50%;
+            font-size: 1.5rem;
+            font-weight: bold;
+            margin-bottom: 1rem;
+        }
+        /* Cards */
+        .card {
+            background-color: var(--card-bg);
+            border-radius: 1rem;
+            font-size: 1.00rem;
+            box-shadow: var(--shadow);
+            padding: 1.0rem;
+            margin-bottom: 0.5rem;
+            border: 1px solid var(--border);
+        }
+        .card-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 0.5rem;
+            padding-bottom: 0.75rem;
+            border-bottom: 0.5px solid var(--border);
+        }
+        .card-title {
+            font-size: 1.25rem;
+            font-weight: 600;
+            display: flex;
+            align-items: center;
+            gap: 0.5rem;
+        }
+        /* Upload Section */
+        .upload-area {
+            border: 2px dashed var(--border);
+            border-radius: 1rem;
+            padding: 3rem 1.5rem;
+            text-align: center;
+            transition: all 0.3s ease;
+            cursor: pointer;
+            margin-bottom: 1rem;
+            background-color: #f8fafc;
+        }
+        .upload-area:hover, .upload-area.dragover {
+            border-color: var(--accent);
+            background-color: rgba(56, 161, 105, 0.05);
+        }
+        .upload-icon {
+            font-size: 3rem;
+            color: var(--accent);
+            margin-bottom: 1rem;
+        }
+        .upload-button {
+            background-color: var(--accent);
+            color: white;
+            border: none;
+            padding: 0.75rem 1.5rem;
+            border-radius: 0.5rem;
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.3s;
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            gap: 0.5rem;
+        }
+        .upload-button:hover {
+            background-color: var(--accent-dark);
+            transform: translateY(-2px);
+        }
+        /* Thumbnail Grid */
+        .thumbnail-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fill, minmax(120px, 1fr));
+            gap: 1rem;
+            margin-top: 1rem;
+            max-height: 300px;
+            overflow-y: auto;
+            padding: 0.5rem;
+        }
+        .thumbnail-item {
+            position: relative;
+            border-radius: 0.5rem;
+            overflow: hidden;
+            border: 2px solid var(--border);
+            transition: all 0.3s;
+            height: 120px;
+        }
+        .thumbnail-item:hover {
+            border-color: var(--accent);
+            transform: translateY(-2px);
+        }
+        .thumbnail-img {
+            width: 100%;
+            height: 100%;
+            object-fit: cover;
+        }
+        .thumbnail-overlay {
+            position: absolute;
+            bottom: 0;
+            left: 0;
+            right: 0;
+            background: linear-gradient(transparent, rgba(0, 0, 0, 0.7));
+            padding: 0.5rem;
+            color: white;
+            font-size: 0.75rem;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+        }
+        .remove-thumbnail {
+            background: rgba(229, 62, 62, 0.8);
+            border: none;
+            color: white;
+            width: 24px;
+            height: 24px;
+            border-radius: 50%;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            cursor: pointer;
+            transition: all 0.3s;
+        }
+        .remove-thumbnail:hover {
+            background: var(--danger);
+            transform: scale(1.1);
+        }
+        /* Start Analysis Button - Centered */
+        .start-analysis-btn {
+            background: linear-gradient(135deg, var(--accent) 0%, var(--accent-dark) 100%);
+            color: white;
+            border: none;
+            padding: 1rem 2rem;
+            font-size: 1.125rem;
+            border-radius: 0.5rem;
+            cursor: pointer;
+            font-weight: 600;
+            transition: all 0.3s;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 0.75rem;
+            width: 100%;
+            margin-top: 1.5rem;
+            box-shadow: 0 4px 6px rgba(56, 161, 105, 0.2);
+        }
+        .start-analysis-btn:hover:not(:disabled) {
+            transform: translateY(-2px);
+            box-shadow: 0 6px 12px rgba(56, 161, 105, 0.3);
+        }
+        .start-analysis-btn:disabled {
+            opacity: 0.5;
+            cursor: not-allowed;
+            transform: none !important;
+        }
+        .start-analysis-btn .btn-content {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 0.75rem;
+        }
+        /* Progress Section */
+        .progress-container {
+            margin-top: 1rem;
+            padding: 1rem;
+            background-color: white;
+            border-radius: 0.5rem;
+            box-shadow: var(--shadow);
+            border: 1px solid var(--border);
+        }
+        .progress-header {
+            display: flex;
+            justify-content: space-between;
+            margin-bottom: 0.5rem;
+        }
+        .progress-bar {
+            height: 0.5rem;
+            background-color: var(--border);
+            border-radius: 1rem;
+            overflow: hidden;
+            margin-bottom: 0.5rem;
+        }
+        .progress-fill {
+            height: 100%;
+            background: linear-gradient(90deg, var(--accent), var(--accent-light));
+            border-radius: 1rem;
+            width: 0%;
+            transition: width 0.5s ease;
+        }
+        /* Results Section */
+        .results-summary {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
+            gap: 1rem;
+            margin-bottom: 1.5rem;
+        }
+        .summary-card {
+            text-align: center;
+            padding: 1.5rem;
+            border-radius: 1rem;
+            background-color: white;
+            border: 1px solid var(--border);
+            transition: transform 0.3s;
+        }
+        .summary-card:hover {
+            transform: translateY(-3px);
+        }
+        .summary-value {
+            font-size: 2rem;
+            font-weight: 700;
+            margin-bottom: 0.25rem;
+        }
+        .summary-label {
+            font-size: 0.875rem;
+            color: var(--text-light);
+        }
+        .results-table-container {
+            overflow-x: auto;
+            margin-top: 1.5rem;
+            border-radius: 0.5rem;
+            border: 1px solid var(--border);
+            background-color: white;
+        }
+        .results-table {
+            width: 100%;
+            border-collapse: collapse;
+        }
+        .results-table th {
+            background-color: #f8fafc;
+            color: var(--text);
+            padding: 1rem;
+            text-align: left;
+            font-weight: 600;
+            border-bottom: 1px solid var(--border);
+        }
+        .results-table td {
+            padding: 1rem;
+            border-bottom: 1px solid var(--border);
+            vertical-align: middle;
+        }
+        .results-table tr:hover {
+            background-color: #f8fafc;
+        }
+        .status-badge {
+            display: inline-block;
+            padding: 0.25rem 0.75rem;
+            border-radius: 2rem;
+            font-size: 0.75rem;
+            font-weight: 600;
+            white-space: nowrap;
+        }
+        .status-authentic {
+            background-color: rgba(56, 161, 105, 0.1);
+            color: var(--accent);
+            border: 1px solid rgba(56, 161, 105, 0.3);
+        }
+        .status-review {
+            background-color: rgba(214, 158, 46, 0.1);
+            color: var(--warning);
+            border: 1px solid rgba(214, 158, 46, 0.3);
+        }
+        .score-indicator {
+            display: flex;
+            align-items: center;
+            gap: 0.5rem;
+            min-width: 150px;
+        }
+        .score-bar {
+            flex: 1;
+            height: 0.5rem;
+            background-color: var(--border);
+            border-radius: 1rem;
+            overflow: hidden;
+        }
+        .score-fill {
+            height: 100%;
+            border-radius: 1rem;
+            transition: width 0.5s ease;
+        }
+        .score-low {
+            background: linear-gradient(90deg, var(--accent), var(--accent-light));
+        }
+        .score-medium {
+            background: linear-gradient(90deg, var(--warning), #ecc94b);
+        }
+        .score-high {
+            background: linear-gradient(90deg, var(--danger), #fc8181);
+        }
+        /* Detailed Analysis */
+        .detailed-analysis {
+            margin-top: 2rem;
+            padding: 1.5rem;
+            background-color: white;
+            border-radius: 1rem;
+            border: 1px solid var(--border);
+        }
+        .analysis-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 1.5rem;
+            cursor: pointer;
+            padding: 0.5rem;
+            border-radius: 0.5rem;
+            transition: background-color 0.3s;
+        }
+        .analysis-header:hover {
+            background-color: #f8fafc;
+        }
+        .analysis-content {
+            display: none;
+            padding-top: 1rem;
+            border-top: 1px solid var(--border);
+            animation: fadeIn 0.5s ease;
+        }
+        .analysis-content.show {
+            display: block;
+        }
+        .signal-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
+            gap: 1rem;
+            margin-bottom: 1.5rem;
+        }
+        .signal-card {
+            padding: 1rem;
+            border-radius: 0.5rem;
+            border: 1px solid var(--border);
+            background-color: #f8fafc;
+        }
+        .signal-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 0.5rem;
+        }
+        .signal-badge {
+            padding: 0.25rem 0.5rem;
+            border-radius: 0.375rem;
+            font-size: 0.75rem;
+            font-weight: 500;
+            border: 1px solid;
+            white-space: nowrap;
+        }
+        .signal-passed {
+            background-color: rgba(56, 161, 105, 0.1);
+            color: var(--accent);
+            border-color: rgba(56, 161, 105, 0.3);
+        }
+        .signal-warning {
+            background-color: rgba(214, 158, 46, 0.1);
+            color: var(--warning);
+            border-color: rgba(214, 158, 46, 0.3);
+        }
+        .signal-flagged {
+            background-color: rgba(229, 62, 62, 0.1);
+            color: var(--danger);
+            border-color: rgba(229, 62, 62, 0.3);
+        }
+        /* Footer - Reduced spacing */
+        footer {
+            margin-top: 0.1rem;
+            padding-top: 0.1rem;
+            border-top: 1px solid var(--border);
+            color: var(--text-light);
+            font-size: 0.875rem;
+            text-align: center;
+        }
+        .footer-links {
+            display: flex;
+            justify-content: center;
+            gap: 2rem;
+            margin-bottom: 1rem;
+            flex-wrap: wrap;
+        }
+        .footer-link {
+            color: var(--accent);
+            text-decoration: none;
+            transition: color 0.3s;
+            font-size: 0.875rem;
+        }
+        .footer-link:hover {
+            color: var(--accent-dark);
+            text-decoration: underline;
+        }
+        /* Action buttons */
+        .action-button {
+            padding: 0.5rem 1rem;
+            border: none;
+            border-radius: 0.5rem;
+            font-weight: 500;
+            cursor: pointer;
+            transition: all 0.3s;
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            gap: 0.5rem;
+            font-size: 0.875rem;
+        }
+        .primary-action {
+            background-color: var(--accent);
+            color: white;
+        }
+        .primary-action:hover {
+            background-color: var(--accent-dark);
+        }
+        .secondary-action {
+            background-color: white;
+            color: var(--accent);
+            border: 1px solid var(--accent);
+        }
+        .secondary-action:hover {
+            background-color: rgba(56, 161, 105, 0.1);
+        }
+        /* Loading overlay */
+        .loading-overlay {
+            position: fixed;
+            top: 0;
+            left: 0;
+            right: 0;
+            bottom: 0;
+            background-color: rgba(0, 0, 0, 0.5);
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            z-index: 1000;
+            opacity: 0;
+            visibility: hidden;
+            transition: all 0.3s;
+        }
+        .loading-overlay.active {
+            opacity: 1;
+            visibility: visible;
+        }
+        .loading-spinner {
+            width: 60px;
+            height: 60px;
+            border: 4px solid rgba(255, 255, 255, 0.3);
+            border-radius: 50%;
+            border-top-color: white;
+            animation: spin 1s ease-in-out infinite;
+        }
+        @keyframes spin {
+            to { transform: rotate(360deg); }
+        }
+        /* Toast notification */
+        .toast {
+            position: fixed;
+            top: 20px;
+            right: 20px;
+            padding: 1rem 1.5rem;
+            background-color: white;
+            color: var(--text);
+            border-radius: 0.5rem;
+            box-shadow: var(--shadow-lg);
+            z-index: 1000;
+            transform: translateX(100%);
+            transition: transform 0.3s ease;
+            max-width: 300px;
+            border-left: 4px solid var(--accent);
+        }
+        .toast.show {
+            transform: translateX(0);
+        }
+        .toast.error {
+            border-left-color: var(--danger);
+        }
+        .toast.warning {
+            border-left-color: var(--warning);
+        }
+        /* Utility classes */
+        .hidden {
+            display: none !important;
+        }
+        .visible {
+            display: block !important;
+        }
+        .text-center {
+            text-align: center;
+        }
+        .mt-1 { margin-top: 0.5rem; }
+        .mt-2 { margin-top: 1rem; }
+        .mt-3 { margin-top: 1.5rem; }
+        .mb-1 { margin-bottom: 0.5rem; }
+        .mb-2 { margin-bottom: 1rem; }
+        .mb-3 { margin-bottom: 1.5rem; }
+        /* Responsive adjustments */
+        @media (max-width: 768px) {
+            .hero h2 {
+                font-size: 2rem;
+            }
+            .hero-subtitle {
+                font-size: 1rem;
+            }
+            .tabs {
+                flex-direction: column;
+            }
+            .tab-button {
+                width: 100%;
+                text-align: center;
+            }
+            .metrics-grid {
+                grid-template-columns: 1fr;
+            }
+            .signal-grid {
+                grid-template-columns: 1fr;
+            }
+            .footer-links {
+                flex-direction: column;
+                gap: 0.75rem;
+            }
+        }
+        /* Spinner for loading button */
+        .spinner {
+            display: inline-block;
+            width: 1rem;
+            height: 1rem;
+            border: 2px solid rgba(255, 255, 255, 0.3);
+            border-radius: 50%;
+            border-top-color: white;
+            animation: spin 1s ease-in-out infinite;
+            margin-right: 0.5rem;
+        }
+    </style>
+</head>
+<body>
+    <!-- Loading Overlay -->
+    <div class="loading-overlay" id="loadingOverlay">
+        <div class="loading-spinner"></div>
+    </div>
+    <!-- Toast Notification -->
+    <div class="toast hidden" id="toast"></div>
+    <!-- Header -->
+    <header>
+        <div class="container">
+            <div class="header-content">
+                <div class="logo">
+                    <div class="logo-icon">
+                        <i class="fas fa-filter"></i>
+                    </div>
+                    <div class="logo-text">
+                        <h1>AI Image Screener</h1>
+                        <div class="tagline">First-pass screening for bulk workflows</div>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </header>
+    <!-- Main Content -->
+    <div class="container">
+        <!-- Landing Screen -->
+        <div id="landingScreen">
+            <!-- Hero Section -->
+            <section class="hero">
+                <h2>AI Image Screener</h2>
+                <p class="hero-subtitle">
+                    A practical first-pass AI image screening system designed to identify images that require human review based on statistical and physical patterns.
+                </p>
+                <div class="performance-badge">
+                    <i class="fas fa-chart-line"></i> Screening accuracy: 40-90% detection rate across AI models
+                </div>
+                <br>
+                <button class="cta-button" id="tryNowBtn">
+                    <div class="btn-content">
+                        <i class="fas fa-play-circle"></i> Start Screening
+                    </div>
+                </button>
+            </section>
+            <!-- Tab Navigation -->
+            <div class="tabs">
+                <button class="tab-button active" data-tab="features">Features</button>
+                <button class="tab-button" data-tab="metrics">Detection Metrics</button>
+                <button class="tab-button" data-tab="howto">How to Use</button>
+            </div>
+            <!-- Features Tab -->
+            <div class="tab-content active" id="featuresTab">
+                <div class="features-grid">
+                    <div class="feature-card">
+                        <div class="feature-icon">
+                            <i class="fas fa-bolt"></i>
+                        </div>
+                        <h3>Fast Processing</h3>
+                        <p>Parallel processing for batch analysis with real-time progress tracking</p>
+                    </div>
+                    <div class="feature-card">
+                        <div class="feature-icon">
+                            <i class="fas fa-chart-bar"></i>
+                        </div>
+                        <h3>Multi-Signal Detection</h3>
+                        <p>Five independent statistical detectors with weighted ensemble aggregation</p>
+                    </div>
+                    <div class="feature-card">
+                        <div class="feature-icon">
+                            <i class="fas fa-file-export"></i>
+                        </div>
+                        <h3>Comprehensive Reports</h3>
+                        <p>Export results in CSV, JSON, and PDF formats for integration and documentation</p>
+                    </div>
+                    <div class="feature-card">
+                        <div class="feature-icon">
+                            <i class="fas fa-sliders-h"></i>
+                        </div>
+                        <h3>Adjustable Sensitivity</h3>
+                        <p>Conservative, balanced, and aggressive modes for different use cases</p>
+                    </div>
+                </div>
+                <!-- Caution Notice -->
+                <div class="card">
+                    <div class="card-header">
+                        <h3 class="card-title"><i class="fas fa-exclamation-triangle" style="color: var(--warning);"></i> Important Notice</h3>
+                    </div>
+                    <p style="color: var(--text-light);">
+                        <strong>This is not a perfect AI detector. It's a screening tool that helps reduce manual review workload by flagging suspicious images for human verification.</strong>
+                    </p>
+                </div>
+            </div>
+            <!-- Metrics Tab - Updated with Detailed Cards -->
+            <div class="tab-content" id="metricsTab">
+                <div class="metrics-grid">
+                    <div class="metric-card">
+                        <div class="metric-header">
+                            <div class="metric-icon" style="background: linear-gradient(135deg, #4a5568 0%, #718096 100%);">
+                                <i class="fas fa-wave-square"></i>
+                            </div>
+                            <div>
+                                <div class="metric-title">Gradient-Field PCA</div>
+                            </div>
+                            <span class="metric-weight">Weight: 30%</span>
+                        </div>
+                        <p class="metric-description">
+                            Detects lighting & gradient inconsistencies typical of diffusion models. Analyzes directional light patterns and shadow consistency that often appear unnatural in AI-generated images.
+                        </p>
+                        <div class="metric-details">
+                            <div class="detail-item">
+                                <span class="detail-label">Detection Method</span>
+                                <span class="detail-value">Principal Component Analysis</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Sensitivity</span>
+                                <span class="detail-value">High for diffusion models</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Performance</span>
+                                <span class="detail-value">85-95% detection rate</span>
+                            </div>
+                        </div>
+                    </div>
+                    <div class="metric-card">
+                        <div class="metric-header">
+                            <div class="metric-icon" style="background: linear-gradient(135deg, #718096 0%, #a0aec0 100%);">
+                                <i class="fas fa-chart-line"></i>
+                            </div>
+                            <div>
+                                <div class="metric-title">Frequency Analysis</div>
+                            </div>
+                            <span class="metric-weight">Weight: 25%</span>
+                        </div>
+                        <p class="metric-description">
+                            Identifies unnatural spectral energy distributions via FFT analysis. AI-generated images often show characteristic frequency patterns different from camera-captured photos.
+                        </p>
+                        <div class="metric-details">
+                            <div class="detail-item">
+                                <span class="detail-label">Detection Method</span>
+                                <span class="detail-value">Fast Fourier Transform</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Sensitivity</span>
+                                <span class="detail-value">Medium-High</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Performance</span>
+                                <span class="detail-value">75-85% detection rate</span>
+                            </div>
+                        </div>
+                    </div>
+                    <div class="metric-card">
+                        <div class="metric-header">
+                            <div class="metric-icon" style="background: linear-gradient(135deg, #38a169 0%, #68d391 100%);">
+                                <i class="fas fa-braille"></i>
+                            </div>
+                            <div>
+                                <div class="metric-title">Noise Pattern Analysis</div>
+                            </div>
+                            <span class="metric-weight">Weight: 20%</span>
+                        </div>
+                        <p class="metric-description">
+                            Detects missing or artificial sensor noise patterns. Real cameras produce characteristic noise while AI models often generate unnaturally uniform or missing noise patterns.
+                        </p>
+                        <div class="metric-details">
+                            <div class="detail-item">
+                                <span class="detail-label">Detection Method</span>
+                                <span class="detail-value">Noise Distribution Analysis</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Sensitivity</span>
+                                <span class="detail-value">Medium</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Performance</span>
+                                <span class="detail-value">70-80% detection rate</span>
+                            </div>
+                        </div>
+                    </div>
+                    <div class="metric-card">
+                        <div class="metric-header">
+                            <div class="metric-icon" style="background: linear-gradient(135deg, #d69e2e 0%, #ecc94b 100%);">
+                                <i class="fas fa-text-height"></i>
+                            </div>
+                            <div>
+                                <div class="metric-title">Texture Statistics</div>
+                            </div>
+                            <span class="metric-weight">Weight: 15%</span>
+                        </div>
+                        <p class="metric-description">
+                            Identifies overly smooth or uniform texture regions. AI-generated images often lack the natural texture variation found in real photographs, especially in complex surfaces.
+                        </p>
+                        <div class="metric-details">
+                            <div class="detail-item">
+                                <span class="detail-label">Detection Method</span>
+                                <span class="detail-value">GLCM Texture Analysis</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Sensitivity</span>
+                                <span class="detail-value">Medium-Low</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Performance</span>
+                                <span class="detail-value">60-70% detection rate</span>
+                            </div>
+                        </div>
+                    </div>
+                    <div class="metric-card">
+                        <div class="metric-header">
+                            <div class="metric-icon" style="background: linear-gradient(135deg, #e53e3e 0%, #fc8181 100%);">
+                                <i class="fas fa-palette"></i>
+                            </div>
+                            <div>
+                                <div class="metric-title">Color Distribution</div>
+                            </div>
+                            <span class="metric-weight">Weight: 10%</span>
+                        </div>
+                        <p class="metric-description">
+                            Flags unnatural saturation and color histogram patterns. AI models often produce colors that are either oversaturated or have distribution patterns that differ from real photographs.
+                        </p>
+                        <div class="metric-details">
+                            <div class="detail-item">
+                                <span class="detail-label">Detection Method</span>
+                                <span class="detail-value">Color Histogram Analysis</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Sensitivity</span>
+                                <span class="detail-value">Low-Medium</span>
+                            </div>
+                            <div class="detail-item">
+                                <span class="detail-label">Performance</span>
+                                <span class="detail-value">50-65% detection rate</span>
+                            </div>
+                        </div>
+                    </div>
+                </div>
+            </div>
+            <!-- How-to-use Tab -->
+            <div class="tab-content" id="howtoTab">
+                <div class="steps-grid">
+                    <div class="step-card">
+                        <div class="step-number">1</div>
+                        <h3>Upload Images</h3>
+                        <p>Drag & drop or select images (JPG, PNG, WEBP)</p>
+                    </div>
+                    <div class="step-card">
+                        <div class="step-number">2</div>
+                        <h3>Start Analysis</h3>
+                        <p>Click "Start Analysis" to begin screening</p>
+                    </div>
+                    <div class="step-card">
+                        <div class="step-number">3</div>
+                        <h3>Review Results</h3>
+                        <p>Check flagged images and export reports</p>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <!-- Analysis Screen (Initially Hidden) -->
+        <div id="analysisScreen" class="hidden">
+            <!-- Upload Card -->
+            <div class="card">
+                <div class="card-header">
+                    <h2 class="card-title"><i class="fas fa-cloud-upload-alt"></i> Upload Images</h2>
+                    <button class="action-button secondary-action" id="backHomeBtn">
+                        <i class="fas fa-arrow-left"></i> Back
+                    </button>
+                </div>
+                <div class="upload-area" id="uploadArea">
+                    <div class="upload-icon">
+                        <i class="fas fa-cloud-upload-alt"></i>
+                    </div>
+                    <h3 class="upload-text">Drag & drop images here</h3>
+                    <p class="upload-text">or</p>
+                    <div class="upload-button" id="fileInputBtn">
+                        <i class="fas fa-folder-open"></i> Browse Files
+                    </div>
+                    <input type="file" id="fileInput" multiple accept=".jpg,.jpeg,.png,.webp" style="display: none;">
+                    <p class="text-center mt-2" style="color: var(--text-light); font-size: 0.875rem;">
+                        Supports JPG, JPEG, PNG, WEBP up to 10MB each
+                    </p>
+                </div>
+                <!-- Thumbnail Grid -->
+                <div class="thumbnail-grid" id="thumbnailGrid"></div>
+                <!-- Start Analysis Button - Centered -->
+                <div class="mt-3" id="analyzeButtonContainer" style="display: none;">
+                    <button class="start-analysis-btn" id="analyzeBtn">
+                        <div class="btn-content">
+                            <i class="fas fa-play"></i> Start Analysis
+                        </div>
+                    </button>
+                </div>
+                <div class="progress-container hidden" id="progressContainer">
+                    <div class="progress-header">
+                        <span>Processing</span>
+                        <span id="progressPercent">0%</span>
+                    </div>
+                    <div class="progress-bar">
+                        <div class="progress-fill" id="progressFill"></div>
+                    </div>
+                    <div class="progress-details">
+                        <span id="currentFile" class="current-file">Ready to process</span>
+                        <span id="progressStats">0 / 0</span>
+                    </div>
+                </div>
+            </div>
+            <!-- Results Section -->
+            <div id="resultsSection" class="hidden">
+                <!-- Export Buttons -->
+                <div class="card">
+                    <div class="card-header">
+                        <h2 class="card-title"><i class="fas fa-chart-bar"></i> Analysis Results</h2>
+                        <div class="results-actions">
+                            <button class="action-button secondary-action" id="exportCsvBtn">
+                                <i class="fas fa-file-csv"></i> CSV
+                            </button>
+                            <button class="action-button secondary-action" id="exportPdfBtn">
+                                <i class="fas fa-file-pdf"></i> PDF
+                            </button>
+                            <button class="action-button secondary-action" id="exportJsonBtn">
+                                <i class="fas fa-file-code"></i> JSON
+                            </button>
+                            <button class="action-button secondary-action" id="newAnalysisBtn">
+                                <i class="fas fa-redo"></i> New
+                            </button>
+                        </div>
+                    </div>
+                    <!-- Results Summary -->
+                    <div class="results-summary" id="resultsSummary">
+                        <!-- Summary cards will be populated here -->
+                    </div>
+                    <!-- Results Table -->
+                    <div class="results-table-container">
+                        <table class="results-table" id="resultsTable">
+                            <thead>
+                                <tr>
+                                    <th>Image</th>
+                                    <th>Status</th>
+                                    <th>Score</th>
+                                    <th>Signals</th>
+                                    <th>Details</th>
+                                </tr>
+                            </thead>
+                            <tbody id="resultsTableBody">
+                                <!-- Results will be populated here -->
+                                <tr id="noResultsRow">
+                                    <td colspan="5" class="text-center" style="padding: 3rem; color: var(--text-light);">
+                                        <i class="fas fa-chart-bar" style="font-size: 3rem; margin-bottom: 1rem; opacity: 0.5;"></i>
+                                        <p>No analysis results yet. Upload images and click "Start Analysis" to begin.</p>
+                                    </td>
+                                </tr>
+                            </tbody>
+                        </table>
+                    </div>
+                </div>
+                <!-- Detailed Analysis -->
+                <div class="detailed-analysis">
+                    <div class="analysis-header" id="toggleDetailedAnalysis">
+                        <h3><i class="fas fa-search"></i> Detailed Analysis</h3>
+                        <i class="fas fa-chevron-down" id="detailedAnalysisIcon"></i>
+                    </div>
+                    <div class="analysis-content" id="detailedAnalysisContent">
+                        <!-- Detailed analysis will be populated here -->
+                        <p id="noDetailedAnalysis" class="text-center" style="color: var(--text-light); padding: 2rem;">
+                            <i class="fas fa-eye" style="font-size: 2rem; margin-bottom: 1rem; opacity: 0.5;"></i><br>
+                            Select an image to view detailed analysis
+                        </p>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+    <!-- Footer with reduced spacing -->
+    <footer>
+        <div class="container">
+            <div class="footer-links">
+                <a href="#" class="footer-link">Documentation</a>
+                <a href="#" class="footer-link">API Reference</a>
+                <a href="#" class="footer-link">Privacy</a>
+                <a href="#" class="footer-link">Support</a>
+            </div>
+            <p>AI Image Screener v1.0.0 © 2025</p>
+        </div>
+    </footer>
+    <script>
+        // API Configuration
+        const API_BASE_URL = window.location.origin;
+        const BATCH_ENDPOINT = '/analyze/batch';
+        const HEALTH_ENDPOINT = '/health';
+        const BATCH_PROGRESS_ENDPOINT = '/batch';
+        const CSV_REPORT_ENDPOINT = '/report/csv';
+        const PDF_REPORT_ENDPOINT = '/report/pdf';
+        // Global state
+        let files = [];
+        let fileDataUrls = {};
+        let currentBatchId = null;
+        let batchResults = null;
+        let pollingInterval = null;
+        let selectedImageIndex = null;
+        // DOM Elements
+        const landingScreen = document.getElementById('landingScreen');
+        const analysisScreen = document.getElementById('analysisScreen');
+        const resultsSection = document.getElementById('resultsSection');
+        const loadingOverlay = document.getElementById('loadingOverlay');
+        const toast = document.getElementById('toast');
+        const tryNowBtn = document.getElementById('tryNowBtn');
+        const backHomeBtn = document.getElementById('backHomeBtn');
+        const newAnalysisBtn = document.getElementById('newAnalysisBtn');
+        const uploadArea = document.getElementById('uploadArea');
+        const fileInput = document.getElementById('fileInput');
+        const fileInputBtn = document.getElementById('fileInputBtn');
+        const thumbnailGrid = document.getElementById('thumbnailGrid');
+        const analyzeBtn = document.getElementById('analyzeBtn');
+        const analyzeButtonContainer = document.getElementById('analyzeButtonContainer');
+        const progressContainer = document.getElementById('progressContainer');
+        const progressFill = document.getElementById('progressFill');
+        const progressPercent = document.getElementById('progressPercent');
+        const currentFile = document.getElementById('currentFile');
+        const progressStats = document.getElementById('progressStats');
+        const resultsSummary = document.getElementById('resultsSummary');
+        const resultsTableBody = document.getElementById('resultsTableBody');
+        const noResultsRow = document.getElementById('noResultsRow');
+        const exportCsvBtn = document.getElementById('exportCsvBtn');
+        const exportPdfBtn = document.getElementById('exportPdfBtn');
+        const exportJsonBtn = document.getElementById('exportJsonBtn');
+        const toggleDetailedAnalysis = document.getElementById('toggleDetailedAnalysis');
+        const detailedAnalysisIcon = document.getElementById('detailedAnalysisIcon');
+        const detailedAnalysisContent = document.getElementById('detailedAnalysisContent');
+        const noDetailedAnalysis = document.getElementById('noDetailedAnalysis');
+        const tabButtons = document.querySelectorAll('.tab-button');
+        const tabContents = document.querySelectorAll('.tab-content');
+        // Initialize
+        document.addEventListener('DOMContentLoaded', () => {
+            setupEventListeners();
+            setupTabs();
+            checkApiHealth();
+        });
+        // Toast notification
+        function showToast(message, type = 'success') {
+            toast.textContent = message;
+            toast.className = `toast ${type} show`;
+            setTimeout(() => {
+                toast.classList.remove('show');
+            }, 3000);
+        }
+        // Loading overlay
+        function showLoading(show) {
+            if (show) {
+                loadingOverlay.classList.add('active');
+            } else {
+                loadingOverlay.classList.remove('active');
+            }
+        }
+        // Tab functionality
+        function setupTabs() {
+            tabButtons.forEach(button => {
+                button.addEventListener('click', () => {
+                    const tabId = button.dataset.tab + 'Tab';
+                    // Remove active class from all buttons and contents
+                    tabButtons.forEach(btn => btn.classList.remove('active'));
+                    tabContents.forEach(content => content.classList.remove('active'));
+                    // Add active class to clicked button and corresponding content
+                    button.classList.add('active');
+                    document.getElementById(tabId).classList.add('active');
+                });
+            });
+        }
+        // Setup event listeners - FIXED FOR ONE-CLICK UPLOAD
+        function setupEventListeners() {
+            // Navigation
+            tryNowBtn.addEventListener('click', showAnalysisScreen);
+            backHomeBtn.addEventListener('click', showLandingScreen);
+            newAnalysisBtn.addEventListener('click', resetAnalysis);
+            // File upload - ONLY ONE CLICK HANDLER
+            fileInputBtn.addEventListener('click', (e) => {
+                e.stopPropagation(); // Prevent bubbling
+                fileInput.click();
+            });
+            // File input change handler
+            fileInput.addEventListener('change', handleFileSelect);
+            // Remove the uploadArea click handler that was causing double triggers
+            // Keep only drag and drop handlers for uploadArea
+            uploadArea.addEventListener('dragover', handleDragOver);
+            uploadArea.addEventListener('dragleave', handleDragLeave);
+            uploadArea.addEventListener('drop', handleDrop);
+            // Analysis
+            analyzeBtn.addEventListener('click', startAnalysis);
+            // Export
+            exportCsvBtn.addEventListener('click', exportCsv);
+            exportPdfBtn.addEventListener('click', exportPdf);
+            exportJsonBtn.addEventListener('click', exportJson);
+            // Detailed analysis toggle
+            toggleDetailedAnalysis.addEventListener('click', () => {
+                detailedAnalysisContent.classList.toggle('show');
+                detailedAnalysisIcon.classList.toggle('fa-chevron-down');
+                detailedAnalysisIcon.classList.toggle('fa-chevron-up');
+            });
+        }
+        // Screen navigation
+        function showLandingScreen() {
+            landingScreen.classList.remove('hidden');
+            analysisScreen.classList.add('hidden');
+            window.scrollTo({ top: 0, behavior: 'smooth' });
+        }
+        function showAnalysisScreen() {
+            landingScreen.classList.add('hidden');
+            analysisScreen.classList.remove('hidden');
+            window.scrollTo({ top: 0, behavior: 'smooth' });
+        }
+        // File handling
+        function handleDragOver(e) {
+            e.preventDefault();
+            uploadArea.classList.add('dragover');
+        }
+        function handleDragLeave(e) {
+            e.preventDefault();
+            uploadArea.classList.remove('dragover');
+        }
+        function handleDrop(e) {
+            e.preventDefault();
+            uploadArea.classList.remove('dragover');
+            const droppedFiles = Array.from(e.dataTransfer.files);
+            if (droppedFiles.length > 0) {
+                processFiles(droppedFiles);
+            }
+        }
+        function handleFileSelect(e) {
+            const selectedFiles = Array.from(e.target.files);
+            if (selectedFiles.length > 0) {
+                processFiles(selectedFiles);
+            }
+            // Clear the input value to allow same file selection
+            e.target.value = '';
+        }
+        async function processFiles(newFiles) {
+            const validFiles = [];
+            for (const file of newFiles) {
+                const validTypes = ['image/jpeg', 'image/jpg', 'image/png', 'image/webp'];
+                const maxSize = 10 * 1024 * 1024;
+                if (!validTypes.includes(file.type)) {
+                    showToast(`File ${file.name} is not a supported image type.`, 'error');
+                    continue;
+                }
+                if (file.size > maxSize) {
+                    showToast(`File ${file.name} exceeds the 10MB size limit.`, 'error');
+                    continue;
+                }
+                validFiles.push(file);
+            }
+            if (validFiles.length > 0) {
+                showLoading(true);
+                try {
+                    // Generate thumbnails
+                    for (const file of validFiles) {
+                        try {
+                            const dataUrl = await createThumbnail(file);
+                            fileDataUrls[file.name] = dataUrl;
+                        } catch (error) {
+                            console.error('Failed to create thumbnail:', error);
+                            fileDataUrls[file.name] = null;
+                        }
+                    }
+                    files.push(...validFiles);
+                    updateThumbnailGrid();
+                    showToast(`Added ${validFiles.length} file(s)`, 'success');
+                } catch (error) {
+                    console.error('Error processing files:', error);
+                    showToast('Error processing files. Please try again.', 'error');
+                } finally {
+                    showLoading(false);
+                }
+            }
+        }
+        function createThumbnail(file) {
+            return new Promise((resolve, reject) => {
+                const reader = new FileReader();
+                reader.onload = (e) => {
+                    const img = new Image();
+                    img.onload = () => {
+                        const canvas = document.createElement('canvas');
+                        const ctx = canvas.getContext('2d');
+                        // Set canvas dimensions for thumbnail
+                        const maxSize = 120;
+                        let width = img.width;
+                        let height = img.height;
+                        if (width > height) {
+                            if (width > maxSize) {
+                                height *= maxSize / width;
+                                width = maxSize;
+                            }
+                        } else {
+                            if (height > maxSize) {
+                                width *= maxSize / height;
+                                height = maxSize;
+                            }
+                        }
+                        canvas.width = width;
+                        canvas.height = height;
+                        ctx.drawImage(img, 0, 0, width, height);
+                        resolve(canvas.toDataURL('image/jpeg', 0.7));
+                    };
+                    img.onerror = reject;
+                    img.src = e.target.result;
+                };
+                reader.onerror = reject;
+                reader.readAsDataURL(file);
+            });
+        }
+        function updateThumbnailGrid() {
+            thumbnailGrid.innerHTML = '';
+            if (files.length === 0) {
+                thumbnailGrid.style.display = 'none';
+                analyzeButtonContainer.style.display = 'none';
+                return;
+            }
+            thumbnailGrid.style.display = 'grid';
+            analyzeButtonContainer.style.display = 'block';
+            files.forEach((file, index) => {
+                const thumbnailItem = document.createElement('div');
+                thumbnailItem.className = 'thumbnail-item';
+                thumbnailItem.dataset.index = index;
+                const dataUrl = fileDataUrls[file.name] || 'data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="100" height="100" viewBox="0 0 100 100"><rect width="100" height="100" fill="%23f0f0f0"/><text x="50" y="50" font-family="Arial" font-size="14" text-anchor="middle" fill="%23999">No preview</text></svg>';
+                thumbnailItem.innerHTML = `
+                    <img src="${dataUrl}" alt="${file.name}" class="thumbnail-img">
+                    <div class="thumbnail-overlay">
+                        <span style="overflow: hidden; text-overflow: ellipsis; white-space: nowrap; max-width: 80px;">
+                            ${file.name}
+                        </span>
+                        <button class="remove-thumbnail" data-index="${index}">
+                            <i class="fas fa-times"></i>
+                        </button>
+                    </div>
+                `;
+                thumbnailGrid.appendChild(thumbnailItem);
+            });
+            // Add event listeners to remove buttons
+            document.querySelectorAll('.remove-thumbnail').forEach(btn => {
+                btn.addEventListener('click', (e) => {
+                    e.stopPropagation();
+                    const index = parseInt(e.currentTarget.dataset.index);
+                    removeFile(index);
+                });
+            });
+        }
+        function removeFile(index) {
+            const removedFile = files[index].name;
+            files.splice(index, 1);
+            delete fileDataUrls[removedFile];
+            updateThumbnailGrid();
+            showToast(`Removed ${removedFile}`, 'warning');
+        }
+        // Analysis
+        async function startAnalysis() {
+            if (files.length === 0) return;
+            showLoading(true);
+            analyzeBtn.disabled = true;
+            analyzeBtn.innerHTML = '<span class="spinner"></span> Processing...';
+            progressFill.style.width = '0%';
+            progressPercent.textContent = '0%';
+            currentFile.textContent = 'Starting analysis...';
+            progressStats.textContent = `0 / ${files.length}`;
+            progressContainer.classList.remove('hidden');
+            clearResults();
+            const formData = new FormData();
+            files.forEach(file => {
+                formData.append('files', file);
+            });
+            try {
+                console.log('Sending batch request for', files.length, 'images...');
+                const response = await fetch(BATCH_ENDPOINT, {
+                    method: 'POST',
+                    body: formData
+                });
+                console.log('Response status:', response.status);
+                if (!response.ok) {
+                    const errorText = await response.text();
+                    throw new Error(`HTTP ${response.status}: ${errorText}`);
+                }
+                const apiResponse = await response.json();
+                console.log('API response:', apiResponse);
+                showLoading(false);
+                if (!apiResponse.success) {
+                    throw new Error(apiResponse.message || 'API request failed');
+                }
+                const data = apiResponse.data;
+                console.log('Data:', data);
+                if (data && data.batch_id) {
+                    console.log('Polling mode: batch_id =', data.batch_id);
+                    currentBatchId = data.batch_id;
+                    showToast('Analysis started. Processing in background...', 'success');
+                    startPollingProgress();
+                } else if (data && data.result) {
+                    console.log('Immediate results mode');
+                    progressFill.style.width = '100%';
+                    progressPercent.textContent = '100%';
+                    currentFile.textContent = 'Processing complete!';
+                    progressStats.textContent = `${files.length} / ${files.length}`;
+                    batchResults = data.result;
+                    setTimeout(() => {
+                        displayResults();
+                        resetUI();
+                        resultsSection.classList.remove('hidden');
+                        document.getElementById('resultsSection').scrollIntoView({
+                            behavior: 'smooth',
+                            block: 'start'
+                        });
+                        showToast(`Analysis complete! Processed ${files.length} image(s)`, 'success');
+                    }, 500);
+                } else {
+                    console.error('Unexpected response format:', apiResponse);
+                    throw new Error('Invalid response format from server');
+                }
+            } catch (error) {
+                console.error('Analysis failed:', error);
+                showLoading(false);
+                showToast('Analysis failed: ' + error.message, 'error');
+                resetUI();
+            }
+        }
+        function startPollingProgress() {
+            if (pollingInterval) clearInterval(pollingInterval);
+            pollingInterval = setInterval(async () => {
+                try {
+                    const response = await fetch(`${BATCH_PROGRESS_ENDPOINT}/${currentBatchId}/progress`);
+                    const data = await response.json();
+                    const sessionData = data.data || data;
+                    if (sessionData.status === 'completed') {
+                        clearInterval(pollingInterval);
+                        if (sessionData.result) {
+                            batchResults = sessionData.result;
+                        } else {
+                            batchResults = sessionData;
+                        }
+                        displayResults();
+                        resetUI();
+                        resultsSection.classList.remove('hidden');
+                        document.getElementById('resultsSection').scrollIntoView({
+                            behavior: 'smooth',
+                            block: 'start'
+                        });
+                        showToast('Batch analysis completed!', 'success');
+                    } else if (sessionData.status === 'processing') {
+                        const progress = sessionData.progress;
+                        if (progress) {
+                            const percent = Math.round((progress.current / progress.total) * 100);
+                            progressFill.style.width = `${percent}%`;
+                            progressPercent.textContent = `${percent}%`;
+                            currentFile.textContent = progress.filename || 'Processing...';
+                            progressStats.textContent = `${progress.current} / ${progress.total}`;
+                        }
+                    } else if (sessionData.status === 'failed' || sessionData.status === 'interrupted') {
+                        clearInterval(pollingInterval);
+                        showToast(`Analysis failed: ${sessionData.error || 'Unknown error'}`, 'error');
+                        resetUI();
+                    }
+                } catch (error) {
+                    console.error('Progress polling failed:', error);
+                }
+            }, 1000);
+        }
+        function displayResults() {
+            if (!batchResults) {
+                console.error('No results to display:', batchResults);
+                return;
+            }
+            console.log('Displaying batch results:', batchResults);
+            const results = batchResults.results || [];
+            console.log('Results array:', results);
+            updateSummary(batchResults);
+            resultsTableBody.innerHTML = '';
+            results.forEach((result, index) => {
+                const row = document.createElement('tr');
+                row.dataset.index = index;
+                const resultData = result;
+                const filename = resultData.filename || 'Unknown';
+                const overallScore = resultData.overall_score || 0;
+                const status = resultData.status || 'LIKELY_AUTHENTIC';
+                const confidence = resultData.confidence || 0;
+                const imageSize = resultData.image_size || [0, 0];
+                const signals = resultData.signals || [];
+                const processingTime = resultData.processing_time || 0;
+                const scorePercent = Math.round(overallScore * 100);
+                let scoreClass = 'score-low';
+                let scoreWidth = '30%';
+                if (scorePercent >= 70) {
+                    scoreClass = 'score-high';
+                    scoreWidth = '90%';
+                } else if (scorePercent >= 50) {
+                    scoreClass = 'score-medium';
+                    scoreWidth = '60%';
+                }
+                const flaggedCount = signals.filter(s => s.status === 'flagged').length;
+                const warningCount = signals.filter(s => s.status === 'warning').length;
+                // Format status for display (remove underscores)
+                const displayStatus = status.replace(/_/g, ' ');
+                // Get thumbnail
+                const thumbnailSrc = fileDataUrls[filename] || 'data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40"><rect width="40" height="40" fill="%23f0f0f0"/></svg>';
+                row.innerHTML = `
+                    <td style="min-width: 200px;">
+                        <div style="display: flex; align-items: center; gap: 0.75rem;">
+                            <img src="${thumbnailSrc}" alt="${filename}" style="width: 40px; height: 40px; object-fit: cover; border-radius: 0.25rem; border: 1px solid var(--border);">
+                            <div>
+                                <div style="font-weight: 500; font-size: 0.875rem;">${filename}</div>
+                                <div style="font-size: 0.75rem; color: var(--text-light);">
+                                    ${imageSize[0]} × ${imageSize[1]}
+                                </div>
+                            </div>
+                        </div>
+                    </td>
+                    <td>
+                        <span class="status-badge ${status === 'LIKELY_AUTHENTIC' ? 'status-authentic' : 'status-review'}" style="white-space: nowrap;">
+                            ${displayStatus}
+                        </span>
+                    </td>
+                    <td>
+                        <div class="score-indicator">
+                            <span style="min-width: 40px; font-size: 0.875rem;">${scorePercent}%</span>
+                            <div class="score-bar">
+                                <div class="score-fill ${scoreClass}" style="width: ${scoreWidth}"></div>
+                            </div>
+                        </div>
+                    </td>
+                    <td style="min-width: 150px;">
+                        <div style="display: flex; gap: 0.25rem; flex-wrap: wrap;">
+                            ${flaggedCount > 0 ? `<span class="signal-badge signal-flagged" style="font-size: 0.7rem;">${flaggedCount} flagged</span>` : ''}
+                            ${warningCount > 0 ? `<span class="signal-badge signal-warning" style="font-size: 0.7rem;">${warningCount} warning</span>` : ''}
+                            ${signals.length - flaggedCount - warningCount > 0 ?
+                            `<span class="signal-badge signal-passed" style="font-size: 0.7rem;">${signals.length - flaggedCount - warningCount} passed</span>` : ''}
+                        </div>
+                    </td>
+                    <td>
+                        <button class="action-button secondary-action view-detail-btn" data-index="${index}" title="View Details" style="padding: 0.25rem 0.5rem;">
+                            <i class="fas fa-eye"></i>
+                        </button>
+                    </td>
+                `;
+                resultsTableBody.appendChild(row);
+            });
+            noResultsRow.classList.add('hidden');
+            document.querySelectorAll('.view-detail-btn').forEach(btn => {
+                btn.addEventListener('click', (e) => {
+                    e.stopPropagation();
+                    const index = parseInt(e.currentTarget.dataset.index);
+                    showDetailedAnalysis(index);
+                });
+            });
+            document.querySelectorAll('#resultsTableBody tr').forEach(row => {
+                row.addEventListener('click', (e) => {
+                    if (!e.target.closest('.view-detail-btn')) {
+                        const index = parseInt(row.dataset.index);
+                        showDetailedAnalysis(index);
+                    }
+                });
+            });
+        }
+        function updateSummary(batchResult) {
+            const total = batchResult.total_images || 0;
+            const processed = batchResult.processed || batchResult.results?.length || 0;
+            const failed = batchResult.failed || 0;
+            let likelyAuthentic = 0;
+            let reviewRequired = 0;
+            if (batchResult.results) {
+                batchResult.results.forEach(result => {
+                    const resultData = result;
+                    const status = resultData.status || 'LIKELY_AUTHENTIC';
+                    if (status === 'LIKELY_AUTHENTIC') {
+                        likelyAuthentic++;
+                    } else if (status === 'REVIEW_REQUIRED') {
+                        reviewRequired++;
+                    }
+                });
+            }
+            resultsSummary.innerHTML = `
+                <div class="summary-card">
+                    <div class="summary-value">${processed}</div>
+                    <div class="summary-label">Total Processed</div>
+                </div>
+                <div class="summary-card">
+                    <div class="summary-value">${likelyAuthentic}</div>
+                    <div class="summary-label">Likely Authentic</div>
+                </div>
+                <div class="summary-card">
+                    <div class="summary-value">${reviewRequired}</div>
+                    <div class="summary-label">Review Required</div>
+                </div>
+                <div class="summary-card">
+                    <div class="summary-value">${failed}</div>
+                    <div class="summary-label">Failed</div>
+                </div>
+            `;
+        }
+        function showDetailedAnalysis(index) {
+            if (!batchResults || !batchResults.results || !batchResults.results[index]) return;
+            selectedImageIndex = index;
+            const result = batchResults.results[index];
+            const resultData = result;
+            const filename = resultData.filename || 'Unknown';
+            const overallScore = resultData.overall_score || 0;
+            const status = resultData.status || 'LIKELY_AUTHENTIC';
+            const confidence = resultData.confidence || 0;
+            const imageSize = resultData.image_size || [0, 0];
+            const processingTime = resultData.processing_time || 0;
+            const signals = resultData.signals || [];
+            const scorePercent = Math.round(overallScore * 100);
+            const displayStatus = status.replace(/_/g, ' ');
+            // Ensure detailed analysis is expanded
+            detailedAnalysisContent.classList.add('show');
+            detailedAnalysisIcon.classList.remove('fa-chevron-down');
+            detailedAnalysisIcon.classList.add('fa-chevron-up');
+            document.getElementById('detailedAnalysisContent').scrollIntoView({
+                behavior: 'smooth',
+                block: 'start'
+            });
+            // Build signals HTML
+            let signalsHtml = '';
+            if (signals && signals.length > 0) {
+                signals.forEach(signal => {
+                    let statusClass = 'signal-passed';
+                    if (signal.status === 'warning') statusClass = 'signal-warning';
+                    if (signal.status === 'flagged') statusClass = 'signal-flagged';
+                    const signalScore = Math.round((signal.score || 0) * 100);
+                    signalsHtml += `
+                        <div class="signal-card">
+                            <div class="signal-header">
+                                <strong>${signal.name || 'Unknown Metric'}</strong>
+                                <span class="signal-badge ${statusClass}">${signal.status}</span>
+                            </div>
+                            <p style="font-size: 0.875rem; margin-bottom: 0.5rem; color: var(--text-light);">
+                                ${signal.explanation || 'No explanation available.'}
+                            </p>
+                            <div style="display: flex; justify-content: space-between; align-items: center;">
+                                <div style="font-size: 0.75rem; color: var(--text-light);">
+                                    Score: ${signalScore}%
+                                </div>
+                            </div>
+                        </div>
+                    `;
+                });
+            } else {
+                signalsHtml = '<p class="text-center" style="color: var(--text-light);">No detection signals available.</p>';
+            }
+            detailedAnalysisContent.innerHTML = `
+                <div style="margin-bottom: 1.5rem;">
+                    <div style="display: flex; align-items: center; gap: 1rem; margin-bottom: 1rem;">
+                        <img src="${fileDataUrls[filename] || 'data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="60" height="60" viewBox="0 0 60 60"><rect width="60" height="60" fill="%23f0f0f0"/></svg>'}"
+                             alt="${filename}"
+                             style="width: 60px; height: 60px; object-fit: cover; border-radius: 0.5rem; border: 1px solid var(--border);">
+                        <div>
+                            <h4 style="margin-bottom: 0.25rem;">${filename}</h4>
+                            <div style="font-size: 0.875rem; color: var(--text-light);">
+                                ${imageSize[0]} × ${imageSize[1]} • ${processingTime.toFixed(2)}s
+                            </div>
+                        </div>
+                    </div>
+                    <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 1rem; margin-bottom: 1.5rem;">
+                        <div style="text-align: center; padding: 1rem; background-color: #f8fafc; border-radius: 0.5rem;">
+                            <div style="font-size: 1.5rem; font-weight: 700; color: ${scorePercent >= 70 ? '#e53e3e' : scorePercent >= 50 ? '#d69e2e' : '#38a169'};">${scorePercent}%</div>
+                            <div style="font-size: 0.875rem; color: var(--text-light);">Score</div>
+                        </div>
+                        <div style="text-align: center; padding: 1rem; background-color: #f8fafc; border-radius: 0.5rem;">
+                            <div style="font-size: 1.5rem; font-weight: 700; color: ${displayStatus.includes('REVIEW') ? '#d69e2e' : '#38a169'};">${displayStatus}</div>
+                            <div style="font-size: 0.875rem; color: var(--text-light);">Verdict</div>
+                        </div>
+                        <div style="text-align: center; padding: 1rem; background-color: #f8fafc; border-radius: 0.5rem;">
+                            <div style="font-size: 1.5rem; font-weight: 700;">${confidence}%</div>
+                            <div style="font-size: 0.875rem; color: var(--text-light);">Confidence</div>
+                        </div>
+                    </div>
+                </div>
+                <h4 style="margin-bottom: 1rem;">Detection Signals</h4>
+                <div class="signal-grid">
+                    ${signalsHtml}
+                </div>
+                <div class="signal-card" style="margin-top: 1.5rem; background-color: ${displayStatus.includes('REVIEW') ? 'rgba(214, 158, 46, 0.1)' : 'rgba(56, 161, 105, 0.1)'}; border-color: ${displayStatus.includes('REVIEW') ? 'rgba(214, 158, 46, 0.3)' : 'rgba(56, 161, 105, 0.3)'};">
+                    <div class="signal-header">
+                        <strong>Recommendation</strong>
+                    </div>
+                    <p style="margin-bottom: 0.5rem;">
+                        ${displayStatus.includes('REVIEW') ? 'Manual verification recommended' : 'No immediate action required'}
+                    </p>
+                    <div style="font-size: 0.875rem; color: var(--text-light);">
+                        Confidence: ${confidence}% likelihood of ${displayStatus.includes('REVIEW') ? 'AI generation' : 'authenticity'}
+                    </div>
+                </div>
+            `;
+        }
+        // Export functions
+        async function exportCsv() {
+            if (!currentBatchId) {
+                showToast('No analysis results to export.', 'warning');
+                return;
+            }
+            showLoading(true);
+            try {
+                // Using GET request since backend now accepts both GET and POST
+                const response = await fetch(`${CSV_REPORT_ENDPOINT}/${currentBatchId}`);
+                if (response.ok) {
+                    // Get the blob data
+                    const blob = await response.blob();
+                    // Create download link
+                    const downloadLink = document.createElement('a');
+                    downloadLink.href = URL.createObjectURL(blob);
+                    downloadLink.download = `ai_screener_report_${currentBatchId}.csv`;
+                    document.body.appendChild(downloadLink);
+                    downloadLink.click();
+                    document.body.removeChild(downloadLink);
+                    showToast('CSV report downloaded successfully.', 'success');
+                } else {
+                    showToast('Failed to generate CSV report.', 'error');
+                }
+            } catch (error) {
+                console.error('CSV export failed:', error);
+                showToast('CSV export failed. Please try again.', 'error');
+            } finally {
+                showLoading(false);
+            }
+        }
+        async function exportPdf() {
+            if (!currentBatchId) {
+                showToast('No analysis results to export.', 'warning');
+                return;
+            }
+            showLoading(true);
+            try {
+                // Using GET request since backend now accepts both GET and POST
+                const response = await fetch(`${PDF_REPORT_ENDPOINT}/${currentBatchId}`);
+                if (response.ok) {
+                    // Get the blob data
+                    const blob = await response.blob();
+                    // Create download link
+                    const downloadLink = document.createElement('a');
+                    downloadLink.href = URL.createObjectURL(blob);
+                    downloadLink.download = `ai_screener_report_${currentBatchId}.pdf`;
+                    document.body.appendChild(downloadLink);
+                    downloadLink.click();
+                    document.body.removeChild(downloadLink);
+                    showToast('PDF report downloaded successfully.', 'success');
+                } else {
+                    showToast('Failed to generate PDF report.', 'error');
+                }
+            } catch (error) {
+                console.error('PDF export failed:', error);
+                showToast('PDF export failed. Please try again.', 'error');
+            } finally {
+                showLoading(false);
+            }
+        }
+        async function exportJson() {
+            if (!batchResults) {
+                showToast('No analysis results to export.', 'warning');
+                return;
+            }
+            showLoading(true);
+            try {
+                const dataStr = JSON.stringify(batchResults, null, 2);
+                const dataBlob = new Blob([dataStr], {type: 'application/json'});
+                const downloadLink = document.createElement('a');
+                downloadLink.href = URL.createObjectURL(dataBlob);
+                downloadLink.download = `ai_image_screener_${new Date().toISOString().split('T')[0]}_${currentBatchId || 'report'}.json`;
+                document.body.appendChild(downloadLink);
+                downloadLink.click();
+                document.body.removeChild(downloadLink);
+                showToast('JSON report downloaded successfully.', 'success');
+            } catch (error) {
+                console.error('JSON export failed:', error);
+                showToast('JSON export failed. Please try again.', 'error');
+            } finally {
+                showLoading(false);
+            }
+        }
+        // Reset functions
+        function resetUI() {
+            analyzeBtn.disabled = false;
+            analyzeBtn.innerHTML = '<div class="btn-content"><i class="fas fa-play"></i> Start Analysis</div>';
+            setTimeout(() => {
+                progressContainer.classList.add('hidden');
+            }, 2000);
+        }
+        function resetAnalysis() {
+            files = [];
+            fileDataUrls = {};
+            batchResults = null;
+            currentBatchId = null;
+            selectedImageIndex = null;
+            updateThumbnailGrid();
+            clearResults();
+            resultsSection.classList.add('hidden');
+            detailedAnalysisContent.innerHTML = '<p id="noDetailedAnalysis" class="text-center" style="color: var(--text-light); padding: 2rem;"><i class="fas fa-eye" style="font-size: 2rem; margin-bottom: 1rem; opacity: 0.5;"></i><br>Select an image to view detailed analysis</p>';
+            window.scrollTo({ top: 0, behavior: 'smooth' });
+            showToast('Analysis reset. Ready for new upload.', 'success');
+        }
+        function clearResults() {
+            resultsSummary.innerHTML = '';
+            resultsTableBody.innerHTML = '';
+            noResultsRow.classList.remove('hidden');
+            if (pollingInterval) {
+                clearInterval(pollingInterval);
+                pollingInterval = null;
+            }
+        }
+        // API health check
+        async function checkApiHealth() {
+            try {
+                const response = await fetch(HEALTH_ENDPOINT);
+                const data = await response.json();
+                if (data.status === 'ok') {
+                    console.log('API connected successfully');
+                }
+            } catch (error) {
+                console.error('API health check failed:', error);
+            }
+        }
+    </script>
+</body>
+</html>

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from .logger import get_logger
+from .image_processor import ImageProcessor
+from .validators import ImageValidator
+from .helpers import (
+    generate_unique_id,
+    cleanup_old_files,
+    format_filesize,
+    calculate_hash
+)
+__all__ = [
+    'get_logger',
+    'ImageProcessor',
+    'ImageValidator',
+    'generate_unique_id',
+    'cleanup_old_files',
+    'format_filesize',
+    'calculate_hash'
+]

utils/helpers.py ADDED Viewed

	@@ -0,0 +1,108 @@

+# Dependencies
+import re
+import uuid
+import hashlib
+from pathlib import Path
+from datetime import datetime
+from datetime import timedelta
+from utils.logger import get_logger
+# Setup Logging
+logger = get_logger(__name__)
+def generate_unique_id() -> str:
+    """
+    Generate unique ID for files/reports
+    """
+    unique_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
+    return unique_id
+def calculate_hash(file_path: Path) -> str:
+    """
+    Calculate SHA256 hash of file
+    """
+    sha256 = hashlib.sha256()
+    with open(file_path, 'rb') as f:
+        for chunk in iter(lambda: f.read(8192), b''):
+            sha256.update(chunk)
+    hash = sha256.hexdigest()
+    return hash
+def format_filesize(size_bytes: int) -> str:
+    """
+    Format file size in human-readable format
+    """
+    for unit in ['B', 'KB', 'MB', 'GB']:
+        if (size_bytes < 1024.0):
+            return f"{size_bytes:.2f} {unit}"
+        size_bytes /= 1024.0
+    file_size = f"{size_bytes:.2f} TB"
+    return file_size
+def cleanup_old_files(directory: Path, days: int = 7) -> int:
+    """
+    Clean up files older than specified days
+    Arguments:
+    ----------
+        directory { Path } : Directory to clean
+        days      { int }  : Files older than this will be deleted
+    Returns:
+    --------
+            { int }        : Number of files deleted
+    """
+    if not directory.exists():
+        return 0
+    cutoff  = datetime.now() - timedelta(days = days)
+    deleted = 0
+    for file_path in directory.iterdir():
+        if file_path.is_file():
+            file_time = datetime.fromtimestamp(file_path.stat().st_mtime)
+            if (file_time < cutoff):
+                try:
+                    file_path.unlink()
+                    deleted += 1
+                    logger.debug(f"Deleted old file: {file_path.name}")
+                except Exception as e:
+                    logger.error(f"Failed to delete {file_path.name}: {e}")
+    if (deleted > 0):
+        logger.info(f"Cleaned up {deleted} files from {directory.name}")
+    return deleted
+def safe_filename(filename: str) -> str:
+    """
+    Sanitize filename for safe storage
+    """
+    # Remove any path components
+    filename = Path(filename).name
+    # Replace unsafe characters
+    filename = re.sub(r'[^\w\s.-]', '', filename)
+    # Limit length
+    if (len(filename) > 255):
+        name, ext = filename.rsplit('.', 1) if '.' in filename else (filename, '')
+        filename  = name[:250] + ('.' + ext if ext else '')
+    return filename

utils/image_processor.py ADDED Viewed

	@@ -0,0 +1,163 @@

+# Dependencies
+import cv2
+import numpy as np
+from PIL import Image
+from pathlib import Path
+from typing import Tuple
+from typing import Optional
+from utils.logger import get_logger
+from config.constants import LUMINANCE_WEIGHTS
+# Setup Logging
+logger = get_logger(__name__)
+class ImageProcessor:
+    """
+    Image loading and preprocessing utilities
+    """
+    @staticmethod
+    def load_image(file_path: Path) -> np.ndarray:
+        """
+        Load image as numpy array in RGB format
+        Arguments:
+        ----------
+            file_path { Path } : Path of the image file needs to be loaded
+        Returns:
+        --------
+            { np.ndarray }     : Image array in RGB format (H, W, 3)
+        """
+        try:
+            image = cv2.imread(str(file_path))
+            if image is None:
+                raise ValueError(f"Failed to load image: {file_path}")
+            # Convert BGR to RGB
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            logger.debug(f"Loaded image: {file_path.name} shape={image.shape}")
+            return image
+        except Exception as e:
+            logger.error(f"Error loading image {file_path}: {e}")
+            raise
+    @staticmethod
+    def rgb_to_luminance(image: np.ndarray) -> np.ndarray:
+        """
+        Convert RGB image to luminance using ITU-R BT.709 standard
+        Arguments:
+        ----------
+            image { np.ndarray } : RGB image array (H, W, 3)
+        Returns:
+        --------
+             { np.ndarray }      : Luminance array (H, W)
+        """
+        if ((image.ndim != 3) or (image.shape[2] != 3)):
+            raise ValueError(f"Expected RGB image (H, W, 3), got shape {image.shape}")
+        r, g, b   = LUMINANCE_WEIGHTS
+        luminance = r * image[:, :, 0] + g * image[:, :, 1] + b * image[:, :, 2]
+        return luminance.astype(np.float32)
+    @staticmethod
+    def compute_gradients(luminance: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Compute Sobel gradients
+        Arguments:
+        ----------
+            luminance { np.ndarray } : Luminance array (H, W)
+        Returns:
+        --------
+                   { tuple }         : Tuple of (gradient_x, gradient_y)
+        """
+        gx = cv2.Sobel(luminance, cv2.CV_64F, 1, 0, ksize = 3)
+        gy = cv2.Sobel(luminance, cv2.CV_64F, 0, 1, ksize = 3)
+        return gx, gy
+    @staticmethod
+    def normalize_image(image: np.ndarray) -> np.ndarray:
+        """
+        Normalize image to [0, 1] range
+        """
+        normalized_image = image.astype(np.float32) / 255.0
+        return normalized_image
+    @staticmethod
+    def resize_if_needed(image: np.ndarray, max_dimension: int = 2048) -> np.ndarray:
+        """
+        Resize image if larger than max_dimension while maintaining aspect ratio
+        Arguments:
+        ----------
+            image        { np.ndarray } : Input image
+            max_dimension   { int }     : Maximum dimension (width or height)
+        Returns:
+        --------
+                 { np.ndarray }         : Resized image if needed, otherwise original
+        """
+        h, w = image.shape[:2]
+        if (max(h, w) <= max_dimension):
+            return image
+        scale   = max_dimension / max(h, w)
+        new_w   = int(w * scale)
+        new_h   = int(h * scale)
+        resized = cv2.resize(image, (new_w, new_h), interpolation = cv2.INTER_AREA)
+        logger.debug(f"Resized image from {w}x{h} to {new_w}x{new_h}")
+        return resized
+    @staticmethod
+    def extract_patches(image: np.ndarray, patch_size: int, stride: int, max_patches: Optional[int] = None) -> np.ndarray:
+        """
+        Extract patches from image
+        Arguments:
+        ----------
+            image      { np.ndarray } : Input image (H, W) or (H, W, C)
+            patch_size    { int }     : Size of patches
+            stride        { int }     : Stride between patches
+            max_patches   { int }     : Maximum number of patches to extract
+        Returns:
+        --------
+                 { np.ndarray }       : Array of patches
+        """
+        h, w    = image.shape[:2]
+        patches = list()
+        for y in range(0, h - patch_size + 1, stride):
+            for x in range(0, w - patch_size + 1, stride):
+                patch = image[y:y+patch_size, x:x+patch_size]
+                patches.append(patch)
+                if (max_patches and (len(patches) >= max_patches)):
+                    return np.array(patches)
+        return np.array(patches)

utils/logger.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# Dependencies
+import sys
+import logging
+from datetime import datetime
+from config.settings import settings
+class ColoredFormatter(logging.Formatter):
+    """
+    Colored log formatter for better readability
+    """
+    COLORS = {'DEBUG'    : '\033[36m',  # Cyan
+              'INFO'     : '\033[32m',  # Green
+              'WARNING'  : '\033[33m',  # Yellow
+              'ERROR'    : '\033[31m',  # Red
+              'CRITICAL' : '\033[35m',  # Magenta
+              'RESET'    : '\033[0m',
+             }
+    def format(self, record):
+        if sys.stdout.isatty():
+            levelname = record.levelname
+            if (levelname in self.COLORS):
+                record.levelname = f"{self.COLORS[levelname]}{levelname}{self.COLORS['RESET']}"
+        return super().format(record)
+def setup_logger(name: str = None) -> logging.Logger:
+    """
+    Setup logger with console and file handlers
+    Arguments:
+    ----------
+        name   { str }     : Logger name (defaults to root logger)
+    Returns:
+    --------
+        { logging.Logger } : Configured logger instance
+    """
+    logger = logging.getLogger(name or settings.APP_NAME)
+    # Avoid duplicate handlers
+    if logger.handlers:
+        return logger
+    level             = getattr(logging, settings.LOG_LEVEL, logging.INFO)
+    logger.setLevel(level)
+    logger.propagate  = False
+    # Console handler with colors
+    console_handler   = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(logging.DEBUG if settings.DEBUG else logging.INFO)
+    console_formatter = ColoredFormatter('%(asctime)s | %(levelname)-8s | %(name)s | %(message)s',
+                                         datefmt = '%Y-%m-%d %H:%M:%S'
+                                        )
+    console_handler.setFormatter(console_formatter)
+    logger.addHandler(console_handler)
+    # File handler
+    log_file          = settings.LOGS_DIR / f"app_{datetime.now().strftime('%Y%m%d')}.log"
+    file_handler      = logging.FileHandler(log_file)
+    file_handler.setLevel(logging.DEBUG)
+    file_formatter    = logging.Formatter('%(asctime)s | %(levelname)-8s | %(name)s | %(funcName)s:%(lineno)d | %(message)s',
+                                          datefmt = '%Y-%m-%d %H:%M:%S'
+                                         )
+    file_handler.setFormatter(file_formatter)
+    logger.addHandler(file_handler)
+    return logger
+def get_logger(name: str = None) -> logging.Logger:
+    """
+    Get or create logger instance
+    """
+    return setup_logger(name)

utils/validators.py ADDED Viewed

	@@ -0,0 +1,108 @@

+# Dependencies
+import magic
+from PIL import Image
+from pathlib import Path
+from typing import Tuple
+from utils.logger import get_logger
+from config.settings import settings
+from config.constants import MIN_IMAGE_DIMENSION
+from config.constants import MAX_IMAGE_DIMENSION
+# Setup Logging
+logger = get_logger(__name__)
+class ValidationError(Exception):
+    """
+    Custom validation error
+    """
+    pass
+class ImageValidator:
+    """
+    Validate uploaded images
+    """
+    @staticmethod
+    def validate_file_size(file_size: int) -> None:
+        """
+        Validate file size
+        """
+        if (file_size > settings.max_file_size_bytes):
+            raise ValidationError(f"File size {file_size} bytes exceeds maximum {settings.max_file_size_bytes} bytes")
+        if (file_size == 0):
+            raise ValidationError("File is empty")
+    @staticmethod
+    def validate_file_extension(filename: str) -> None:
+        """
+        Validate file extension
+        """
+        extension = Path(filename).suffix.lower()
+        if (extension not in settings.ALLOWED_EXTENSIONS):
+            raise ValidationError(f"File extension {extension} not allowed. Allowed: {', '.join(settings.ALLOWED_EXTENSIONS)}")
+    @staticmethod
+    def validate_image_content(file_path: Path) -> Tuple[int, int]:
+        """
+        Validate image can be opened and get dimensions
+        """
+        try:
+            with Image.open(file_path) as image:
+                width, height = image.size
+                # Validate dimensions
+                if ((width < MIN_IMAGE_DIMENSION) or (height < MIN_IMAGE_DIMENSION)):
+                    raise ValidationError(f"Image dimensions ({width}x{height}) too small. Minimum: {MIN_IMAGE_DIMENSION}px")
+                if ((width > MAX_IMAGE_DIMENSION) or (height > MAX_IMAGE_DIMENSION)):
+                    raise ValidationError(f"Image dimensions ({width}x{height}) too large. Maximum: {MAX_IMAGE_DIMENSION}px")
+                # Verify format
+                if (image.format.lower() not in ['jpeg', 'png', 'webp']):
+                    raise ValidationError(f"Unsupported image format: {image.format}")
+                return width, height
+        except ValidationError:
+            raise
+        except Exception as e:
+            raise ValidationError(f"Cannot open image: {str(e)}")
+    @staticmethod
+    def validate_mime_type(file_path: Path) -> None:
+        """
+        Validate MIME type matches image
+        """
+        try:
+            mime = magic.from_file(str(file_path), mime = True)
+            if (not mime.startswith('image/')):
+                raise ValidationError(f"File is not an image. MIME type: {mime}")
+        except Exception as e:
+            logger.warning(f"MIME type validation failed: {e}")
+            # Don't fail if python-magic is not available
+    @classmethod
+    def validate_image(cls, file_path: Path, filename: str, file_size: int) -> Tuple[int, int]:
+        """
+        Comprehensive image validation
+        """
+        cls.validate_file_size(file_size)
+        cls.validate_file_extension(filename)
+        dimensions = cls.validate_image_content(file_path)
+        cls.validate_mime_type(file_path)  # Optional, commented out if python-magic not available
+        logger.debug(f"Validated image: {filename} ({dimensions[0]}x{dimensions[1]})")
+        return dimensions