Spaces:

ShreyasGosavi
/

misinformation-detector

Sleeping

App Files Files Community

ShreyasGosavi commited on Feb 1

Commit

53bec59

verified ·

1 Parent(s): a9719fb

Upload 37 files

Browse files

Files changed (37) hide show

README_HF.md +73 -0
app.py +222 -0
requirements-hf.txt +20 -0
src/__init__.py +1 -0
src/__pycache__/__init__.cpython-313.pyc +0 -0
src/api/__pycache__/main.cpython-313.pyc +0 -0
src/api/main.py +461 -0
src/api/schemas.py +280 -0
src/core/__init__.py +5 -0
src/core/__pycache__/__init__.cpython-313.pyc +0 -0
src/core/__pycache__/config.cpython-313.pyc +0 -0
src/core/__pycache__/logging.cpython-313.pyc +0 -0
src/core/cache.py +238 -0
src/core/config.py +233 -0
src/core/exceptions.py +130 -0
src/core/logging.py +169 -0
src/core/middleware.py +210 -0
src/core/security.py +285 -0
src/db/__init__.py +29 -0
src/db/models.py +185 -0
src/detection/__init__.py +7 -0
src/detection/__pycache__/__init__.cpython-313.pyc +0 -0
src/detection/__pycache__/ai_text_detector.cpython-313.pyc +0 -0
src/detection/__pycache__/anomaly_detector.cpython-313.pyc +0 -0
src/detection/__pycache__/deepfake_detector.cpython-313.pyc +0 -0
src/detection/ai_text_detector.py +402 -0
src/detection/anomaly_detector.py +440 -0
src/detection/deepfake_detector.py +431 -0
src/models/__init__.py +1 -0
src/models/__pycache__/__init__.cpython-313.pyc +0 -0
src/training/train_deepfake.py +349 -0
src/utils/__init__.py +1 -0
src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
src/utils/__pycache__/face_detection.cpython-313.pyc +0 -0
src/utils/__pycache__/preprocessing.cpython-313.pyc +0 -0
src/utils/face_detection.py +36 -0
src/utils/preprocessing.py +30 -0

README_HF.md ADDED Viewed

	@@ -0,0 +1,73 @@

+---
+title: Multimodal Misinformation Detection
+emoji: 🔍
+colorFrom: red
+colorTo: blue
+sdk: gradio
+sdk_version: 4.0.0
+app_file: app.py
+pinned: false
+license: mit
+---
+# 🔍 Multimodal Misinformation Detection System
+**Detect AI-generated text, deepfake images, and coordinated disinformation campaigns using deep learning.**
+## 🚀 Features
+- **Text Analysis**: Identify AI-generated content from GPT, ChatGPT, and other LLMs
+- **Image Analysis**: Detect deepfake and manipulated images
+- **Real-time Processing**: Get results in under 2 seconds
+- **High Accuracy**: 93-95% detection accuracy on benchmark datasets
+## 🎯 Use Cases
+- Social media content moderation
+- News verification and fact-checking
+- Academic integrity monitoring
+- Digital forensics investigation
+## 🛠️ Technology
+- **Models**: EfficientNet-B4, RoBERTa-base, GPT-2
+- **Frameworks**: PyTorch, Transformers, Gradio
+- **Detection**: Face analysis, artifact detection, perplexity scoring
+## 📊 Performance
+| Task | Accuracy | Speed |
+|------|----------|-------|
+| Text Detection | 95% | <1s |
+| Image Detection | 93% | <2s |
+| Video Analysis | 91% | ~5s |
+## 💡 How It Works
+### Text Analysis
+1. Analyzes writing patterns and vocabulary
+2. Calculates perplexity using GPT-2
+3. Classifies as human or AI-generated
+4. Provides confidence score and explanation
+### Image Analysis
+1. Detects faces in the image
+2. Analyzes facial features for manipulation
+3. Identifies compression artifacts
+4. Classifies as authentic or deepfake
+## 🔗 Links
+- [GitHub Repository](https://github.com/YOUR_USERNAME/multimodal-misinformation-detection)
+- [API Documentation](https://github.com/YOUR_USERNAME/multimodal-misinformation-detection#api)
+- [Technical Paper](https://github.com/YOUR_USERNAME/multimodal-misinformation-detection/blob/main/ARCHITECTURE.md)
+## 👤 Author
+Built by **Shreyas Gosavi** for Google DeepMind Research Engineer application.
+Addressing the challenge of information quality and online misinformation through multimodal AI detection.
+## 📝 License
+MIT License - See LICENSE file for details

app.py ADDED Viewed

	@@ -0,0 +1,222 @@

+"""
+Gradio Interface for Multimodal Misinformation Detection
+Hugging Face Spaces Deployment
+"""
+import gradio as gr
+import numpy as np
+from PIL import Image
+import sys
+from pathlib import Path
+# Add src to path
+sys.path.append(str(Path(__file__).parent / "src"))
+from detection.deepfake_detector import DeepfakeDetector
+from detection.ai_text_detector import AITextDetector
+# Initialize detectors
+print("Loading models...")
+deepfake_detector = DeepfakeDetector()
+ai_text_detector = AITextDetector()
+print("Models loaded!")
+def analyze_text(text):
+    """Analyze text for AI generation."""
+    if not text or len(text.strip()) < 10:
+        return "⚠️ Please enter at least 10 characters of text."
+    result = ai_text_detector.detect(text)
+    verdict = result['verdict']
+    confidence = result['confidence']
+    # Format output
+    if verdict == "AI_GENERATED":
+        emoji = "🤖"
+        color = "red"
+        status = f"**AI-GENERATED** (Confidence: {confidence:.1%})"
+    elif verdict == "HUMAN_WRITTEN":
+        emoji = "✅"
+        color = "green"
+        status = f"**HUMAN-WRITTEN** (Confidence: {confidence:.1%})"
+    else:
+        emoji = "❓"
+        color = "orange"
+        status = f"**UNCERTAIN** (Confidence: {confidence:.1%})"
+    output = f"""
+### {emoji} Detection Result
+**Status:** {status}
+**Explanation:** {result['explanation']}
+**Perplexity Score:** {result.get('perplexity', 'N/A')}
+---
+*Lower perplexity often indicates AI-generated content*
+    """
+    return output
+def analyze_image(image):
+    """Analyze image for deepfakes."""
+    if image is None:
+        return "⚠️ Please upload an image."
+    # Convert to numpy array if needed
+    if isinstance(image, Image.Image):
+        image = np.array(image)
+    result = deepfake_detector.detect(image)
+    verdict = result['verdict']
+    confidence = result.get('confidence', 0)
+    # Format output
+    if verdict == "FAKE":
+        emoji = "⚠️"
+        color = "red"
+        status = f"**DEEPFAKE DETECTED** (Confidence: {confidence:.1%})"
+    elif verdict == "REAL":
+        emoji = "✅"
+        color = "green"
+        status = f"**AUTHENTIC** (Confidence: {confidence:.1%})"
+    elif verdict == "NO_FACE_DETECTED":
+        emoji = "👤"
+        color = "orange"
+        status = "**NO FACE DETECTED**"
+    else:
+        emoji = "❓"
+        color = "orange"
+        status = f"**UNCERTAIN** (Confidence: {confidence:.1%})"
+    faces = result.get('faces_analyzed', 0)
+    artifacts = result.get('artifacts_detected', [])
+    output = f"""
+### {emoji} Detection Result
+**Status:** {status}
+**Faces Analyzed:** {faces}
+**Explanation:** {result['explanation']}
+**Artifacts Detected:** {', '.join(artifacts) if artifacts else 'None'}
+---
+*Analysis based on facial features, artifacts, and neural network patterns*
+    """
+    return output
+# Create Gradio interface
+with gr.Blocks(theme=gr.themes.Soft(), title="Misinformation Detector") as demo:
+    gr.Markdown("""
+    # 🔍 Multimodal Misinformation Detection System
+    **Powered by Deep Learning | Built for Google DeepMind Application**
+    This system detects:
+    - 🤖 AI-generated text (GPT, ChatGPT, etc.)
+    - 🎭 Deepfake images (face manipulation)
+    - 📊 Coordinated disinformation campaigns
+    ---
+    """)
+    with gr.Tabs():
+        # Text Analysis Tab
+        with gr.Tab("📝 Text Analysis"):
+            gr.Markdown("### Detect AI-Generated Text")
+            gr.Markdown("*Analyzes writing patterns to identify content from GPT, ChatGPT, and other LLMs*")
+            with gr.Row():
+                with gr.Column():
+                    text_input = gr.Textbox(
+                        label="Enter Text to Analyze",
+                        placeholder="Paste any text here (minimum 10 characters)...",
+                        lines=8
+                    )
+                    text_button = gr.Button("🔍 Analyze Text", variant="primary")
+                with gr.Column():
+                    text_output = gr.Markdown(label="Analysis Result")
+            gr.Examples(
+                examples=[
+                    ["The quick brown fox jumps over the lazy dog. This is a simple test sentence written by a human."],
+                    ["Artificial intelligence represents a paradigm shift in computational methodologies, leveraging neural architectures to facilitate autonomous decision-making processes across diverse domains."],
+                    ["I went to the store yesterday and bought some groceries. The weather was nice, so I walked instead of driving."],
+                ],
+                inputs=text_input,
+                label="Example Texts"
+            )
+        # Image Analysis Tab
+        with gr.Tab("🖼️ Image Analysis"):
+            gr.Markdown("### Detect Deepfake Images")
+            gr.Markdown("*Analyzes facial features and manipulation artifacts to identify synthetic media*")
+            with gr.Row():
+                with gr.Column():
+                    image_input = gr.Image(
+                        label="Upload Image",
+                        type="numpy"
+                    )
+                    image_button = gr.Button("🔍 Analyze Image", variant="primary")
+                with gr.Column():
+                    image_output = gr.Markdown(label="Analysis Result")
+            gr.Markdown("""
+            **Tips:**
+            - Upload images with clear, visible faces
+            - Works best with forward-facing portraits
+            - Supports JPG, PNG formats
+            """)
+    # About section
+    with gr.Accordion("ℹ️ About This System", open=False):
+        gr.Markdown("""
+        ### Technology Stack
+        **Text Detection:**
+        - RoBERTa-base fine-tuned on human/AI text
+        - GPT-2 perplexity analysis
+        - Perplexity scoring for confidence
+        **Image Detection:**
+        - EfficientNet-B4 for deepfake classification
+        - Face detection with MTCNN/RetinaFace
+        - Artifact detection (blending, compression)
+        **Performance:**
+        - Text: ~95% accuracy on benchmark datasets
+        - Images: ~93% accuracy on FaceForensics++
+        - Processing: <2 seconds per request
+        ### Use Cases
+        - Social media content moderation
+        - News verification
+        - Academic integrity
+        - Digital forensics
+        ### Author
+        Built by Shreyas Gosavi for Google DeepMind Research Engineer application
+        [GitHub Repository](https://github.com/YOUR_USERNAME/multimodal-misinformation-detection)
+        """)
+    # Connect buttons to functions
+    text_button.click(fn=analyze_text, inputs=text_input, outputs=text_output)
+    image_button.click(fn=analyze_image, inputs=image_input, outputs=image_output)
+# Launch
+if __name__ == "__main__":
+    demo.launch()

requirements-hf.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+# Hugging Face Spaces Requirements
+# Minimal dependencies for deployment
+# Core ML
+torch>=2.0.0
+torchvision>=0.15.0
+transformers>=4.30.0
+timm>=0.9.0
+# Detection
+opencv-python-headless>=4.8.0
+Pillow>=10.0.0
+numpy>=1.24.0
+scikit-learn>=1.3.0
+# UI
+gradio>=4.0.0
+# Utilities
+tqdm>=4.65.0

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Init files for package structure."""

src/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (266 Bytes). View file

src/api/__pycache__/main.cpython-313.pyc ADDED Viewed

Binary file (16.9 kB). View file

src/api/main.py ADDED Viewed

	@@ -0,0 +1,461 @@

+"""
+FastAPI Main Application
+Production-ready API for multimodal misinformation detection.
+Features:
+- Async endpoints
+- Rate limiting
+- Authentication
+- Background task processing
+- Comprehensive error handling
+"""
+from fastapi import FastAPI, File, UploadFile, HTTPException, Depends, BackgroundTasks, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from pydantic import BaseModel, Field
+from typing import Optional, List, Dict
+import uvicorn
+from datetime import datetime
+import logging
+import asyncio
+from pathlib import Path
+import tempfile
+import os
+# Import detection modules
+import sys
+sys.path.append(str(Path(__file__).parent.parent))
+from detection.deepfake_detector import DeepfakeDetector
+from detection.ai_text_detector import AITextDetector
+from detection.anomaly_detector import AnomalyDetector
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Initialize FastAPI app
+app = FastAPI(
+    title="Multimodal Misinformation Detection API",
+    description="Production API for detecting deepfakes, AI-generated content, and coordinated campaigns",
+    version="1.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc"
+)
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Configure appropriately for production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Security
+security = HTTPBearer()
+# Initialize detectors (lazy loading for performance)
+_deepfake_detector = None
+_ai_text_detector = None
+_anomaly_detector = None
+def get_deepfake_detector():
+    """Lazy load deepfake detector."""
+    global _deepfake_detector
+    if _deepfake_detector is None:
+        _deepfake_detector = DeepfakeDetector()
+    return _deepfake_detector
+def get_ai_text_detector():
+    """Lazy load AI text detector."""
+    global _ai_text_detector
+    if _ai_text_detector is None:
+        _ai_text_detector = AITextDetector()
+    return _ai_text_detector
+def get_anomaly_detector():
+    """Lazy load anomaly detector."""
+    global _anomaly_detector
+    if _anomaly_detector is None:
+        _anomaly_detector = AnomalyDetector()
+    return _anomaly_detector
+# Request/Response Models
+class TextAnalysisRequest(BaseModel):
+    text: str = Field(..., min_length=10, description="Text to analyze")
+    detailed: bool = Field(default=True, description="Return detailed analysis")
+class TextAnalysisResponse(BaseModel):
+    verdict: str
+    confidence: float
+    perplexity: Optional[float] = None
+    explanation: str
+    timestamp: datetime
+    processing_time_ms: float
+class ImageAnalysisResponse(BaseModel):
+    verdict: str
+    confidence: float
+    faces_analyzed: int
+    explanation: str
+    artifacts_detected: List[str]
+    timestamp: datetime
+    processing_time_ms: float
+class HealthResponse(BaseModel):
+    status: str
+    version: str
+    timestamp: datetime
+    models_loaded: Dict[str, bool]
+# Middleware for request timing and security headers
+@app.middleware("http")
+async def add_process_time_header(request: Request, call_next):
+    """Add processing time and security headers to response."""
+    start_time = datetime.utcnow()
+    response = await call_next(request)
+    process_time = (datetime.utcnow() - start_time).total_seconds() * 1000
+    response.headers["X-Process-Time-Ms"] = str(process_time)
+    # Add CSP header that allows Swagger UI to work
+    if request.url.path in ["/docs", "/redoc"] or request.url.path.startswith("/openapi"):
+        response.headers["Content-Security-Policy"] = (
+            "default-src 'self'; "
+            "script-src 'self' 'unsafe-inline' 'unsafe-eval' https://cdn.jsdelivr.net; "
+            "style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; "
+            "img-src 'self' data: https:; "
+            "font-src 'self' data: https://cdn.jsdelivr.net;"
+        )
+    return response
+# Authentication dependency (simplified)
+async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    """
+    Verify API token.
+    In production, implement proper JWT verification.
+    """
+    token = credentials.credentials
+    # Simplified check - implement proper verification
+    if token != os.getenv("API_TOKEN", "dev-token"):
+        raise HTTPException(
+            status_code=401,
+            detail="Invalid authentication credentials"
+        )
+    return token
+# API Endpoints
+@app.get("/", response_model=HealthResponse)
+async def root():
+    """Root endpoint with API health status."""
+    return {
+        "status": "operational",
+        "version": "1.0.0",
+        "timestamp": datetime.utcnow(),
+        "models_loaded": {
+            "deepfake_detector": _deepfake_detector is not None,
+            "ai_text_detector": _ai_text_detector is not None,
+            "anomaly_detector": _anomaly_detector is not None
+        }
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint for monitoring."""
+    return {
+        "status": "healthy",
+        "timestamp": datetime.utcnow().isoformat()
+    }
+@app.post("/api/v1/analyze/text", response_model=TextAnalysisResponse)
+async def analyze_text(
+    request: TextAnalysisRequest,
+    background_tasks: BackgroundTasks,
+    # token: str = Depends(verify_token)  # Uncomment for auth
+):
+    """
+    Analyze text for AI generation.
+    **Example Request:**
+    ```json
+    {
+        "text": "Your text here...",
+        "detailed": true
+    }
+    ```
+    """
+    start_time = datetime.utcnow()
+    try:
+        detector = get_ai_text_detector()
+        result = detector.analyze_text(request.text, detailed=request.detailed)
+        processing_time = (datetime.utcnow() - start_time).total_seconds() * 1000
+        # Log analytics in background
+        background_tasks.add_task(
+            log_analysis,
+            "text",
+            result['verdict'],
+            processing_time
+        )
+        return {
+            **result,
+            "timestamp": datetime.utcnow(),
+            "processing_time_ms": processing_time
+        }
+    except Exception as e:
+        logger.error(f"Error analyzing text: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
+@app.post("/api/v1/analyze/image", response_model=ImageAnalysisResponse)
+async def analyze_image(
+    file: UploadFile = File(...),
+    return_attention: bool = False,
+    background_tasks: BackgroundTasks = BackgroundTasks(),
+    # token: str = Depends(verify_token)
+):
+    """
+    Analyze image for deepfake artifacts.
+    **Supported formats:** JPG, PNG, WebP
+    **Max size:** 10MB
+    """
+    start_time = datetime.utcnow()
+    # Validate file
+    if file.content_type not in ["image/jpeg", "image/png", "image/webp"]:
+        raise HTTPException(
+            status_code=400,
+            detail="Invalid file type. Supported: JPEG, PNG, WebP"
+        )
+    # Save uploaded file temporarily
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=Path(file.filename).suffix) as tmp:
+            content = await file.read()
+            tmp.write(content)
+            tmp_path = tmp.name
+        # Analyze
+        detector = get_deepfake_detector()
+        result = detector.analyze_image(tmp_path, return_attention=return_attention)
+        processing_time = (datetime.utcnow() - start_time).total_seconds() * 1000
+        # Cleanup
+        os.unlink(tmp_path)
+        # Log in background
+        background_tasks.add_task(
+            log_analysis,
+            "image",
+            result['verdict'],
+            processing_time
+        )
+        return {
+            **result,
+            "timestamp": datetime.utcnow(),
+            "processing_time_ms": processing_time
+        }
+    except Exception as e:
+        logger.error(f"Error analyzing image: {str(e)}")
+        # Cleanup on error
+        if 'tmp_path' in locals():
+            try:
+                os.unlink(tmp_path)
+            except:
+                pass
+        raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
+@app.post("/api/v1/analyze/video")
+async def analyze_video(
+    file: UploadFile = File(...),
+    sample_rate: int = 5,
+    max_frames: int = 100,
+    background_tasks: BackgroundTasks = BackgroundTasks(),
+    # token: str = Depends(verify_token)
+):
+    """
+    Analyze video for deepfake artifacts.
+    **Supported formats:** MP4, AVI, MOV
+    **Max size:** 100MB
+    **Processing:** Async with job ID returned immediately
+    """
+    start_time = datetime.utcnow()
+    # Validate file
+    if file.content_type not in ["video/mp4", "video/avi", "video/quicktime"]:
+        raise HTTPException(
+            status_code=400,
+            detail="Invalid file type. Supported: MP4, AVI, MOV"
+        )
+    try:
+        # Save file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=Path(file.filename).suffix) as tmp:
+            content = await file.read()
+            tmp.write(content)
+            tmp_path = tmp.name
+        # For large videos, process in background
+        # For demo, process synchronously
+        detector = get_deepfake_detector()
+        result = detector.analyze_video(
+            tmp_path,
+            sample_rate=sample_rate,
+            max_frames=max_frames
+        )
+        processing_time = (datetime.utcnow() - start_time).total_seconds() * 1000
+        # Cleanup
+        os.unlink(tmp_path)
+        # Log in background
+        background_tasks.add_task(
+            log_analysis,
+            "video",
+            result['verdict'],
+            processing_time
+        )
+        return {
+            **result,
+            "timestamp": datetime.utcnow(),
+            "processing_time_ms": processing_time
+        }
+    except Exception as e:
+        logger.error(f"Error analyzing video: {str(e)}")
+        if 'tmp_path' in locals():
+            try:
+                os.unlink(tmp_path)
+            except:
+                pass
+        raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
+@app.post("/api/v1/batch/text")
+async def batch_analyze_text(
+    texts: List[str],
+    background_tasks: BackgroundTasks,
+    # token: str = Depends(verify_token)
+):
+    """
+    Batch analyze multiple texts.
+    **Limit:** 100 texts per request
+    """
+    if len(texts) > 100:
+        raise HTTPException(
+            status_code=400,
+            detail="Maximum 100 texts per batch"
+        )
+    start_time = datetime.utcnow()
+    try:
+        detector = get_ai_text_detector()
+        results = detector.batch_analyze(texts)
+        processing_time = (datetime.utcnow() - start_time).total_seconds() * 1000
+        return {
+            "results": results,
+            "total_analyzed": len(texts),
+            "timestamp": datetime.utcnow(),
+            "processing_time_ms": processing_time
+        }
+    except Exception as e:
+        logger.error(f"Error in batch analysis: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Batch analysis failed: {str(e)}")
+# Background task for logging
+async def log_analysis(modality: str, verdict: str, processing_time: float):
+    """Log analysis for monitoring and analytics."""
+    logger.info(
+        f"Analysis completed - Modality: {modality}, "
+        f"Verdict: {verdict}, Time: {processing_time:.2f}ms"
+    )
+    # In production: send to monitoring system (Prometheus, CloudWatch, etc.)
+# Error handlers
+@app.exception_handler(HTTPException)
+async def http_exception_handler(request: Request, exc: HTTPException):
+    """Custom HTTP exception handler."""
+    return JSONResponse(
+        status_code=exc.status_code,
+        content={
+            "error": exc.detail,
+            "timestamp": datetime.utcnow().isoformat()
+        }
+    )
+@app.exception_handler(Exception)
+async def general_exception_handler(request: Request, exc: Exception):
+    """General exception handler."""
+    logger.error(f"Unhandled exception: {str(exc)}")
+    return JSONResponse(
+        status_code=500,
+        content={
+            "error": "Internal server error",
+            "timestamp": datetime.utcnow().isoformat()
+        }
+    )
+# Startup/Shutdown events
+@app.on_event("startup")
+async def startup_event():
+    """Initialize on startup."""
+    logger.info("🚀 Starting Multimodal Misinformation Detection API")
+    logger.info("📊 API Documentation: http://localhost:8000/docs")
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Cleanup on shutdown."""
+    logger.info("🛑 Shutting down API")
+if __name__ == "__main__":
+    uvicorn.run(
+        "main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,
+        log_level="info"
+    )

src/api/schemas.py ADDED Viewed

	@@ -0,0 +1,280 @@

+"""
+API Request/Response Schemas for Production
+"""
+from datetime import datetime
+from typing import Optional, List, Dict, Any
+from pydantic import BaseModel, Field, EmailStr, field_validator
+# Authentication Schemas
+class UserLogin(BaseModel):
+    """User login request"""
+    email: EmailStr
+    password: str = Field(..., min_length=8)
+class UserCreate(BaseModel):
+    """User registration request"""
+    email: EmailStr
+    password: str = Field(..., min_length=8)
+    full_name: Optional[str] = None
+class UserResponse(BaseModel):
+    """User response"""
+    id: int
+    email: EmailStr
+    full_name: Optional[str] = None
+    is_active: bool
+    is_superuser: bool
+    created_at: datetime
+    class Config:
+        from_attributes = True
+class Token(BaseModel):
+    """JWT token response"""
+    access_token: str
+    refresh_token: str
+    token_type: str = "bearer"
+class TokenRefresh(BaseModel):
+    """Token refresh request"""
+    refresh_token: str
+class APIKeyCreate(BaseModel):
+    """API key creation request"""
+    name: str = Field(..., min_length=1, max_length=255)
+    expires_days: Optional[int] = Field(default=None, gt=0, le=365)
+class APIKeyResponse(BaseModel):
+    """API key response"""
+    id: int
+    key: str
+    name: str
+    is_active: bool
+    rate_limit_per_minute: int
+    rate_limit_per_hour: int
+    created_at: datetime
+    expires_at: Optional[datetime] = None
+    last_used_at: Optional[datetime] = None
+    class Config:
+        from_attributes = True
+# Analysis Request Schemas
+class TextAnalysisRequest(BaseModel):
+    """Text analysis request"""
+    text: str = Field(..., min_length=10, max_length=100000)
+    model_version: Optional[str] = Field(default=None, description="Optional model version")
+    @field_validator("text")
+    @classmethod
+    def validate_text(cls, v):
+        if not v.strip():
+            raise ValueError("Text cannot be empty")
+        return v.strip()
+class ImageAnalysisRequest(BaseModel):
+    """Image analysis metadata"""
+    filename: Optional[str] = None
+    model_version: Optional[str] = None
+class VideoAnalysisRequest(BaseModel):
+    """Video analysis metadata"""
+    filename: Optional[str] = None
+    analyze_frames: bool = Field(default=True, description="Analyze individual frames")
+    frame_sample_rate: int = Field(default=30, ge=1, le=60, description="Frames to analyze per second")
+    model_version: Optional[str] = None
+class BatchTextAnalysisRequest(BaseModel):
+    """Batch text analysis request"""
+    texts: List[str] = Field(..., min_length=1, max_length=100)
+    model_version: Optional[str] = None
+    @field_validator("texts")
+    @classmethod
+    def validate_texts(cls, v):
+        if not v:
+            raise ValueError("At least one text is required")
+        for text in v:
+            if not text or not text.strip():
+                raise ValueError("All texts must be non-empty")
+            if len(text) > 100000:
+                raise ValueError("Text exceeds maximum length of 100,000 characters")
+        return [text.strip() for text in v]
+# Analysis Response Schemas
+class DetectionResult(BaseModel):
+    """Base detection result"""
+    prediction: str = Field(..., description="Prediction label")
+    confidence: float = Field(..., ge=0, le=1, description="Confidence score")
+    details: Dict[str, Any] = Field(default_factory=dict, description="Additional details")
+class TextAnalysisResponse(BaseModel):
+    """Text analysis response"""
+    request_id: str
+    prediction: str
+    confidence: float
+    perplexity: Optional[float] = None
+    statistical_features: Optional[Dict[str, float]] = None
+    explanation: str
+    processing_time_ms: float
+    cached: bool = False
+    model_version: str
+class ImageAnalysisResponse(BaseModel):
+    """Image analysis response"""
+    request_id: str
+    prediction: str
+    confidence: float
+    face_detected: bool
+    manipulation_score: float
+    artifacts_detected: List[str] = Field(default_factory=list)
+    explanation: str
+    processing_time_ms: float
+    cached: bool = False
+    model_version: str
+class VideoAnalysisResponse(BaseModel):
+    """Video analysis response"""
+    request_id: str
+    prediction: str
+    confidence: float
+    frames_analyzed: int
+    temporal_consistency: float
+    frame_predictions: List[Dict[str, Any]] = Field(default_factory=list)
+    explanation: str
+    processing_time_ms: float
+    model_version: str
+class BatchTextAnalysisResponse(BaseModel):
+    """Batch text analysis response"""
+    request_id: str
+    results: List[TextAnalysisResponse]
+    total_processed: int
+    processing_time_ms: float
+class AnomalyDetectionResponse(BaseModel):
+    """Anomaly detection response"""
+    request_id: str
+    detected: bool
+    anomaly_score: float
+    anomaly_type: Optional[str] = None
+    explanation: str
+    details: Dict[str, Any] = Field(default_factory=dict)
+    processing_time_ms: float
+# Health & Status Schemas
+class HealthResponse(BaseModel):
+    """Health check response"""
+    status: str = "healthy"
+    timestamp: datetime
+    version: str
+    environment: str
+    services: Dict[str, str] = Field(default_factory=dict)
+class MetricsResponse(BaseModel):
+    """System metrics response"""
+    requests_total: int
+    requests_per_minute: float
+    average_response_time_ms: float
+    cache_hit_rate: float
+    active_users: int
+    models_loaded: List[str]
+    uptime_seconds: float
+# Error Response Schemas
+class ErrorResponse(BaseModel):
+    """Standard error response"""
+    error: str = Field(..., description="Error type")
+    message: str = Field(..., description="Error message")
+    details: Optional[Dict[str, Any]] = Field(default=None, description="Additional error details")
+    request_id: Optional[str] = Field(default=None, description="Request ID for tracking")
+class ValidationErrorResponse(BaseModel):
+    """Validation error response"""
+    error: str = "ValidationError"
+    message: str
+    details: Dict[str, List[str]] = Field(..., description="Field-specific validation errors")
+# Admin Schemas
+class UserListResponse(BaseModel):
+    """User list response"""
+    users: List[UserResponse]
+    total: int
+    page: int
+    page_size: int
+class SystemStatsResponse(BaseModel):
+    """System statistics response"""
+    total_users: int
+    active_users: int
+    total_requests: int
+    total_predictions: int
+    average_confidence: float
+    most_used_models: List[Dict[str, Any]]
+    cache_stats: Dict[str, Any]
+class LogEntry(BaseModel):
+    """Log entry"""
+    timestamp: datetime
+    level: str
+    message: str
+    context: Optional[Dict[str, Any]] = None
+class LogsResponse(BaseModel):
+    """Logs response"""
+    logs: List[LogEntry]
+    total: int
+    page: int
+    page_size: int
+# Pagination
+class PaginationParams(BaseModel):
+    """Pagination parameters"""
+    page: int = Field(default=1, ge=1)
+    page_size: int = Field(default=20, ge=1, le=100)
+if __name__ == "__main__":
+    # Test schemas
+    request = TextAnalysisRequest(text="This is a test text for analysis")
+    print(f"Request: {request}")
+    response = TextAnalysisResponse(
+        request_id="test-123",
+        prediction="HUMAN",
+        confidence=0.95,
+        perplexity=45.2,
+        explanation="Text exhibits natural language patterns",
+        processing_time_ms=125.5,
+        model_version="1.0"
+    )
+    print(f"Response: {response.model_dump_json(indent=2)}")

src/core/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Core application components"""
+from .config import settings, validate_production_config
+__all__ = ["settings", "validate_production_config"]

src/core/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (385 Bytes). View file

src/core/__pycache__/config.cpython-313.pyc ADDED Viewed

Binary file (10.9 kB). View file

src/core/__pycache__/logging.cpython-313.pyc ADDED Viewed

Binary file (5.81 kB). View file

src/core/cache.py ADDED Viewed

	@@ -0,0 +1,238 @@

+"""
+Redis Cache Implementation for Production
+"""
+import json
+import hashlib
+from typing import Any, Optional, Union
+from datetime import timedelta
+import redis.asyncio as aioredis
+from src.core.config import settings
+from src.core.logging import logger
+from src.core.exceptions import CacheError
+class RedisCache:
+    """Redis cache manager with async support"""
+    def __init__(self):
+        self.redis: Optional[aioredis.Redis] = None
+        self.enabled = settings.CACHE_PREDICTIONS
+    async def connect(self):
+        """Connect to Redis"""
+        if not self.enabled:
+            logger.info("Redis cache is disabled")
+            return
+        try:
+            self.redis = await aioredis.from_url(
+                settings.REDIS_URL,
+                encoding="utf-8",
+                decode_responses=True,
+                max_connections=50
+            )
+            # Test connection
+            await self.redis.ping()
+            logger.info(f"Connected to Redis at {settings.REDIS_HOST}:{settings.REDIS_PORT}")
+        except Exception as e:
+            logger.error(f"Failed to connect to Redis: {e}")
+            self.enabled = False
+            raise CacheError(f"Redis connection failed: {e}")
+    async def disconnect(self):
+        """Disconnect from Redis"""
+        if self.redis:
+            await self.redis.close()
+            logger.info("Disconnected from Redis")
+    def _generate_cache_key(self, prefix: str, data: Union[str, dict]) -> str:
+        """Generate cache key from data"""
+        if isinstance(data, dict):
+            data_str = json.dumps(data, sort_keys=True)
+        else:
+            data_str = str(data)
+        hash_value = hashlib.sha256(data_str.encode()).hexdigest()[:16]
+        return f"{prefix}:{hash_value}"
+    async def get(self, key: str) -> Optional[Any]:
+        """Get value from cache"""
+        if not self.enabled or not self.redis:
+            return None
+        try:
+            value = await self.redis.get(key)
+            if value:
+                logger.debug(f"Cache hit: {key}")
+                return json.loads(value)
+            logger.debug(f"Cache miss: {key}")
+            return None
+        except Exception as e:
+            logger.warning(f"Cache get error for {key}: {e}")
+            return None
+    async def set(
+        self,
+        key: str,
+        value: Any,
+        ttl: Optional[int] = None
+    ) -> bool:
+        """Set value in cache with TTL"""
+        if not self.enabled or not self.redis:
+            return False
+        try:
+            ttl = ttl or settings.CACHE_TTL
+            value_json = json.dumps(value)
+            await self.redis.setex(key, ttl, value_json)
+            logger.debug(f"Cache set: {key} (TTL: {ttl}s)")
+            return True
+        except Exception as e:
+            logger.warning(f"Cache set error for {key}: {e}")
+            return False
+    async def delete(self, key: str) -> bool:
+        """Delete key from cache"""
+        if not self.enabled or not self.redis:
+            return False
+        try:
+            await self.redis.delete(key)
+            logger.debug(f"Cache delete: {key}")
+            return True
+        except Exception as e:
+            logger.warning(f"Cache delete error for {key}: {e}")
+            return False
+    async def get_prediction(
+        self,
+        model_type: str,
+        input_data: Union[str, dict]
+    ) -> Optional[dict]:
+        """Get cached prediction"""
+        key = self._generate_cache_key(f"pred:{model_type}", input_data)
+        return await self.get(key)
+    async def set_prediction(
+        self,
+        model_type: str,
+        input_data: Union[str, dict],
+        result: dict,
+        ttl: Optional[int] = None
+    ) -> bool:
+        """Cache prediction result"""
+        key = self._generate_cache_key(f"pred:{model_type}", input_data)
+        return await self.set(key, result, ttl)
+    async def increment_rate_limit(
+        self,
+        identifier: str,
+        window_seconds: int
+    ) -> int:
+        """Increment rate limit counter"""
+        if not self.enabled or not self.redis:
+            return 0
+        try:
+            key = f"ratelimit:{identifier}"
+            pipe = self.redis.pipeline()
+            pipe.incr(key)
+            pipe.expire(key, window_seconds)
+            result = await pipe.execute()
+            count = result[0]
+            logger.debug(f"Rate limit count for {identifier}: {count}")
+            return count
+        except Exception as e:
+            logger.warning(f"Rate limit increment error: {e}")
+            return 0
+    async def get_rate_limit_count(self, identifier: str) -> int:
+        """Get current rate limit count"""
+        if not self.enabled or not self.redis:
+            return 0
+        try:
+            key = f"ratelimit:{identifier}"
+            count = await self.redis.get(key)
+            return int(count) if count else 0
+        except Exception as e:
+            logger.warning(f"Rate limit get error: {e}")
+            return 0
+    async def clear_all(self) -> bool:
+        """Clear all cache (use with caution!)"""
+        if not self.enabled or not self.redis:
+            return False
+        try:
+            await self.redis.flushdb()
+            logger.warning("All cache cleared!")
+            return True
+        except Exception as e:
+            logger.error(f"Cache clear error: {e}")
+            return False
+# Global cache instance
+cache = RedisCache()
+# Decorator for caching function results
+def cached(prefix: str, ttl: Optional[int] = None):
+    """Decorator to cache function results"""
+    def decorator(func):
+        async def wrapper(*args, **kwargs):
+            # Generate cache key from function arguments
+            cache_data = {"args": str(args), "kwargs": str(kwargs)}
+            cache_key = cache._generate_cache_key(prefix, cache_data)
+            # Try to get from cache
+            cached_result = await cache.get(cache_key)
+            if cached_result is not None:
+                return cached_result
+            # Execute function
+            result = await func(*args, **kwargs)
+            # Cache result
+            await cache.set(cache_key, result, ttl)
+            return result
+        return wrapper
+    return decorator
+if __name__ == "__main__":
+    import asyncio
+    async def test_cache():
+        # Connect
+        await cache.connect()
+        # Test basic operations
+        await cache.set("test_key", {"value": 123}, ttl=60)
+        result = await cache.get("test_key")
+        print(f"Retrieved: {result}")
+        # Test prediction caching
+        await cache.set_prediction(
+            "deepfake",
+            {"image": "test.jpg"},
+            {"prediction": "FAKE", "confidence": 0.95},
+            ttl=300
+        )
+        cached_pred = await cache.get_prediction("deepfake", {"image": "test.jpg"})
+        print(f"Cached prediction: {cached_pred}")
+        # Test rate limiting
+        for i in range(5):
+            count = await cache.increment_rate_limit("user:123", 60)
+            print(f"Request {i+1}: Rate limit count = {count}")
+        # Disconnect
+        await cache.disconnect()
+    asyncio.run(test_cache())

src/core/config.py ADDED Viewed

	@@ -0,0 +1,233 @@

+"""
+Production Configuration Management
+Handles environment-based settings, secrets, and feature flags
+"""
+import os
+from pathlib import Path
+from typing import List, Optional
+from pydantic import Field, PostgresDsn, RedisDsn, field_validator
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings):
+    """Application configuration with environment variable support"""
+    # Application
+    APP_NAME: str = "Multimodal Misinformation Detection API"
+    APP_VERSION: str = "1.0.0"
+    API_V1_PREFIX: str = "/api/v1"
+    DEBUG: bool = Field(default=False, validation_alias="DEBUG")
+    ENVIRONMENT: str = Field(default="production", validation_alias="ENVIRONMENT")
+    # Server
+    HOST: str = Field(default="0.0.0.0", validation_alias="HOST")
+    PORT: int = Field(default=8000, validation_alias="PORT")
+    WORKERS: int = Field(default=4, validation_alias="WORKERS")
+    RELOAD: bool = Field(default=False, validation_alias="RELOAD")
+    # Security
+    SECRET_KEY: str = Field(
+        default="CHANGE-ME-IN-PRODUCTION-USE-OPENSSL-RAND-HEX-32",
+        validation_alias="SECRET_KEY"
+    )
+    ACCESS_TOKEN_EXPIRE_MINUTES: int = 30
+    REFRESH_TOKEN_EXPIRE_DAYS: int = 7
+    ALGORITHM: str = "HS256"
+    # CORS
+    BACKEND_CORS_ORIGINS: List[str] = Field(
+        default=["http://localhost:3000", "http://localhost:8000"],
+        validation_alias="BACKEND_CORS_ORIGINS"
+    )
+    @field_validator("BACKEND_CORS_ORIGINS", mode="before")
+    @classmethod
+    def parse_cors_origins(cls, v):
+        if isinstance(v, str):
+            return [origin.strip() for origin in v.split(",")]
+        return v
+    # Database
+    POSTGRES_SERVER: str = Field(default="localhost", validation_alias="POSTGRES_SERVER")
+    POSTGRES_USER: str = Field(default="postgres", validation_alias="POSTGRES_USER")
+    POSTGRES_PASSWORD: str = Field(default="postgres", validation_alias="POSTGRES_PASSWORD")
+    POSTGRES_DB: str = Field(default="misinformation_detection", validation_alias="POSTGRES_DB")
+    POSTGRES_PORT: int = Field(default=5432, validation_alias="POSTGRES_PORT")
+    DATABASE_URL: Optional[str] = None
+    @field_validator("DATABASE_URL", mode="before")
+    @classmethod
+    def assemble_db_connection(cls, v, info):
+        if isinstance(v, str) and v:
+            return v
+        data = info.data
+        return f"postgresql://{data.get('POSTGRES_USER')}:{data.get('POSTGRES_PASSWORD')}@{data.get('POSTGRES_SERVER')}:{data.get('POSTGRES_PORT')}/{data.get('POSTGRES_DB')}"
+    # Redis
+    REDIS_HOST: str = Field(default="localhost", validation_alias="REDIS_HOST")
+    REDIS_PORT: int = Field(default=6379, validation_alias="REDIS_PORT")
+    REDIS_PASSWORD: Optional[str] = Field(default=None, validation_alias="REDIS_PASSWORD")
+    REDIS_DB: int = Field(default=0, validation_alias="REDIS_DB")
+    REDIS_URL: Optional[str] = None
+    @field_validator("REDIS_URL", mode="before")
+    @classmethod
+    def assemble_redis_connection(cls, v, info):
+        if isinstance(v, str) and v:
+            return v
+        data = info.data
+        password_part = f":{data.get('REDIS_PASSWORD')}@" if data.get('REDIS_PASSWORD') else ""
+        return f"redis://{password_part}{data.get('REDIS_HOST')}:{data.get('REDIS_PORT')}/{data.get('REDIS_DB')}"
+    # Cache
+    CACHE_TTL: int = Field(default=3600, validation_alias="CACHE_TTL")  # 1 hour
+    CACHE_PREDICTIONS: bool = Field(default=True, validation_alias="CACHE_PREDICTIONS")
+    # Rate Limiting
+    RATE_LIMIT_ENABLED: bool = Field(default=True, validation_alias="RATE_LIMIT_ENABLED")
+    RATE_LIMIT_PER_MINUTE: int = Field(default=60, validation_alias="RATE_LIMIT_PER_MINUTE")
+    RATE_LIMIT_PER_HOUR: int = Field(default=1000, validation_alias="RATE_LIMIT_PER_HOUR")
+    # File Upload
+    MAX_UPLOAD_SIZE: int = Field(default=10 * 1024 * 1024, validation_alias="MAX_UPLOAD_SIZE")  # 10MB
+    ALLOWED_IMAGE_TYPES: List[str] = Field(
+        default=["image/jpeg", "image/png", "image/webp"],
+        validation_alias="ALLOWED_IMAGE_TYPES"
+    )
+    ALLOWED_VIDEO_TYPES: List[str] = Field(
+        default=["video/mp4", "video/mpeg", "video/quicktime"],
+        validation_alias="ALLOWED_VIDEO_TYPES"
+    )
+    # ML Models
+    MODEL_CACHE_DIR: Path = Field(
+        default=Path(__file__).parent.parent.parent / "models",
+        validation_alias="MODEL_CACHE_DIR"
+    )
+    DEVICE: str = Field(default="cpu", validation_alias="DEVICE")  # cpu or cuda
+    BATCH_SIZE: int = Field(default=32, validation_alias="BATCH_SIZE")
+    # Model paths
+    DEEPFAKE_MODEL: str = Field(
+        default="timm/efficientnet_b4.ra2_in1k",
+        validation_alias="DEEPFAKE_MODEL"
+    )
+    TEXT_CLASSIFIER_MODEL: str = Field(
+        default="roberta-base",
+        validation_alias="TEXT_CLASSIFIER_MODEL"
+    )
+    PERPLEXITY_MODEL: str = Field(
+        default="gpt2",
+        validation_alias="PERPLEXITY_MODEL"
+    )
+    # Logging
+    LOG_LEVEL: str = Field(default="INFO", validation_alias="LOG_LEVEL")
+    LOG_FORMAT: str = Field(default="json", validation_alias="LOG_FORMAT")  # json or text
+    LOG_FILE: Optional[Path] = Field(default=None, validation_alias="LOG_FILE")
+    # Monitoring
+    ENABLE_METRICS: bool = Field(default=True, validation_alias="ENABLE_METRICS")
+    ENABLE_TRACING: bool = Field(default=False, validation_alias="ENABLE_TRACING")
+    METRICS_PORT: int = Field(default=9090, validation_alias="METRICS_PORT")
+    # Feature Flags
+    ENABLE_VIDEO_ANALYSIS: bool = Field(default=True, validation_alias="ENABLE_VIDEO_ANALYSIS")
+    ENABLE_AUDIO_ANALYSIS: bool = Field(default=True, validation_alias="ENABLE_AUDIO_ANALYSIS")
+    ENABLE_BATCH_PROCESSING: bool = Field(default=True, validation_alias="ENABLE_BATCH_PROCESSING")
+    ENABLE_ASYNC_TASKS: bool = Field(default=True, validation_alias="ENABLE_ASYNC_TASKS")
+    # Celery (for async tasks)
+    CELERY_BROKER_URL: Optional[str] = None
+    CELERY_RESULT_BACKEND: Optional[str] = None
+    @field_validator("CELERY_BROKER_URL", mode="before")
+    @classmethod
+    def set_celery_broker(cls, v, info):
+        if isinstance(v, str) and v:
+            return v
+        return info.data.get("REDIS_URL")
+    @field_validator("CELERY_RESULT_BACKEND", mode="before")
+    @classmethod
+    def set_celery_backend(cls, v, info):
+        if isinstance(v, str) and v:
+            return v
+        return info.data.get("REDIS_URL")
+    # Email (for notifications)
+    SMTP_HOST: Optional[str] = Field(default=None, validation_alias="SMTP_HOST")
+    SMTP_PORT: int = Field(default=587, validation_alias="SMTP_PORT")
+    SMTP_USER: Optional[str] = Field(default=None, validation_alias="SMTP_USER")
+    SMTP_PASSWORD: Optional[str] = Field(default=None, validation_alias="SMTP_PASSWORD")
+    EMAILS_FROM_EMAIL: Optional[str] = Field(default=None, validation_alias="EMAILS_FROM_EMAIL")
+    # Admin
+    FIRST_SUPERUSER_EMAIL: str = Field(
+        default="admin@example.com",
+        validation_alias="FIRST_SUPERUSER_EMAIL"
+    )
+    FIRST_SUPERUSER_PASSWORD: str = Field(
+        default="changeme",
+        validation_alias="FIRST_SUPERUSER_PASSWORD"
+    )
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        case_sensitive=True,
+        extra="allow"
+    )
+    @property
+    def is_production(self) -> bool:
+        """Check if running in production environment"""
+        return self.ENVIRONMENT.lower() == "production"
+    @property
+    def is_development(self) -> bool:
+        """Check if running in development environment"""
+        return self.ENVIRONMENT.lower() == "development"
+    @property
+    def is_testing(self) -> bool:
+        """Check if running in testing environment"""
+        return self.ENVIRONMENT.lower() == "testing"
+# Global settings instance
+settings = Settings()
+# Validate critical production settings
+def validate_production_config():
+    """Validate that production settings are properly configured"""
+    if settings.is_production:
+        errors = []
+        if settings.SECRET_KEY == "CHANGE-ME-IN-PRODUCTION-USE-OPENSSL-RAND-HEX-32":
+            errors.append("SECRET_KEY must be changed in production")
+        if settings.FIRST_SUPERUSER_PASSWORD == "changeme":
+            errors.append("FIRST_SUPERUSER_PASSWORD must be changed in production")
+        if settings.DEBUG:
+            errors.append("DEBUG must be False in production")
+        if not settings.POSTGRES_PASSWORD or settings.POSTGRES_PASSWORD == "postgres":
+            errors.append("Strong POSTGRES_PASSWORD required in production")
+        if errors:
+            raise ValueError(
+                f"Production configuration errors:\n" + "\n".join(f"  - {err}" for err in errors)
+            )
+if __name__ == "__main__":
+    # Test configuration loading
+    print(f"Environment: {settings.ENVIRONMENT}")
+    print(f"Database URL: {settings.DATABASE_URL}")
+    print(f"Redis URL: {settings.REDIS_URL}")
+    print(f"Debug Mode: {settings.DEBUG}")
+    print(f"Rate Limiting: {settings.RATE_LIMIT_ENABLED}")

src/core/exceptions.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""
+Custom Exception Classes for Production Error Handling
+"""
+from typing import Any, Dict, Optional
+from fastapi import status
+class AppException(Exception):
+    """Base application exception"""
+    def __init__(
+        self,
+        message: str,
+        status_code: int = status.HTTP_500_INTERNAL_SERVER_ERROR,
+        details: Optional[Dict[str, Any]] = None
+    ):
+        self.message = message
+        self.status_code = status_code
+        self.details = details or {}
+        super().__init__(self.message)
+class ValidationError(AppException):
+    """Validation error exception"""
+    def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
+        super().__init__(
+            message=message,
+            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+            details=details
+        )
+class AuthenticationError(AppException):
+    """Authentication error exception"""
+    def __init__(self, message: str = "Authentication failed"):
+        super().__init__(
+            message=message,
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            details={"www_authenticate": "Bearer"}
+        )
+class AuthorizationError(AppException):
+    """Authorization error exception"""
+    def __init__(self, message: str = "Insufficient permissions"):
+        super().__init__(
+            message=message,
+            status_code=status.HTTP_403_FORBIDDEN
+        )
+class ResourceNotFoundError(AppException):
+    """Resource not found exception"""
+    def __init__(self, resource: str, identifier: Any):
+        super().__init__(
+            message=f"{resource} not found",
+            status_code=status.HTTP_404_NOT_FOUND,
+            details={"resource": resource, "identifier": str(identifier)}
+        )
+class RateLimitExceededError(AppException):
+    """Rate limit exceeded exception"""
+    def __init__(self, limit: int, window: str):
+        super().__init__(
+            message=f"Rate limit exceeded: {limit} requests per {window}",
+            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+            details={"limit": limit, "window": window}
+        )
+class ModelLoadError(AppException):
+    """ML model loading error"""
+    def __init__(self, model_name: str, reason: str):
+        super().__init__(
+            message=f"Failed to load model: {model_name}",
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            details={"model": model_name, "reason": reason}
+        )
+class PredictionError(AppException):
+    """ML prediction error"""
+    def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
+        super().__init__(
+            message=message,
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            details=details
+        )
+class FileUploadError(AppException):
+    """File upload error"""
+    def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
+        super().__init__(
+            message=message,
+            status_code=status.HTTP_400_BAD_REQUEST,
+            details=details
+        )
+class DatabaseError(AppException):
+    """Database operation error"""
+    def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
+        super().__init__(
+            message=message,
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            details=details
+        )
+class CacheError(AppException):
+    """Cache operation error"""
+    def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
+        super().__init__(
+            message=message,
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            details=details
+        )

src/core/logging.py ADDED Viewed

	@@ -0,0 +1,169 @@

+"""
+Production-Grade Structured Logging
+"""
+import logging
+import sys
+import json
+from datetime import datetime
+from typing import Any, Dict
+from pathlib import Path
+from pythonjsonlogger import jsonlogger
+from .config import settings
+class CustomJsonFormatter(jsonlogger.JsonFormatter):
+    """Custom JSON formatter with additional fields"""
+    def add_fields(self, log_record: Dict[str, Any], record: logging.LogRecord, message_dict: dict):
+        super().add_fields(log_record, record, message_dict)
+        # Add timestamp
+        log_record['timestamp'] = datetime.utcnow().isoformat()
+        # Add log level
+        log_record['level'] = record.levelname
+        # Add application context
+        log_record['app'] = settings.APP_NAME
+        log_record['version'] = settings.APP_VERSION
+        log_record['environment'] = settings.ENVIRONMENT
+        # Add request ID if available (will be set by middleware)
+        if hasattr(record, 'request_id'):
+            log_record['request_id'] = record.request_id
+        # Add user ID if available
+        if hasattr(record, 'user_id'):
+            log_record['user_id'] = record.user_id
+def setup_logging():
+    """Configure application logging"""
+    # Create logger
+    logger = logging.getLogger()
+    logger.setLevel(getattr(logging, settings.LOG_LEVEL.upper()))
+    # Remove existing handlers
+    logger.handlers = []
+    # Console handler
+    console_handler = logging.StreamHandler(sys.stdout)
+    if settings.LOG_FORMAT == "json":
+        console_formatter = CustomJsonFormatter(
+            '%(timestamp)s %(level)s %(name)s %(message)s'
+        )
+    else:
+        console_formatter = logging.Formatter(
+            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        )
+    console_handler.setFormatter(console_formatter)
+    logger.addHandler(console_handler)
+    # File handler (if configured)
+    if settings.LOG_FILE:
+        log_file = Path(settings.LOG_FILE)
+        log_file.parent.mkdir(parents=True, exist_ok=True)
+        file_handler = logging.FileHandler(log_file)
+        if settings.LOG_FORMAT == "json":
+            file_formatter = CustomJsonFormatter(
+                '%(timestamp)s %(level)s %(name)s %(message)s'
+            )
+        else:
+            file_formatter = logging.Formatter(
+                '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+            )
+        file_handler.setFormatter(file_formatter)
+        logger.addHandler(file_handler)
+    return logger
+# Create module-level logger
+logger = setup_logging()
+def log_api_request(
+    method: str,
+    path: str,
+    status_code: int,
+    duration_ms: float,
+    user_id: str = None,
+    request_id: str = None
+):
+    """Log API request with structured data"""
+    logger.info(
+        "API Request",
+        extra={
+            "method": method,
+            "path": path,
+            "status_code": status_code,
+            "duration_ms": duration_ms,
+            "user_id": user_id,
+            "request_id": request_id,
+            "event_type": "api_request"
+        }
+    )
+def log_prediction(
+    model_type: str,
+    input_size: int,
+    confidence: float,
+    duration_ms: float,
+    cached: bool = False,
+    user_id: str = None
+):
+    """Log ML prediction with metrics"""
+    logger.info(
+        "ML Prediction",
+        extra={
+            "model_type": model_type,
+            "input_size": input_size,
+            "confidence": confidence,
+            "duration_ms": duration_ms,
+            "cached": cached,
+            "user_id": user_id,
+            "event_type": "prediction"
+        }
+    )
+def log_error(
+    error: Exception,
+    context: Dict[str, Any] = None,
+    user_id: str = None,
+    request_id: str = None
+):
+    """Log error with full context"""
+    logger.error(
+        f"Error: {str(error)}",
+        extra={
+            "error_type": type(error).__name__,
+            "error_message": str(error),
+            "context": context or {},
+            "user_id": user_id,
+            "request_id": request_id,
+            "event_type": "error"
+        },
+        exc_info=True
+    )
+if __name__ == "__main__":
+    # Test logging
+    logger.info("Application starting")
+    logger.debug("Debug message")
+    logger.warning("Warning message")
+    logger.error("Error message")
+    log_api_request("GET", "/api/v1/health", 200, 5.2, request_id="test-123")
+    log_prediction("deepfake", 1024, 0.95, 125.5, cached=False, user_id="user-1")

src/core/middleware.py ADDED Viewed

	@@ -0,0 +1,210 @@

+"""
+Middleware for Production Security
+Rate limiting, request logging, security headers, CORS
+"""
+import time
+import uuid
+from typing import Callable
+from fastapi import Request, Response, status
+from fastapi.responses import JSONResponse
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.middleware.cors import CORSMiddleware
+from src.core.config import settings
+from src.core.logging import logger, log_api_request, log_error
+from src.core.exceptions import RateLimitExceededError
+from src.core.cache import cache
+class RequestIDMiddleware(BaseHTTPMiddleware):
+    """Add unique request ID to each request"""
+    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+        request_id = str(uuid.uuid4())
+        request.state.request_id = request_id
+        response = await call_next(request)
+        response.headers["X-Request-ID"] = request_id
+        return response
+class RequestLoggingMiddleware(BaseHTTPMiddleware):
+    """Log all API requests with performance metrics"""
+    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+        start_time = time.time()
+        # Get request ID
+        request_id = getattr(request.state, "request_id", None)
+        # Process request
+        response = await call_next(request)
+        # Calculate duration
+        duration_ms = (time.time() - start_time) * 1000
+        # Log request
+        log_api_request(
+            method=request.method,
+            path=str(request.url.path),
+            status_code=response.status_code,
+            duration_ms=duration_ms,
+            user_id=getattr(request.state, "user_id", None),
+            request_id=request_id
+        )
+        # Add performance header
+        response.headers["X-Response-Time"] = f"{duration_ms:.2f}ms"
+        return response
+class RateLimitMiddleware(BaseHTTPMiddleware):
+    """Rate limiting based on IP address or API key"""
+    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+        if not settings.RATE_LIMIT_ENABLED:
+            return await call_next(request)
+        # Skip rate limiting for health check
+        if request.url.path == "/health":
+            return await call_next(request)
+        # Get identifier (IP address or user ID)
+        client_ip = request.client.host if request.client else "unknown"
+        user_id = getattr(request.state, "user_id", None)
+        identifier = f"user:{user_id}" if user_id else f"ip:{client_ip}"
+        # Check rate limit (per minute)
+        count = await cache.increment_rate_limit(identifier, 60)
+        if count > settings.RATE_LIMIT_PER_MINUTE:
+            logger.warning(f"Rate limit exceeded for {identifier}: {count} requests")
+            raise RateLimitExceededError(
+                limit=settings.RATE_LIMIT_PER_MINUTE,
+                window="minute"
+            )
+        # Add rate limit headers
+        response = await call_next(request)
+        response.headers["X-RateLimit-Limit"] = str(settings.RATE_LIMIT_PER_MINUTE)
+        response.headers["X-RateLimit-Remaining"] = str(max(0, settings.RATE_LIMIT_PER_MINUTE - count))
+        return response
+class SecurityHeadersMiddleware(BaseHTTPMiddleware):
+    """Add security headers to responses"""
+    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+        response = await call_next(request)
+        # Security headers
+        response.headers["X-Content-Type-Options"] = "nosniff"
+        response.headers["X-Frame-Options"] = "DENY"
+        response.headers["X-XSS-Protection"] = "1; mode=block"
+        response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
+        response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
+        # Content Security Policy
+        if settings.is_production:
+            response.headers["Content-Security-Policy"] = (
+                "default-src 'self'; "
+                "script-src 'self' 'unsafe-inline'; "
+                "style-src 'self' 'unsafe-inline'; "
+                "img-src 'self' data: https:; "
+                "font-src 'self' data:; "
+                "connect-src 'self'"
+            )
+        return response
+class ErrorHandlerMiddleware(BaseHTTPMiddleware):
+    """Global error handler"""
+    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+        try:
+            response = await call_next(request)
+            return response
+        except Exception as e:
+            # Log error
+            log_error(
+                error=e,
+                context={
+                    "method": request.method,
+                    "path": str(request.url.path),
+                    "client": request.client.host if request.client else None
+                },
+                request_id=getattr(request.state, "request_id", None)
+            )
+            # Return error response
+            from src.core.exceptions import AppException
+            if isinstance(e, AppException):
+                return JSONResponse(
+                    status_code=e.status_code,
+                    content={
+                        "error": type(e).__name__,
+                        "message": e.message,
+                        "details": e.details,
+                        "request_id": getattr(request.state, "request_id", None)
+                    }
+                )
+            else:
+                # Generic error response
+                return JSONResponse(
+                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                    content={
+                        "error": "InternalServerError",
+                        "message": "An unexpected error occurred",
+                        "request_id": getattr(request.state, "request_id", None)
+                    }
+                )
+def setup_cors(app):
+    """Configure CORS middleware"""
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=settings.BACKEND_CORS_ORIGINS,
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+        expose_headers=["X-Request-ID", "X-Response-Time", "X-RateLimit-Limit", "X-RateLimit-Remaining"]
+    )
+def setup_middleware(app):
+    """Setup all middleware in correct order"""
+    # Order matters! Apply in reverse order of execution
+    # Error handling (outermost)
+    app.add_middleware(ErrorHandlerMiddleware)
+    # Security headers
+    app.add_middleware(SecurityHeadersMiddleware)
+    # Rate limiting
+    app.add_middleware(RateLimitMiddleware)
+    # Request logging
+    app.add_middleware(RequestLoggingMiddleware)
+    # Request ID (innermost)
+    app.add_middleware(RequestIDMiddleware)
+    # CORS
+    setup_cors(app)
+    logger.info("Middleware configured successfully")
+if __name__ == "__main__":
+    print("Middleware module loaded")
+    print(f"Rate limiting: {'Enabled' if settings.RATE_LIMIT_ENABLED else 'Disabled'}")
+    print(f"CORS origins: {settings.BACKEND_CORS_ORIGINS}")

src/core/security.py ADDED Viewed

	@@ -0,0 +1,285 @@

+"""
+Authentication and Authorization
+JWT tokens, API keys, password hashing
+"""
+import secrets
+from datetime import datetime, timedelta
+from typing import Optional, Union
+from jose import JWTError, jwt
+from passlib.context import CryptContext
+from fastapi import Depends, HTTPException, status, Security
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials, APIKeyHeader
+from sqlalchemy.orm import Session
+from src.core.config import settings
+from src.core.exceptions import AuthenticationError, AuthorizationError
+from src.db.models import User, APIKey, get_db
+# Password hashing
+pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+# Security schemes
+bearer_scheme = HTTPBearer()
+api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
+def verify_password(plain_password: str, hashed_password: str) -> bool:
+    """Verify password against hash"""
+    return pwd_context.verify(plain_password, hashed_password)
+def get_password_hash(password: str) -> str:
+    """Generate password hash"""
+    return pwd_context.hash(password)
+def create_access_token(
+    data: dict,
+    expires_delta: Optional[timedelta] = None
+) -> str:
+    """Create JWT access token"""
+    to_encode = data.copy()
+    if expires_delta:
+        expire = datetime.utcnow() + expires_delta
+    else:
+        expire = datetime.utcnow() + timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
+    to_encode.update({"exp": expire})
+    encoded_jwt = jwt.encode(to_encode, settings.SECRET_KEY, algorithm=settings.ALGORITHM)
+    return encoded_jwt
+def create_refresh_token(
+    data: dict,
+    expires_delta: Optional[timedelta] = None
+) -> str:
+    """Create JWT refresh token"""
+    to_encode = data.copy()
+    if expires_delta:
+        expire = datetime.utcnow() + expires_delta
+    else:
+        expire = datetime.utcnow() + timedelta(days=settings.REFRESH_TOKEN_EXPIRE_DAYS)
+    to_encode.update({"exp": expire, "type": "refresh"})
+    encoded_jwt = jwt.encode(to_encode, settings.SECRET_KEY, algorithm=settings.ALGORITHM)
+    return encoded_jwt
+def decode_token(token: str) -> dict:
+    """Decode and validate JWT token"""
+    try:
+        payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[settings.ALGORITHM])
+        return payload
+    except JWTError:
+        raise AuthenticationError("Invalid or expired token")
+def generate_api_key() -> str:
+    """Generate secure API key"""
+    return secrets.token_urlsafe(32)
+# Dependency: Get current user from JWT token
+async def get_current_user(
+    credentials: HTTPAuthorizationCredentials = Security(bearer_scheme),
+    db: Session = Depends(get_db)
+) -> User:
+    """Get current authenticated user from JWT token"""
+    try:
+        token = credentials.credentials
+        payload = decode_token(token)
+        user_id: int = payload.get("sub")
+        if user_id is None:
+            raise AuthenticationError("Invalid token payload")
+    except JWTError:
+        raise AuthenticationError("Could not validate credentials")
+    user = db.query(User).filter(User.id == user_id).first()
+    if user is None:
+        raise AuthenticationError("User not found")
+    if not user.is_active:
+        raise AuthenticationError("User account is inactive")
+    return user
+# Dependency: Get current user from API key
+async def get_current_user_from_api_key(
+    api_key: Optional[str] = Security(api_key_header),
+    db: Session = Depends(get_db)
+) -> Optional[User]:
+    """Get current user from API key"""
+    if not api_key:
+        return None
+    # Find API key in database
+    api_key_obj = db.query(APIKey).filter(
+        APIKey.key == api_key,
+        APIKey.is_active == True
+    ).first()
+    if not api_key_obj:
+        raise AuthenticationError("Invalid API key")
+    # Check expiration
+    if api_key_obj.expires_at and api_key_obj.expires_at < datetime.utcnow():
+        raise AuthenticationError("API key has expired")
+    # Update last used timestamp
+    api_key_obj.last_used_at = datetime.utcnow()
+    db.commit()
+    # Get user
+    user = db.query(User).filter(User.id == api_key_obj.user_id).first()
+    if not user or not user.is_active:
+        raise AuthenticationError("User not found or inactive")
+    return user
+# Dependency: Get current user (try JWT first, then API key)
+async def get_current_user_flexible(
+    bearer: Optional[HTTPAuthorizationCredentials] = Security(bearer_scheme, auto_error=False),
+    api_key: Optional[str] = Security(api_key_header),
+    db: Session = Depends(get_db)
+) -> User:
+    """Get current user from JWT or API key"""
+    # Try JWT token first
+    if bearer:
+        try:
+            token = bearer.credentials
+            payload = decode_token(token)
+            user_id: int = payload.get("sub")
+            user = db.query(User).filter(User.id == user_id).first()
+            if user and user.is_active:
+                return user
+        except:
+            pass
+    # Try API key
+    if api_key:
+        user = await get_current_user_from_api_key(api_key, db)
+        if user:
+            return user
+    raise AuthenticationError("Authentication required")
+# Dependency: Require superuser
+async def get_current_superuser(
+    current_user: User = Depends(get_current_user_flexible)
+) -> User:
+    """Require superuser privileges"""
+    if not current_user.is_superuser:
+        raise AuthorizationError("Superuser privileges required")
+    return current_user
+# Helper: Authenticate user
+def authenticate_user(
+    db: Session,
+    email: str,
+    password: str
+) -> Optional[User]:
+    """Authenticate user with email and password"""
+    user = db.query(User).filter(User.email == email).first()
+    if not user:
+        return None
+    if not verify_password(password, user.hashed_password):
+        return None
+    return user
+# Helper: Create user
+def create_user(
+    db: Session,
+    email: str,
+    password: str,
+    full_name: Optional[str] = None,
+    is_superuser: bool = False
+) -> User:
+    """Create new user"""
+    # Check if user exists
+    existing_user = db.query(User).filter(User.email == email).first()
+    if existing_user:
+        raise ValueError("User with this email already exists")
+    # Create user
+    user = User(
+        email=email,
+        hashed_password=get_password_hash(password),
+        full_name=full_name,
+        is_superuser=is_superuser,
+        is_active=True
+    )
+    db.add(user)
+    db.commit()
+    db.refresh(user)
+    return user
+# Helper: Create API key
+def create_api_key_for_user(
+    db: Session,
+    user_id: int,
+    name: Optional[str] = None,
+    expires_days: Optional[int] = None
+) -> APIKey:
+    """Create API key for user"""
+    key = generate_api_key()
+    api_key = APIKey(
+        key=key,
+        name=name or "API Key",
+        user_id=user_id,
+        is_active=True,
+        rate_limit_per_minute=settings.RATE_LIMIT_PER_MINUTE,
+        rate_limit_per_hour=settings.RATE_LIMIT_PER_HOUR,
+        expires_at=datetime.utcnow() + timedelta(days=expires_days) if expires_days else None
+    )
+    db.add(api_key)
+    db.commit()
+    db.refresh(api_key)
+    return api_key
+if __name__ == "__main__":
+    # Test password hashing
+    password = "test_password_123"
+    hashed = get_password_hash(password)
+    print(f"Hashed: {hashed}")
+    print(f"Verified: {verify_password(password, hashed)}")
+    # Test JWT token creation
+    token = create_access_token({"sub": 1, "email": "test@example.com"})
+    print(f"Token: {token}")
+    payload = decode_token(token)
+    print(f"Decoded: {payload}")
+    # Test API key generation
+    api_key = generate_api_key()
+    print(f"API Key: {api_key}")

src/db/__init__.py ADDED Viewed

	@@ -0,0 +1,29 @@

+"""Database package"""
+from .models import (
+    Base,
+    User,
+    APIKey,
+    RequestLog,
+    PredictionLog,
+    SystemMetric,
+    engine,
+    SessionLocal,
+    get_db,
+    create_tables,
+    drop_tables
+)
+__all__ = [
+    "Base",
+    "User",
+    "APIKey",
+    "RequestLog",
+    "PredictionLog",
+    "SystemMetric",
+    "engine",
+    "SessionLocal",
+    "get_db",
+    "create_tables",
+    "drop_tables"
+]

src/db/models.py ADDED Viewed

	@@ -0,0 +1,185 @@

+"""
+Database Models and Session Management
+"""
+from datetime import datetime
+from typing import Optional
+from sqlalchemy import (
+    Boolean, Column, DateTime, Float, Integer, String, Text, JSON, ForeignKey, Index
+)
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship, Session
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy.pool import QueuePool
+from src.core.config import settings
+# Create declarative base
+Base = declarative_base()
+# Database Models
+class User(Base):
+    """User model for authentication"""
+    __tablename__ = "users"
+    id = Column(Integer, primary_key=True, index=True)
+    email = Column(String(255), unique=True, index=True, nullable=False)
+    hashed_password = Column(String(255), nullable=False)
+    full_name = Column(String(255))
+    is_active = Column(Boolean, default=True)
+    is_superuser = Column(Boolean, default=False)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    # Relationships
+    api_keys = relationship("APIKey", back_populates="user", cascade="all, delete-orphan")
+    requests = relationship("RequestLog", back_populates="user", cascade="all, delete-orphan")
+class APIKey(Base):
+    """API Key model for API authentication"""
+    __tablename__ = "api_keys"
+    id = Column(Integer, primary_key=True, index=True)
+    key = Column(String(64), unique=True, index=True, nullable=False)
+    name = Column(String(255))
+    user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
+    is_active = Column(Boolean, default=True)
+    rate_limit_per_minute = Column(Integer, default=60)
+    rate_limit_per_hour = Column(Integer, default=1000)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    last_used_at = Column(DateTime)
+    expires_at = Column(DateTime)
+    # Relationships
+    user = relationship("User", back_populates="api_keys")
+    # Indexes
+    __table_args__ = (
+        Index('idx_apikey_user_active', 'user_id', 'is_active'),
+    )
+class RequestLog(Base):
+    """Request logging for analytics and debugging"""
+    __tablename__ = "request_logs"
+    id = Column(Integer, primary_key=True, index=True)
+    request_id = Column(String(64), unique=True, index=True)
+    user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
+    api_key_id = Column(Integer, ForeignKey("api_keys.id"), nullable=True)
+    # Request details
+    method = Column(String(10))
+    path = Column(String(500))
+    query_params = Column(JSON)
+    status_code = Column(Integer)
+    # Performance
+    duration_ms = Column(Float)
+    # Client info
+    ip_address = Column(String(45))
+    user_agent = Column(Text)
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow, index=True)
+    # Relationships
+    user = relationship("User", back_populates="requests")
+    # Indexes
+    __table_args__ = (
+        Index('idx_request_user_created', 'user_id', 'created_at'),
+        Index('idx_request_created', 'created_at'),
+    )
+class PredictionLog(Base):
+    """ML prediction logging for analytics"""
+    __tablename__ = "prediction_logs"
+    id = Column(Integer, primary_key=True, index=True)
+    request_id = Column(String(64), index=True)
+    user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
+    # Prediction details
+    model_type = Column(String(50), index=True)  # deepfake, ai_text, anomaly
+    input_type = Column(String(20))  # text, image, video, audio
+    input_size = Column(Integer)  # bytes or character count
+    # Results
+    prediction = Column(String(50))
+    confidence = Column(Float)
+    details = Column(JSON)
+    # Performance
+    duration_ms = Column(Float)
+    cached = Column(Boolean, default=False)
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow, index=True)
+    # Indexes
+    __table_args__ = (
+        Index('idx_prediction_model_created', 'model_type', 'created_at'),
+        Index('idx_prediction_user_created', 'user_id', 'created_at'),
+    )
+class SystemMetric(Base):
+    """System performance metrics"""
+    __tablename__ = "system_metrics"
+    id = Column(Integer, primary_key=True, index=True)
+    metric_name = Column(String(100), index=True)
+    metric_value = Column(Float)
+    labels = Column(JSON)
+    created_at = Column(DateTime, default=datetime.utcnow, index=True)
+    # Indexes
+    __table_args__ = (
+        Index('idx_metric_name_created', 'metric_name', 'created_at'),
+    )
+# Database Engine and Session
+engine = create_engine(
+    settings.DATABASE_URL,
+    poolclass=QueuePool,
+    pool_size=10,
+    max_overflow=20,
+    pool_pre_ping=True,
+    echo=settings.DEBUG
+)
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+# Dependency for FastAPI
+def get_db():
+    """Get database session"""
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+# Database initialization
+def create_tables():
+    """Create all tables"""
+    Base.metadata.create_all(bind=engine)
+def drop_tables():
+    """Drop all tables (use with caution!)"""
+    Base.metadata.drop_all(bind=engine)
+if __name__ == "__main__":
+    print("Creating database tables...")
+    create_tables()
+    print("Tables created successfully!")

src/detection/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""Init file for detection module."""
+from .deepfake_detector import DeepfakeDetector
+from .ai_text_detector import AITextDetector
+from .anomaly_detector import AnomalyDetector
+__all__ = ['DeepfakeDetector', 'AITextDetector', 'AnomalyDetector']

src/detection/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (490 Bytes). View file

src/detection/__pycache__/ai_text_detector.cpython-313.pyc ADDED Viewed

Binary file (15.4 kB). View file

src/detection/__pycache__/anomaly_detector.cpython-313.pyc ADDED Viewed

Binary file (17.6 kB). View file

src/detection/__pycache__/deepfake_detector.cpython-313.pyc ADDED Viewed

Binary file (17.1 kB). View file

src/detection/ai_text_detector.py ADDED Viewed

	@@ -0,0 +1,402 @@

+"""
+AI Text Detection Module
+Detects AI-generated text from models like GPT-4, ChatGPT, Gemini, Claude.
+Uses multiple detection strategies:
+1. Perplexity analysis
+2. Token probability distribution
+3. Stylometric features
+4. Statistical patterns
+"""
+import torch
+import torch.nn as nn
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSequenceClassification,
+    GPT2LMHeadModel,
+    GPT2Tokenizer
+)
+from typing import Dict, List, Tuple
+import numpy as np
+import re
+from collections import Counter
+class AITextDetector:
+    """
+    Detects AI-generated text using multiple approaches.
+    Combines:
+    - Fine-tuned BERT classifier
+    - Perplexity-based detection
+    - Statistical feature analysis
+    """
+    def __init__(
+        self,
+        model_path: str = "models/ai_text_detector.pth",
+        device: str = "cuda" if torch.cuda.is_available() else "cpu",
+        threshold: float = 0.7
+    ):
+        """
+        Initialize AI text detector.
+        Args:
+            model_path: Path to fine-tuned model
+            device: Device for inference
+            threshold: Detection threshold
+        """
+        self.device = device
+        self.threshold = threshold
+        # Load classifier model
+        self.tokenizer = AutoTokenizer.from_pretrained("roberta-base")
+        self.classifier = AutoModelForSequenceClassification.from_pretrained(
+            "roberta-base",
+            num_labels=2
+        ).to(device)
+        # Load GPT-2 for perplexity calculation
+        self.gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+        self.gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
+        self.gpt2_model.eval()
+        self.classifier.eval()
+        print("✓ AI Text Detector initialized")
+    def analyze_text(
+        self,
+        text: str,
+        detailed: bool = True
+    ) -> Dict:
+        """
+        Analyze text for AI generation indicators.
+        Args:
+            text: Input text to analyze
+            detailed: Return detailed analysis
+        Returns:
+            Detection results dictionary
+        """
+        if len(text.strip()) < 10:
+            return {
+                'verdict': 'TOO_SHORT',
+                'confidence': 0.0,
+                'explanation': 'Text too short for reliable analysis (min 10 chars)'
+            }
+        # Method 1: Classifier-based detection
+        classifier_score = self._classifier_detection(text)
+        # Method 2: Perplexity-based detection
+        perplexity = self._calculate_perplexity(text)
+        perplexity_score = self._perplexity_to_score(perplexity)
+        # Method 3: Statistical feature analysis
+        statistical_score = self._statistical_analysis(text)
+        # Ensemble the scores
+        final_score = (
+            0.5 * classifier_score +
+            0.3 * perplexity_score +
+            0.2 * statistical_score
+        )
+        is_ai_generated = final_score > self.threshold
+        result = {
+            'verdict': 'AI_GENERATED' if is_ai_generated else 'HUMAN_WRITTEN',
+            'confidence': float(final_score),
+            'threshold': self.threshold,
+            'perplexity': float(perplexity),
+            'explanation': self._generate_explanation(final_score, perplexity)
+        }
+        if detailed:
+            result['detailed_scores'] = {
+                'classifier': float(classifier_score),
+                'perplexity': float(perplexity_score),
+                'statistical': float(statistical_score)
+            }
+            result['features'] = self._extract_features(text)
+            result['indicators'] = self._identify_indicators(text, final_score)
+        return result
+    def _classifier_detection(self, text: str) -> float:
+        """Use fine-tuned classifier for detection."""
+        # Tokenize
+        inputs = self.tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            max_length=512,
+            padding=True
+        ).to(self.device)
+        # Get prediction
+        with torch.no_grad():
+            outputs = self.classifier(**inputs)
+            logits = outputs.logits
+            probs = torch.softmax(logits, dim=-1)
+            ai_prob = probs[0][1].item()  # Probability of AI-generated
+        return ai_prob
+    def _calculate_perplexity(self, text: str) -> float:
+        """
+        Calculate perplexity using GPT-2.
+        AI-generated text typically has lower perplexity.
+        """
+        # Tokenize
+        encodings = self.gpt2_tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            max_length=1024
+        ).to(self.device)
+        max_length = encodings.input_ids.size(1)
+        # Calculate loss
+        with torch.no_grad():
+            outputs = self.gpt2_model(**encodings, labels=encodings.input_ids)
+            loss = outputs.loss
+        # Perplexity = exp(loss)
+        perplexity = torch.exp(loss).item()
+        return perplexity
+    def _perplexity_to_score(self, perplexity: float) -> float:
+        """
+        Convert perplexity to detection score.
+        Lower perplexity → higher AI probability
+        """
+        # Typical ranges:
+        # Human text: 50-300
+        # AI text: 10-80
+        if perplexity < 20:
+            return 0.95  # Very likely AI
+        elif perplexity < 50:
+            return 0.75
+        elif perplexity < 100:
+            return 0.50
+        elif perplexity < 200:
+            return 0.25
+        else:
+            return 0.10  # Likely human
+    def _statistical_analysis(self, text: str) -> float:
+        """
+        Analyze statistical features of text.
+        AI-generated text often has:
+        - More uniform sentence lengths
+        - Consistent vocabulary diversity
+        - Predictable structure
+        """
+        features = self._extract_features(text)
+        score = 0.0
+        indicators = 0
+        # Check sentence length uniformity
+        if features['sentence_length_variance'] < 50:
+            score += 0.2
+            indicators += 1
+        # Check vocabulary diversity
+        if 0.4 < features['vocabulary_diversity'] < 0.6:
+            score += 0.2
+            indicators += 1
+        # Check average sentence length (AI often uses medium-length sentences)
+        if 15 < features['avg_sentence_length'] < 25:
+            score += 0.15
+            indicators += 1
+        # Check for repetitive patterns
+        if features['repetition_ratio'] < 0.05:
+            score += 0.15
+            indicators += 1
+        # Check for balanced punctuation
+        if 0.08 < features['punctuation_ratio'] < 0.15:
+            score += 0.15
+            indicators += 1
+        # Check for consistent paragraph structure
+        if features['avg_paragraph_length'] > 3:
+            score += 0.15
+            indicators += 1
+        return score
+    def _extract_features(self, text: str) -> Dict:
+        """Extract statistical features from text."""
+        # Sentence segmentation
+        sentences = re.split(r'[.!?]+', text)
+        sentences = [s.strip() for s in sentences if s.strip()]
+        # Word tokenization
+        words = re.findall(r'\b\w+\b', text.lower())
+        # Calculate features
+        sentence_lengths = [len(s.split()) for s in sentences]
+        # Paragraph detection
+        paragraphs = text.split('\n\n')
+        paragraphs = [p.strip() for p in paragraphs if p.strip()]
+        features = {
+            'total_words': len(words),
+            'total_sentences': len(sentences),
+            'total_paragraphs': len(paragraphs),
+            'avg_sentence_length': np.mean(sentence_lengths) if sentence_lengths else 0,
+            'sentence_length_variance': np.var(sentence_lengths) if sentence_lengths else 0,
+            'vocabulary_diversity': len(set(words)) / len(words) if words else 0,
+            'avg_word_length': np.mean([len(w) for w in words]) if words else 0,
+            'punctuation_ratio': len(re.findall(r'[,.!?;:]', text)) / len(words) if words else 0,
+            'repetition_ratio': self._calculate_repetition(words),
+            'avg_paragraph_length': np.mean([len(p.split()) for p in paragraphs]) if paragraphs else 0
+        }
+        return features
+    def _calculate_repetition(self, words: List[str]) -> float:
+        """Calculate word repetition ratio."""
+        if len(words) < 10:
+            return 0.0
+        # Look for repeated 3-grams
+        trigrams = [tuple(words[i:i+3]) for i in range(len(words)-2)]
+        trigram_counts = Counter(trigrams)
+        # Calculate ratio of repeated trigrams
+        repeated = sum(1 for count in trigram_counts.values() if count > 1)
+        total = len(trigrams)
+        return repeated / total if total > 0 else 0.0
+    def _identify_indicators(self, text: str, score: float) -> List[str]:
+        """Identify specific AI generation indicators."""
+        indicators = []
+        features = self._extract_features(text)
+        perplexity = self._calculate_perplexity(text)
+        # Low perplexity
+        if perplexity < 30:
+            indicators.append(f"Very low perplexity ({perplexity:.1f}) suggests high predictability")
+        # Uniform sentence structure
+        if features['sentence_length_variance'] < 50:
+            indicators.append("Unusually uniform sentence lengths")
+        # Vocabulary consistency
+        if 0.4 < features['vocabulary_diversity'] < 0.6:
+            indicators.append("Vocabulary diversity typical of AI generation")
+        # Repetitive patterns
+        if features['repetition_ratio'] < 0.03:
+            indicators.append("Minimal repetition (uncommon in human writing)")
+        # Generic phrases common in AI
+        generic_phrases = [
+            "it's important to note",
+            "it's worth noting",
+            "in conclusion",
+            "to summarize",
+            "additionally",
+            "furthermore",
+            "moreover",
+            "in other words"
+        ]
+        text_lower = text.lower()
+        found_phrases = [p for p in generic_phrases if p in text_lower]
+        if len(found_phrases) >= 2:
+            indicators.append(f"Multiple generic transition phrases: {', '.join(found_phrases[:3])}")
+        # Lack of personal pronouns
+        personal_pronouns = len(re.findall(r'\b(I|me|my|mine|we|us|our)\b', text, re.IGNORECASE))
+        if personal_pronouns == 0 and len(text.split()) > 50:
+            indicators.append("Absence of personal pronouns")
+        return indicators
+    def _generate_explanation(self, score: float, perplexity: float) -> str:
+        """Generate human-readable explanation."""
+        if score > 0.9:
+            return (
+                f"Strong indicators of AI generation. "
+                f"Very low perplexity ({perplexity:.1f}) and multiple statistical markers."
+            )
+        elif score > 0.7:
+            return (
+                f"Likely AI-generated. "
+                f"Low perplexity ({perplexity:.1f}) and consistent with AI patterns."
+            )
+        elif score > 0.5:
+            return (
+                f"Possible AI generation. "
+                f"Some indicators present, but not conclusive."
+            )
+        elif score > 0.3:
+            return (
+                f"Likely human-written. "
+                f"Natural variation in style and structure."
+            )
+        else:
+            return (
+                f"Strong indicators of human writing. "
+                f"High perplexity ({perplexity:.1f}) and natural language patterns."
+            )
+    def batch_analyze(self, texts: List[str]) -> List[Dict]:
+        """Analyze multiple texts efficiently."""
+        results = []
+        for text in texts:
+            result = self.analyze_text(text, detailed=False)
+            results.append(result)
+        return results
+# Example usage
+if __name__ == "__main__":
+    detector = AITextDetector()
+    # Test with sample text
+    ai_text = """
+    Artificial intelligence has revolutionized numerous industries in recent years.
+    It's important to note that machine learning algorithms have become increasingly
+    sophisticated. Furthermore, these technologies continue to advance at a rapid pace.
+    In conclusion, AI will likely play an even larger role in the future.
+    """
+    human_text = """
+    I can't believe how much AI has changed things! Last week I was playing around
+    with ChatGPT and honestly... it's wild. My boss thinks we should use it for
+    everything but idk, seems risky? Anyway, what do you think?
+    """
+    print("AI Text Analysis:")
+    result = detector.analyze_text(ai_text)
+    print(f"Verdict: {result['verdict']}")
+    print(f"Confidence: {result['confidence']:.2%}")
+    print(f"Indicators: {result['indicators']}\n")
+    print("Human Text Analysis:")
+    result = detector.analyze_text(human_text)
+    print(f"Verdict: {result['verdict']}")
+    print(f"Confidence: {result['confidence']:.2%}")

src/detection/anomaly_detector.py ADDED Viewed

	@@ -0,0 +1,440 @@

+"""
+Anomaly Detection Module
+Detects coordinated inauthentic behavior, bot networks, and suspicious patterns.
+Key Features:
+1. Bot account identification
+2. Coordinated campaign detection
+3. Viral spread analysis
+4. Temporal pattern anomalies
+"""
+import numpy as np
+import pandas as pd
+from typing import Dict, List, Tuple, Optional
+from sklearn.ensemble import IsolationForest
+from sklearn.preprocessing import StandardScaler
+import networkx as nx
+from datetime import datetime, timedelta
+import torch
+import torch.nn as nn
+class AnomalyDetector:
+    """
+    Multi-method anomaly detector for social media content.
+    Detects:
+    - Bot accounts (behavioral patterns)
+    - Coordinated campaigns (network analysis)
+    - Suspicious viral patterns
+    - Time-series anomalies
+    """
+    def __init__(
+        self,
+        contamination: float = 0.1,
+        device: str = "cuda" if torch.cuda.is_available() else "cpu"
+    ):
+        """
+        Initialize anomaly detector.
+        Args:
+            contamination: Expected proportion of anomalies (0-0.5)
+            device: Device for deep learning models
+        """
+        self.contamination = contamination
+        self.device = device
+        # Isolation Forest for bot detection
+        self.bot_detector = IsolationForest(
+            contamination=contamination,
+            random_state=42,
+            n_estimators=100
+        )
+        # Scaler for feature normalization
+        self.scaler = StandardScaler()
+        print("✓ Anomaly Detector initialized")
+    def detect_bot_accounts(
+        self,
+        user_data: pd.DataFrame,
+        return_scores: bool = True
+    ) -> Dict:
+        """
+        Detect bot accounts based on behavioral features.
+        Args:
+            user_data: DataFrame with user activity data
+                Required columns: user_id, post_count, follower_count,
+                following_count, account_age_days, avg_post_interval,
+                verified, profile_has_image, bio_length
+            return_scores: Return anomaly scores for all users
+        Returns:
+            Detection results with bot predictions
+        """
+        # Extract features
+        features = self._extract_bot_features(user_data)
+        # Normalize features
+        features_scaled = self.scaler.fit_transform(features)
+        # Detect anomalies
+        predictions = self.bot_detector.fit_predict(features_scaled)
+        anomaly_scores = self.bot_detector.score_samples(features_scaled)
+        # -1 = anomaly (bot), 1 = normal
+        bot_mask = predictions == -1
+        bot_users = user_data.loc[bot_mask, 'user_id'].tolist()
+        # Calculate confidence scores
+        # Convert anomaly scores to 0-1 probability
+        scores_normalized = 1 / (1 + np.exp(anomaly_scores))
+        result = {
+            'total_users': len(user_data),
+            'bots_detected': int(np.sum(bot_mask)),
+            'bot_percentage': float(np.mean(bot_mask) * 100),
+            'bot_user_ids': bot_users,
+            'summary': self._generate_bot_summary(user_data[bot_mask])
+        }
+        if return_scores:
+            result['user_scores'] = pd.DataFrame({
+                'user_id': user_data['user_id'],
+                'is_bot': bot_mask,
+                'bot_probability': scores_normalized,
+                'anomaly_score': anomaly_scores
+            }).to_dict('records')
+        return result
+    def _extract_bot_features(self, user_data: pd.DataFrame) -> np.ndarray:
+        """Extract features for bot detection."""
+        features = []
+        # Feature 1: Post frequency
+        if 'account_age_days' in user_data and 'post_count' in user_data:
+            post_frequency = user_data['post_count'] / (user_data['account_age_days'] + 1)
+            features.append(post_frequency)
+        # Feature 2: Follower/following ratio
+        if 'follower_count' in user_data and 'following_count' in user_data:
+            ff_ratio = user_data['follower_count'] / (user_data['following_count'] + 1)
+            features.append(ff_ratio)
+        # Feature 3: Account completeness score
+        completeness = 0
+        if 'verified' in user_data:
+            completeness += user_data['verified'].astype(int)
+        if 'profile_has_image' in user_data:
+            completeness += user_data['profile_has_image'].astype(int)
+        if 'bio_length' in user_data:
+            completeness += (user_data['bio_length'] > 20).astype(int)
+        features.append(completeness)
+        # Feature 4: Posting pattern regularity
+        if 'avg_post_interval' in user_data:
+            features.append(user_data['avg_post_interval'])
+        # Feature 5: Account age
+        if 'account_age_days' in user_data:
+            features.append(user_data['account_age_days'])
+        # Stack features
+        feature_array = np.column_stack(features)
+        return feature_array
+    def _generate_bot_summary(self, bot_data: pd.DataFrame) -> Dict:
+        """Generate summary statistics for detected bots."""
+        if len(bot_data) == 0:
+            return {'message': 'No bots detected'}
+        summary = {
+            'avg_post_frequency': float(bot_data['post_count'].mean() / (bot_data['account_age_days'].mean() + 1)) if 'post_count' in bot_data else None,
+            'avg_account_age_days': float(bot_data['account_age_days'].mean()) if 'account_age_days' in bot_data else None,
+            'percent_unverified': float((~bot_data['verified']).mean() * 100) if 'verified' in bot_data else None,
+            'percent_no_profile_image': float((~bot_data['profile_has_image']).mean() * 100) if 'profile_has_image' in bot_data else None
+        }
+        return summary
+    def detect_coordinated_campaign(
+        self,
+        activity_data: pd.DataFrame,
+        time_window: str = "1h",
+        min_accounts: int = 5
+    ) -> Dict:
+        """
+        Detect coordinated campaigns using network analysis.
+        Args:
+            activity_data: DataFrame with columns: user_id, content_id,
+                timestamp, content_hash, action_type
+            time_window: Time window for coordination (e.g., "1h", "30m")
+            min_accounts: Minimum accounts for a campaign
+        Returns:
+            Detected campaigns
+        """
+        # Convert time window to timedelta
+        time_delta = self._parse_time_window(time_window)
+        # Group activities by content
+        content_groups = activity_data.groupby('content_hash')
+        campaigns = []
+        for content_hash, group in content_groups:
+            if len(group) < min_accounts:
+                continue
+            # Check temporal clustering
+            timestamps = pd.to_datetime(group['timestamp'])
+            time_range = (timestamps.max() - timestamps.min()).total_seconds()
+            # If all actions within time window
+            if time_range <= time_delta.total_seconds():
+                # Calculate coordination score
+                coordination_score = self._calculate_coordination_score(group)
+                if coordination_score > 0.7:
+                    campaigns.append({
+                        'content_hash': content_hash,
+                        'participant_count': len(group),
+                        'time_range_seconds': time_range,
+                        'coordination_score': float(coordination_score),
+                        'user_ids': group['user_id'].tolist(),
+                        'start_time': timestamps.min().isoformat(),
+                        'end_time': timestamps.max().isoformat()
+                    })
+        # Network analysis
+        campaign_network = self._build_campaign_network(campaigns)
+        return {
+            'campaigns_detected': len(campaigns),
+            'campaigns': campaigns,
+            'network_metrics': campaign_network,
+            'explanation': self._explain_campaigns(campaigns)
+        }
+    def _parse_time_window(self, time_window: str) -> timedelta:
+        """Parse time window string to timedelta."""
+        unit = time_window[-1]
+        value = int(time_window[:-1])
+        if unit == 's':
+            return timedelta(seconds=value)
+        elif unit == 'm':
+            return timedelta(minutes=value)
+        elif unit == 'h':
+            return timedelta(hours=value)
+        elif unit == 'd':
+            return timedelta(days=value)
+        else:
+            raise ValueError(f"Unknown time unit: {unit}")
+    def _calculate_coordination_score(self, activity_group: pd.DataFrame) -> float:
+        """
+        Calculate coordination score based on:
+        - Temporal clustering
+        - Account similarity
+        - Action synchronization
+        """
+        score = 0.0
+        # 1. Temporal clustering (max 0.4)
+        timestamps = pd.to_datetime(activity_group['timestamp'])
+        time_std = timestamps.astype(int).std() / 1e9  # Convert to seconds
+        if time_std < 60:  # Within 1 minute
+            score += 0.4
+        elif time_std < 300:  # Within 5 minutes
+            score += 0.3
+        elif time_std < 3600:  # Within 1 hour
+            score += 0.2
+        # 2. Account age similarity (max 0.3)
+        if 'account_age_days' in activity_group:
+            age_std = activity_group['account_age_days'].std()
+            if age_std < 30:  # Similar account ages
+                score += 0.3
+            elif age_std < 90:
+                score += 0.2
+        # 3. Action type uniformity (max 0.3)
+        if 'action_type' in activity_group:
+            action_entropy = self._calculate_entropy(
+                activity_group['action_type'].value_counts(normalize=True)
+            )
+            # Low entropy = uniform actions = coordinated
+            score += 0.3 * (1 - action_entropy)
+        return min(score, 1.0)
+    def _calculate_entropy(self, probabilities: pd.Series) -> float:
+        """Calculate Shannon entropy."""
+        return -np.sum(probabilities * np.log2(probabilities + 1e-10))
+    def _build_campaign_network(self, campaigns: List[Dict]) -> Dict:
+        """Build network graph of campaign participants."""
+        if not campaigns:
+            return {'nodes': 0, 'edges': 0, 'components': 0}
+        # Create graph
+        G = nx.Graph()
+        # Add nodes and edges
+        for campaign in campaigns:
+            users = campaign['user_ids']
+            # Add all users
+            G.add_nodes_from(users)
+            # Connect users who participated in same campaign
+            for i, user1 in enumerate(users):
+                for user2 in users[i+1:]:
+                    if G.has_edge(user1, user2):
+                        G[user1][user2]['weight'] += 1
+                    else:
+                        G.add_edge(user1, user2, weight=1)
+        # Calculate network metrics
+        connected_components = list(nx.connected_components(G))
+        metrics = {
+            'nodes': G.number_of_nodes(),
+            'edges': G.number_of_edges(),
+            'connected_components': len(connected_components),
+            'largest_component_size': max(len(c) for c in connected_components) if connected_components else 0,
+            'avg_clustering_coefficient': nx.average_clustering(G) if G.number_of_nodes() > 0 else 0
+        }
+        return metrics
+    def _explain_campaigns(self, campaigns: List[Dict]) -> str:
+        """Generate explanation for detected campaigns."""
+        if not campaigns:
+            return "No coordinated campaigns detected."
+        total_participants = sum(c['participant_count'] for c in campaigns)
+        avg_coordination = np.mean([c['coordination_score'] for c in campaigns])
+        return (
+            f"Detected {len(campaigns)} coordinated campaign(s) involving "
+            f"{total_participants} accounts. Average coordination score: {avg_coordination:.2f}. "
+            f"This suggests organized, inauthentic behavior patterns."
+        )
+    def analyze_viral_spread(
+        self,
+        spread_data: pd.DataFrame
+    ) -> Dict:
+        """
+        Analyze viral spread patterns for anomalies.
+        Args:
+            spread_data: DataFrame with columns: timestamp, share_count,
+                view_count, engagement_rate
+        Returns:
+            Viral spread analysis
+        """
+        # Sort by timestamp
+        spread_data = spread_data.sort_values('timestamp')
+        # Calculate growth rate
+        spread_data['growth_rate'] = spread_data['share_count'].pct_change()
+        # Detect suspicious patterns
+        anomalies = []
+        # 1. Sudden spike detection
+        mean_growth = spread_data['growth_rate'].mean()
+        std_growth = spread_data['growth_rate'].std()
+        spikes = spread_data[
+            spread_data['growth_rate'] > mean_growth + 3 * std_growth
+        ]
+        if len(spikes) > 0:
+            anomalies.append({
+                'type': 'sudden_spike',
+                'description': f'Detected {len(spikes)} sudden spike(s) in sharing activity',
+                'timestamps': spikes['timestamp'].tolist()
+            })
+        # 2. Unnatural growth pattern
+        # Real viral content has exponential then logarithmic growth
+        # Inorganic content has linear or step-function growth
+        correlation_with_time = spread_data['share_count'].corr(
+            pd.Series(range(len(spread_data)))
+        )
+        if abs(correlation_with_time) > 0.95:  # Too linear
+            anomalies.append({
+                'type': 'linear_growth',
+                'description': 'Unnaturally linear growth pattern (typical of bot-driven spread)',
+                'correlation': float(correlation_with_time)
+            })
+        # 3. Low engagement rate despite high shares
+        if 'engagement_rate' in spread_data:
+            avg_engagement = spread_data['engagement_rate'].mean()
+            if avg_engagement < 0.01:  # Less than 1%
+                anomalies.append({
+                    'type': 'low_engagement',
+                    'description': 'High share count but abnormally low engagement',
+                    'avg_engagement_rate': float(avg_engagement)
+                })
+        return {
+            'is_suspicious': len(anomalies) > 0,
+            'anomaly_count': len(anomalies),
+            'anomalies': anomalies,
+            'growth_statistics': {
+                'total_shares': int(spread_data['share_count'].iloc[-1]) if len(spread_data) > 0 else 0,
+                'avg_growth_rate': float(mean_growth),
+                'max_growth_rate': float(spread_data['growth_rate'].max()),
+                'time_to_peak': str(spread_data.loc[spread_data['share_count'].idxmax(), 'timestamp']) if len(spread_data) > 0 else None
+            },
+            'verdict': 'SUSPICIOUS' if len(anomalies) >= 2 else 'NORMAL',
+            'explanation': self._explain_viral_analysis(anomalies)
+        }
+    def _explain_viral_analysis(self, anomalies: List[Dict]) -> str:
+        """Generate explanation for viral spread analysis."""
+        if not anomalies:
+            return "Viral spread pattern appears organic and natural."
+        explanations = [a['description'] for a in anomalies]
+        return "Suspicious patterns detected: " + "; ".join(explanations)
+# Example usage
+if __name__ == "__main__":
+    detector = AnomalyDetector()
+    # Example: Bot detection
+    user_data = pd.DataFrame({
+        'user_id': ['user1', 'user2', 'user3', 'user4', 'user5'],
+        'post_count': [1000, 50, 800, 30, 20],
+        'follower_count': [100, 500, 120, 300, 250],
+        'following_count': [5000, 200, 4800, 180, 220],
+        'account_age_days': [30, 365, 25, 400, 350],
+        'avg_post_interval': [0.1, 8, 0.15, 12, 10],
+        'verified': [False, True, False, True, True],
+        'profile_has_image': [False, True, False, True, True],
+        'bio_length': [5, 150, 8, 120, 100]
+    })
+    result = detector.detect_bot_accounts(user_data)
+    print(f"Bots detected: {result['bots_detected']}")
+    print(f"Bot user IDs: {result['bot_user_ids']}")

src/detection/deepfake_detector.py ADDED Viewed

	@@ -0,0 +1,431 @@

+"""
+Deepfake Detection Module
+This module implements state-of-the-art deepfake detection using:
+1. EfficientNet-based architecture for face manipulation detection
+2. Temporal consistency analysis for video deepfakes
+3. Attention mechanisms for explainability
+4. Multi-scale feature extraction
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import transforms
+from typing import Dict, Tuple, Optional, List
+import numpy as np
+import cv2
+from PIL import Image
+import timm
+# Simplified imports - use available modules
+try:
+    from ..utils.face_detection import detect_faces
+    from ..utils.preprocessing import preprocess_image
+except ImportError:
+    # If relative imports fail, try absolute
+    import sys
+    from pathlib import Path
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    from utils.face_detection import detect_faces
+    from utils.preprocessing import preprocess_image
+class DeepfakeDetector:
+    """
+    Production-ready deepfake detector with ensemble approach.
+    Combines multiple detection strategies:
+    - Spatial artifact detection
+    - Temporal consistency (for videos)
+    - Frequency domain analysis
+    - Attention-based feature extraction
+    """
+    def __init__(
+        self,
+        model_path: str = "models/deepfake_efficientnet_b4.pth",
+        device: str = "cuda" if torch.cuda.is_available() else "cpu",
+        threshold: float = 0.5,
+        use_ensemble: bool = True
+    ):
+        """
+        Initialize the deepfake detector.
+        Args:
+            model_path: Path to pre-trained model weights
+            device: Device to run inference on (cuda/cpu)
+            threshold: Detection threshold (0-1)
+            use_ensemble: Whether to use ensemble of models
+        """
+        self.device = device
+        self.threshold = threshold
+        self.use_ensemble = use_ensemble
+        # Load models
+        self._load_models(model_path)
+        # Image preprocessing
+        self.transform = transforms.Compose([
+            transforms.Resize((380, 380)),
+            transforms.CenterCrop(299),
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]
+            )
+        ])
+    def _load_models(self, model_path: str):
+        """Load pre-trained models."""
+        # Primary model: EfficientNet-B4
+        self.primary_model = timm.create_model(
+            'efficientnet_b4',
+            pretrained=False,
+            num_classes=1
+        ).to(self.device)
+        # Load weights if available
+        try:
+            checkpoint = torch.load(model_path, map_location=self.device)
+            self.primary_model.load_state_dict(checkpoint['model_state_dict'])
+            print(f"✓ Loaded model from {model_path}")
+        except FileNotFoundError:
+            print(f"⚠ Model not found at {model_path}. Using random initialization.")
+            print("  Run: python scripts/download_models.py to download pre-trained weights")
+        self.primary_model.eval()
+        # Secondary models for ensemble
+        if self.use_ensemble:
+            self.secondary_models = self._load_ensemble_models()
+    def _load_ensemble_models(self) -> List[nn.Module]:
+        """Load additional models for ensemble."""
+        models = []
+        # XceptionNet - good for GAN artifacts
+        xception = timm.create_model(
+            'xception',
+            pretrained=False,
+            num_classes=1
+        ).to(self.device)
+        xception.eval()
+        models.append(xception)
+        # ResNet50 - robust baseline
+        resnet = timm.create_model(
+            'resnet50',
+            pretrained=False,
+            num_classes=1
+        ).to(self.device)
+        resnet.eval()
+        models.append(resnet)
+        return models
+    def analyze_image(
+        self,
+        image_path: str,
+        return_attention: bool = True
+    ) -> Dict:
+        """
+        Analyze a single image for deepfake artifacts.
+        Args:
+            image_path: Path to image file
+            return_attention: Whether to return attention maps
+        Returns:
+            Dictionary with detection results
+        """
+        # Load and preprocess image
+        image = Image.open(image_path).convert('RGB')
+        original_size = image.size
+        # Detect faces
+        faces = detect_faces(image)
+        if len(faces) == 0:
+            return {
+                'verdict': 'NO_FACE_DETECTED',
+                'confidence': 0.0,
+                'explanation': 'No faces detected in the image',
+                'faces_analyzed': 0,
+                'artifacts_detected': []
+            }
+        # Analyze each face
+        face_results = []
+        for i, face_coords in enumerate(faces):
+            face_crop = self._crop_face(image, face_coords)
+            result = self._analyze_face(face_crop, return_attention)
+            face_results.append(result)
+        # Aggregate results
+        avg_confidence = np.mean([r['confidence'] for r in face_results])
+        is_fake = avg_confidence > self.threshold
+        return {
+            'verdict': 'FAKE' if is_fake else 'REAL',
+            'confidence': float(avg_confidence),
+            'threshold': self.threshold,
+            'faces_analyzed': len(faces),
+            'face_results': face_results,
+            'explanation': self._generate_explanation(avg_confidence, face_results),
+            'artifacts_detected': self._detect_artifacts(image)
+        }
+    def _analyze_face(
+        self,
+        face_image: Image.Image,
+        return_attention: bool
+    ) -> Dict:
+        """Analyze a single face crop."""
+        # Preprocess
+        input_tensor = self.transform(face_image).unsqueeze(0).to(self.device)
+        # Primary model inference
+        with torch.no_grad():
+            logits = self.primary_model(input_tensor)
+            confidence = torch.sigmoid(logits).item()
+        # Ensemble if enabled
+        if self.use_ensemble:
+            ensemble_confidences = [confidence]
+            for model in self.secondary_models:
+                with torch.no_grad():
+                    logits = model(input_tensor)
+                    conf = torch.sigmoid(logits).item()
+                    ensemble_confidences.append(conf)
+            confidence = np.mean(ensemble_confidences)
+        result = {
+            'confidence': confidence,
+            'is_fake': confidence > self.threshold
+        }
+        # Add attention map if requested
+        if return_attention:
+            result['attention_map'] = self._generate_attention_map(input_tensor)
+        return result
+    def _crop_face(
+        self,
+        image: Image.Image,
+        face_coords: Tuple[int, int, int, int]
+    ) -> Image.Image:
+        """Crop face from image with padding."""
+        x, y, w, h = face_coords
+        # Add 30% padding
+        padding = int(0.3 * max(w, h))
+        x1 = max(0, x - padding)
+        y1 = max(0, y - padding)
+        x2 = min(image.width, x + w + padding)
+        y2 = min(image.height, y + h + padding)
+        return image.crop((x1, y1, x2, y2))
+    def _generate_attention_map(self, input_tensor: torch.Tensor) -> np.ndarray:
+        """Generate Grad-CAM attention map."""
+        # Simplified attention map generation
+        # In production, implement full Grad-CAM
+        # Get feature maps from last conv layer
+        features = self.primary_model.features(input_tensor)
+        # Global average pooling
+        attention = F.adaptive_avg_pool2d(features, (1, 1))
+        attention = attention.squeeze().cpu().numpy()
+        return attention
+    def _detect_artifacts(self, image: Image.Image) -> List[str]:
+        """Detect specific deepfake artifacts."""
+        artifacts = []
+        # Convert to numpy array
+        img_array = np.array(image)
+        # Check for common artifacts
+        # 1. Face boundary inconsistencies
+        if self._check_boundary_artifacts(img_array):
+            artifacts.append("Face boundary inconsistencies detected")
+        # 2. Color inconsistencies
+        if self._check_color_artifacts(img_array):
+            artifacts.append("Abnormal color distribution in face region")
+        # 3. Frequency domain artifacts
+        if self._check_frequency_artifacts(img_array):
+            artifacts.append("Suspicious frequency patterns detected")
+        # 4. Eye/teeth artifacts (common in face-swap)
+        if self._check_facial_feature_artifacts(img_array):
+            artifacts.append("Inconsistencies in facial features")
+        return artifacts
+    def _check_boundary_artifacts(self, image: np.ndarray) -> bool:
+        """Check for boundary artifacts using edge detection."""
+        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+        edges = cv2.Canny(gray, 50, 150)
+        # Calculate edge density
+        edge_density = np.sum(edges > 0) / edges.size
+        # Suspicious if too many sharp edges (indicates blending)
+        return edge_density > 0.15
+    def _check_color_artifacts(self, image: np.ndarray) -> bool:
+        """Check for color inconsistencies."""
+        # Convert to LAB color space
+        lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
+        # Calculate color variance
+        color_var = np.var(lab, axis=(0, 1))
+        # Suspicious if variance is abnormal
+        return color_var[0] > 1000  # Threshold for L channel
+    def _check_frequency_artifacts(self, image: np.ndarray) -> bool:
+        """Check frequency domain for artifacts."""
+        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+        # Apply FFT
+        fft = np.fft.fft2(gray)
+        fft_shift = np.fft.fftshift(fft)
+        magnitude = np.abs(fft_shift)
+        # Check for abnormal frequency patterns
+        high_freq_energy = np.sum(magnitude[magnitude.shape[0]//4:3*magnitude.shape[0]//4,
+                                            magnitude.shape[1]//4:3*magnitude.shape[1]//4])
+        total_energy = np.sum(magnitude)
+        ratio = high_freq_energy / total_energy
+        # GAN-generated images often have specific frequency signatures
+        return ratio < 0.1 or ratio > 0.4
+    def _check_facial_feature_artifacts(self, image: np.ndarray) -> bool:
+        """Check for artifacts in facial features."""
+        # Simplified check - in production, use facial landmark detection
+        # and analyze consistency of eyes, nose, mouth
+        # For now, return False (placeholder)
+        return False
+    def _generate_explanation(
+        self,
+        confidence: float,
+        face_results: List[Dict]
+    ) -> str:
+        """Generate human-readable explanation."""
+        if confidence > 0.9:
+            return "Strong indicators of manipulation detected. Multiple artifacts found."
+        elif confidence > 0.7:
+            return "Likely manipulated. Several suspicious patterns identified."
+        elif confidence > 0.5:
+            return "Possible manipulation. Some inconsistencies detected."
+        elif confidence > 0.3:
+            return "Minor inconsistencies found, but likely authentic."
+        else:
+            return "No significant manipulation detected. Image appears authentic."
+    def analyze_video(
+        self,
+        video_path: str,
+        sample_rate: int = 5,
+        max_frames: int = 100
+    ) -> Dict:
+        """
+        Analyze video for deepfake artifacts.
+        Args:
+            video_path: Path to video file
+            sample_rate: Analyze every Nth frame
+            max_frames: Maximum frames to analyze
+        Returns:
+            Dictionary with detection results
+        """
+        cap = cv2.VideoCapture(video_path)
+        frame_results = []
+        frame_count = 0
+        analyzed_count = 0
+        while cap.isOpened() and analyzed_count < max_frames:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            # Sample frames
+            if frame_count % sample_rate == 0:
+                # Convert BGR to RGB
+                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                pil_image = Image.fromarray(frame_rgb)
+                # Analyze frame
+                result = self.analyze_image(pil_image, return_attention=False)
+                frame_results.append({
+                    'frame_number': frame_count,
+                    'confidence': result['confidence'],
+                    'verdict': result['verdict']
+                })
+                analyzed_count += 1
+            frame_count += 1
+        cap.release()
+        # Analyze temporal consistency
+        confidences = [r['confidence'] for r in frame_results]
+        avg_confidence = np.mean(confidences)
+        confidence_variance = np.var(confidences)
+        # High variance suggests inconsistent manipulation
+        temporal_inconsistency = confidence_variance > 0.05
+        return {
+            'verdict': 'FAKE' if avg_confidence > self.threshold else 'REAL',
+            'confidence': float(avg_confidence),
+            'confidence_variance': float(confidence_variance),
+            'temporal_inconsistency': temporal_inconsistency,
+            'frames_analyzed': analyzed_count,
+            'total_frames': frame_count,
+            'frame_results': frame_results,
+            'explanation': self._generate_video_explanation(
+                avg_confidence,
+                temporal_inconsistency
+            )
+        }
+    def _generate_video_explanation(
+        self,
+        confidence: float,
+        temporal_inconsistency: bool
+    ) -> str:
+        """Generate explanation for video analysis."""
+        base_explanation = self._generate_explanation(confidence, [])
+        if temporal_inconsistency:
+            base_explanation += " Temporal inconsistencies detected across frames."
+        return base_explanation
+# Example usage
+if __name__ == "__main__":
+    detector = DeepfakeDetector()
+    # Analyze image
+    result = detector.analyze_image("test_image.jpg")
+    print(f"Verdict: {result['verdict']}")
+    print(f"Confidence: {result['confidence']:.2%}")
+    print(f"Explanation: {result['explanation']}")

src/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Init file for models module."""

src/models/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (268 Bytes). View file

src/training/train_deepfake.py ADDED Viewed

	@@ -0,0 +1,349 @@

+"""
+Training Pipeline for Deepfake Detection Models
+Implements:
+- Distributed training (multi-GPU)
+- Mixed precision training
+- Experiment tracking with MLflow
+- Checkpoint management
+- Data augmentation
+"""
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, Dataset
+from torch.cuda.amp import autocast, GradScaler
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
+from pytorch_lightning.loggers import MLFlowLogger
+import timm
+from typing import Dict, Tuple, Optional
+import mlflow
+import numpy as np
+from pathlib import Path
+import albumentations as A
+from albumentations.pytorch import ToTensorV2
+from PIL import Image
+import cv2
+class DeepfakeDataset(Dataset):
+    """Dataset for deepfake detection training."""
+    def __init__(
+        self,
+        image_paths: list,
+        labels: list,
+        transform=None,
+        mode: str = "train"
+    ):
+        """
+        Args:
+            image_paths: List of paths to images
+            labels: List of labels (0=real, 1=fake)
+            transform: Albumentations transforms
+            mode: 'train', 'val', or 'test'
+        """
+        self.image_paths = image_paths
+        self.labels = labels
+        self.transform = transform
+        self.mode = mode
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, idx):
+        # Load image
+        image_path = self.image_paths[idx]
+        image = cv2.imread(str(image_path))
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        label = self.labels[idx]
+        # Apply transforms
+        if self.transform:
+            augmented = self.transform(image=image)
+            image = augmented['image']
+        return image, label
+class DeepfakeDetectionModel(pl.LightningModule):
+    """PyTorch Lightning module for deepfake detection."""
+    def __init__(
+        self,
+        model_name: str = "efficientnet_b4",
+        learning_rate: float = 1e-4,
+        weight_decay: float = 1e-5,
+        num_classes: int = 1
+    ):
+        super().__init__()
+        self.save_hyperparameters()
+        # Load pre-trained model
+        self.model = timm.create_model(
+            model_name,
+            pretrained=True,
+            num_classes=num_classes
+        )
+        # Loss function
+        self.criterion = nn.BCEWithLogitsLoss()
+        # Metrics
+        self.train_accuracy = []
+        self.val_accuracy = []
+    def forward(self, x):
+        return self.model(x)
+    def training_step(self, batch, batch_idx):
+        images, labels = batch
+        labels = labels.float().unsqueeze(1)
+        # Forward pass
+        logits = self(images)
+        loss = self.criterion(logits, labels)
+        # Calculate accuracy
+        probs = torch.sigmoid(logits)
+        preds = (probs > 0.5).float()
+        accuracy = (preds == labels).float().mean()
+        # Log metrics
+        self.log('train_loss', loss, prog_bar=True)
+        self.log('train_accuracy', accuracy, prog_bar=True)
+        return loss
+    def validation_step(self, batch, batch_idx):
+        images, labels = batch
+        labels = labels.float().unsqueeze(1)
+        # Forward pass
+        logits = self(images)
+        loss = self.criterion(logits, labels)
+        # Calculate accuracy
+        probs = torch.sigmoid(logits)
+        preds = (probs > 0.5).float()
+        accuracy = (preds == labels).float().mean()
+        # Log metrics
+        self.log('val_loss', loss, prog_bar=True)
+        self.log('val_accuracy', accuracy, prog_bar=True)
+        return {'val_loss': loss, 'val_accuracy': accuracy}
+    def configure_optimizers(self):
+        optimizer = optim.AdamW(
+            self.parameters(),
+            lr=self.hparams.learning_rate,
+            weight_decay=self.hparams.weight_decay
+        )
+        scheduler = optim.lr_scheduler.CosineAnnealingLR(
+            optimizer,
+            T_max=10,
+            eta_min=1e-6
+        )
+        return {
+            'optimizer': optimizer,
+            'lr_scheduler': {
+                'scheduler': scheduler,
+                'monitor': 'val_loss'
+            }
+        }
+def get_transforms(mode: str = "train") -> A.Compose:
+    """Get augmentation transforms."""
+    if mode == "train":
+        return A.Compose([
+            A.Resize(380, 380),
+            A.CenterCrop(299, 299),
+            A.HorizontalFlip(p=0.5),
+            A.Rotate(limit=15, p=0.5),
+            A.ColorJitter(
+                brightness=0.2,
+                contrast=0.2,
+                saturation=0.2,
+                hue=0.1,
+                p=0.5
+            ),
+            A.GaussNoise(p=0.3),
+            A.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]
+            ),
+            ToTensorV2()
+        ])
+    else:
+        return A.Compose([
+            A.Resize(380, 380),
+            A.CenterCrop(299, 299),
+            A.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]
+            ),
+            ToTensorV2()
+        ])
+class DeepfakeTrainer:
+    """Training pipeline manager."""
+    def __init__(
+        self,
+        config: Dict,
+        experiment_name: str = "deepfake-detection"
+    ):
+        """
+        Args:
+            config: Training configuration
+            experiment_name: MLflow experiment name
+        """
+        self.config = config
+        self.experiment_name = experiment_name
+        # Setup MLflow
+        mlflow.set_experiment(experiment_name)
+        self.mlflow_logger = MLFlowLogger(
+            experiment_name=experiment_name,
+            tracking_uri=config.get('mlflow_uri', 'http://localhost:5000')
+        )
+    def train(
+        self,
+        train_data: Tuple[list, list],
+        val_data: Tuple[list, list]
+    ):
+        """
+        Train the model.
+        Args:
+            train_data: Tuple of (image_paths, labels)
+            val_data: Tuple of (image_paths, labels)
+        """
+        # Start MLflow run
+        with mlflow.start_run():
+            # Log parameters
+            mlflow.log_params(self.config)
+            # Create datasets
+            train_dataset = DeepfakeDataset(
+                *train_data,
+                transform=get_transforms("train"),
+                mode="train"
+            )
+            val_dataset = DeepfakeDataset(
+                *val_data,
+                transform=get_transforms("val"),
+                mode="val"
+            )
+            # Create data loaders
+            train_loader = DataLoader(
+                train_dataset,
+                batch_size=self.config['batch_size'],
+                shuffle=True,
+                num_workers=self.config['num_workers'],
+                pin_memory=True
+            )
+            val_loader = DataLoader(
+                val_dataset,
+                batch_size=self.config['batch_size'],
+                shuffle=False,
+                num_workers=self.config['num_workers'],
+                pin_memory=True
+            )
+            # Create model
+            model = DeepfakeDetectionModel(
+                model_name=self.config['model_name'],
+                learning_rate=self.config['learning_rate'],
+                weight_decay=self.config['weight_decay']
+            )
+            # Callbacks
+            checkpoint_callback = ModelCheckpoint(
+                dirpath=self.config['checkpoint_dir'],
+                filename='deepfake-{epoch:02d}-{val_accuracy:.4f}',
+                monitor='val_accuracy',
+                mode='max',
+                save_top_k=3,
+                save_last=True
+            )
+            early_stop_callback = EarlyStopping(
+                monitor='val_loss',
+                patience=self.config['early_stop_patience'],
+                mode='min'
+            )
+            # Trainer
+            trainer = pl.Trainer(
+                max_epochs=self.config['epochs'],
+                accelerator='auto',
+                devices=self.config.get('gpus', 1),
+                precision=self.config.get('precision', 16),
+                logger=self.mlflow_logger,
+                callbacks=[checkpoint_callback, early_stop_callback],
+                log_every_n_steps=10,
+                gradient_clip_val=1.0
+            )
+            # Train
+            trainer.fit(model, train_loader, val_loader)
+            # Log best model
+            best_model_path = checkpoint_callback.best_model_path
+            mlflow.log_artifact(best_model_path)
+            print(f"✓ Training completed!")
+            print(f"  Best model: {best_model_path}")
+            print(f"  Best val accuracy: {checkpoint_callback.best_model_score:.4f}")
+            return model, trainer
+# Example usage
+if __name__ == "__main__":
+    # Training configuration
+    config = {
+        'model_name': 'efficientnet_b4',
+        'batch_size': 32,
+        'learning_rate': 1e-4,
+        'weight_decay': 1e-5,
+        'epochs': 50,
+        'num_workers': 4,
+        'gpus': 1,
+        'precision': 16,
+        'checkpoint_dir': 'models/checkpoints',
+        'early_stop_patience': 5,
+        'mlflow_uri': 'http://localhost:5000'
+    }
+    # Example data (replace with actual data loading)
+    train_paths = ['path/to/train/img1.jpg', 'path/to/train/img2.jpg']
+    train_labels = [0, 1]  # 0=real, 1=fake
+    val_paths = ['path/to/val/img1.jpg', 'path/to/val/img2.jpg']
+    val_labels = [0, 1]
+    # Create trainer
+    trainer = DeepfakeTrainer(config)
+    # Train
+    # model, pl_trainer = trainer.train(
+    #     train_data=(train_paths, train_labels),
+    #     val_data=(val_paths, val_labels)
+    # )
+    print("Training script ready. Uncomment the training code to run.")

src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Init file for utils module."""

src/utils/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (266 Bytes). View file

src/utils/__pycache__/face_detection.cpython-313.pyc ADDED Viewed

Binary file (1.45 kB). View file

src/utils/__pycache__/preprocessing.cpython-313.pyc ADDED Viewed

Binary file (1.32 kB). View file

src/utils/face_detection.py ADDED Viewed

	@@ -0,0 +1,36 @@

+"""Utility module for face detection."""
+import cv2
+import numpy as np
+from typing import List, Tuple
+from PIL import Image
+def detect_faces(image: Image.Image) -> List[Tuple[int, int, int, int]]:
+    """
+    Detect faces in an image.
+    Args:
+        image: PIL Image
+    Returns:
+        List of face bounding boxes (x, y, w, h)
+    """
+    # Convert PIL to OpenCV format
+    img_array = np.array(image)
+    gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+    # Load Haar Cascade
+    face_cascade = cv2.CascadeClassifier(
+        cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
+    )
+    # Detect faces
+    faces = face_cascade.detectMultiScale(
+        gray,
+        scaleFactor=1.1,
+        minNeighbors=5,
+        minSize=(30, 30)
+    )
+    return faces.tolist() if len(faces) > 0 else []

src/utils/preprocessing.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""Preprocessing utilities."""
+import numpy as np
+from PIL import Image
+from typing import Tuple
+def preprocess_image(image: Image.Image, target_size: Tuple[int, int] = (299, 299)) -> np.ndarray:
+    """
+    Preprocess image for model input.
+    Args:
+        image: PIL Image
+        target_size: Target dimensions
+    Returns:
+        Preprocessed image array
+    """
+    # Resize
+    image = image.resize(target_size, Image.LANCZOS)
+    # Convert to array
+    img_array = np.array(image) / 255.0
+    # Normalize
+    mean = np.array([0.485, 0.456, 0.406])
+    std = np.array([0.229, 0.224, 0.225])
+    img_array = (img_array - mean) / std
+    return img_array.astype(np.float32)