Spaces:

Gradii
/

DetectMeBotBackend

Running

App Files Files Community

Gradii commited on 16 days ago

Commit

dcb5a1a

1 Parent(s): 6b15151

first text model

Browse files

Files changed (8) hide show

backend/app/api/routes.py +115 -39
backend/app/models/schemas.py +1 -1
backend/app/services/detector/__init__.py +0 -36
backend/app/services/detector/base.py +0 -38
backend/app/services/detector/mock.py +0 -56
backend/app/services/image_analyzer.py +1 -18
backend/app/services/text_analyzer.py +31 -6
backend/requirements.txt +3 -0

backend/app/api/routes.py CHANGED Viewed

@@ -14,7 +14,6 @@ from app.models.schemas import (
 from app.services.download import download_file
 from app.services.text_analyzer import analyze_text
 from app.services.image_analyzer import analyze_image
-from app.services.detector import get_detector
 from app.core.config import get_settings
 from app.utils.exceptions import DeepfakeDetectionError
@@ -22,6 +21,20 @@ logger = logging.getLogger(__name__)
 router = APIRouter()
 @router.get(
     "/",
@@ -33,14 +46,13 @@ async def health_check() -> HealthResponse:
     settings = get_settings()
     logger.info("Health check endpoint accessed")
-    available_models = ["mock"]
     supported_types = ["text", "image", "video", "file"]
     return HealthResponse(
         status="ok",
         service="Deepfake Detection Service",
         version=settings.APP_VERSION,
-        available_models=available_models,
         supported_types=supported_types,
     )
@@ -58,20 +70,39 @@ async def health_check() -> HealthResponse:
 )
 async def analyze(request: AnalysisRequest) -> AnalysisResponse:
     settings = get_settings()
-    detector_model = None
     if isinstance(request, TextAnalysisRequest):
-        detector_model = request.model or settings.DEFAULT_DETECTOR_MODEL
-        logger.info(f"Received text analysis request, length: {len(request.text)} chars, model: {detector_model}")
         try:
-            detector = get_detector(detector_model)
         except ValueError as e:
-            logger.error(f"Invalid detector model: {str(e)}")
             raise HTTPException(status_code=400, detail=str(e))
-        text_bytes = request.text.encode('utf-8')
-        analysis_result = await detector.detect(text_bytes)
         logger.info(f"Text analysis completed. Result: {analysis_result}")
@@ -79,28 +110,43 @@ async def analyze(request: AnalysisRequest) -> AnalysisResponse:
             is_deepfake=analysis_result["is_deepfake"],
             confidence=analysis_result["confidence"],
             analysis_time=analysis_result["analysis_time"],
-            model_used=detector_model,
             content_type="text",
         )
     elif isinstance(request, ImageAnalysisRequest):
-        detector_model = request.model or settings.DEFAULT_DETECTOR_MODEL
-        logger.info(f"Received image analysis request for URL: {request.image_url}, model: {detector_model}")
-        try:
-            detector = get_detector(detector_model)
-        except ValueError as e:
-            logger.error(f"Invalid detector model: {str(e)}")
-            raise HTTPException(status_code=400, detail=str(e))
         try:
             image_bytes = await download_file(str(request.image_url))
             if not image_bytes:
                 raise HTTPException(status_code=500, detail="Failed to download image")
         except DeepfakeDetectionError as e:
             raise HTTPException(status_code=e.status_code, detail=e.message)
-        analysis_result = await detector.detect(image_bytes)
         logger.info(f"Image analysis completed. Result: {analysis_result}")
@@ -108,28 +154,43 @@ async def analyze(request: AnalysisRequest) -> AnalysisResponse:
             is_deepfake=analysis_result["is_deepfake"],
             confidence=analysis_result["confidence"],
             analysis_time=analysis_result["analysis_time"],
-            model_used=detector_model,
             content_type="image",
         )
     elif isinstance(request, VideoAnalysisRequest):
-        detector_model = request.model or settings.DEFAULT_DETECTOR_MODEL
-        logger.info(f"Received video analysis request for URL: {request.video_url}, model: {detector_model}")
-        try:
-            detector = get_detector(detector_model)
-        except ValueError as e:
-            logger.error(f"Invalid detector model: {str(e)}")
-            raise HTTPException(status_code=400, detail=str(e))
         try:
             video_bytes = await download_file(str(request.video_url))
             if not video_bytes:
                 raise HTTPException(status_code=500, detail="Failed to download video")
         except DeepfakeDetectionError as e:
             raise HTTPException(status_code=e.status_code, detail=e.message)
-        analysis_result = await detector.detect(video_bytes)
         logger.info(f"Video analysis completed. Result: {analysis_result}")
@@ -137,28 +198,43 @@ async def analyze(request: AnalysisRequest) -> AnalysisResponse:
             is_deepfake=analysis_result["is_deepfake"],
             confidence=analysis_result["confidence"],
             analysis_time=analysis_result["analysis_time"],
-            model_used=detector_model,
             content_type="video",
         )
     elif isinstance(request, FileAnalysisRequest):
-        detector_model = request.model or settings.DEFAULT_DETECTOR_MODEL
-        logger.info(f"Received file analysis request for URL: {request.file_url}, model: {detector_model}")
-        try:
-            detector = get_detector(detector_model)
-        except ValueError as e:
-            logger.error(f"Invalid detector model: {str(e)}")
-            raise HTTPException(status_code=400, detail=str(e))
         try:
             file_bytes = await download_file(str(request.file_url))
             if not file_bytes:
                 raise HTTPException(status_code=500, detail="Failed to download file")
         except DeepfakeDetectionError as e:
             raise HTTPException(status_code=e.status_code, detail=e.message)
-        analysis_result = await detector.detect(file_bytes)
         logger.info(f"File analysis completed. Result: {analysis_result}")
@@ -166,7 +242,7 @@ async def analyze(request: AnalysisRequest) -> AnalysisResponse:
             is_deepfake=analysis_result["is_deepfake"],
             confidence=analysis_result["confidence"],
             analysis_time=analysis_result["analysis_time"],
-            model_used=detector_model,
             content_type="file",
         )

 from app.services.download import download_file
 from app.services.text_analyzer import analyze_text
 from app.services.image_analyzer import analyze_image
 from app.core.config import get_settings
 from app.utils.exceptions import DeepfakeDetectionError
 router = APIRouter()
+AVAILABLE_MODELS = {
+    "text": ["yaya36095/xlm-roberta-text-detector"],
+    "image": [],
+    "video": [],
+    "file": [],
+}
+MAX_CONTENT_SIZES = {
+    "text": 5000,
+    "image": 100 * 1024 * 1024,
+    "video": 100 * 1024 * 1024,
+    "file": 100 * 1024 * 1024,
+}
 @router.get(
     "/",
     settings = get_settings()
     logger.info("Health check endpoint accessed")
     supported_types = ["text", "image", "video", "file"]
     return HealthResponse(
         status="ok",
         service="Deepfake Detection Service",
         version=settings.APP_VERSION,
+        available_models=AVAILABLE_MODELS,
         supported_types=supported_types,
     )
 )
 async def analyze(request: AnalysisRequest) -> AnalysisResponse:
     settings = get_settings()
     if isinstance(request, TextAnalysisRequest):
+        content_type = "text"
+        if len(request.text) > MAX_CONTENT_SIZES["text"]:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Text content exceeds maximum length of {MAX_CONTENT_SIZES['text']} characters"
+            )
+        if len(request.text) < 10:
+            raise HTTPException(
+                status_code=400,
+                detail="Text content must be at least 10 characters"
+            )
+        model = request.model or "yaya36095/xlm-roberta-text-detector"
+        if model not in AVAILABLE_MODELS["text"]:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Model '{model}' is not available for text analysis. Available models: {AVAILABLE_MODELS['text']}"
+            )
+        logger.info(f"Received text analysis request, length: {len(request.text)} chars, model: {model}")
         try:
+            analysis_result = await analyze_text(request.text)
         except ValueError as e:
             raise HTTPException(status_code=400, detail=str(e))
+        except Exception as e:
+            logger.error(f"Text analysis error: {str(e)}", exc_info=True)
+            raise HTTPException(status_code=500, detail="Failed to analyze text")
         logger.info(f"Text analysis completed. Result: {analysis_result}")
             is_deepfake=analysis_result["is_deepfake"],
             confidence=analysis_result["confidence"],
             analysis_time=analysis_result["analysis_time"],
+            model_used=model,
             content_type="text",
         )
     elif isinstance(request, ImageAnalysisRequest):
+        content_type = "image"
+        model = request.model
+        if not model:
+            raise HTTPException(
+                status_code=400,
+                detail=f"No model available for image analysis. Available models: {AVAILABLE_MODELS['image']}"
+            )
+        if model not in AVAILABLE_MODELS["image"]:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Model '{model}' is not available for image analysis. Available models: {AVAILABLE_MODELS['image']}"
+            )
+        logger.info(f"Received image analysis request for URL: {request.image_url}, model: {model}")
         try:
             image_bytes = await download_file(str(request.image_url))
             if not image_bytes:
                 raise HTTPException(status_code=500, detail="Failed to download image")
+            if len(image_bytes) > MAX_CONTENT_SIZES["image"]:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Image size exceeds maximum of {MAX_CONTENT_SIZES['image']} bytes"
+                )
         except DeepfakeDetectionError as e:
             raise HTTPException(status_code=e.status_code, detail=e.message)
+        analysis_result = await analyze_image(image_bytes)
         logger.info(f"Image analysis completed. Result: {analysis_result}")
             is_deepfake=analysis_result["is_deepfake"],
             confidence=analysis_result["confidence"],
             analysis_time=analysis_result["analysis_time"],
+            model_used=model,
             content_type="image",
         )
     elif isinstance(request, VideoAnalysisRequest):
+        content_type = "video"
+        model = request.model
+        if not model:
+            raise HTTPException(
+                status_code=400,
+                detail=f"No model available for video analysis. Available models: {AVAILABLE_MODELS['video']}"
+            )
+        if model not in AVAILABLE_MODELS["video"]:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Model '{model}' is not available for video analysis. Available models: {AVAILABLE_MODELS['video']}"
+            )
+        logger.info(f"Received video analysis request for URL: {request.video_url}, model: {model}")
         try:
             video_bytes = await download_file(str(request.video_url))
             if not video_bytes:
                 raise HTTPException(status_code=500, detail="Failed to download video")
+            if len(video_bytes) > MAX_CONTENT_SIZES["video"]:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Video size exceeds maximum of {MAX_CONTENT_SIZES['video']} bytes"
+                )
         except DeepfakeDetectionError as e:
             raise HTTPException(status_code=e.status_code, detail=e.message)
+        analysis_result = await analyze_image(video_bytes)
         logger.info(f"Video analysis completed. Result: {analysis_result}")
             is_deepfake=analysis_result["is_deepfake"],
             confidence=analysis_result["confidence"],
             analysis_time=analysis_result["analysis_time"],
+            model_used=model,
             content_type="video",
         )
     elif isinstance(request, FileAnalysisRequest):
+        content_type = "file"
+        model = request.model
+        if not model:
+            raise HTTPException(
+                status_code=400,
+                detail=f"No model available for file analysis. Available models: {AVAILABLE_MODELS['file']}"
+            )
+        if model not in AVAILABLE_MODELS["file"]:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Model '{model}' is not available for file analysis. Available models: {AVAILABLE_MODELS['file']}"
+            )
+        logger.info(f"Received file analysis request for URL: {request.file_url}, model: {model}")
         try:
             file_bytes = await download_file(str(request.file_url))
             if not file_bytes:
                 raise HTTPException(status_code=500, detail="Failed to download file")
+            if len(file_bytes) > MAX_CONTENT_SIZES["file"]:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"File size exceeds maximum of {MAX_CONTENT_SIZES['file']} bytes"
+                )
         except DeepfakeDetectionError as e:
             raise HTTPException(status_code=e.status_code, detail=e.message)
+        analysis_result = await analyze_image(file_bytes)
         logger.info(f"File analysis completed. Result: {analysis_result}")
             is_deepfake=analysis_result["is_deepfake"],
             confidence=analysis_result["confidence"],
             analysis_time=analysis_result["analysis_time"],
+            model_used=model,
             content_type="file",
         )

backend/app/models/schemas.py CHANGED Viewed

@@ -108,5 +108,5 @@ class HealthResponse(BaseModel):
     status: str = Field(..., description="Service status")
     service: str = Field(..., description="Service name")
     version: str = Field(..., description="Service version")
-    available_models: list = Field(..., description="Available detector models")
     supported_types: list = Field(..., description="Supported content types")

     status: str = Field(..., description="Service status")
     service: str = Field(..., description="Service name")
     version: str = Field(..., description="Service version")
+    available_models: dict = Field(..., description="Available detector models per content type")
     supported_types: list = Field(..., description="Supported content types")

backend/app/services/detector/__init__.py CHANGED Viewed

@@ -1,37 +1 @@
-"""Detector models for deepfake detection."""
-from app.services.detector.base import BaseDetector
-from app.services.detector.mock import MockDetector
-__all__ = ["BaseDetector", "MockDetector", "get_detector"]
-def get_detector(model_name: str = "mock") -> BaseDetector:
-    """
-    Factory function to get detector instance by model name.
-    Args:
-        model_name: Name of the detector model
-    Returns:
-        Instance of the requested detector
-    Raises:
-        ValueError: If model is not supported
-    """
-    detectors = {
-        "mock": MockDetector,
-        # Future models:
-        # "deepseek": DeepseekDetector,
-        # "openai": OpenAIDetector,
-        # "huggingface": HuggingFaceDetector,
-    }
-    if model_name not in detectors:
-        available = ", ".join(detectors.keys())
-        raise ValueError(
-            f"Detector model '{model_name}' is not supported. "
-            f"Available models: {available}"
-        )
-    return detectors[model_name]()



1

backend/app/services/detector/base.py DELETED Viewed

@@ -1,38 +0,0 @@
-"""Base detector class defining the interface for all detectors."""
-from abc import ABC, abstractmethod
-from typing import Dict, Any
-class BaseDetector(ABC):
-    """
-    Abstract base class for deepfake detectors.
-    All detector implementations should inherit from this class and implement
-    the detect() method.
-    """
-    def __init__(self, model_name: str):
-        """
-        Initialize the detector.
-        Args:
-            model_name: Name of the detector model
-        """
-        self.model_name = model_name
-    @abstractmethod
-    async def detect(self, file_bytes: bytes) -> Dict[str, Any]:
-        """
-        Detect if file is a deepfake.
-        Args:
-            file_bytes: The file contents as bytes
-        Returns:
-            Dictionary containing:
-            - is_deepfake: Boolean indicating if file is a deepfake
-            - confidence: Float between 0.0 and 1.0
-            - analysis_time: Float representing processing time
-        """
-        pass

backend/app/services/detector/mock.py DELETED Viewed

@@ -1,56 +0,0 @@
-"""Mock detector implementation for testing and development."""
-import asyncio
-import logging
-import time
-from typing import Dict, Any
-from app.services.detector.base import BaseDetector
-logger = logging.getLogger(__name__)
-class MockDetector(BaseDetector):
-    """
-    Mock detector for testing and development.
-    Simulates deepfake detection without requiring actual ML models.
-    """
-    def __init__(self):
-        """Initialize the mock detector."""
-        super().__init__("mock")
-    async def detect(self, file_bytes: bytes) -> Dict[str, Any]:
-        """
-        Simulate deepfake detection with a random result.
-        Args:
-            file_bytes: The file contents as bytes
-        Returns:
-            Dictionary with is_deepfake, confidence, and analysis_time
-        """
-        logger.info("Starting mock deepfake analysis...")
-        start_time = time.time()
-        # Simulate processing delay (1 to 2 seconds)
-        delay = 1.0 + (hash(file_bytes) % 100) / 100.0
-        await asyncio.sleep(delay)
-        analysis_time = time.time() - start_time
-        # Simulate ML model output (deterministic based on file content hash)
-        file_hash = hash(file_bytes) % 100
-        is_deepfake = file_hash > 50  # ~50% chance
-        confidence = (file_hash % 100) / 100.0
-        result = {
-            "is_deepfake": is_deepfake,
-            "confidence": round(confidence, 3),
-            "analysis_time": round(analysis_time, 3),
-        }
-        logger.info(f"Mock analysis completed. Result: {result}")
-        return result

backend/app/services/image_analyzer.py CHANGED Viewed

@@ -6,21 +6,4 @@ logger = logging.getLogger(__name__)
 async def analyze_image(image_bytes: bytes) -> Dict[str, Any]:
-    start_time = time.time()
-    logger.info(f"Starting image analysis, size: {len(image_bytes)} bytes")
-    image_hash = hash(image_bytes) % 100
-    is_deepfake = image_hash > 50
-    confidence = (image_hash % 100) / 100.0
-    analysis_time = time.time() - start_time
-    result = {
-        "is_deepfake": is_deepfake,
-        "confidence": round(confidence, 3),
-        "analysis_time": round(analysis_time, 3),
-    }
-    logger.info(f"Image analysis completed. Result: {result}")
-    return result


6
7
8	async def analyze_image(image_bytes: bytes) -> Dict[str, Any]:
9	+ raise NotImplementedError("Image analysis models not yet configured")

backend/app/services/text_analyzer.py CHANGED Viewed

@@ -1,26 +1,51 @@
 import logging
 import time
 from typing import Dict, Any
 logger = logging.getLogger(__name__)
 async def analyze_text(text: str) -> Dict[str, Any]:
     start_time = time.time()
     logger.info(f"Starting text analysis, length: {len(text)} chars")
-    text_hash = hash(text) % 100
-    is_deepfake = text_hash > 50
-    confidence = (text_hash % 100) / 100.0
     analysis_time = time.time() - start_time
-    result = {
         "is_deepfake": is_deepfake,
         "confidence": round(confidence, 3),
         "analysis_time": round(analysis_time, 3),
     }
-    logger.info(f"Text analysis completed. Result: {result}")
-    return result

 import logging
 import time
 from typing import Dict, Any
+from transformers import pipeline
 logger = logging.getLogger(__name__)
+_text_classifier = None
+def _load_model():
+    global _text_classifier
+    if _text_classifier is None:
+        logger.info("Loading XLM-RoBERTa text detector model...")
+        _text_classifier = pipeline(
+            "text-classification",
+            model="yaya36095/xlm-roberta-text-detector",
+            device=-1
+        )
+        logger.info("Text detector model loaded successfully")
+    return _text_classifier
 async def analyze_text(text: str) -> Dict[str, Any]:
+    if len(text) > 5000:
+        raise ValueError("Text content exceeds maximum length of 5000 characters")
+    if len(text) < 10:
+        raise ValueError("Text content must be at least 10 characters")
     start_time = time.time()
     logger.info(f"Starting text analysis, length: {len(text)} chars")
+    classifier = _load_model()
+    result = classifier(text)
+    label = result[0]["label"]
+    score = result[0]["score"]
+    is_deepfake = label.lower() == "fake"
+    confidence = score
     analysis_time = time.time() - start_time
+    response = {
         "is_deepfake": is_deepfake,
         "confidence": round(confidence, 3),
         "analysis_time": round(analysis_time, 3),
     }
+    logger.info(f"Text analysis completed. Result: {response}")
+    return response

backend/requirements.txt CHANGED Viewed

@@ -4,3 +4,6 @@ httpx==0.27.0
 pydantic==2.8.2
 pydantic-settings==2.3.1
 python-multipart==0.0.6

 pydantic==2.8.2
 pydantic-settings==2.3.1
 python-multipart==0.0.6
+transformers==4.41.2
+torch==2.3.1
+numpy==1.26.4