Gradi02 commited on
Commit
566faed
·
unverified ·
2 Parent(s): b17136527e6062

Merge pull request #2 from Tobkubos/backend-setup

Browse files
backend/app/api/routes.py CHANGED
@@ -14,7 +14,6 @@ from app.models.schemas import (
14
  from app.services.download import download_file
15
  from app.services.text_analyzer import analyze_text
16
  from app.services.image_analyzer import analyze_image
17
- from app.services.detector import get_detector
18
  from app.core.config import get_settings
19
  from app.utils.exceptions import DeepfakeDetectionError
20
 
@@ -22,6 +21,20 @@ logger = logging.getLogger(__name__)
22
 
23
  router = APIRouter()
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  @router.get(
27
  "/",
@@ -33,14 +46,13 @@ async def health_check() -> HealthResponse:
33
  settings = get_settings()
34
  logger.info("Health check endpoint accessed")
35
 
36
- available_models = ["mock"]
37
  supported_types = ["text", "image", "video", "file"]
38
 
39
  return HealthResponse(
40
  status="ok",
41
  service="Deepfake Detection Service",
42
  version=settings.APP_VERSION,
43
- available_models=available_models,
44
  supported_types=supported_types,
45
  )
46
 
@@ -58,20 +70,38 @@ async def health_check() -> HealthResponse:
58
  )
59
  async def analyze(request: AnalysisRequest) -> AnalysisResponse:
60
  settings = get_settings()
61
- detector_model = None
62
 
63
  if isinstance(request, TextAnalysisRequest):
64
- detector_model = request.model or settings.DEFAULT_DETECTOR_MODEL
65
- logger.info(f"Received text analysis request, length: {len(request.text)} chars, model: {detector_model}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  try:
68
- detector = get_detector(detector_model)
69
  except ValueError as e:
70
- logger.error(f"Invalid detector model: {str(e)}")
71
  raise HTTPException(status_code=400, detail=str(e))
72
-
73
- text_bytes = request.text.encode('utf-8')
74
- analysis_result = await detector.detect(text_bytes)
75
 
76
  logger.info(f"Text analysis completed. Result: {analysis_result}")
77
 
@@ -79,28 +109,37 @@ async def analyze(request: AnalysisRequest) -> AnalysisResponse:
79
  is_deepfake=analysis_result["is_deepfake"],
80
  confidence=analysis_result["confidence"],
81
  analysis_time=analysis_result["analysis_time"],
82
- model_used=detector_model,
83
  content_type="text",
84
  )
85
 
86
  elif isinstance(request, ImageAnalysisRequest):
87
- detector_model = request.model or settings.DEFAULT_DETECTOR_MODEL
88
- logger.info(f"Received image analysis request for URL: {request.image_url}, model: {detector_model}")
89
 
90
- try:
91
- detector = get_detector(detector_model)
92
- except ValueError as e:
93
- logger.error(f"Invalid detector model: {str(e)}")
94
- raise HTTPException(status_code=400, detail=str(e))
 
 
 
95
 
96
  try:
97
  image_bytes = await download_file(str(request.image_url))
98
  if not image_bytes:
99
  raise HTTPException(status_code=500, detail="Failed to download image")
 
 
 
 
 
 
 
100
  except DeepfakeDetectionError as e:
101
  raise HTTPException(status_code=e.status_code, detail=e.message)
102
 
103
- analysis_result = await detector.detect(image_bytes)
104
 
105
  logger.info(f"Image analysis completed. Result: {analysis_result}")
106
 
@@ -108,28 +147,37 @@ async def analyze(request: AnalysisRequest) -> AnalysisResponse:
108
  is_deepfake=analysis_result["is_deepfake"],
109
  confidence=analysis_result["confidence"],
110
  analysis_time=analysis_result["analysis_time"],
111
- model_used=detector_model,
112
  content_type="image",
113
  )
114
 
115
  elif isinstance(request, VideoAnalysisRequest):
116
- detector_model = request.model or settings.DEFAULT_DETECTOR_MODEL
117
- logger.info(f"Received video analysis request for URL: {request.video_url}, model: {detector_model}")
118
 
119
- try:
120
- detector = get_detector(detector_model)
121
- except ValueError as e:
122
- logger.error(f"Invalid detector model: {str(e)}")
123
- raise HTTPException(status_code=400, detail=str(e))
 
 
 
124
 
125
  try:
126
  video_bytes = await download_file(str(request.video_url))
127
  if not video_bytes:
128
  raise HTTPException(status_code=500, detail="Failed to download video")
 
 
 
 
 
 
 
129
  except DeepfakeDetectionError as e:
130
  raise HTTPException(status_code=e.status_code, detail=e.message)
131
 
132
- analysis_result = await detector.detect(video_bytes)
133
 
134
  logger.info(f"Video analysis completed. Result: {analysis_result}")
135
 
@@ -137,28 +185,37 @@ async def analyze(request: AnalysisRequest) -> AnalysisResponse:
137
  is_deepfake=analysis_result["is_deepfake"],
138
  confidence=analysis_result["confidence"],
139
  analysis_time=analysis_result["analysis_time"],
140
- model_used=detector_model,
141
  content_type="video",
142
  )
143
 
144
  elif isinstance(request, FileAnalysisRequest):
145
- detector_model = request.model or settings.DEFAULT_DETECTOR_MODEL
146
- logger.info(f"Received file analysis request for URL: {request.file_url}, model: {detector_model}")
147
 
148
- try:
149
- detector = get_detector(detector_model)
150
- except ValueError as e:
151
- logger.error(f"Invalid detector model: {str(e)}")
152
- raise HTTPException(status_code=400, detail=str(e))
 
 
 
153
 
154
  try:
155
  file_bytes = await download_file(str(request.file_url))
156
  if not file_bytes:
157
  raise HTTPException(status_code=500, detail="Failed to download file")
 
 
 
 
 
 
 
158
  except DeepfakeDetectionError as e:
159
  raise HTTPException(status_code=e.status_code, detail=e.message)
160
 
161
- analysis_result = await detector.detect(file_bytes)
162
 
163
  logger.info(f"File analysis completed. Result: {analysis_result}")
164
 
@@ -166,7 +223,7 @@ async def analyze(request: AnalysisRequest) -> AnalysisResponse:
166
  is_deepfake=analysis_result["is_deepfake"],
167
  confidence=analysis_result["confidence"],
168
  analysis_time=analysis_result["analysis_time"],
169
- model_used=detector_model,
170
  content_type="file",
171
  )
172
 
 
14
  from app.services.download import download_file
15
  from app.services.text_analyzer import analyze_text
16
  from app.services.image_analyzer import analyze_image
 
17
  from app.core.config import get_settings
18
  from app.utils.exceptions import DeepfakeDetectionError
19
 
 
21
 
22
  router = APIRouter()
23
 
24
+ AVAILABLE_MODELS = {
25
+ "text": ["yaya36095/xlm-roberta-text-detector"],
26
+ "image": [],
27
+ "video": [],
28
+ "file": [],
29
+ }
30
+
31
+ MAX_CONTENT_SIZES = {
32
+ "text": 5000,
33
+ "image": 100 * 1024 * 1024,
34
+ "video": 100 * 1024 * 1024,
35
+ "file": 100 * 1024 * 1024,
36
+ }
37
+
38
 
39
  @router.get(
40
  "/",
 
46
  settings = get_settings()
47
  logger.info("Health check endpoint accessed")
48
 
 
49
  supported_types = ["text", "image", "video", "file"]
50
 
51
  return HealthResponse(
52
  status="ok",
53
  service="Deepfake Detection Service",
54
  version=settings.APP_VERSION,
55
+ available_models=AVAILABLE_MODELS,
56
  supported_types=supported_types,
57
  )
58
 
 
70
  )
71
  async def analyze(request: AnalysisRequest) -> AnalysisResponse:
72
  settings = get_settings()
 
73
 
74
  if isinstance(request, TextAnalysisRequest):
75
+ content_type = "text"
76
+
77
+ if len(request.text) > MAX_CONTENT_SIZES["text"]:
78
+ raise HTTPException(
79
+ status_code=400,
80
+ detail=f"Text content exceeds maximum length of {MAX_CONTENT_SIZES['text']} characters"
81
+ )
82
+
83
+ if len(request.text) < 10:
84
+ raise HTTPException(
85
+ status_code=400,
86
+ detail="Text content must be at least 10 characters"
87
+ )
88
+
89
+ if not AVAILABLE_MODELS["text"]:
90
+ raise HTTPException(
91
+ status_code=400,
92
+ detail="No model available for text analysis"
93
+ )
94
+
95
+ model = AVAILABLE_MODELS["text"][0]
96
+ logger.info(f"Received text analysis request, length: {len(request.text)} chars, model: {model}")
97
 
98
  try:
99
+ analysis_result = await analyze_text(request.text)
100
  except ValueError as e:
 
101
  raise HTTPException(status_code=400, detail=str(e))
102
+ except Exception as e:
103
+ logger.error(f"Text analysis error: {str(e)}", exc_info=True)
104
+ raise HTTPException(status_code=500, detail="Failed to analyze text")
105
 
106
  logger.info(f"Text analysis completed. Result: {analysis_result}")
107
 
 
109
  is_deepfake=analysis_result["is_deepfake"],
110
  confidence=analysis_result["confidence"],
111
  analysis_time=analysis_result["analysis_time"],
112
+ model_used=model,
113
  content_type="text",
114
  )
115
 
116
  elif isinstance(request, ImageAnalysisRequest):
117
+ content_type = "image"
 
118
 
119
+ if not AVAILABLE_MODELS["image"]:
120
+ raise HTTPException(
121
+ status_code=400,
122
+ detail="No model available for image analysis"
123
+ )
124
+
125
+ model = AVAILABLE_MODELS["image"][0]
126
+ logger.info(f"Received image analysis request for URL: {request.image_url}, model: {model}")
127
 
128
  try:
129
  image_bytes = await download_file(str(request.image_url))
130
  if not image_bytes:
131
  raise HTTPException(status_code=500, detail="Failed to download image")
132
+
133
+ if len(image_bytes) > MAX_CONTENT_SIZES["image"]:
134
+ raise HTTPException(
135
+ status_code=400,
136
+ detail=f"Image size exceeds maximum of {MAX_CONTENT_SIZES['image']} bytes"
137
+ )
138
+
139
  except DeepfakeDetectionError as e:
140
  raise HTTPException(status_code=e.status_code, detail=e.message)
141
 
142
+ analysis_result = await analyze_image(image_bytes)
143
 
144
  logger.info(f"Image analysis completed. Result: {analysis_result}")
145
 
 
147
  is_deepfake=analysis_result["is_deepfake"],
148
  confidence=analysis_result["confidence"],
149
  analysis_time=analysis_result["analysis_time"],
150
+ model_used=model,
151
  content_type="image",
152
  )
153
 
154
  elif isinstance(request, VideoAnalysisRequest):
155
+ content_type = "video"
 
156
 
157
+ if not AVAILABLE_MODELS["video"]:
158
+ raise HTTPException(
159
+ status_code=400,
160
+ detail="No model available for video analysis"
161
+ )
162
+
163
+ model = AVAILABLE_MODELS["video"][0]
164
+ logger.info(f"Received video analysis request for URL: {request.video_url}, model: {model}")
165
 
166
  try:
167
  video_bytes = await download_file(str(request.video_url))
168
  if not video_bytes:
169
  raise HTTPException(status_code=500, detail="Failed to download video")
170
+
171
+ if len(video_bytes) > MAX_CONTENT_SIZES["video"]:
172
+ raise HTTPException(
173
+ status_code=400,
174
+ detail=f"Video size exceeds maximum of {MAX_CONTENT_SIZES['video']} bytes"
175
+ )
176
+
177
  except DeepfakeDetectionError as e:
178
  raise HTTPException(status_code=e.status_code, detail=e.message)
179
 
180
+ analysis_result = await analyze_image(video_bytes)
181
 
182
  logger.info(f"Video analysis completed. Result: {analysis_result}")
183
 
 
185
  is_deepfake=analysis_result["is_deepfake"],
186
  confidence=analysis_result["confidence"],
187
  analysis_time=analysis_result["analysis_time"],
188
+ model_used=model,
189
  content_type="video",
190
  )
191
 
192
  elif isinstance(request, FileAnalysisRequest):
193
+ content_type = "file"
 
194
 
195
+ if not AVAILABLE_MODELS["file"]:
196
+ raise HTTPException(
197
+ status_code=400,
198
+ detail="No model available for file analysis"
199
+ )
200
+
201
+ model = AVAILABLE_MODELS["file"][0]
202
+ logger.info(f"Received file analysis request for URL: {request.file_url}, model: {model}")
203
 
204
  try:
205
  file_bytes = await download_file(str(request.file_url))
206
  if not file_bytes:
207
  raise HTTPException(status_code=500, detail="Failed to download file")
208
+
209
+ if len(file_bytes) > MAX_CONTENT_SIZES["file"]:
210
+ raise HTTPException(
211
+ status_code=400,
212
+ detail=f"File size exceeds maximum of {MAX_CONTENT_SIZES['file']} bytes"
213
+ )
214
+
215
  except DeepfakeDetectionError as e:
216
  raise HTTPException(status_code=e.status_code, detail=e.message)
217
 
218
+ analysis_result = await analyze_image(file_bytes)
219
 
220
  logger.info(f"File analysis completed. Result: {analysis_result}")
221
 
 
223
  is_deepfake=analysis_result["is_deepfake"],
224
  confidence=analysis_result["confidence"],
225
  analysis_time=analysis_result["analysis_time"],
226
+ model_used=model,
227
  content_type="file",
228
  )
229
 
backend/app/models/schemas.py CHANGED
@@ -1,18 +1,16 @@
1
  from pydantic import BaseModel, HttpUrl, Field
2
- from typing import Optional, Union, Literal
3
 
4
 
5
  class TextAnalysisRequest(BaseModel):
6
  content_type: Literal["text"]
7
  text: str = Field(..., description="Text content to analyze for deepfake detection")
8
- model: Optional[str] = Field(None, description="Detector model to use")
9
 
10
  class Config:
11
  json_schema_extra = {
12
  "example": {
13
  "content_type": "text",
14
- "text": "Some text that might be AI-generated",
15
- "model": "mock"
16
  }
17
  }
18
 
@@ -20,14 +18,12 @@ class TextAnalysisRequest(BaseModel):
20
  class ImageAnalysisRequest(BaseModel):
21
  content_type: Literal["image"]
22
  image_url: HttpUrl = Field(..., description="URL of the image to analyze")
23
- model: Optional[str] = Field(None, description="Detector model to use")
24
 
25
  class Config:
26
  json_schema_extra = {
27
  "example": {
28
  "content_type": "image",
29
- "image_url": "https://example.com/image.jpg",
30
- "model": "mock"
31
  }
32
  }
33
 
@@ -35,14 +31,12 @@ class ImageAnalysisRequest(BaseModel):
35
  class VideoAnalysisRequest(BaseModel):
36
  content_type: Literal["video"]
37
  video_url: HttpUrl = Field(..., description="URL of the video to analyze")
38
- model: Optional[str] = Field(None, description="Detector model to use")
39
 
40
  class Config:
41
  json_schema_extra = {
42
  "example": {
43
  "content_type": "video",
44
- "video_url": "https://example.com/video.mp4",
45
- "model": "mock"
46
  }
47
  }
48
 
@@ -50,14 +44,12 @@ class VideoAnalysisRequest(BaseModel):
50
  class FileAnalysisRequest(BaseModel):
51
  content_type: Literal["file"]
52
  file_url: HttpUrl = Field(..., description="URL of the file to analyze")
53
- model: Optional[str] = Field(None, description="Detector model to use")
54
 
55
  class Config:
56
  json_schema_extra = {
57
  "example": {
58
  "content_type": "file",
59
- "file_url": "https://example.com/video.mp4",
60
- "model": "mock"
61
  }
62
  }
63
 
@@ -108,5 +100,5 @@ class HealthResponse(BaseModel):
108
  status: str = Field(..., description="Service status")
109
  service: str = Field(..., description="Service name")
110
  version: str = Field(..., description="Service version")
111
- available_models: list = Field(..., description="Available detector models")
112
  supported_types: list = Field(..., description="Supported content types")
 
1
  from pydantic import BaseModel, HttpUrl, Field
2
+ from typing import Union, Literal, Optional
3
 
4
 
5
  class TextAnalysisRequest(BaseModel):
6
  content_type: Literal["text"]
7
  text: str = Field(..., description="Text content to analyze for deepfake detection")
 
8
 
9
  class Config:
10
  json_schema_extra = {
11
  "example": {
12
  "content_type": "text",
13
+ "text": "Some text that might be AI-generated"
 
14
  }
15
  }
16
 
 
18
  class ImageAnalysisRequest(BaseModel):
19
  content_type: Literal["image"]
20
  image_url: HttpUrl = Field(..., description="URL of the image to analyze")
 
21
 
22
  class Config:
23
  json_schema_extra = {
24
  "example": {
25
  "content_type": "image",
26
+ "image_url": "https://example.com/image.jpg"
 
27
  }
28
  }
29
 
 
31
  class VideoAnalysisRequest(BaseModel):
32
  content_type: Literal["video"]
33
  video_url: HttpUrl = Field(..., description="URL of the video to analyze")
 
34
 
35
  class Config:
36
  json_schema_extra = {
37
  "example": {
38
  "content_type": "video",
39
+ "video_url": "https://example.com/video.mp4"
 
40
  }
41
  }
42
 
 
44
  class FileAnalysisRequest(BaseModel):
45
  content_type: Literal["file"]
46
  file_url: HttpUrl = Field(..., description="URL of the file to analyze")
 
47
 
48
  class Config:
49
  json_schema_extra = {
50
  "example": {
51
  "content_type": "file",
52
+ "file_url": "https://example.com/video.mp4"
 
53
  }
54
  }
55
 
 
100
  status: str = Field(..., description="Service status")
101
  service: str = Field(..., description="Service name")
102
  version: str = Field(..., description="Service version")
103
+ available_models: dict = Field(..., description="Available detector models per content type")
104
  supported_types: list = Field(..., description="Supported content types")
backend/app/services/detector/__init__.py CHANGED
@@ -1,37 +1 @@
1
- """Detector models for deepfake detection."""
2
 
3
- from app.services.detector.base import BaseDetector
4
- from app.services.detector.mock import MockDetector
5
-
6
- __all__ = ["BaseDetector", "MockDetector", "get_detector"]
7
-
8
-
9
- def get_detector(model_name: str = "mock") -> BaseDetector:
10
- """
11
- Factory function to get detector instance by model name.
12
-
13
- Args:
14
- model_name: Name of the detector model
15
-
16
- Returns:
17
- Instance of the requested detector
18
-
19
- Raises:
20
- ValueError: If model is not supported
21
- """
22
- detectors = {
23
- "mock": MockDetector,
24
- # Future models:
25
- # "deepseek": DeepseekDetector,
26
- # "openai": OpenAIDetector,
27
- # "huggingface": HuggingFaceDetector,
28
- }
29
-
30
- if model_name not in detectors:
31
- available = ", ".join(detectors.keys())
32
- raise ValueError(
33
- f"Detector model '{model_name}' is not supported. "
34
- f"Available models: {available}"
35
- )
36
-
37
- return detectors[model_name]()
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/detector/base.py DELETED
@@ -1,38 +0,0 @@
1
- """Base detector class defining the interface for all detectors."""
2
-
3
- from abc import ABC, abstractmethod
4
- from typing import Dict, Any
5
-
6
-
7
- class BaseDetector(ABC):
8
- """
9
- Abstract base class for deepfake detectors.
10
-
11
- All detector implementations should inherit from this class and implement
12
- the detect() method.
13
- """
14
-
15
- def __init__(self, model_name: str):
16
- """
17
- Initialize the detector.
18
-
19
- Args:
20
- model_name: Name of the detector model
21
- """
22
- self.model_name = model_name
23
-
24
- @abstractmethod
25
- async def detect(self, file_bytes: bytes) -> Dict[str, Any]:
26
- """
27
- Detect if file is a deepfake.
28
-
29
- Args:
30
- file_bytes: The file contents as bytes
31
-
32
- Returns:
33
- Dictionary containing:
34
- - is_deepfake: Boolean indicating if file is a deepfake
35
- - confidence: Float between 0.0 and 1.0
36
- - analysis_time: Float representing processing time
37
- """
38
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/detector/mock.py DELETED
@@ -1,56 +0,0 @@
1
- """Mock detector implementation for testing and development."""
2
-
3
- import asyncio
4
- import logging
5
- import time
6
- from typing import Dict, Any
7
-
8
- from app.services.detector.base import BaseDetector
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- class MockDetector(BaseDetector):
14
- """
15
- Mock detector for testing and development.
16
-
17
- Simulates deepfake detection without requiring actual ML models.
18
- """
19
-
20
- def __init__(self):
21
- """Initialize the mock detector."""
22
- super().__init__("mock")
23
-
24
- async def detect(self, file_bytes: bytes) -> Dict[str, Any]:
25
- """
26
- Simulate deepfake detection with a random result.
27
-
28
- Args:
29
- file_bytes: The file contents as bytes
30
-
31
- Returns:
32
- Dictionary with is_deepfake, confidence, and analysis_time
33
- """
34
- logger.info("Starting mock deepfake analysis...")
35
-
36
- start_time = time.time()
37
-
38
- # Simulate processing delay (1 to 2 seconds)
39
- delay = 1.0 + (hash(file_bytes) % 100) / 100.0
40
- await asyncio.sleep(delay)
41
-
42
- analysis_time = time.time() - start_time
43
-
44
- # Simulate ML model output (deterministic based on file content hash)
45
- file_hash = hash(file_bytes) % 100
46
- is_deepfake = file_hash > 50 # ~50% chance
47
- confidence = (file_hash % 100) / 100.0
48
-
49
- result = {
50
- "is_deepfake": is_deepfake,
51
- "confidence": round(confidence, 3),
52
- "analysis_time": round(analysis_time, 3),
53
- }
54
-
55
- logger.info(f"Mock analysis completed. Result: {result}")
56
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/image_analyzer.py CHANGED
@@ -6,21 +6,4 @@ logger = logging.getLogger(__name__)
6
 
7
 
8
  async def analyze_image(image_bytes: bytes) -> Dict[str, Any]:
9
- start_time = time.time()
10
-
11
- logger.info(f"Starting image analysis, size: {len(image_bytes)} bytes")
12
-
13
- image_hash = hash(image_bytes) % 100
14
- is_deepfake = image_hash > 50
15
- confidence = (image_hash % 100) / 100.0
16
-
17
- analysis_time = time.time() - start_time
18
-
19
- result = {
20
- "is_deepfake": is_deepfake,
21
- "confidence": round(confidence, 3),
22
- "analysis_time": round(analysis_time, 3),
23
- }
24
-
25
- logger.info(f"Image analysis completed. Result: {result}")
26
- return result
 
6
 
7
 
8
  async def analyze_image(image_bytes: bytes) -> Dict[str, Any]:
9
+ raise NotImplementedError("Image analysis models not yet configured")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/text_analyzer.py CHANGED
@@ -1,26 +1,51 @@
1
  import logging
2
  import time
3
  from typing import Dict, Any
 
4
 
5
  logger = logging.getLogger(__name__)
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  async def analyze_text(text: str) -> Dict[str, Any]:
 
 
 
 
 
 
9
  start_time = time.time()
10
 
11
  logger.info(f"Starting text analysis, length: {len(text)} chars")
12
 
13
- text_hash = hash(text) % 100
14
- is_deepfake = text_hash > 50
15
- confidence = (text_hash % 100) / 100.0
 
 
 
 
 
16
 
17
  analysis_time = time.time() - start_time
18
 
19
- result = {
20
  "is_deepfake": is_deepfake,
21
  "confidence": round(confidence, 3),
22
  "analysis_time": round(analysis_time, 3),
23
  }
24
 
25
- logger.info(f"Text analysis completed. Result: {result}")
26
- return result
 
1
  import logging
2
  import time
3
  from typing import Dict, Any
4
+ from transformers import pipeline
5
 
6
  logger = logging.getLogger(__name__)
7
 
8
+ _text_classifier = None
9
+
10
+ def _load_model():
11
+ global _text_classifier
12
+ if _text_classifier is None:
13
+ logger.info("Loading XLM-RoBERTa text detector model...")
14
+ _text_classifier = pipeline(
15
+ "text-classification",
16
+ model="yaya36095/xlm-roberta-text-detector",
17
+ device=-1
18
+ )
19
+ logger.info("Text detector model loaded successfully")
20
+ return _text_classifier
21
 
22
  async def analyze_text(text: str) -> Dict[str, Any]:
23
+ if len(text) > 5000:
24
+ raise ValueError("Text content exceeds maximum length of 5000 characters")
25
+
26
+ if len(text) < 10:
27
+ raise ValueError("Text content must be at least 10 characters")
28
+
29
  start_time = time.time()
30
 
31
  logger.info(f"Starting text analysis, length: {len(text)} chars")
32
 
33
+ classifier = _load_model()
34
+ result = classifier(text)
35
+
36
+ label = result[0]["label"]
37
+ score = result[0]["score"]
38
+
39
+ is_deepfake = label.lower() == "fake"
40
+ confidence = score
41
 
42
  analysis_time = time.time() - start_time
43
 
44
+ response = {
45
  "is_deepfake": is_deepfake,
46
  "confidence": round(confidence, 3),
47
  "analysis_time": round(analysis_time, 3),
48
  }
49
 
50
+ logger.info(f"Text analysis completed. Result: {response}")
51
+ return response
backend/requirements.txt CHANGED
@@ -4,3 +4,8 @@ httpx==0.27.0
4
  pydantic==2.8.2
5
  pydantic-settings==2.3.1
6
  python-multipart==0.0.6
 
 
 
 
 
 
4
  pydantic==2.8.2
5
  pydantic-settings==2.3.1
6
  python-multipart==0.0.6
7
+ transformers==4.41.2
8
+ torch==2.3.1
9
+ numpy==1.26.4
10
+ sentencepiece
11
+ protobuf