Gradii commited on
Commit
dcb5a1a
·
1 Parent(s): 6b15151

first text model

Browse files
backend/app/api/routes.py CHANGED
@@ -14,7 +14,6 @@ from app.models.schemas import (
14
  from app.services.download import download_file
15
  from app.services.text_analyzer import analyze_text
16
  from app.services.image_analyzer import analyze_image
17
- from app.services.detector import get_detector
18
  from app.core.config import get_settings
19
  from app.utils.exceptions import DeepfakeDetectionError
20
 
@@ -22,6 +21,20 @@ logger = logging.getLogger(__name__)
22
 
23
  router = APIRouter()
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  @router.get(
27
  "/",
@@ -33,14 +46,13 @@ async def health_check() -> HealthResponse:
33
  settings = get_settings()
34
  logger.info("Health check endpoint accessed")
35
 
36
- available_models = ["mock"]
37
  supported_types = ["text", "image", "video", "file"]
38
 
39
  return HealthResponse(
40
  status="ok",
41
  service="Deepfake Detection Service",
42
  version=settings.APP_VERSION,
43
- available_models=available_models,
44
  supported_types=supported_types,
45
  )
46
 
@@ -58,20 +70,39 @@ async def health_check() -> HealthResponse:
58
  )
59
  async def analyze(request: AnalysisRequest) -> AnalysisResponse:
60
  settings = get_settings()
61
- detector_model = None
62
 
63
  if isinstance(request, TextAnalysisRequest):
64
- detector_model = request.model or settings.DEFAULT_DETECTOR_MODEL
65
- logger.info(f"Received text analysis request, length: {len(request.text)} chars, model: {detector_model}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  try:
68
- detector = get_detector(detector_model)
69
  except ValueError as e:
70
- logger.error(f"Invalid detector model: {str(e)}")
71
  raise HTTPException(status_code=400, detail=str(e))
72
-
73
- text_bytes = request.text.encode('utf-8')
74
- analysis_result = await detector.detect(text_bytes)
75
 
76
  logger.info(f"Text analysis completed. Result: {analysis_result}")
77
 
@@ -79,28 +110,43 @@ async def analyze(request: AnalysisRequest) -> AnalysisResponse:
79
  is_deepfake=analysis_result["is_deepfake"],
80
  confidence=analysis_result["confidence"],
81
  analysis_time=analysis_result["analysis_time"],
82
- model_used=detector_model,
83
  content_type="text",
84
  )
85
 
86
  elif isinstance(request, ImageAnalysisRequest):
87
- detector_model = request.model or settings.DEFAULT_DETECTOR_MODEL
88
- logger.info(f"Received image analysis request for URL: {request.image_url}, model: {detector_model}")
89
 
90
- try:
91
- detector = get_detector(detector_model)
92
- except ValueError as e:
93
- logger.error(f"Invalid detector model: {str(e)}")
94
- raise HTTPException(status_code=400, detail=str(e))
 
 
 
 
 
 
 
 
95
 
96
  try:
97
  image_bytes = await download_file(str(request.image_url))
98
  if not image_bytes:
99
  raise HTTPException(status_code=500, detail="Failed to download image")
 
 
 
 
 
 
 
100
  except DeepfakeDetectionError as e:
101
  raise HTTPException(status_code=e.status_code, detail=e.message)
102
 
103
- analysis_result = await detector.detect(image_bytes)
104
 
105
  logger.info(f"Image analysis completed. Result: {analysis_result}")
106
 
@@ -108,28 +154,43 @@ async def analyze(request: AnalysisRequest) -> AnalysisResponse:
108
  is_deepfake=analysis_result["is_deepfake"],
109
  confidence=analysis_result["confidence"],
110
  analysis_time=analysis_result["analysis_time"],
111
- model_used=detector_model,
112
  content_type="image",
113
  )
114
 
115
  elif isinstance(request, VideoAnalysisRequest):
116
- detector_model = request.model or settings.DEFAULT_DETECTOR_MODEL
117
- logger.info(f"Received video analysis request for URL: {request.video_url}, model: {detector_model}")
118
 
119
- try:
120
- detector = get_detector(detector_model)
121
- except ValueError as e:
122
- logger.error(f"Invalid detector model: {str(e)}")
123
- raise HTTPException(status_code=400, detail=str(e))
 
 
 
 
 
 
 
 
124
 
125
  try:
126
  video_bytes = await download_file(str(request.video_url))
127
  if not video_bytes:
128
  raise HTTPException(status_code=500, detail="Failed to download video")
 
 
 
 
 
 
 
129
  except DeepfakeDetectionError as e:
130
  raise HTTPException(status_code=e.status_code, detail=e.message)
131
 
132
- analysis_result = await detector.detect(video_bytes)
133
 
134
  logger.info(f"Video analysis completed. Result: {analysis_result}")
135
 
@@ -137,28 +198,43 @@ async def analyze(request: AnalysisRequest) -> AnalysisResponse:
137
  is_deepfake=analysis_result["is_deepfake"],
138
  confidence=analysis_result["confidence"],
139
  analysis_time=analysis_result["analysis_time"],
140
- model_used=detector_model,
141
  content_type="video",
142
  )
143
 
144
  elif isinstance(request, FileAnalysisRequest):
145
- detector_model = request.model or settings.DEFAULT_DETECTOR_MODEL
146
- logger.info(f"Received file analysis request for URL: {request.file_url}, model: {detector_model}")
147
 
148
- try:
149
- detector = get_detector(detector_model)
150
- except ValueError as e:
151
- logger.error(f"Invalid detector model: {str(e)}")
152
- raise HTTPException(status_code=400, detail=str(e))
 
 
 
 
 
 
 
 
153
 
154
  try:
155
  file_bytes = await download_file(str(request.file_url))
156
  if not file_bytes:
157
  raise HTTPException(status_code=500, detail="Failed to download file")
 
 
 
 
 
 
 
158
  except DeepfakeDetectionError as e:
159
  raise HTTPException(status_code=e.status_code, detail=e.message)
160
 
161
- analysis_result = await detector.detect(file_bytes)
162
 
163
  logger.info(f"File analysis completed. Result: {analysis_result}")
164
 
@@ -166,7 +242,7 @@ async def analyze(request: AnalysisRequest) -> AnalysisResponse:
166
  is_deepfake=analysis_result["is_deepfake"],
167
  confidence=analysis_result["confidence"],
168
  analysis_time=analysis_result["analysis_time"],
169
- model_used=detector_model,
170
  content_type="file",
171
  )
172
 
 
14
  from app.services.download import download_file
15
  from app.services.text_analyzer import analyze_text
16
  from app.services.image_analyzer import analyze_image
 
17
  from app.core.config import get_settings
18
  from app.utils.exceptions import DeepfakeDetectionError
19
 
 
21
 
22
  router = APIRouter()
23
 
24
+ AVAILABLE_MODELS = {
25
+ "text": ["yaya36095/xlm-roberta-text-detector"],
26
+ "image": [],
27
+ "video": [],
28
+ "file": [],
29
+ }
30
+
31
+ MAX_CONTENT_SIZES = {
32
+ "text": 5000,
33
+ "image": 100 * 1024 * 1024,
34
+ "video": 100 * 1024 * 1024,
35
+ "file": 100 * 1024 * 1024,
36
+ }
37
+
38
 
39
  @router.get(
40
  "/",
 
46
  settings = get_settings()
47
  logger.info("Health check endpoint accessed")
48
 
 
49
  supported_types = ["text", "image", "video", "file"]
50
 
51
  return HealthResponse(
52
  status="ok",
53
  service="Deepfake Detection Service",
54
  version=settings.APP_VERSION,
55
+ available_models=AVAILABLE_MODELS,
56
  supported_types=supported_types,
57
  )
58
 
 
70
  )
71
  async def analyze(request: AnalysisRequest) -> AnalysisResponse:
72
  settings = get_settings()
 
73
 
74
  if isinstance(request, TextAnalysisRequest):
75
+ content_type = "text"
76
+
77
+ if len(request.text) > MAX_CONTENT_SIZES["text"]:
78
+ raise HTTPException(
79
+ status_code=400,
80
+ detail=f"Text content exceeds maximum length of {MAX_CONTENT_SIZES['text']} characters"
81
+ )
82
+
83
+ if len(request.text) < 10:
84
+ raise HTTPException(
85
+ status_code=400,
86
+ detail="Text content must be at least 10 characters"
87
+ )
88
+
89
+ model = request.model or "yaya36095/xlm-roberta-text-detector"
90
+
91
+ if model not in AVAILABLE_MODELS["text"]:
92
+ raise HTTPException(
93
+ status_code=400,
94
+ detail=f"Model '{model}' is not available for text analysis. Available models: {AVAILABLE_MODELS['text']}"
95
+ )
96
+
97
+ logger.info(f"Received text analysis request, length: {len(request.text)} chars, model: {model}")
98
 
99
  try:
100
+ analysis_result = await analyze_text(request.text)
101
  except ValueError as e:
 
102
  raise HTTPException(status_code=400, detail=str(e))
103
+ except Exception as e:
104
+ logger.error(f"Text analysis error: {str(e)}", exc_info=True)
105
+ raise HTTPException(status_code=500, detail="Failed to analyze text")
106
 
107
  logger.info(f"Text analysis completed. Result: {analysis_result}")
108
 
 
110
  is_deepfake=analysis_result["is_deepfake"],
111
  confidence=analysis_result["confidence"],
112
  analysis_time=analysis_result["analysis_time"],
113
+ model_used=model,
114
  content_type="text",
115
  )
116
 
117
  elif isinstance(request, ImageAnalysisRequest):
118
+ content_type = "image"
119
+ model = request.model
120
 
121
+ if not model:
122
+ raise HTTPException(
123
+ status_code=400,
124
+ detail=f"No model available for image analysis. Available models: {AVAILABLE_MODELS['image']}"
125
+ )
126
+
127
+ if model not in AVAILABLE_MODELS["image"]:
128
+ raise HTTPException(
129
+ status_code=400,
130
+ detail=f"Model '{model}' is not available for image analysis. Available models: {AVAILABLE_MODELS['image']}"
131
+ )
132
+
133
+ logger.info(f"Received image analysis request for URL: {request.image_url}, model: {model}")
134
 
135
  try:
136
  image_bytes = await download_file(str(request.image_url))
137
  if not image_bytes:
138
  raise HTTPException(status_code=500, detail="Failed to download image")
139
+
140
+ if len(image_bytes) > MAX_CONTENT_SIZES["image"]:
141
+ raise HTTPException(
142
+ status_code=400,
143
+ detail=f"Image size exceeds maximum of {MAX_CONTENT_SIZES['image']} bytes"
144
+ )
145
+
146
  except DeepfakeDetectionError as e:
147
  raise HTTPException(status_code=e.status_code, detail=e.message)
148
 
149
+ analysis_result = await analyze_image(image_bytes)
150
 
151
  logger.info(f"Image analysis completed. Result: {analysis_result}")
152
 
 
154
  is_deepfake=analysis_result["is_deepfake"],
155
  confidence=analysis_result["confidence"],
156
  analysis_time=analysis_result["analysis_time"],
157
+ model_used=model,
158
  content_type="image",
159
  )
160
 
161
  elif isinstance(request, VideoAnalysisRequest):
162
+ content_type = "video"
163
+ model = request.model
164
 
165
+ if not model:
166
+ raise HTTPException(
167
+ status_code=400,
168
+ detail=f"No model available for video analysis. Available models: {AVAILABLE_MODELS['video']}"
169
+ )
170
+
171
+ if model not in AVAILABLE_MODELS["video"]:
172
+ raise HTTPException(
173
+ status_code=400,
174
+ detail=f"Model '{model}' is not available for video analysis. Available models: {AVAILABLE_MODELS['video']}"
175
+ )
176
+
177
+ logger.info(f"Received video analysis request for URL: {request.video_url}, model: {model}")
178
 
179
  try:
180
  video_bytes = await download_file(str(request.video_url))
181
  if not video_bytes:
182
  raise HTTPException(status_code=500, detail="Failed to download video")
183
+
184
+ if len(video_bytes) > MAX_CONTENT_SIZES["video"]:
185
+ raise HTTPException(
186
+ status_code=400,
187
+ detail=f"Video size exceeds maximum of {MAX_CONTENT_SIZES['video']} bytes"
188
+ )
189
+
190
  except DeepfakeDetectionError as e:
191
  raise HTTPException(status_code=e.status_code, detail=e.message)
192
 
193
+ analysis_result = await analyze_image(video_bytes)
194
 
195
  logger.info(f"Video analysis completed. Result: {analysis_result}")
196
 
 
198
  is_deepfake=analysis_result["is_deepfake"],
199
  confidence=analysis_result["confidence"],
200
  analysis_time=analysis_result["analysis_time"],
201
+ model_used=model,
202
  content_type="video",
203
  )
204
 
205
  elif isinstance(request, FileAnalysisRequest):
206
+ content_type = "file"
207
+ model = request.model
208
 
209
+ if not model:
210
+ raise HTTPException(
211
+ status_code=400,
212
+ detail=f"No model available for file analysis. Available models: {AVAILABLE_MODELS['file']}"
213
+ )
214
+
215
+ if model not in AVAILABLE_MODELS["file"]:
216
+ raise HTTPException(
217
+ status_code=400,
218
+ detail=f"Model '{model}' is not available for file analysis. Available models: {AVAILABLE_MODELS['file']}"
219
+ )
220
+
221
+ logger.info(f"Received file analysis request for URL: {request.file_url}, model: {model}")
222
 
223
  try:
224
  file_bytes = await download_file(str(request.file_url))
225
  if not file_bytes:
226
  raise HTTPException(status_code=500, detail="Failed to download file")
227
+
228
+ if len(file_bytes) > MAX_CONTENT_SIZES["file"]:
229
+ raise HTTPException(
230
+ status_code=400,
231
+ detail=f"File size exceeds maximum of {MAX_CONTENT_SIZES['file']} bytes"
232
+ )
233
+
234
  except DeepfakeDetectionError as e:
235
  raise HTTPException(status_code=e.status_code, detail=e.message)
236
 
237
+ analysis_result = await analyze_image(file_bytes)
238
 
239
  logger.info(f"File analysis completed. Result: {analysis_result}")
240
 
 
242
  is_deepfake=analysis_result["is_deepfake"],
243
  confidence=analysis_result["confidence"],
244
  analysis_time=analysis_result["analysis_time"],
245
+ model_used=model,
246
  content_type="file",
247
  )
248
 
backend/app/models/schemas.py CHANGED
@@ -108,5 +108,5 @@ class HealthResponse(BaseModel):
108
  status: str = Field(..., description="Service status")
109
  service: str = Field(..., description="Service name")
110
  version: str = Field(..., description="Service version")
111
- available_models: list = Field(..., description="Available detector models")
112
  supported_types: list = Field(..., description="Supported content types")
 
108
  status: str = Field(..., description="Service status")
109
  service: str = Field(..., description="Service name")
110
  version: str = Field(..., description="Service version")
111
+ available_models: dict = Field(..., description="Available detector models per content type")
112
  supported_types: list = Field(..., description="Supported content types")
backend/app/services/detector/__init__.py CHANGED
@@ -1,37 +1 @@
1
- """Detector models for deepfake detection."""
2
 
3
- from app.services.detector.base import BaseDetector
4
- from app.services.detector.mock import MockDetector
5
-
6
- __all__ = ["BaseDetector", "MockDetector", "get_detector"]
7
-
8
-
9
- def get_detector(model_name: str = "mock") -> BaseDetector:
10
- """
11
- Factory function to get detector instance by model name.
12
-
13
- Args:
14
- model_name: Name of the detector model
15
-
16
- Returns:
17
- Instance of the requested detector
18
-
19
- Raises:
20
- ValueError: If model is not supported
21
- """
22
- detectors = {
23
- "mock": MockDetector,
24
- # Future models:
25
- # "deepseek": DeepseekDetector,
26
- # "openai": OpenAIDetector,
27
- # "huggingface": HuggingFaceDetector,
28
- }
29
-
30
- if model_name not in detectors:
31
- available = ", ".join(detectors.keys())
32
- raise ValueError(
33
- f"Detector model '{model_name}' is not supported. "
34
- f"Available models: {available}"
35
- )
36
-
37
- return detectors[model_name]()
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/detector/base.py DELETED
@@ -1,38 +0,0 @@
1
- """Base detector class defining the interface for all detectors."""
2
-
3
- from abc import ABC, abstractmethod
4
- from typing import Dict, Any
5
-
6
-
7
- class BaseDetector(ABC):
8
- """
9
- Abstract base class for deepfake detectors.
10
-
11
- All detector implementations should inherit from this class and implement
12
- the detect() method.
13
- """
14
-
15
- def __init__(self, model_name: str):
16
- """
17
- Initialize the detector.
18
-
19
- Args:
20
- model_name: Name of the detector model
21
- """
22
- self.model_name = model_name
23
-
24
- @abstractmethod
25
- async def detect(self, file_bytes: bytes) -> Dict[str, Any]:
26
- """
27
- Detect if file is a deepfake.
28
-
29
- Args:
30
- file_bytes: The file contents as bytes
31
-
32
- Returns:
33
- Dictionary containing:
34
- - is_deepfake: Boolean indicating if file is a deepfake
35
- - confidence: Float between 0.0 and 1.0
36
- - analysis_time: Float representing processing time
37
- """
38
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/detector/mock.py DELETED
@@ -1,56 +0,0 @@
1
- """Mock detector implementation for testing and development."""
2
-
3
- import asyncio
4
- import logging
5
- import time
6
- from typing import Dict, Any
7
-
8
- from app.services.detector.base import BaseDetector
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- class MockDetector(BaseDetector):
14
- """
15
- Mock detector for testing and development.
16
-
17
- Simulates deepfake detection without requiring actual ML models.
18
- """
19
-
20
- def __init__(self):
21
- """Initialize the mock detector."""
22
- super().__init__("mock")
23
-
24
- async def detect(self, file_bytes: bytes) -> Dict[str, Any]:
25
- """
26
- Simulate deepfake detection with a random result.
27
-
28
- Args:
29
- file_bytes: The file contents as bytes
30
-
31
- Returns:
32
- Dictionary with is_deepfake, confidence, and analysis_time
33
- """
34
- logger.info("Starting mock deepfake analysis...")
35
-
36
- start_time = time.time()
37
-
38
- # Simulate processing delay (1 to 2 seconds)
39
- delay = 1.0 + (hash(file_bytes) % 100) / 100.0
40
- await asyncio.sleep(delay)
41
-
42
- analysis_time = time.time() - start_time
43
-
44
- # Simulate ML model output (deterministic based on file content hash)
45
- file_hash = hash(file_bytes) % 100
46
- is_deepfake = file_hash > 50 # ~50% chance
47
- confidence = (file_hash % 100) / 100.0
48
-
49
- result = {
50
- "is_deepfake": is_deepfake,
51
- "confidence": round(confidence, 3),
52
- "analysis_time": round(analysis_time, 3),
53
- }
54
-
55
- logger.info(f"Mock analysis completed. Result: {result}")
56
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/image_analyzer.py CHANGED
@@ -6,21 +6,4 @@ logger = logging.getLogger(__name__)
6
 
7
 
8
  async def analyze_image(image_bytes: bytes) -> Dict[str, Any]:
9
- start_time = time.time()
10
-
11
- logger.info(f"Starting image analysis, size: {len(image_bytes)} bytes")
12
-
13
- image_hash = hash(image_bytes) % 100
14
- is_deepfake = image_hash > 50
15
- confidence = (image_hash % 100) / 100.0
16
-
17
- analysis_time = time.time() - start_time
18
-
19
- result = {
20
- "is_deepfake": is_deepfake,
21
- "confidence": round(confidence, 3),
22
- "analysis_time": round(analysis_time, 3),
23
- }
24
-
25
- logger.info(f"Image analysis completed. Result: {result}")
26
- return result
 
6
 
7
 
8
  async def analyze_image(image_bytes: bytes) -> Dict[str, Any]:
9
+ raise NotImplementedError("Image analysis models not yet configured")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/text_analyzer.py CHANGED
@@ -1,26 +1,51 @@
1
  import logging
2
  import time
3
  from typing import Dict, Any
 
4
 
5
  logger = logging.getLogger(__name__)
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  async def analyze_text(text: str) -> Dict[str, Any]:
 
 
 
 
 
 
9
  start_time = time.time()
10
 
11
  logger.info(f"Starting text analysis, length: {len(text)} chars")
12
 
13
- text_hash = hash(text) % 100
14
- is_deepfake = text_hash > 50
15
- confidence = (text_hash % 100) / 100.0
 
 
 
 
 
16
 
17
  analysis_time = time.time() - start_time
18
 
19
- result = {
20
  "is_deepfake": is_deepfake,
21
  "confidence": round(confidence, 3),
22
  "analysis_time": round(analysis_time, 3),
23
  }
24
 
25
- logger.info(f"Text analysis completed. Result: {result}")
26
- return result
 
1
  import logging
2
  import time
3
  from typing import Dict, Any
4
+ from transformers import pipeline
5
 
6
  logger = logging.getLogger(__name__)
7
 
8
+ _text_classifier = None
9
+
10
+ def _load_model():
11
+ global _text_classifier
12
+ if _text_classifier is None:
13
+ logger.info("Loading XLM-RoBERTa text detector model...")
14
+ _text_classifier = pipeline(
15
+ "text-classification",
16
+ model="yaya36095/xlm-roberta-text-detector",
17
+ device=-1
18
+ )
19
+ logger.info("Text detector model loaded successfully")
20
+ return _text_classifier
21
 
22
  async def analyze_text(text: str) -> Dict[str, Any]:
23
+ if len(text) > 5000:
24
+ raise ValueError("Text content exceeds maximum length of 5000 characters")
25
+
26
+ if len(text) < 10:
27
+ raise ValueError("Text content must be at least 10 characters")
28
+
29
  start_time = time.time()
30
 
31
  logger.info(f"Starting text analysis, length: {len(text)} chars")
32
 
33
+ classifier = _load_model()
34
+ result = classifier(text)
35
+
36
+ label = result[0]["label"]
37
+ score = result[0]["score"]
38
+
39
+ is_deepfake = label.lower() == "fake"
40
+ confidence = score
41
 
42
  analysis_time = time.time() - start_time
43
 
44
+ response = {
45
  "is_deepfake": is_deepfake,
46
  "confidence": round(confidence, 3),
47
  "analysis_time": round(analysis_time, 3),
48
  }
49
 
50
+ logger.info(f"Text analysis completed. Result: {response}")
51
+ return response
backend/requirements.txt CHANGED
@@ -4,3 +4,6 @@ httpx==0.27.0
4
  pydantic==2.8.2
5
  pydantic-settings==2.3.1
6
  python-multipart==0.0.6
 
 
 
 
4
  pydantic==2.8.2
5
  pydantic-settings==2.3.1
6
  python-multipart==0.0.6
7
+ transformers==4.41.2
8
+ torch==2.3.1
9
+ numpy==1.26.4