Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import torch
|
|
| 4 |
import logging
|
| 5 |
import gc
|
| 6 |
import sys
|
|
|
|
| 7 |
from fastapi import FastAPI, HTTPException
|
| 8 |
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
from pydantic import BaseModel
|
|
@@ -11,6 +12,35 @@ from typing import Dict, List, Optional
|
|
| 11 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 12 |
from tokenizers.normalizers import Sequence, Replace, Strip
|
| 13 |
from tokenizers import Regex
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# =====================================================
|
| 16 |
# 🔧 تكوين البيئة والإعدادات
|
|
@@ -78,93 +108,133 @@ class ModelManager:
|
|
| 78 |
"https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12",
|
| 79 |
"https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed22"
|
| 80 |
]
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
def load_tokenizer(self):
|
| 83 |
-
"""تحميل الـ Tokenizer مع
|
| 84 |
try:
|
| 85 |
-
logger.info("📝 Loading tokenizer...")
|
| 86 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 87 |
-
|
| 88 |
cache_dir=CACHE_DIR,
|
| 89 |
use_fast=True,
|
| 90 |
trust_remote_code=False
|
| 91 |
)
|
|
|
|
| 92 |
|
| 93 |
-
|
|
|
|
| 94 |
try:
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
except Exception as e:
|
| 110 |
-
logger.
|
| 111 |
-
|
|
|
|
| 112 |
|
| 113 |
def load_single_model(self, model_url=None, model_path=None, model_name="Model"):
|
| 114 |
-
"""تحميل موديل واحد مع
|
|
|
|
| 115 |
try:
|
| 116 |
-
logger.info(f"🤖 Loading {model_name}...")
|
| 117 |
|
| 118 |
-
#
|
| 119 |
base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 120 |
-
|
| 121 |
num_labels=41,
|
| 122 |
cache_dir=CACHE_DIR,
|
| 123 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 124 |
low_cpu_mem_usage=True,
|
| 125 |
trust_remote_code=False
|
| 126 |
)
|
|
|
|
| 127 |
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
if model_path and os.path.exists(model_path):
|
| 130 |
logger.info(f"📁 Loading from local file: {model_path}")
|
| 131 |
state_dict = torch.load(model_path, map_location=device, weights_only=True)
|
| 132 |
base_model.load_state_dict(state_dict, strict=False)
|
| 133 |
elif model_url:
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
base_model.load_state_dict(state_dict, strict=False)
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
logger.
|
| 147 |
else:
|
| 148 |
logger.info("📊 Using model with random initialization")
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
model
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
def load_models(self, max_models=2):
|
| 169 |
"""تحميل الموديلات بحد أقصى للذاكرة"""
|
| 170 |
if self.models_loaded:
|
|
@@ -173,6 +243,7 @@ class ModelManager:
|
|
| 173 |
|
| 174 |
# تحميل الـ Tokenizer أولاً
|
| 175 |
if not self.load_tokenizer():
|
|
|
|
| 176 |
return False
|
| 177 |
|
| 178 |
# تحميل الموديلات
|
|
@@ -188,13 +259,14 @@ class ModelManager:
|
|
| 188 |
if model is not None:
|
| 189 |
self.models.append(model)
|
| 190 |
|
| 191 |
-
# تحميل الموديلات من URLs
|
| 192 |
-
for i,
|
| 193 |
if len(self.models) >= max_models:
|
| 194 |
break
|
| 195 |
|
|
|
|
| 196 |
model = self.load_single_model(
|
| 197 |
-
model_url=
|
| 198 |
model_name=f"Model {len(self.models) + 1}"
|
| 199 |
)
|
| 200 |
if model is not None:
|
|
@@ -214,7 +286,7 @@ class ModelManager:
|
|
| 214 |
# التحقق من نجاح التحميل
|
| 215 |
if len(self.models) > 0:
|
| 216 |
self.models_loaded = True
|
| 217 |
-
logger.info(f"✅ Successfully loaded {len(self.models)} models")
|
| 218 |
return True
|
| 219 |
else:
|
| 220 |
logger.error("❌ No models could be loaded")
|
|
@@ -230,13 +302,14 @@ class ModelManager:
|
|
| 230 |
if not cleaned_text.strip():
|
| 231 |
raise ValueError("Empty text after cleaning")
|
| 232 |
|
| 233 |
-
# Tokenization
|
|
|
|
| 234 |
try:
|
| 235 |
inputs = self.tokenizer(
|
| 236 |
cleaned_text,
|
| 237 |
return_tensors="pt",
|
| 238 |
truncation=True,
|
| 239 |
-
max_length=
|
| 240 |
padding=True
|
| 241 |
).to(device)
|
| 242 |
except Exception as e:
|
|
@@ -297,7 +370,8 @@ class ModelManager:
|
|
| 297 |
"predicted_model": predicted_model,
|
| 298 |
"top_5_predictions": top_5_results,
|
| 299 |
"is_human": human_percentage > ai_percentage,
|
| 300 |
-
"models_used": len(all_probabilities)
|
|
|
|
| 301 |
}
|
| 302 |
|
| 303 |
# =====================================================
|
|
@@ -320,7 +394,7 @@ def split_into_paragraphs(text: str) -> List[str]:
|
|
| 320 |
app = FastAPI(
|
| 321 |
title="ModernBERT AI Text Detector",
|
| 322 |
description="كشف النصوص المكتوبة بواسطة الذكاء الاصطناعي",
|
| 323 |
-
version="2.
|
| 324 |
)
|
| 325 |
|
| 326 |
# إضافة CORS للسماح بالاستخدام من المتصفح
|
|
@@ -361,6 +435,9 @@ async def startup_event():
|
|
| 361 |
logger.info("🚀 Starting ModernBERT AI Detector...")
|
| 362 |
logger.info(f"🐍 Python version: {sys.version}")
|
| 363 |
logger.info(f"🔥 PyTorch version: {torch.__version__}")
|
|
|
|
|
|
|
|
|
|
| 364 |
logger.info("=" * 50)
|
| 365 |
|
| 366 |
# محاولة تحميل الموديلات
|
|
@@ -368,9 +445,10 @@ async def startup_event():
|
|
| 368 |
success = model_manager.load_models(max_models=max_models)
|
| 369 |
|
| 370 |
if success:
|
| 371 |
-
logger.info("✅ Application ready!")
|
| 372 |
else:
|
| 373 |
logger.error("⚠️ Failed to load models - API will return errors")
|
|
|
|
| 374 |
|
| 375 |
@app.get("/")
|
| 376 |
async def root():
|
|
@@ -379,6 +457,7 @@ async def root():
|
|
| 379 |
"message": "ModernBERT AI Text Detector API",
|
| 380 |
"status": "online" if model_manager.models_loaded else "initializing",
|
| 381 |
"models_loaded": len(model_manager.models),
|
|
|
|
| 382 |
"device": str(device),
|
| 383 |
"endpoints": {
|
| 384 |
"analyze": "/analyze",
|
|
@@ -401,6 +480,7 @@ async def health_check():
|
|
| 401 |
return {
|
| 402 |
"status": "healthy" if model_manager.models_loaded else "unhealthy",
|
| 403 |
"models_loaded": len(model_manager.models),
|
|
|
|
| 404 |
"device": str(device),
|
| 405 |
"cuda_available": torch.cuda.is_available(),
|
| 406 |
"memory_info": memory_info
|
|
@@ -430,7 +510,7 @@ async def analyze_text(data: TextInput):
|
|
| 430 |
return DetectionResult(
|
| 431 |
success=False,
|
| 432 |
code=503,
|
| 433 |
-
message="Models not available",
|
| 434 |
data={}
|
| 435 |
)
|
| 436 |
|
|
@@ -497,7 +577,8 @@ async def analyze_text(data: TextInput):
|
|
| 497 |
"input_text": text[:500] + "..." if len(text) > 500 else text,
|
| 498 |
"detected_language": "en",
|
| 499 |
"top_5_predictions": result.get("top_5_predictions", []),
|
| 500 |
-
"models_used": result.get("models_used", 1)
|
|
|
|
| 501 |
}
|
| 502 |
)
|
| 503 |
|
|
@@ -531,7 +612,8 @@ async def analyze_simple(data: SimpleTextInput):
|
|
| 531 |
"ai_score": result["ai_percentage"],
|
| 532 |
"human_score": result["human_percentage"],
|
| 533 |
"detected_model": result["predicted_model"] if result["ai_percentage"] > 50 else None,
|
| 534 |
-
"confidence": max(result["ai_percentage"], result["human_percentage"])
|
|
|
|
| 535 |
}
|
| 536 |
|
| 537 |
except HTTPException:
|
|
@@ -557,3 +639,10 @@ if __name__ == "__main__":
|
|
| 557 |
logger.info(f"📚 Documentation: http://{host}:{port}/docs")
|
| 558 |
logger.info("=" * 50)
|
| 559 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import logging
|
| 5 |
import gc
|
| 6 |
import sys
|
| 7 |
+
import pwd # Added for monkey patch
|
| 8 |
from fastapi import FastAPI, HTTPException
|
| 9 |
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
from pydantic import BaseModel
|
|
|
|
| 12 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 13 |
from tokenizers.normalizers import Sequence, Replace, Strip
|
| 14 |
from tokenizers import Regex
|
| 15 |
+
from huggingface_hub import hf_hub_download # Added for reliable HF downloads
|
| 16 |
+
|
| 17 |
+
# =====================================================
|
| 18 |
+
# 🛠️ Monkey Patch for Docker/Container UID Issue
|
| 19 |
+
# =====================================================
|
| 20 |
+
# Fix for 'getpwuid(): uid not found: 1000' in containerized environments
|
| 21 |
+
def patched_getpwuid(uid_num):
|
| 22 |
+
try:
|
| 23 |
+
return original_getpwuid(uid_num)
|
| 24 |
+
except KeyError:
|
| 25 |
+
if uid_num == os.getuid():
|
| 26 |
+
# Create fake user entry
|
| 27 |
+
return pwd.struct_pwent(
|
| 28 |
+
name='dockeruser',
|
| 29 |
+
passwd='x',
|
| 30 |
+
uid=uid_num,
|
| 31 |
+
gid=os.getgid(),
|
| 32 |
+
gecos='Docker User',
|
| 33 |
+
dir='/tmp',
|
| 34 |
+
shell='/bin/sh'
|
| 35 |
+
)
|
| 36 |
+
raise
|
| 37 |
+
|
| 38 |
+
original_getpwuid = pwd.getpwuid
|
| 39 |
+
pwd.getpwuid = patched_getpwuid
|
| 40 |
+
|
| 41 |
+
# Set fallback env vars to avoid user-dependent paths
|
| 42 |
+
os.environ.setdefault('HOME', '/tmp')
|
| 43 |
+
os.environ.setdefault('USER', 'dockeruser')
|
| 44 |
|
| 45 |
# =====================================================
|
| 46 |
# 🔧 تكوين البيئة والإعدادات
|
|
|
|
| 108 |
"https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12",
|
| 109 |
"https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed22"
|
| 110 |
]
|
| 111 |
+
self.base_model_id = "answerdotai/ModernBERT-base" # Primary
|
| 112 |
+
self.fallback_model_id = "bert-base-uncased" # Fallback if ModernBERT fails
|
| 113 |
+
self.using_fallback = False
|
| 114 |
|
| 115 |
def load_tokenizer(self):
|
| 116 |
+
"""تحميل الـ Tokenizer مع fallback"""
|
| 117 |
try:
|
| 118 |
+
logger.info(f"📝 Loading tokenizer from {self.base_model_id}...")
|
| 119 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 120 |
+
self.base_model_id,
|
| 121 |
cache_dir=CACHE_DIR,
|
| 122 |
use_fast=True,
|
| 123 |
trust_remote_code=False
|
| 124 |
)
|
| 125 |
+
logger.info("✅ Primary tokenizer loaded successfully")
|
| 126 |
|
| 127 |
+
except Exception as e:
|
| 128 |
+
logger.warning(f"⚠️ Failed to load primary tokenizer: {e}")
|
| 129 |
try:
|
| 130 |
+
logger.info(f"🔄 Falling back to {self.fallback_model_id}...")
|
| 131 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 132 |
+
self.fallback_model_id,
|
| 133 |
+
cache_dir=CACHE_DIR,
|
| 134 |
+
use_fast=True,
|
| 135 |
+
trust_remote_code=False
|
| 136 |
+
)
|
| 137 |
+
self.using_fallback = True
|
| 138 |
+
logger.info("✅ Fallback tokenizer loaded successfully")
|
| 139 |
+
except Exception as fallback_e:
|
| 140 |
+
logger.error(f"❌ Failed to load fallback tokenizer: {fallback_e}")
|
| 141 |
+
return False
|
| 142 |
+
|
| 143 |
+
# إعداد معالج النصوص
|
| 144 |
+
try:
|
| 145 |
+
newline_to_space = Replace(Regex(r'\s*\n\s*'), " ")
|
| 146 |
+
join_hyphen_break = Replace(Regex(r'(\w+)[--]\s*\n\s*(\w+)'), r"\1\2")
|
| 147 |
+
self.tokenizer.backend_tokenizer.normalizer = Sequence([
|
| 148 |
+
self.tokenizer.backend_tokenizer.normalizer,
|
| 149 |
+
join_hyphen_break,
|
| 150 |
+
newline_to_space,
|
| 151 |
+
Strip()
|
| 152 |
+
])
|
| 153 |
except Exception as e:
|
| 154 |
+
logger.warning(f"⚠️ Could not set custom normalizer: {e}")
|
| 155 |
+
|
| 156 |
+
return True
|
| 157 |
|
| 158 |
def load_single_model(self, model_url=None, model_path=None, model_name="Model"):
|
| 159 |
+
"""تحميل موديل واحد مع fallback ومعالجة شامل�� للأخطاء"""
|
| 160 |
+
base_model = None
|
| 161 |
try:
|
| 162 |
+
logger.info(f"🤖 Loading base {model_name} from {self.base_model_id}...")
|
| 163 |
|
| 164 |
+
# محاولة تحميل الموديل الأساسي الرئيسي
|
| 165 |
base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 166 |
+
self.base_model_id,
|
| 167 |
num_labels=41,
|
| 168 |
cache_dir=CACHE_DIR,
|
| 169 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 170 |
low_cpu_mem_usage=True,
|
| 171 |
trust_remote_code=False
|
| 172 |
)
|
| 173 |
+
logger.info("✅ Primary base model loaded")
|
| 174 |
|
| 175 |
+
except Exception as e:
|
| 176 |
+
logger.warning(f"⚠️ Failed to load primary base model: {e}")
|
| 177 |
+
try:
|
| 178 |
+
logger.info(f"🔄 Falling back to {self.fallback_model_id}...")
|
| 179 |
+
base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 180 |
+
self.fallback_model_id,
|
| 181 |
+
num_labels=41,
|
| 182 |
+
cache_dir=CACHE_DIR,
|
| 183 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 184 |
+
low_cpu_mem_usage=True,
|
| 185 |
+
trust_remote_code=False
|
| 186 |
+
)
|
| 187 |
+
self.using_fallback = True
|
| 188 |
+
logger.info("✅ Fallback base model loaded (note: weights may not be compatible)")
|
| 189 |
+
except Exception as fallback_e:
|
| 190 |
+
logger.error(f"❌ Failed to load fallback base model: {fallback_e}")
|
| 191 |
+
return None
|
| 192 |
+
|
| 193 |
+
# محاولة تحميل الأوزان (فقط إذا لم نستخدم fallback، أو إذا كانت متوافقة)
|
| 194 |
+
try:
|
| 195 |
if model_path and os.path.exists(model_path):
|
| 196 |
logger.info(f"📁 Loading from local file: {model_path}")
|
| 197 |
state_dict = torch.load(model_path, map_location=device, weights_only=True)
|
| 198 |
base_model.load_state_dict(state_dict, strict=False)
|
| 199 |
elif model_url:
|
| 200 |
+
# استخدام hf_hub_download بدلاً من torch.hub للـ HF repos
|
| 201 |
+
logger.info(f"🌐 Downloading weights from HF repo...")
|
| 202 |
+
repo_id = "mihalykiss/modernbert_2"
|
| 203 |
+
filename = model_url.split('/')[-1] # Extract filename like "Model_groups_3class_seed12"
|
| 204 |
+
pt_file = hf_hub_download(
|
| 205 |
+
repo_id=repo_id,
|
| 206 |
+
filename=filename,
|
| 207 |
+
cache_dir=CACHE_DIR,
|
| 208 |
+
local_dir_use_symlinks=False
|
| 209 |
+
)
|
| 210 |
+
state_dict = torch.load(pt_file, map_location=device, weights_only=True)
|
| 211 |
+
|
| 212 |
+
# تحميل الأوزان فقط إذا لم نكن في وضع fallback (لأن ModernBERT weights قد لا تتوافق مع BERT القياسي)
|
| 213 |
+
if not self.using_fallback:
|
| 214 |
base_model.load_state_dict(state_dict, strict=False)
|
| 215 |
+
logger.info("✅ Weights loaded successfully")
|
| 216 |
+
else:
|
| 217 |
+
logger.warning("⚠️ Skipping weight load in fallback mode (incompatible architecture)")
|
| 218 |
else:
|
| 219 |
logger.info("📊 Using model with random initialization")
|
| 220 |
+
except Exception as weight_error:
|
| 221 |
+
logger.warning(f"⚠️ Could not load weights: {weight_error}")
|
| 222 |
+
logger.info("📊 Continuing with base model (random or pre-trained init)")
|
| 223 |
+
|
| 224 |
+
# نقل الموديل للجهاز المناسب
|
| 225 |
+
model = base_model.to(device)
|
| 226 |
+
model.eval()
|
| 227 |
+
|
| 228 |
+
# تنظيف الذاكرة
|
| 229 |
+
if 'state_dict' in locals():
|
| 230 |
+
del state_dict
|
| 231 |
+
gc.collect()
|
| 232 |
+
if torch.cuda.is_available():
|
| 233 |
+
torch.cuda.empty_cache()
|
| 234 |
+
|
| 235 |
+
logger.info(f"✅ {model_name} loaded successfully (fallback: {self.using_fallback})")
|
| 236 |
+
return model
|
| 237 |
+
|
|
|
|
| 238 |
def load_models(self, max_models=2):
|
| 239 |
"""تحميل الموديلات بحد أقصى للذاكرة"""
|
| 240 |
if self.models_loaded:
|
|
|
|
| 243 |
|
| 244 |
# تحميل الـ Tokenizer أولاً
|
| 245 |
if not self.load_tokenizer():
|
| 246 |
+
logger.error("❌ Tokenizer load failed - cannot proceed")
|
| 247 |
return False
|
| 248 |
|
| 249 |
# تحميل الموديلات
|
|
|
|
| 259 |
if model is not None:
|
| 260 |
self.models.append(model)
|
| 261 |
|
| 262 |
+
# تحميل الموديلات من URLs (استخراج filenames)
|
| 263 |
+
for i, full_url in enumerate(self.model_urls[:max_models - len(self.models)]):
|
| 264 |
if len(self.models) >= max_models:
|
| 265 |
break
|
| 266 |
|
| 267 |
+
# استخدام full_url كما هو، لكن في load_single_model نستخرج filename
|
| 268 |
model = self.load_single_model(
|
| 269 |
+
model_url=full_url,
|
| 270 |
model_name=f"Model {len(self.models) + 1}"
|
| 271 |
)
|
| 272 |
if model is not None:
|
|
|
|
| 286 |
# التحقق من نجاح التحميل
|
| 287 |
if len(self.models) > 0:
|
| 288 |
self.models_loaded = True
|
| 289 |
+
logger.info(f"✅ Successfully loaded {len(self.models)} models (using fallback: {self.using_fallback})")
|
| 290 |
return True
|
| 291 |
else:
|
| 292 |
logger.error("❌ No models could be loaded")
|
|
|
|
| 302 |
if not cleaned_text.strip():
|
| 303 |
raise ValueError("Empty text after cleaning")
|
| 304 |
|
| 305 |
+
# Tokenization (max_length adjusted for fallback BERT if needed)
|
| 306 |
+
max_len = 512 if not self.using_fallback else 512 # BERT max is 512
|
| 307 |
try:
|
| 308 |
inputs = self.tokenizer(
|
| 309 |
cleaned_text,
|
| 310 |
return_tensors="pt",
|
| 311 |
truncation=True,
|
| 312 |
+
max_length=max_len,
|
| 313 |
padding=True
|
| 314 |
).to(device)
|
| 315 |
except Exception as e:
|
|
|
|
| 370 |
"predicted_model": predicted_model,
|
| 371 |
"top_5_predictions": top_5_results,
|
| 372 |
"is_human": human_percentage > ai_percentage,
|
| 373 |
+
"models_used": len(all_probabilities),
|
| 374 |
+
"using_fallback": self.using_fallback
|
| 375 |
}
|
| 376 |
|
| 377 |
# =====================================================
|
|
|
|
| 394 |
app = FastAPI(
|
| 395 |
title="ModernBERT AI Text Detector",
|
| 396 |
description="كشف النصوص المكتوبة بواسطة الذكاء الاصطناعي",
|
| 397 |
+
version="2.2.0" # Updated version with UID fix
|
| 398 |
)
|
| 399 |
|
| 400 |
# إضافة CORS للسماح بالاستخدام من المتصفح
|
|
|
|
| 435 |
logger.info("🚀 Starting ModernBERT AI Detector...")
|
| 436 |
logger.info(f"🐍 Python version: {sys.version}")
|
| 437 |
logger.info(f"🔥 PyTorch version: {torch.__version__}")
|
| 438 |
+
import transformers
|
| 439 |
+
logger.info(f"🔧 Transformers version: {transformers.__version__}")
|
| 440 |
+
logger.info("🛡️ UID Monkey Patch Applied (for Docker/Container)")
|
| 441 |
logger.info("=" * 50)
|
| 442 |
|
| 443 |
# محاولة تحميل الموديلات
|
|
|
|
| 445 |
success = model_manager.load_models(max_models=max_models)
|
| 446 |
|
| 447 |
if success:
|
| 448 |
+
logger.info("✅ Application ready! (Fallback mode: %s)", model_manager.using_fallback)
|
| 449 |
else:
|
| 450 |
logger.error("⚠️ Failed to load models - API will return errors")
|
| 451 |
+
logger.info("💡 Tip: Ensure 'transformers>=4.45.0' and 'huggingface_hub' are installed. Run: pip install --upgrade transformers huggingface_hub")
|
| 452 |
|
| 453 |
@app.get("/")
|
| 454 |
async def root():
|
|
|
|
| 457 |
"message": "ModernBERT AI Text Detector API",
|
| 458 |
"status": "online" if model_manager.models_loaded else "initializing",
|
| 459 |
"models_loaded": len(model_manager.models),
|
| 460 |
+
"using_fallback": model_manager.using_fallback,
|
| 461 |
"device": str(device),
|
| 462 |
"endpoints": {
|
| 463 |
"analyze": "/analyze",
|
|
|
|
| 480 |
return {
|
| 481 |
"status": "healthy" if model_manager.models_loaded else "unhealthy",
|
| 482 |
"models_loaded": len(model_manager.models),
|
| 483 |
+
"using_fallback": model_manager.using_fallback,
|
| 484 |
"device": str(device),
|
| 485 |
"cuda_available": torch.cuda.is_available(),
|
| 486 |
"memory_info": memory_info
|
|
|
|
| 510 |
return DetectionResult(
|
| 511 |
success=False,
|
| 512 |
code=503,
|
| 513 |
+
message="Models not available. Check logs for details.",
|
| 514 |
data={}
|
| 515 |
)
|
| 516 |
|
|
|
|
| 577 |
"input_text": text[:500] + "..." if len(text) > 500 else text,
|
| 578 |
"detected_language": "en",
|
| 579 |
"top_5_predictions": result.get("top_5_predictions", []),
|
| 580 |
+
"models_used": result.get("models_used", 1),
|
| 581 |
+
"using_fallback": result.get("using_fallback", False)
|
| 582 |
}
|
| 583 |
)
|
| 584 |
|
|
|
|
| 612 |
"ai_score": result["ai_percentage"],
|
| 613 |
"human_score": result["human_percentage"],
|
| 614 |
"detected_model": result["predicted_model"] if result["ai_percentage"] > 50 else None,
|
| 615 |
+
"confidence": max(result["ai_percentage"], result["human_percentage"]),
|
| 616 |
+
"using_fallback": result.get("using_fallback", False)
|
| 617 |
}
|
| 618 |
|
| 619 |
except HTTPException:
|
|
|
|
| 639 |
logger.info(f"📚 Documentation: http://{host}:{port}/docs")
|
| 640 |
logger.info("=" * 50)
|
| 641 |
|
| 642 |
+
uvicorn.run(
|
| 643 |
+
"main:app", # Assuming this file is named main.py
|
| 644 |
+
host=host,
|
| 645 |
+
port=port,
|
| 646 |
+
workers=workers,
|
| 647 |
+
reload=False # Set to True for dev
|
| 648 |
+
)
|