Spaces:

RafzE
/

detextly-ai-detector

Running

App Files Files Community

RafzE commited on 6 days ago

Commit

799764f

verified ·

1 Parent(s): 8c2a49d

Update app.py

Browse files

Files changed (1) hide show

app.py +134 -155

app.py CHANGED Viewed

@@ -6,19 +6,23 @@ import torch
 import logging
 from typing import Optional, List
 import time
-import re
-# Set up logging
 logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 app = FastAPI(
     title="Detextly AI Detector API",
-    description="AI Content Detection API using RoBERTa",
-    version="2.0.0"  # Updated version
 )
-# CORS middleware
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -27,7 +31,9 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Request/Response models
 class ScanRequest(BaseModel):
     text: str
     scan_type: str = "basic"
@@ -40,8 +46,10 @@ class ScanResponse(BaseModel):
     credits: Optional[dict] = None
     test_mode: bool = False
-# Load RoBERTa AI Detector model
-MODEL_NAME = "roberta-base-openai-detector"
 class AIDetector:
     def __init__(self):
@@ -49,127 +57,118 @@ class AIDetector:
         self.tokenizer = None
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logger.info(f"Using device: {self.device}")
     def load_model(self):
-        """Lazy load the model"""
-        if self.model is None:
-            logger.info("Loading RoBERTa AI Detector model...")
-            try:
-                self.model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
-                self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-                logger.info(f"Loaded {MODEL_NAME}")
-            except Exception as e:
-                logger.error(f"Failed to load model: {e}")
-                raise
-            self.model.to(self.device)
-            self.model.eval()
-            logger.info("Model loaded successfully")
-    def predict(self, text: str, max_length: int = 512):
-        """Predict AI probability"""
         if self.model is None:
             self.load_model()
-        # Tokenize
-        inputs = self.tokenizer(
-            text,
-            return_tensors="pt",
-            truncation=True,
             max_length=max_length,
             padding=True
         )
-        # Move to device
-        inputs = {k: v.to(self.device) for k, v in inputs.items()}
-        # Predict
         with torch.no_grad():
-            outputs = self.model(**inputs)
-            probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
-            ai_probability = probabilities[0][0].item()  # Index 1 is AI
         return ai_probability
-# Initialize detector
 detector = AIDetector()
 def detect_chatgpt_patterns(text: str) -> float:
-    """Detect specific ChatGPT/AI assistant patterns"""
-    lower_text = text.lower()
-    # ChatGPT specific phrases
-    chatgpt_phrases = [
         "as an ai language model",
         "i don't have personal experiences",
         "i don't have feelings",
-        "i'm an ai assistant",
-        "based on the information provided",
-        "i cannot provide medical",
-        "i cannot provide legal",
-        "i cannot provide financial",
-        "keep in mind that",
-        "please note that",
-        "my responses are generated"
     ]
-    # Count ChatGPT phrases
-    chatgpt_count = sum(1 for phrase in chatgpt_phrases if phrase in lower_text)
-    # If ChatGPT phrases detected, increase AI probability
-    if chatgpt_count >= 1:
-        logger.info(f"ChatGPT patterns detected: {chatgpt_count}")
-        return 0.95  # Force high AI probability
-    return 0.0
-def analyze_sections_roberta(text: str, overall_score: float) -> List[dict]:
-    """Split text into sections with AI scores for highlight scan"""
     sections = []
     words = text.split()
-    section_length = 100
-    for i in range(0, len(words), section_length):
-        section_text = " ".join(words[i:i+section_length])
-        if len(section_text.strip()) < 50:
             continue
-        # Get section-specific prediction
-        section_score = detector.predict(section_text) if len(section_text) > 20 else overall_score
-        # Check for ChatGPT patterns in each section
-        chatgpt_adjustment = detect_chatgpt_patterns(section_text)
-        if chatgpt_adjustment > 0:
-            section_score = max(section_score, chatgpt_adjustment)
         sections.append({
-            "text": section_text[:150] + "..." if len(section_text) > 150 else section_text,
             "score": section_score,
-            "words": len(section_text.split()),
             "ai_probability": section_score,
-            "human_probability": 1 - section_score
         })
-        # Limit to 10 sections max
         if len(sections) >= 10:
             break
     return sections
 @app.on_event("startup")
-async def startup_event():
-    """Pre-load model on startup"""
     detector.load_model()
 @app.get("/")
 async def root():
     return {
         "status": "online",
-        "service": "Detextly AI Detector",
-        "version": "3.0.0",
         "model": MODEL_NAME,
         "device": str(detector.device),
-        "features": ["basic_scan", "highlight_scan", "chatgpt_detection"]
     }
 @app.get("/health")
@@ -177,75 +176,61 @@ async def health():
     return {"status": "healthy", "model": MODEL_NAME}
 @app.post("/api/scan", response_model=ScanResponse)
-async def scan_text(request: ScanRequest):
-    """Main scan endpoint"""
-    start_time = time.time()
     try:
-        if not request.text or len(request.text.strip()) < 10:
-            raise HTTPException(status_code=400, detail="Text too short")
-        # Limit text length for performance
-        text = request.text[:2000]
-        # Get prediction from RoBERTa
-        ai_probability = detector.predict(text)
-        # Check for ChatGPT patterns (OVERRIDE if detected)
-        chatgpt_probability = detect_chatgpt_patterns(text)
-        if chatgpt_probability > 0:
-            ai_probability = chatgpt_probability
-            logger.info(f"ChatGPT detected, overriding to {ai_probability}")
-        # Prepare result
         result = {
-            "overall": ai_probability,
-            "processing_time_ms": int((time.time() - start_time) * 1000),
-            "simulated": False,
-            "details": {
-                "ai_probability": ai_probability,
-                "human_probability": 1 - ai_probability,
-                "model": MODEL_NAME,
-                "confidence": "high" if ai_probability > 0.7 or ai_probability < 0.3 else "medium",
-                "chatgpt_detected": chatgpt_probability > 0
-            }
         }
-        # For highlight scans, add section analysis
-        if request.scan_type == "highlight":
-            sections = analyze_sections_roberta(text, ai_probability)
             result["sections"] = sections
-            result["scan_type"] = "highlight"
             result["section_count"] = len(sections)
-            logger.info(f"Highlight scan completed: {len(sections)} sections analyzed")
         else:
-            result["scan_type"] = request.scan_type
-        processing_time = int((time.time() - start_time) * 1000)
-        # NORMAL credits (5 basic, 1 highlight daily)
-        credits = {
-            "basic": 5,
-            "highlight": 1,
-            "resetTime": "2024-12-31T23:59:59Z",
-            "test_mode": False
-        }
         return ScanResponse(
             success=True,
             result=result,
-            processingTime=processing_time,
-            credits=credits,
             test_mode=False
         )
     except Exception as e:
         logger.error(f"Scan error: {e}")
-        raise HTTPException(status_code=500, detail=f"Scan failed: {str(e)}")
 @app.get("/api/credits")
-async def get_credits(userId: str):
-    """Get user credits"""
     return {
         "basic": 5,
         "highlight": 1,
@@ -255,10 +240,4 @@ async def get_credits(userId: str):
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(
-        app,
-        host="0.0.0.0",
-        port=7860,
-        log_level="info"
-    )

 import logging
 from typing import Optional, List
 import time
+# -------------------------------------------------------
+# Logging
+# -------------------------------------------------------
 logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("detector")
+# -------------------------------------------------------
+# FastAPI App
+# -------------------------------------------------------
 app = FastAPI(
     title="Detextly AI Detector API",
+    description="AI Content Detection API using RoBERTa-Large",
+    version="3.1.0"
 )
+# CORS
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# -------------------------------------------------------
+# Request / Response Models
+# -------------------------------------------------------
 class ScanRequest(BaseModel):
     text: str
     scan_type: str = "basic"
     credits: Optional[dict] = None
     test_mode: bool = False
+# -------------------------------------------------------
+# Model
+# -------------------------------------------------------
+MODEL_NAME = "openai-community/roberta-large-openai-detector"
 class AIDetector:
     def __init__(self):
         self.tokenizer = None
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logger.info(f"Using device: {self.device}")
     def load_model(self):
+        if self.model is not None:
+            return
+        logger.info(f"Loading model: {MODEL_NAME}")
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+            self.model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
+        except Exception as e:
+            logger.error(f"Error loading {MODEL_NAME}: {e}")
+            raise
+        self.model.to(self.device)
+        self.model.eval()
+        logger.info("Model loaded successfully.")
+    def predict(self, text: str, max_length: int = 512) -> float:
+        """Return AI probability using class index 1 (correct)."""
         if self.model is None:
             self.load_model()
+        tokens = self.tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
             max_length=max_length,
             padding=True
         )
+        tokens = {k: v.to(self.device) for k, v in tokens.items()}
         with torch.no_grad():
+            outputs = self.model(**tokens)
+            probabilities = torch.softmax(outputs.logits, dim=-1)
+            # FIX: index 1 = AI-written
+            ai_probability = float(probabilities[0][1].item())
         return ai_probability
+# Init detector
 detector = AIDetector()
+# -------------------------------------------------------
+# ChatGPT Pattern Detector
+# -------------------------------------------------------
 def detect_chatgpt_patterns(text: str) -> float:
+    """Return 0.95 if strong GPT-patterns are detected."""
+    patterns = [
         "as an ai language model",
+        "i am an ai model",
+        "i cannot provide medical",
+        "as a language model",
+        "based on the information provided",
+        "my training data",
         "i don't have personal experiences",
         "i don't have feelings",
     ]
+    lower = text.lower()
+    found = any(p in lower for p in patterns)
+    return 0.95 if found else 0.0
+# -------------------------------------------------------
+# Highlight Scan - Split into Sections
+# -------------------------------------------------------
+def analyze_sections(text: str, overall_score: float) -> List[dict]:
     sections = []
     words = text.split()
+    chunk_size = 100
+    for i in range(0, len(words), chunk_size):
+        chunk = " ".join(words[i:i+chunk_size])
+        if len(chunk) < 40:
             continue
+        section_score = detector.predict(chunk)
+        pattern_score = detect_chatgpt_patterns(chunk)
+        if pattern_score > 0:
+            section_score = max(section_score, pattern_score)
         sections.append({
+            "text": chunk[:150] + "..." if len(chunk) > 150 else chunk,
             "score": section_score,
             "ai_probability": section_score,
+            "human_probability": 1 - section_score,
+            "words": len(chunk.split())
         })
         if len(sections) >= 10:
             break
     return sections
+# -------------------------------------------------------
+# Endpoints
+# -------------------------------------------------------
 @app.on_event("startup")
+async def startup():
     detector.load_model()
 @app.get("/")
 async def root():
     return {
         "status": "online",
         "model": MODEL_NAME,
         "device": str(detector.device),
+        "version": "3.1.0",
+        "features": ["basic_scan", "highlight_scan", "chatgpt_pattern_detection"]
     }
 @app.get("/health")
     return {"status": "healthy", "model": MODEL_NAME}
 @app.post("/api/scan", response_model=ScanResponse)
+async def scan_text(req: ScanRequest):
+    start = time.time()
     try:
+        if not req.text or len(req.text.strip()) < 10:
+            raise HTTPException(status_code=400, detail="Text too short.")
+        text = req.text[:3000]  # safe CPU limit
+        # Base prediction
+        ai_prob = detector.predict(text)
+        # Pattern override
+        pattern_prob = detect_chatgpt_patterns(text)
+        if pattern_prob > ai_prob:
+            ai_prob = pattern_prob
+        # Build base result
         result = {
+            "overall": ai_prob,
+            "human_probability": 1 - ai_prob,
+            "model": MODEL_NAME,
+            "confidence": "high" if ai_prob > 0.75 or ai_prob < 0.25 else "medium",
+            "chatgpt_detected": pattern_prob > 0
         }
+        # Highlight scan
+        if req.scan_type == "highlight":
+            sections = analyze_sections(text, ai_prob)
             result["sections"] = sections
             result["section_count"] = len(sections)
+            result["scan_type"] = "highlight"
         else:
+            result["scan_type"] = "basic"
+        # Return response
         return ScanResponse(
             success=True,
             result=result,
+            processingTime=int((time.time() - start) * 1000),
+            credits={
+                "basic": 5,
+                "highlight": 1,
+                "resetTime": "2024-12-31T23:59:59Z",
+                "test_mode": False
+            },
             test_mode=False
         )
     except Exception as e:
         logger.error(f"Scan error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/api/credits")
+async def credits(userId: str):
     return {
         "basic": 5,
         "highlight": 1,
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)