Spaces:

yagnik12
/

AI_Text_Detecter

Running

yagnik12 commited on Sep 12, 2025

Commit

44d54f5

verified ·

1 Parent(s): 95adb4e

Update ai_text_detector_valid_final.py

Files changed (1) hide show

ai_text_detector_valid_final.py CHANGED Viewed

@@ -48,11 +48,22 @@ def clean_text(text: str) -> str:
     return text.strip()
 def classify_szegedai(text: str):
     cleaned_text = clean_text(text)
     if not cleaned_text.strip():
         return {"error": "Empty text"}
     inputs = tokenizer_modernbert(cleaned_text, return_tensors="pt", truncation=True, padding=True).to(device)
     with torch.no_grad():
         logits_1 = model_1(**inputs).logits
         logits_2 = model_2(**inputs).logits
@@ -65,17 +76,14 @@ def classify_szegedai(text: str):
         human_index = 24
         for p in [probs1, probs2, probs3]:
             p[:, human_index] *= 2.0  # Boost human label
-            p = p / p.sum(dim=1, keepdim=True)
         probs = (probs1 + probs2 + probs3) / 3
     human_prob = probs[0][human_index].item() * 100
     ai_prob = 100 - human_prob
-    return {
-        "Human Probability": round(human_prob, 2),
-        "AI Probability": round(ai_prob, 2),
-    }
 # ---------------------------
 # HuggingFace other models

     return text.strip()
 def classify_szegedai(text: str):
+    """
+    ModernBERT ensemble detector with:
+    - Human label boost
+    - Short text handling (<30 words ignored)
+    """
     cleaned_text = clean_text(text)
     if not cleaned_text.strip():
         return {"error": "Empty text"}
+    word_count = len(cleaned_text.split())
+    if word_count < 30:
+        # For very short texts, skip AI classification and assume mostly human
+        return {"Human Probability": 95.0, "AI Probability": 5.0}
     inputs = tokenizer_modernbert(cleaned_text, return_tensors="pt", truncation=True, padding=True).to(device)
     with torch.no_grad():
         logits_1 = model_1(**inputs).logits
         logits_2 = model_2(**inputs).logits
         human_index = 24
         for p in [probs1, probs2, probs3]:
             p[:, human_index] *= 2.0  # Boost human label
+            p = p / p.sum(dim=1, keepdim=True)  # Re-normalize
         probs = (probs1 + probs2 + probs3) / 3
     human_prob = probs[0][human_index].item() * 100
     ai_prob = 100 - human_prob
+    return {"Human Probability": round(human_prob, 2), "AI Probability": round(ai_prob, 2)}
 # ---------------------------
 # HuggingFace other models