yagnik12 commited on
Commit
44d54f5
·
verified ·
1 Parent(s): 95adb4e

Update ai_text_detector_valid_final.py

Browse files
Files changed (1) hide show
  1. ai_text_detector_valid_final.py +13 -5
ai_text_detector_valid_final.py CHANGED
@@ -48,11 +48,22 @@ def clean_text(text: str) -> str:
48
  return text.strip()
49
 
50
  def classify_szegedai(text: str):
 
 
 
 
 
51
  cleaned_text = clean_text(text)
52
  if not cleaned_text.strip():
53
  return {"error": "Empty text"}
54
 
 
 
 
 
 
55
  inputs = tokenizer_modernbert(cleaned_text, return_tensors="pt", truncation=True, padding=True).to(device)
 
56
  with torch.no_grad():
57
  logits_1 = model_1(**inputs).logits
58
  logits_2 = model_2(**inputs).logits
@@ -65,17 +76,14 @@ def classify_szegedai(text: str):
65
  human_index = 24
66
  for p in [probs1, probs2, probs3]:
67
  p[:, human_index] *= 2.0 # Boost human label
68
- p = p / p.sum(dim=1, keepdim=True)
69
 
70
  probs = (probs1 + probs2 + probs3) / 3
71
 
72
  human_prob = probs[0][human_index].item() * 100
73
  ai_prob = 100 - human_prob
74
 
75
- return {
76
- "Human Probability": round(human_prob, 2),
77
- "AI Probability": round(ai_prob, 2),
78
- }
79
 
80
  # ---------------------------
81
  # HuggingFace other models
 
48
  return text.strip()
49
 
50
  def classify_szegedai(text: str):
51
+ """
52
+ ModernBERT ensemble detector with:
53
+ - Human label boost
54
+ - Short text handling (<30 words ignored)
55
+ """
56
  cleaned_text = clean_text(text)
57
  if not cleaned_text.strip():
58
  return {"error": "Empty text"}
59
 
60
+ word_count = len(cleaned_text.split())
61
+ if word_count < 30:
62
+ # For very short texts, skip AI classification and assume mostly human
63
+ return {"Human Probability": 95.0, "AI Probability": 5.0}
64
+
65
  inputs = tokenizer_modernbert(cleaned_text, return_tensors="pt", truncation=True, padding=True).to(device)
66
+
67
  with torch.no_grad():
68
  logits_1 = model_1(**inputs).logits
69
  logits_2 = model_2(**inputs).logits
 
76
  human_index = 24
77
  for p in [probs1, probs2, probs3]:
78
  p[:, human_index] *= 2.0 # Boost human label
79
+ p = p / p.sum(dim=1, keepdim=True) # Re-normalize
80
 
81
  probs = (probs1 + probs2 + probs3) / 3
82
 
83
  human_prob = probs[0][human_index].item() * 100
84
  ai_prob = 100 - human_prob
85
 
86
+ return {"Human Probability": round(human_prob, 2), "AI Probability": round(ai_prob, 2)}
 
 
 
87
 
88
  # ---------------------------
89
  # HuggingFace other models