Spaces:

yagnik12
/

AI_Text_Detecter_HanxiGuo_BiScope-Data

Running

App Files Files Community

yagnik12 commited on Sep 15, 2025

Commit

93b7207

verified ·

1 Parent(s): 3968acc

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -16

app.py CHANGED Viewed

@@ -3,7 +3,13 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2
 import torch
 import math
 import nltk
-nltk.download('punkt')
 from nltk.tokenize import sent_tokenize
 # -------------------------------
@@ -36,12 +42,23 @@ def sentence_score(sentence):
         inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512)
         with torch.no_grad():
             logits = model(**inputs).logits
-            probs.append(torch.softmax(logits, dim=1).tolist()[0][1])
     ppl = compute_perplexity(sentence)
     ppl_score = max(0, min(1, 100/ppl))
-    # Weighted average: 70% model ensemble, 30% perplexity
     return sum(probs)/len(probs)*0.7 + ppl_score*0.3
 def analyze_text(user_text):
     sentences = sent_tokenize(user_text)
     if not sentences:
@@ -50,23 +67,12 @@ def analyze_text(user_text):
     sentence_probs = [sentence_score(s) for s in sentences]
     final_ai = sum(sentence_probs)/len(sentence_probs)
     final_human = 1 - final_ai
-    # Verdict
-    if final_ai < 0.2:
-        verdict_text = "Most likely human-written."
-    elif final_ai < 0.4:
-        verdict_text = "Possibly human-written with minimal AI assistance."
-    elif final_ai < 0.6:
-        verdict_text = "Unclear – could be human or AI-assisted."
-    elif final_ai < 0.8:
-        verdict_text = "Possibly AI-generated or human using AI assistance."
-    else:
-        verdict_text = "Likely AI-generated or heavily AI-assisted."
     return {
         "Final AI Probability": round(final_ai*100,2),
         "Final Human Probability": round(final_human*100,2),
-        "Verdict": verdict_text,
         "Sentence-level AI probabilities": [round(p*100,2) for p in sentence_probs]
     }

 import torch
 import math
 import nltk
+# Download Punkt tokenizer if not already available
+try:
+    nltk.data.find("tokenizers/punkt")
+except LookupError:
+    nltk.download("punkt")
 from nltk.tokenize import sent_tokenize
 # -------------------------------
         inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512)
         with torch.no_grad():
             logits = model(**inputs).logits
+            probs.append(torch.softmax(logits, dim=1).tolist()[0][1])  # AI probability
     ppl = compute_perplexity(sentence)
     ppl_score = max(0, min(1, 100/ppl))
     return sum(probs)/len(probs)*0.7 + ppl_score*0.3
+def verdict(ai_prob):
+    if ai_prob < 20:
+        return "Most likely human-written."
+    elif ai_prob < 40:
+        return "Possibly human-written with minimal AI assistance."
+    elif ai_prob < 60:
+        return "Unclear – could be human or AI-assisted."
+    elif ai_prob < 80:
+        return "Possibly AI-generated or human using AI assistance."
+    else:
+        return "Likely AI-generated or heavily AI-assisted."
 def analyze_text(user_text):
     sentences = sent_tokenize(user_text)
     if not sentences:
     sentence_probs = [sentence_score(s) for s in sentences]
     final_ai = sum(sentence_probs)/len(sentence_probs)
     final_human = 1 - final_ai
+    final_verdict = verdict(final_ai*100)
     return {
         "Final AI Probability": round(final_ai*100,2),
         "Final Human Probability": round(final_human*100,2),
+        "Verdict": final_verdict,
         "Sentence-level AI probabilities": [round(p*100,2) for p in sentence_probs]
     }