yagnik12 commited on
Commit
93b7207
·
verified ·
1 Parent(s): 3968acc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -16
app.py CHANGED
@@ -3,7 +3,13 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2
3
  import torch
4
  import math
5
  import nltk
6
- nltk.download('punkt')
 
 
 
 
 
 
7
  from nltk.tokenize import sent_tokenize
8
 
9
  # -------------------------------
@@ -36,12 +42,23 @@ def sentence_score(sentence):
36
  inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512)
37
  with torch.no_grad():
38
  logits = model(**inputs).logits
39
- probs.append(torch.softmax(logits, dim=1).tolist()[0][1])
40
  ppl = compute_perplexity(sentence)
41
  ppl_score = max(0, min(1, 100/ppl))
42
- # Weighted average: 70% model ensemble, 30% perplexity
43
  return sum(probs)/len(probs)*0.7 + ppl_score*0.3
44
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def analyze_text(user_text):
46
  sentences = sent_tokenize(user_text)
47
  if not sentences:
@@ -50,23 +67,12 @@ def analyze_text(user_text):
50
  sentence_probs = [sentence_score(s) for s in sentences]
51
  final_ai = sum(sentence_probs)/len(sentence_probs)
52
  final_human = 1 - final_ai
53
-
54
- # Verdict
55
- if final_ai < 0.2:
56
- verdict_text = "Most likely human-written."
57
- elif final_ai < 0.4:
58
- verdict_text = "Possibly human-written with minimal AI assistance."
59
- elif final_ai < 0.6:
60
- verdict_text = "Unclear – could be human or AI-assisted."
61
- elif final_ai < 0.8:
62
- verdict_text = "Possibly AI-generated or human using AI assistance."
63
- else:
64
- verdict_text = "Likely AI-generated or heavily AI-assisted."
65
 
66
  return {
67
  "Final AI Probability": round(final_ai*100,2),
68
  "Final Human Probability": round(final_human*100,2),
69
- "Verdict": verdict_text,
70
  "Sentence-level AI probabilities": [round(p*100,2) for p in sentence_probs]
71
  }
72
 
 
3
  import torch
4
  import math
5
  import nltk
6
+
7
+ # Download Punkt tokenizer if not already available
8
+ try:
9
+ nltk.data.find("tokenizers/punkt")
10
+ except LookupError:
11
+ nltk.download("punkt")
12
+
13
  from nltk.tokenize import sent_tokenize
14
 
15
  # -------------------------------
 
42
  inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512)
43
  with torch.no_grad():
44
  logits = model(**inputs).logits
45
+ probs.append(torch.softmax(logits, dim=1).tolist()[0][1]) # AI probability
46
  ppl = compute_perplexity(sentence)
47
  ppl_score = max(0, min(1, 100/ppl))
 
48
  return sum(probs)/len(probs)*0.7 + ppl_score*0.3
49
 
50
+ def verdict(ai_prob):
51
+ if ai_prob < 20:
52
+ return "Most likely human-written."
53
+ elif ai_prob < 40:
54
+ return "Possibly human-written with minimal AI assistance."
55
+ elif ai_prob < 60:
56
+ return "Unclear – could be human or AI-assisted."
57
+ elif ai_prob < 80:
58
+ return "Possibly AI-generated or human using AI assistance."
59
+ else:
60
+ return "Likely AI-generated or heavily AI-assisted."
61
+
62
  def analyze_text(user_text):
63
  sentences = sent_tokenize(user_text)
64
  if not sentences:
 
67
  sentence_probs = [sentence_score(s) for s in sentences]
68
  final_ai = sum(sentence_probs)/len(sentence_probs)
69
  final_human = 1 - final_ai
70
+ final_verdict = verdict(final_ai*100)
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  return {
73
  "Final AI Probability": round(final_ai*100,2),
74
  "Final Human Probability": round(final_human*100,2),
75
+ "Verdict": final_verdict,
76
  "Sentence-level AI probabilities": [round(p*100,2) for p in sentence_probs]
77
  }
78