yagnik12 commited on
Commit
27f9fcd
·
verified ·
1 Parent(s): ed4fe2b

Update ai_text_detector_valid_final.py

Browse files
Files changed (1) hide show
  1. ai_text_detector_valid_final.py +57 -81
ai_text_detector_valid_final.py CHANGED
@@ -2,98 +2,74 @@ import os
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import requests
 
5
 
6
- HF_TOKEN = os.getenv("HF_TOKEN")
7
- # ------------------ Models ------------------
8
- MODELS = {
9
- "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
10
- "MonkeyDAnh":"MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
11
- "Andreas122001":"andreas122001/roberta-academic-detector"
12
- }
13
 
14
- # ------------------ Load Local Model ------------------
15
- def load_model(model_id):
16
- tokenizer = AutoTokenizer.from_pretrained(model_id)
17
- model = AutoModelForSequenceClassification.from_pretrained(model_id)
18
- return tokenizer, model
19
 
20
- # ------------------ Prediction ------------------
21
- def predict(text, tokenizer, model):
22
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
23
- with torch.no_grad():
24
- outputs = model(**inputs)
25
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
26
- return probs[0].numpy() # [human_prob, ai_prob]
27
 
28
- # ------------------ SzegedAI API ------------------
29
- def szegedai_predict(text):
30
- url = "https://hf.space/embed/SzegedAI/AI_Detector/api/predict/"
31
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
32
- data = {"data": [text]}
33
- response = requests.post(url, json=data, headers=headers)
34
- response.raise_for_status() # Raise error if request fails
35
- res = response.json()
36
- return res['data'][0]
 
 
 
37
 
38
- # ------------------ Verdict ------------------
39
- def verdict(ai_prob):
40
- if ai_prob < 20:
41
- return "Most likely human-written."
42
- elif 20 <= ai_prob < 40:
43
- return "Possibly human-written with minimal AI assistance."
44
- elif 40 <= ai_prob < 60:
45
- return "Unclear – could be either human or AI-assisted."
46
- elif 60 <= ai_prob < 80:
47
- return "Possibly AI-generated, or a human using AI assistance."
48
- else: # ai_prob >= 80
49
- return "Likely AI-generated or heavily AI-assisted."
 
 
50
 
51
- # ------------------ Detection ------------------
52
  def detect_text(text):
53
  results = {}
54
- ai_scores = []
55
-
56
- # ----- Local Models -----
57
  for name, model_id in MODELS.items():
58
- try:
59
- tokenizer, model = load_model(model_id)
60
- human_score, ai_score = predict(text, tokenizer, model)
61
- results[name] = {
62
- "Human Probability": round(float(human_score) * 100, 2),
63
- "AI Probability": round(float(ai_score) * 100, 2),
64
- }
65
- ai_scores.append(ai_score * 100)
66
- except Exception as e:
67
- results[name] = {"error": str(e)}
68
-
69
- # ----- SzegedAI Detector -----
70
- try:
71
- human_score, ai_score = szegedai_predict(text)
72
- results["SzegedAI Detector"] = {
73
- "Human Probability": round(float(human_score) * 100, 2),
74
- "AI Probability": round(float(ai_score) * 100, 2),
75
- }
76
- ai_scores.append(ai_score * 100)
77
- except Exception as e:
78
- results["SzegedAI Detector"] = {"error": str(e)}
79
 
80
- # ----- Final Score -----
81
- if ai_scores:
82
- avg_ai = sum(ai_scores) / len(ai_scores)
83
- results["Final Score"] = {"Verdict": verdict(avg_ai)}
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  return results
86
 
87
- # ------------------ Main ------------------
88
  if __name__ == "__main__":
89
- text = input("Enter text to analyze:\n")
90
- output = detect_text(text)
91
- print("\n--- Detection Results ---")
92
- for model, scores in output.items():
93
- print(f"\n[{model}]")
94
- if isinstance(scores, dict):
95
- for k, v in scores.items():
96
- if isinstance(v, (int, float)):
97
- print(f"{k}: {v}%")
98
- else:
99
- print(f"{k}: {v}")
 
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import requests
5
+ import numpy as np
6
 
7
+ # Hugging Face Token
8
+ HF_TOKEN = os.getenv("HF_TOKEN") # export HF_TOKEN="your_token" before running
 
 
 
 
 
9
 
10
+ # Headers for API
11
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
 
 
 
12
 
13
+ # Multiple AI text detection models
14
+ MODELS = {
15
+ "DeBERTa Detector": "microsoft/deberta-v3-base",
16
+ "MonkeyDAnh": "MonkeyDAnh/deberta-v3-base-finetuned-ai-human-detector",
17
+ "Andreas122001": "Andreas122001/roberta-base-openai-detector"
18
+ # SzegedAI handled separately since it's a Space
19
+ }
20
 
21
+ def run_hf_model(model_id, text):
22
+ """Run HuggingFace transformer models"""
23
+ try:
24
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
25
+ model = AutoModelForSequenceClassification.from_pretrained(model_id)
26
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
27
+ with torch.no_grad():
28
+ logits = model(**inputs).logits
29
+ probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]
30
+ return {"Human Probability": float(probs[0]*100), "AI Probability": float(probs[1]*100)}
31
+ except Exception as e:
32
+ return {"error": str(e)}
33
 
34
+ def run_szegedai(text):
35
+ """Call the SzegedAI Space API"""
36
+ try:
37
+ response = requests.post(
38
+ "https://huggingface.co/spaces/SzegedAI/AI_Detector/run/predict",
39
+ headers=headers,
40
+ json={"data": [text]},
41
+ timeout=30
42
+ )
43
+ response.raise_for_status()
44
+ result = response.json()
45
+ return result # Raw result, we can format it later
46
+ except Exception as e:
47
+ return {"error": str(e)}
48
 
 
49
  def detect_text(text):
50
  results = {}
51
+ # Transformers models
 
 
52
  for name, model_id in MODELS.items():
53
+ results[name] = run_hf_model(model_id, text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ # SzegedAI (Space)
56
+ results["SzegedAI Detector"] = run_szegedai(text)
 
 
57
 
58
+ # Final verdict (simple rule-based)
59
+ ai_probs = []
60
+ for v in results.values():
61
+ if "AI Probability" in v:
62
+ ai_probs.append(v["AI Probability"])
63
+ avg_ai = np.mean(ai_probs) if ai_probs else 0
64
+ if avg_ai > 80:
65
+ verdict = "Likely AI-generated"
66
+ elif avg_ai > 40:
67
+ verdict = "Possibly human-written with AI assistance"
68
+ else:
69
+ verdict = "Likely human-written"
70
+ results["Final Score"] = {"Verdict": verdict}
71
  return results
72
 
 
73
  if __name__ == "__main__":
74
+ sample = "This is a test sentence written by AI or human."
75
+ print(detect_text(sample))