Spaces:

yagnik12
/

AI_Text_Detecter

Running

App Files Files Community

yagnik12 commited on Sep 12, 2025

Commit

27f9fcd

verified ·

1 Parent(s): ed4fe2b

Update ai_text_detector_valid_final.py

Browse files

Files changed (1) hide show

ai_text_detector_valid_final.py +57 -81

ai_text_detector_valid_final.py CHANGED Viewed

@@ -2,98 +2,74 @@ import os
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import requests
-HF_TOKEN = os.getenv("HF_TOKEN")
-# ------------------ Models ------------------
-MODELS = {
-    "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
-    "MonkeyDAnh":"MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
-    "Andreas122001":"andreas122001/roberta-academic-detector"
-}
-# ------------------ Load Local Model ------------------
-def load_model(model_id):
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    model = AutoModelForSequenceClassification.from_pretrained(model_id)
-    return tokenizer, model
-# ------------------ Prediction ------------------
-def predict(text, tokenizer, model):
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-    with torch.no_grad():
-        outputs = model(**inputs)
-        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
-    return probs[0].numpy()  # [human_prob, ai_prob]
-# ------------------ SzegedAI API ------------------
-def szegedai_predict(text):
-    url = "https://hf.space/embed/SzegedAI/AI_Detector/api/predict/"
-    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
-    data = {"data": [text]}
-    response = requests.post(url, json=data, headers=headers)
-    response.raise_for_status()  # Raise error if request fails
-    res = response.json()
-    return res['data'][0]
-# ------------------ Verdict ------------------
-def verdict(ai_prob):
-    if ai_prob < 20:
-        return "Most likely human-written."
-    elif 20 <= ai_prob < 40:
-        return "Possibly human-written with minimal AI assistance."
-    elif 40 <= ai_prob < 60:
-        return "Unclear – could be either human or AI-assisted."
-    elif 60 <= ai_prob < 80:
-        return "Possibly AI-generated, or a human using AI assistance."
-    else:  # ai_prob >= 80
-        return "Likely AI-generated or heavily AI-assisted."
-# ------------------ Detection ------------------
 def detect_text(text):
     results = {}
-    ai_scores = []
-    # ----- Local Models -----
     for name, model_id in MODELS.items():
-        try:
-            tokenizer, model = load_model(model_id)
-            human_score, ai_score = predict(text, tokenizer, model)
-            results[name] = {
-                "Human Probability": round(float(human_score) * 100, 2),
-                "AI Probability": round(float(ai_score) * 100, 2),
-            }
-            ai_scores.append(ai_score * 100)
-        except Exception as e:
-            results[name] = {"error": str(e)}
-    # ----- SzegedAI Detector -----
-    try:
-        human_score, ai_score = szegedai_predict(text)
-        results["SzegedAI Detector"] = {
-            "Human Probability": round(float(human_score) * 100, 2),
-            "AI Probability": round(float(ai_score) * 100, 2),
-        }
-        ai_scores.append(ai_score * 100)
-    except Exception as e:
-        results["SzegedAI Detector"] = {"error": str(e)}
-    # ----- Final Score -----
-    if ai_scores:
-        avg_ai = sum(ai_scores) / len(ai_scores)
-        results["Final Score"] = {"Verdict": verdict(avg_ai)}
     return results
-# ------------------ Main ------------------
 if __name__ == "__main__":
-    text = input("Enter text to analyze:\n")
-    output = detect_text(text)
-    print("\n--- Detection Results ---")
-    for model, scores in output.items():
-        print(f"\n[{model}]")
-        if isinstance(scores, dict):
-            for k, v in scores.items():
-                if isinstance(v, (int, float)):
-                    print(f"{k}: {v}%")
-                else:
-                    print(f"{k}: {v}")

 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import requests
+import numpy as np
+# Hugging Face Token
+HF_TOKEN = os.getenv("HF_TOKEN")  # export HF_TOKEN="your_token" before running
+# Headers for API
+headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
+# Multiple AI text detection models
+MODELS = {
+    "DeBERTa Detector": "microsoft/deberta-v3-base",
+    "MonkeyDAnh": "MonkeyDAnh/deberta-v3-base-finetuned-ai-human-detector",
+    "Andreas122001": "Andreas122001/roberta-base-openai-detector"
+    # SzegedAI handled separately since it's a Space
+}
+def run_hf_model(model_id, text):
+    """Run HuggingFace transformer models"""
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        model = AutoModelForSequenceClassification.from_pretrained(model_id)
+        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+        with torch.no_grad():
+            logits = model(**inputs).logits
+        probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]
+        return {"Human Probability": float(probs[0]*100), "AI Probability": float(probs[1]*100)}
+    except Exception as e:
+        return {"error": str(e)}
+def run_szegedai(text):
+    """Call the SzegedAI Space API"""
+    try:
+        response = requests.post(
+            "https://huggingface.co/spaces/SzegedAI/AI_Detector/run/predict",
+            headers=headers,
+            json={"data": [text]},
+            timeout=30
+        )
+        response.raise_for_status()
+        result = response.json()
+        return result  # Raw result, we can format it later
+    except Exception as e:
+        return {"error": str(e)}
 def detect_text(text):
     results = {}
+    # Transformers models
     for name, model_id in MODELS.items():
+        results[name] = run_hf_model(model_id, text)
+    # SzegedAI (Space)
+    results["SzegedAI Detector"] = run_szegedai(text)
+    # Final verdict (simple rule-based)
+    ai_probs = []
+    for v in results.values():
+        if "AI Probability" in v:
+            ai_probs.append(v["AI Probability"])
+    avg_ai = np.mean(ai_probs) if ai_probs else 0
+    if avg_ai > 80:
+        verdict = "Likely AI-generated"
+    elif avg_ai > 40:
+        verdict = "Possibly human-written with AI assistance"
+    else:
+        verdict = "Likely human-written"
+    results["Final Score"] = {"Verdict": verdict}
     return results
 if __name__ == "__main__":
+    sample = "This is a test sentence written by AI or human."
+    print(detect_text(sample))