Spaces:

yagnik12
/

AI_Text_Detecter_HanxiGuo_BiScope-Data

Running

App Files Files Community

yagnik12 commited on Sep 15

Commit

8bcd35e

verified ·

1 Parent(s): a43517c

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -87

app.py CHANGED Viewed

@@ -2,41 +2,19 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
 import torch
 import math
-import nltk
-# Download NLTK punkt tokenizer if not present
-nltk.download('punkt')
-from nltk.tokenize import sent_tokenize
-# -------------------------------
-# Load Models
-# -------------------------------
-# Example models: use open-source detectors available on Hugging Face
 detector_names = [
-    "Hello-SimpleAI/chatgpt-detector-roberta",  # public model
-    "roberta-large-openai-detector"             # public model
 ]
-detector_tokenizers = []
-detector_models = []
-for name in detector_names:
-    try:
-        detector_tokenizers.append(AutoTokenizer.from_pretrained(name))
-        detector_models.append(AutoModelForSequenceClassification.from_pretrained(name))
-    except Exception as e:
-        print(f"Error loading model {name}: {e}")
-# GPT-2 for perplexity scoring
 gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
 gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
-# -------------------------------
 # Helper functions
-# -------------------------------
 def compute_perplexity(text: str) -> float:
     enc = gpt2_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
     input_ids = enc.input_ids
@@ -44,6 +22,32 @@ def compute_perplexity(text: str) -> float:
         loss = gpt2_model(input_ids, labels=input_ids).loss
     return math.exp(loss.item())
 def verdict(ai_prob):
     if ai_prob < 20:
         return "Most likely human-written."
@@ -56,70 +60,12 @@ def verdict(ai_prob):
     else:
         return "Likely AI-generated or heavily AI-assisted."
-def analyze_sentence(sentence):
-    # Detector probabilities
-    probs = []
-    for tokenizer, model in zip(detector_tokenizers, detector_models):
-        try:
-            inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512)
-            with torch.no_grad():
-                logits = model(**inputs).logits
-                probs.append(torch.softmax(logits, dim=1).tolist()[0][1])  # AI probability
-        except Exception as e:
-            print(f"Error analyzing sentence with model: {e}")
-    # GPT-2 perplexity
-    ppl = compute_perplexity(sentence)
-    ppl_score = max(0, min(1, 100 / ppl))
-    # Aggregate
-    if probs:
-        final_ai = sum(probs) / len(probs) * 0.7 + ppl_score * 0.3
-    else:
-        final_ai = ppl_score  # fallback if detectors fail
-    return round(final_ai * 100, 2)
-def analyze_text(text):
-    if not text.strip():
-        return {"error": "Please enter some text to analyze."}
-    sentences = sent_tokenize(text)
-    sentence_results = []
-    total_ai = 0
-    for sent in sentences:
-        ai_prob = analyze_sentence(sent)
-        total_ai += ai_prob
-        sentence_results.append({"sentence": sent, "AI Probability (%)": ai_prob})
-    final_ai_prob = total_ai / len(sentences)
-    final_human_prob = 100 - final_ai_prob
-    final_verdict = verdict(final_ai_prob)
-    return {
-        "Sentence-level Analysis": sentence_results,
-        "Final AI Probability (%)": round(final_ai_prob, 2),
-        "Final Human Probability (%)": round(final_human_prob, 2),
-        "Verdict": final_verdict
-    }
-# -------------------------------
 # Gradio UI
-# -------------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🔍 Enhanced AI vs Human Text Detector (Sentence-Level)")
-    user_input = gr.Textbox(
-        label="✍️ Enter Text",
-        placeholder="Paste text here...",
-        lines=12,
-        type="text"
-    )
     run_btn = gr.Button("Run Detection")
     output = gr.JSON(label="Results")
     run_btn.click(analyze_text, inputs=user_input, outputs=output)
 if __name__ == "__main__":

 from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
 import torch
 import math
+# Load models
 detector_names = [
+    "Hello-SimpleAI/chatgpt-detector-roberta",
+    "roberta-large-openai-detector"
 ]
+detector_tokenizers = [AutoTokenizer.from_pretrained(name) for name in detector_names]
+detector_models = [AutoModelForSequenceClassification.from_pretrained(name) for name in detector_names]
 gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
 gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
 # Helper functions
 def compute_perplexity(text: str) -> float:
     enc = gpt2_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
     input_ids = enc.input_ids
         loss = gpt2_model(input_ids, labels=input_ids).loss
     return math.exp(loss.item())
+def analyze_text(user_text: str):
+    if not user_text.strip():
+        return {"error": "Please enter some text to analyze."}
+    # Model 1: ChatGPT detector
+    probs = []
+    for tokenizer, model in zip(detector_tokenizers, detector_models):
+        inputs = tokenizer(user_text, return_tensors="pt", truncation=True, max_length=512)
+        with torch.no_grad():
+            logits = model(**inputs).logits
+            probs.append(torch.softmax(logits, dim=1).tolist()[0][1])  # AI probability
+    # Model 2: GPT-2 Perplexity
+    ppl = compute_perplexity(user_text)
+    ppl_score = max(0, min(1, 100 / ppl))  # normalized to [0,1]
+    # Aggregate result
+    final_ai = sum(probs) / len(probs) * 0.7 + ppl_score * 0.3
+    final_human = 1 - final_ai
+    return {
+        "Final AI Probability": round(final_ai * 100, 2),
+        "Final Human Probability": round(final_human * 100, 2),
+        "Verdict": verdict(final_ai * 100)
+    }
 def verdict(ai_prob):
     if ai_prob < 20:
         return "Most likely human-written."
     else:
         return "Likely AI-generated or heavily AI-assisted."
 # Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# 🔍 Enhanced AI vs Human Text Detector")
+    user_input = gr.Textbox(label="Enter Text", placeholder="Paste text here...", lines=12, type="text")
     run_btn = gr.Button("Run Detection")
     output = gr.JSON(label="Results")
     run_btn.click(analyze_text, inputs=user_input, outputs=output)
 if __name__ == "__main__":