Spaces:

yagnik12
/

AI_Text_Detecter_HanxiGuo_BiScope-Data

Running

App Files Files Community

yagnik12 commited on Sep 15

Commit

1c31b93

verified ·

1 Parent(s): 7c62b75

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -27

app.py CHANGED Viewed

@@ -2,19 +2,32 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
 import torch
 import math
 # Load models
 detector_names = [
     "Hello-SimpleAI/chatgpt-detector-roberta",
     "roberta-large-openai-detector"
 ]
 detector_tokenizers = [AutoTokenizer.from_pretrained(name) for name in detector_names]
 detector_models = [AutoModelForSequenceClassification.from_pretrained(name) for name in detector_names]
 gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
 gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
 # Helper functions
 def compute_perplexity(text: str) -> float:
     enc = gpt2_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
     input_ids = enc.input_ids
@@ -22,50 +35,78 @@ def compute_perplexity(text: str) -> float:
         loss = gpt2_model(input_ids, labels=input_ids).loss
     return math.exp(loss.item())
-def analyze_text(user_text: str):
-    if not user_text.strip():
-        return {"error": "Please enter some text to analyze."}
-    # Model 1: ChatGPT detector
     probs = []
     for tokenizer, model in zip(detector_tokenizers, detector_models):
-        inputs = tokenizer(user_text, return_tensors="pt", truncation=True, max_length=512)
         with torch.no_grad():
             logits = model(**inputs).logits
             probs.append(torch.softmax(logits, dim=1).tolist()[0][1])  # AI probability
-    # Model 2: GPT-2 Perplexity
-    ppl = compute_perplexity(user_text)
-    ppl_score = max(0, min(1, 100 / ppl))  # normalized to [0,1]
-    # Aggregate result
     final_ai = sum(probs) / len(probs) * 0.7 + ppl_score * 0.3
-    final_human = 1 - final_ai
     return {
-        "Final AI Probability": round(final_ai * 100, 2),
-        "Final Human Probability": round(final_human * 100, 2),
-        "Verdict": verdict(final_ai * 100)
     }
-def verdict(ai_prob):
-    if ai_prob < 20:
-        return "Most likely human-written."
-    elif 20 <= ai_prob < 40:
-        return "Possibly human-written with minimal AI assistance."
-    elif 40 <= ai_prob < 60:
-        return "Unclear – could be either human or AI-assisted."
-    elif 60 <= ai_prob < 80:
-        return "Possibly AI-generated, or a human using AI assistance."
-    else:
-        return "Likely AI-generated or heavily AI-assisted."
 # Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("# 🔍 Enhanced AI vs Human Text Detector")
-    user_input = gr.Textbox(label="Enter Text", placeholder="Paste text here...", lines=12, type="text")
     run_btn = gr.Button("Run Detection")
     output = gr.JSON(label="Results")
     run_btn.click(analyze_text, inputs=user_input, outputs=output)
 if __name__ == "__main__":

 from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
 import torch
 import math
+import nltk
+nltk.download('punkt')
+from nltk.tokenize import sent_tokenize
+# -------------------------------
 # Load models
+# -------------------------------
+# Detector models (placeholders for fine-tuned models)
 detector_names = [
     "Hello-SimpleAI/chatgpt-detector-roberta",
     "roberta-large-openai-detector"
 ]
 detector_tokenizers = [AutoTokenizer.from_pretrained(name) for name in detector_names]
 detector_models = [AutoModelForSequenceClassification.from_pretrained(name) for name in detector_names]
+# GPT-2 for perplexity scoring
 gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
 gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
+# -------------------------------
 # Helper functions
+# -------------------------------
 def compute_perplexity(text: str) -> float:
     enc = gpt2_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
     input_ids = enc.input_ids
         loss = gpt2_model(input_ids, labels=input_ids).loss
     return math.exp(loss.item())
+def verdict(ai_prob):
+    """Return human-readable verdict based on AI probability (0-100)."""
+    if ai_prob < 20:
+        return "Most likely human-written."
+    elif 20 <= ai_prob < 40:
+        return "Possibly human-written with minimal AI assistance."
+    elif 40 <= ai_prob < 60:
+        return "Unclear – could be either human or AI-assisted."
+    elif 60 <= ai_prob < 80:
+        return "Possibly AI-generated, or a human using AI assistance."
+    else:
+        return "Likely AI-generated or heavily AI-assisted."
+def analyze_sentence(sentence):
+    # Detector probabilities
     probs = []
     for tokenizer, model in zip(detector_tokenizers, detector_models):
+        inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512)
         with torch.no_grad():
             logits = model(**inputs).logits
             probs.append(torch.softmax(logits, dim=1).tolist()[0][1])  # AI probability
+    # GPT-2 perplexity
+    ppl = compute_perplexity(sentence)
+    ppl_score = max(0, min(1, 100 / ppl))
+    # Aggregate
     final_ai = sum(probs) / len(probs) * 0.7 + ppl_score * 0.3
+    return round(final_ai * 100, 2)  # return in percentage
+def analyze_text(text):
+    if not text.strip():
+        return {"error": "Please enter some text to analyze."}
+    sentences = sent_tokenize(text)
+    sentence_results = []
+    total_ai = 0
+    for sent in sentences:
+        ai_prob = analyze_sentence(sent)
+        total_ai += ai_prob
+        sentence_results.append({"sentence": sent, "AI Probability (%)": ai_prob})
+    # Final aggregated AI probability
+    final_ai_prob = total_ai / len(sentences)
+    final_human_prob = 100 - final_ai_prob
+    final_verdict = verdict(final_ai_prob)
     return {
+        "Sentence-level Analysis": sentence_results,
+        "Final AI Probability (%)": round(final_ai_prob, 2),
+        "Final Human Probability (%)": round(final_human_prob, 2),
+        "Verdict": final_verdict
     }
+# -------------------------------
 # Gradio UI
+# -------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# 🔍 Enhanced AI vs Human Text Detector (Sentence-Level)")
+    user_input = gr.Textbox(
+        label="✍️ Enter Text",
+        placeholder="Paste text here...",
+        lines=12,
+        type="text"
+    )
     run_btn = gr.Button("Run Detection")
     output = gr.JSON(label="Results")
     run_btn.click(analyze_text, inputs=user_input, outputs=output)
 if __name__ == "__main__":