Spaces:

VictorM-Coder
/

AIDetector

Running

App Files Files Community

VictorM-Coder commited on Sep 8, 2025

Commit

ceeca7d

verified ·

1 Parent(s): 54bfac3

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -96

app.py CHANGED Viewed

@@ -1,102 +1,168 @@
-import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import re
-# Use GPU if available
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# One tokenizer shared across models
-tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
-# Ensemble model repos (replace with real Hugging Face repos if names differ)
-model_names = [
-    "mihalykiss/modernbert_2_seed12",
-    "mihalykiss/modernbert_2_seed22",
-    "mihalykiss/modernbert_2_seed32"
-]
-# Load models directly from Hugging Face
-models = []
-for repo in model_names:
-    m = AutoModelForSequenceClassification.from_pretrained(repo).to(device).eval()
-    models.append(m)
-# Label map
-label_mapping = {
-    0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
-    6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
-    11: 'flan_t5_base', 12: 'flan_t5_large', 13: 'flan_t5_small',
-    14: 'flan_t5_xl', 15: 'flan_t5_xxl', 16: 'gemma-7b-it', 17: 'gemma2-9b-it',
-    18: 'gpt-3.5-turbo', 19: 'gpt-35', 20: 'gpt4', 21: 'gpt4o',
-    22: 'gpt_j', 23: 'gpt_neox', 24: 'human', 25: 'llama3-70b', 26: 'llama3-8b',
-    27: 'mixtral-8x7b', 28: 'opt_1.3b', 29: 'opt_125m', 30: 'opt_13b',
-    31: 'opt_2.7b', 32: 'opt_30b', 33: 'opt_350m', 34: 'opt_6.7b',
-    35: 'opt_iml_30b', 36: 'opt_iml_max_1.3b', 37: 't0_11b', 38: 't0_3b',
-    39: 'text-davinci-002', 40: 'text-davinci-003'
-}
-# Text cleanup
-def clean_text(text: str) -> str:
-    text = re.sub(r"\s{2,}", " ", text)
-    text = re.sub(r"\s+([,.;:?!])", r"\1", text)
-    return text.strip()
-# Classification function
-def classify_text(text):
-    cleaned_text = clean_text(text)
-    if not cleaned_text:
-        return "Please paste some text."
-    sentences = re.split(r'(?<=[.!?])\s+', cleaned_text)
-    highlighted = []
-    total_ai, total_human = 0, 0
-    for sent in sentences:
-        if not sent.strip():
-            continue
-        inputs = tokenizer(sent, return_tensors="pt", truncation=True, padding=True).to(device)
-        with torch.no_grad():
-            probs_list = []
-            for m in models:
-                logits = m(**inputs).logits
-                probs_list.append(torch.softmax(logits, dim=1))
-            avg_probs = sum(probs_list) / len(probs_list)
-            probs = avg_probs[0]
-        # Human class = 24, AI = all others
-        ai_probs = probs.clone()
-        ai_probs[24] = 0
-        ai_score = ai_probs.sum().item() * 100
-        human_score = 100 - ai_score
-        total_ai += ai_score
-        total_human += human_score
-        if ai_score > 20:
-            highlighted.append(f"<span class='highlight-ai'>{sent}</span>")
-        else:
-            highlighted.append(f"<span class='highlight-human'>{sent}</span>")
-    # Global verdict
-    if total_human >= total_ai:
-        verdict = f"<br><br><b>Overall: {(total_human/(total_ai+total_human))*100:.2f}% Human</b>"
-    else:
-        verdict = f"<br><br><b>Overall: {(total_ai/(total_ai+total_human))*100:.2f}% AI</b>"
-    return " ".join(highlighted) + verdict
-# Gradio interface with styling
-iface = gr.Interface(
-    fn=classify_text,
-    inputs=gr.Textbox(lines=6, placeholder="Paste text here..."),
-    outputs="html",
-    title="AI Text Detector",
-    description="Detects AI-generated text using a ModernBERT ensemble. Sentences are highlighted:<br>"
-                "<span style='color:#FF5733;font-weight:bold;'>AI-like</span> vs "
-                "<span style='color:#4CAF50;font-weight:bold;'>Human-like</span>."
 )
-iface.launch()

+import os
+import json
+import ast
+import streamlit as st
 import torch
+import torch.nn.functional as F
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import re
+import math
+import logging
+import pandas as pd
+st.set_page_config(
+    page_title="AI Article Detection by Writenix",
+    page_icon="🧠",
+    layout="wide"
+)
+st.logo(
+    image="https://dejan.ai/wp-content/uploads/2024/02/dejan-300x103.png",
+    link="https://dejan.ai/",
 )
+# --- Load heuristic weights from environment secrets, with JSON→Python fallback ---
+@st.cache_resource
+def load_heuristic_weights():
+    def _load(env_key):
+        raw = os.environ[env_key]
+        try:
+            return json.loads(raw)
+        except json.JSONDecodeError:
+            return ast.literal_eval(raw)
+    ai = _load("AI_WEIGHTS_JSON")
+    og = _load("OG_WEIGHTS_JSON")
+    return ai, og
+AI_WEIGHTS, OG_WEIGHTS = load_heuristic_weights()
+SIGMOID_K = 0.5
+def tokenize(text):
+    return re.findall(r'\b[a-z]{2,}\b', text.lower())
+def classify_text_likelihood(text: str) -> float:
+    tokens = tokenize(text)
+    if not tokens:
+        return 0.5
+    ai_score = og_score = matched = 0
+    for t in tokens:
+        aw = AI_WEIGHTS.get(t, 0)
+        ow = OG_WEIGHTS.get(t, 0)
+        if aw or ow:
+            matched += 1
+            ai_score += aw
+            og_score += ow
+    if matched == 0:
+        return 0.5
+    net = ai_score - og_score
+    return 1 / (1 + math.exp(-SIGMOID_K * net))
+# --- Logging & Streamlit setup ---
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+st.markdown("""
+<link href="https://fonts.googleapis.com/css2?family=Roboto&display=swap" rel="stylesheet">
+<style>
+    html, body, [class*="css"] {
+        font-family: 'Roboto', sans-serif;
+    }
+</style>
+""", unsafe_allow_html=True)
+@st.cache_resource
+def load_model_and_tokenizer(model_name):
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
+    model = AutoModelForSequenceClassification.from_pretrained(model_name, torch_dtype=dtype)
+    model.to(device).eval()
+    return tokenizer, model, device
+MODEL_NAME = "dejanseo/ai-cop"
+try:
+    tokenizer, model, device = load_model_and_tokenizer(MODEL_NAME)
+except Exception as e:
+    st.error(f"Error loading model: {e}")
+    logger.error(f"Failed to load model: {e}", exc_info=True)
+    st.stop()
+def sent_tokenize(text):
+    return [s for s in re.split(r'(?<=[\.!?])\s+', text.strip()) if s]
+st.title("AI Article Detection")
+text = st.text_area("Enter text to classify", height=200, placeholder="Paste your text here…")
+if st.button("Classify", type="primary"):
+    if not text.strip():
+        st.warning("Please enter some text.")
+    else:
+        with st.spinner("Analyzing…"):
+            sentences = sent_tokenize(text)
+            if not sentences:
+                st.warning("No sentences detected.")
+                st.stop()
+            inputs = tokenizer(
+                sentences,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=model.config.max_position_embeddings
+            ).to(device)
+            with torch.no_grad():
+                logits = model(**inputs).logits
+                probs = F.softmax(logits, dim=-1).cpu()
+                preds = torch.argmax(probs, dim=-1).cpu()
+            # Create dataframe for sentences
+            sentences_data = []
+            highlighted_sentences = []
+            for i, s in enumerate(sentences):
+                p = preds[i].item()
+                conf = probs[i, p].item()
+                label = "AI" if p == 0 else "Human"
+                sentences_data.append({
+                    "sentence": s,
+                    "classification": label,
+                    "confidence": conf
+                })
+                if label == "AI":
+                    highlighted_sentences.append(f"<span style='color:red; font-weight:bold'>{s}</span>")
+                else:
+                    highlighted_sentences.append(f"<span style='color:green; font-weight:bold'>{s}</span>")
+            # Display dataframe
+            df = pd.DataFrame(sentences_data)
+            st.dataframe(
+                df,
+                column_config={
+                    "sentence": st.column_config.TextColumn("Sentence"),
+                    "classification": st.column_config.TextColumn("Classification"),
+                    "confidence": st.column_config.ProgressColumn(
+                        "Confidence",
+                        help="Model's confidence in the classification",
+                        format="%.2f",
+                        min_value=0,
+                        max_value=1,
+                    ),
+                },
+                hide_index=True,
+            )
+            # Highlighted text output
+            st.markdown("### 🔍 Highlighted Text")
+            st.markdown(" ".join(highlighted_sentences), unsafe_allow_html=True)
+            avg = torch.mean(probs, dim=0)
+            model_ai = avg[0].item()
+            heuristic_ai = classify_text_likelihood(text)
+            combined = min(model_ai + heuristic_ai, 1.0)
+            st.subheader(f"⚖️ AI Likelihood: {combined*100:.1f}%")
+            st.write(f"🤖 Model: {model_ai*100:.1f}%")
+            st.write(f"🛠️ Heuristic: {heuristic_ai*100:.1f}%")