VictorM-Coder commited on
Commit
49d2f3f
·
verified ·
1 Parent(s): 4828349

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -113
app.py CHANGED
@@ -1,125 +1,79 @@
1
- import streamlit as st
2
  import torch
3
  import torch.nn.functional as F
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
  import re
6
- import logging
7
  import pandas as pd
8
-
9
- st.set_page_config(
10
- page_title="AI Article Detection by Writenix",
11
- page_icon="🧠",
12
- layout="wide"
13
- )
14
-
15
- st.logo(
16
- image="https://dejan.ai/wp-content/uploads/2024/02/dejan-300x103.png",
17
- link="https://dejan.ai/",
18
- )
19
-
20
- # --- Logging & Streamlit setup ---
21
- logging.basicConfig(level=logging.INFO)
22
- logger = logging.getLogger(__name__)
23
-
24
- st.markdown("""
25
- <link href="https://fonts.googleapis.com/css2?family=Roboto&display=swap" rel="stylesheet">
26
- <style>
27
- html, body, [class*="css"] {
28
- font-family: 'Roboto', sans-serif;
29
- }
30
- </style>
31
- """, unsafe_allow_html=True)
32
-
33
- @st.cache_resource
34
- def load_model_and_tokenizer(model_name):
35
- tokenizer = AutoTokenizer.from_pretrained(model_name)
36
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
- dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
38
- model = AutoModelForSequenceClassification.from_pretrained(model_name, torch_dtype=dtype)
39
- model.to(device).eval()
40
- return tokenizer, model, device
41
 
42
  MODEL_NAME = "dejanseo/ai-cop"
43
- try:
44
- tokenizer, model, device = load_model_and_tokenizer(MODEL_NAME)
45
- except Exception as e:
46
- st.error(f"Error loading model: {e}")
47
- logger.error(f"Failed to load model: {e}", exc_info=True)
48
- st.stop()
 
49
 
50
  def sent_tokenize(text):
51
  return [s for s in re.split(r'(?<=[\.!?])\s+', text.strip()) if s]
52
 
53
- st.title("AI Article Detection")
54
-
55
- text = st.text_area("Enter text to classify", height=200, placeholder="Paste your text here…")
56
-
57
- if st.button("Classify", type="primary"):
58
  if not text.strip():
59
- st.warning("Please enter some text.")
60
- else:
61
- with st.spinner("Analyzing…"):
62
- sentences = sent_tokenize(text)
63
- if not sentences:
64
- st.warning("No sentences detected.")
65
- st.stop()
66
-
67
- inputs = tokenizer(
68
- sentences,
69
- return_tensors="pt",
70
- padding=True,
71
- truncation=True,
72
- max_length=model.config.max_position_embeddings
73
- ).to(device)
74
-
75
- with torch.no_grad():
76
- logits = model(**inputs).logits
77
- probs = F.softmax(logits, dim=-1).cpu()
78
- preds = torch.argmax(probs, dim=-1).cpu()
79
-
80
- # Create dataframe for sentences
81
- sentences_data = []
82
- highlighted_sentences = []
83
- for i, s in enumerate(sentences):
84
- p = preds[i].item()
85
- conf = probs[i, p].item()
86
- label = "AI" if p == 0 else "Human"
87
-
88
- sentences_data.append({
89
- "sentence": s,
90
- "classification": label,
91
- "confidence": conf
92
- })
93
-
94
- if label == "AI":
95
- highlighted_sentences.append(f"<span style='color:red; font-weight:bold'>{s}</span>")
96
- else:
97
- highlighted_sentences.append(f"<span style='color:green; font-weight:bold'>{s}</span>")
98
-
99
- # Display dataframe
100
- df = pd.DataFrame(sentences_data)
101
- st.dataframe(
102
- df,
103
- column_config={
104
- "sentence": st.column_config.TextColumn("Sentence"),
105
- "classification": st.column_config.TextColumn("Classification"),
106
- "confidence": st.column_config.ProgressColumn(
107
- "Confidence",
108
- help="Model's confidence in the classification",
109
- format="%.2f",
110
- min_value=0,
111
- max_value=1,
112
- ),
113
- },
114
- hide_index=True,
115
- )
116
-
117
- # Highlighted text output
118
- st.markdown("### 🔍 Highlighted Text")
119
- st.markdown(" ".join(highlighted_sentences), unsafe_allow_html=True)
120
-
121
- # Overall score (just model avg)
122
- avg = torch.mean(probs, dim=0)
123
- model_ai = avg[0].item()
124
-
125
- st.subheader(f"⚖️ AI Likelihood: {model_ai*100:.1f}%")
 
 
1
  import torch
2
  import torch.nn.functional as F
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import re
 
5
  import pandas as pd
6
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  MODEL_NAME = "dejanseo/ai-cop"
9
+
10
+ # --- Load model ---
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+ dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
14
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, torch_dtype=dtype)
15
+ model.to(device).eval()
16
 
17
  def sent_tokenize(text):
18
  return [s for s in re.split(r'(?<=[\.!?])\s+', text.strip()) if s]
19
 
20
+ # --- Inference function ---
21
+ def classify_text(text):
 
 
 
22
  if not text.strip():
23
+ return "⚠️ Please enter some text.", None, None
24
+
25
+ sentences = sent_tokenize(text)
26
+ if not sentences:
27
+ return "⚠️ No sentences detected.", None, None
28
+
29
+ inputs = tokenizer(
30
+ sentences,
31
+ return_tensors="pt",
32
+ padding=True,
33
+ truncation=True,
34
+ max_length=model.config.max_position_embeddings
35
+ ).to(device)
36
+
37
+ with torch.no_grad():
38
+ logits = model(**inputs).logits
39
+ probs = F.softmax(logits, dim=-1).cpu()
40
+ preds = torch.argmax(probs, dim=-1).cpu()
41
+
42
+ results = []
43
+ highlighted_sentences = []
44
+ for i, s in enumerate(sentences):
45
+ p = preds[i].item()
46
+ conf = probs[i, p].item()
47
+ label = "AI" if p == 0 else "Human"
48
+
49
+ results.append([s, label, f"{conf:.2f}"])
50
+ if label == "AI":
51
+ highlighted_sentences.append(f"<span style='color:red; font-weight:bold'>{s}</span>")
52
+ else:
53
+ highlighted_sentences.append(f"<span style='color:green; font-weight:bold'>{s}</span>")
54
+
55
+ # Overall AI likelihood
56
+ avg = torch.mean(probs, dim=0)
57
+ model_ai = avg[0].item() * 100
58
+
59
+ highlighted_text = " ".join(highlighted_sentences)
60
+ df = pd.DataFrame(results, columns=["Sentence", "Classification", "Confidence"])
61
+ return f"⚖️ AI Likelihood: {model_ai:.1f}%", highlighted_text, df
62
+
63
+ # --- Gradio Interface ---
64
+ with gr.Blocks() as demo:
65
+ gr.Markdown("## 🧠 AI Article Detection by Writenix")
66
+
67
+ with gr.Row():
68
+ text_input = gr.Textbox(label="Enter text", lines=10, placeholder="Paste your text here…")
69
+
70
+ classify_btn = gr.Button("Classify")
71
+
72
+ ai_score = gr.Label(label="Overall AI Likelihood")
73
+ highlighted = gr.HTML()
74
+ table = gr.Dataframe(headers=["Sentence", "Classification", "Confidence"], wrap=True)
75
+
76
+ classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])
77
+
78
+ if __name__ == "__main__":
79
+ demo.launch()