VictorM-Coder commited on
Commit
57bb1ed
Β·
verified Β·
1 Parent(s): 6f9b15a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -78
app.py CHANGED
@@ -1,89 +1,100 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
 
4
  import re
5
 
6
- # Load model
7
- MODEL = "Hello-SimpleAI/HC3"
8
- tokenizer = AutoTokenizer.from_pretrained(MODEL)
9
- model = AutoModelForSequenceClassification.from_pretrained(MODEL)
10
-
11
- def split_sentences(paragraph):
12
- """Split a paragraph into sentences."""
13
- return re.split(r'(?<=[.!?]) +', paragraph.strip())
14
-
15
- def group_sentences(sentences, size=2):
16
- """Group sentences into chunks of 2 (or remaining)."""
17
- return [" ".join(sentences[i:i+size]) for i in range(0, len(sentences), size)]
18
-
19
- def detect_ai(text):
20
- paragraphs = re.split(r"\n\s*\n", text.strip())
21
-
22
- results = []
23
- all_ai_flags = [] # store 1 = AI, 0 = Human
24
-
25
- highlighted = ""
26
- for para in paragraphs:
27
- if not para.strip():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  continue
29
-
30
- sentences = split_sentences(para)
31
- chunks = group_sentences(sentences, size=2)
32
- highlighted_para = ""
33
-
34
- for chunk in chunks:
35
- if not chunk.strip():
36
- continue
37
-
38
- inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512)
39
- with torch.no_grad():
40
- outputs = model(**inputs)
41
- probs = torch.softmax(outputs.logits, dim=1)
42
-
43
- ai_score = float(probs[0][1]) # AI likelihood
44
- human_score = 1 - ai_score # Human likelihood
45
-
46
- # Threshold check (AI > 20% β†’ AI)
47
- if ai_score > 0.2:
48
- label = "πŸ”΄ AI"
49
- color = "rgb(255,120,120)" # red
50
- all_ai_flags.append(1)
51
- else:
52
- label = "🟒 Human"
53
- color = "rgb(120,255,120)" # green
54
- all_ai_flags.append(0)
55
-
56
- highlighted_para += (
57
- f"<div style='background-color:{color}; padding:4px; margin-bottom:4px; border-radius:4px'>"
58
- f"<b>{label}</b> β€” Human {round(human_score*100,1)}% | AI {round(ai_score*100,1)}%<br>"
59
- f"{chunk}</div>"
60
- )
61
-
62
- highlighted += f"<div style='margin-bottom:12px'>{highlighted_para}</div>"
63
-
64
- # Compute overall result
65
- if all_ai_flags:
66
- ai_ratio = sum(all_ai_flags) / len(all_ai_flags)
67
- if ai_ratio == 1:
68
- overall = "πŸ”΄ Overall: 100% AI"
69
- elif ai_ratio == 0:
70
- overall = "🟒 Overall: 100% Human"
71
  else:
72
- overall = f"βš–οΈ Overall AI Probability: {round(ai_ratio*100,2)}%"
73
- highlighted += f"<p><b>{overall}</b></p>"
 
 
 
74
  else:
75
- overall = "No text detected"
76
 
77
- return highlighted, {"overall": overall, "chunks_checked": len(all_ai_flags)}
78
 
79
- with gr.Blocks() as demo:
80
- gr.Markdown("## πŸ€– AI Detector (2-sentence chunks)")
81
- gr.Markdown("Groups of 2 sentences are checked. If AI >20%, the group is flagged as AI.")
82
- input_text = gr.Textbox(lines=12, placeholder="Paste your essay or report here...")
83
- output_html = gr.HTML()
84
- output_json = gr.JSON()
85
- run_btn = gr.Button("Detect AI")
86
 
87
- run_btn.click(detect_ai, inputs=input_text, outputs=[output_html, output_json])
 
 
 
 
 
 
 
88
 
89
- demo.launch()
 
1
  import gradio as gr
 
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import re
5
 
6
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
+
8
+ # Use one tokenizer across all ensemble models
9
+ tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
10
+
11
+ # Load 3 models from Hugging Face (no local .bin required)
12
+ model_names = [
13
+ "mihalykiss/modernbert_2/Model_groups_3class_seed12",
14
+ "mihalykiss/modernbert_2/Model_groups_3class_seed22",
15
+ "mihalykiss/modernbert_2/Model_groups_3class_seed32", # third ensemble variant
16
+ ]
17
+
18
+ models = []
19
+ for name in model_names:
20
+ m = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
21
+ m.load_state_dict(torch.hub.load_state_dict_from_url(
22
+ f"https://huggingface.co/{name}/resolve/main/pytorch_model.bin",
23
+ map_location=device
24
+ ))
25
+ m.to(device).eval()
26
+ models.append(m)
27
+
28
+ label_mapping = {
29
+ 0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
30
+ 6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
31
+ 11: 'flan_t5_base', 12: 'flan_t5_large', 13: 'flan_t5_small',
32
+ 14: 'flan_t5_xl', 15: 'flan_t5_xxl', 16: 'gemma-7b-it', 17: 'gemma2-9b-it',
33
+ 18: 'gpt-3.5-turbo', 19: 'gpt-35', 20: 'gpt4', 21: 'gpt4o',
34
+ 22: 'gpt_j', 23: 'gpt_neox', 24: 'human', 25: 'llama3-70b', 26: 'llama3-8b',
35
+ 27: 'mixtral-8x7b', 28: 'opt_1.3b', 29: 'opt_125m', 30: 'opt_13b',
36
+ 31: 'opt_2.7b', 32: 'opt_30b', 33: 'opt_350m', 34: 'opt_6.7b',
37
+ 35: 'opt_iml_30b', 36: 'opt_iml_max_1.3b', 37: 't0_11b', 38: 't0_3b',
38
+ 39: 'text-davinci-002', 40: 'text-davinci-003'
39
+ }
40
+
41
+ def clean_text(text: str) -> str:
42
+ text = re.sub(r"\s{2,}", " ", text)
43
+ text = re.sub(r"\s+([,.;:?!])", r"\1", text)
44
+ return text.strip()
45
+
46
+ def classify_text(text):
47
+ cleaned_text = clean_text(text)
48
+ if not cleaned_text:
49
+ return "Please paste some text."
50
+
51
+ # Split text into sentences for per-sentence highlighting
52
+ sentences = re.split(r'(?<=[.!?])\s+', cleaned_text)
53
+
54
+ highlighted = []
55
+ total_ai, total_human = 0, 0
56
+
57
+ for sent in sentences:
58
+ if not sent.strip():
59
  continue
60
+ inputs = tokenizer(sent, return_tensors="pt", truncation=True, padding=True).to(device)
61
+ with torch.no_grad():
62
+ probs_list = []
63
+ for m in models:
64
+ logits = m(**inputs).logits
65
+ probs_list.append(torch.softmax(logits, dim=1))
66
+ avg_probs = sum(probs_list) / len(probs_list)
67
+ probs = avg_probs[0]
68
+
69
+ ai_probs = probs.clone()
70
+ ai_probs[24] = 0
71
+ ai_score = ai_probs.sum().item() * 100
72
+ human_score = 100 - ai_score
73
+
74
+ total_ai += ai_score
75
+ total_human += human_score
76
+
77
+ if ai_score > 20: # highlight AI-like sentences
78
+ highlighted.append(f"<span class='highlight-ai'>{sent}</span>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  else:
80
+ highlighted.append(f"<span class='highlight-human'>{sent}</span>")
81
+
82
+ # Global decision
83
+ if total_human >= total_ai:
84
+ verdict = f"<br><br><b>Overall: {total_human/(total_ai+total_human)*100:.2f}% Human</b>"
85
  else:
86
+ verdict = f"<br><br><b>Overall: {total_ai/(total_ai+total_human)*100:.2f}% AI</b>"
87
 
88
+ return " ".join(highlighted) + verdict
89
 
 
 
 
 
 
 
 
90
 
91
+ # Gradio UI
92
+ iface = gr.Interface(
93
+ fn=classify_text,
94
+ inputs=gr.Textbox(lines=6, placeholder="Paste text here..."),
95
+ outputs="html",
96
+ title="AI Text Detector",
97
+ description="Detects AI-generated text using ModernBERT ensemble and highlights AI-like vs Human-like sentences."
98
+ )
99
 
100
+ iface.launch()