Spaces:

SzegedAI
/

AI_Detector

Running

App Files Files Community

mihalykiss commited on 3 days ago

Commit

c402f62

verified ·

1 Parent(s): d51afa3

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -29

app.py CHANGED Viewed

@@ -7,33 +7,27 @@ from tokenizers.normalizers import Sequence, Replace, Strip, NFKC
 from tokenizers import Regex
 import matplotlib.pyplot as plt
-# Set device to GPU if available, otherwise CPU
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# --- Model and Tokenizer Setup ---
 model1_path = "modernbert.bin"
 model2_path = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12"
 model3_path = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed22"
 tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
-# Load Model 1 from local path
 model_1 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
 model_1.load_state_dict(torch.load(model1_path, map_location=device))
 model_1.to(device).eval()
-# Load Model 2 from URL
 model_2 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
 model_2.load_state_dict(torch.hub.load_state_dict_from_url(model2_path, map_location=device))
 model_2.to(device).eval()
-# Load Model 3 from URL
 model_3 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
 model_3.load_state_dict(torch.hub.load_state_dict_from_url(model3_path, map_location=device))
 model_3.to(device).eval()
-# --- Label Mapping and Text Cleaning ---
 label_mapping = {
     0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
     6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
@@ -60,22 +54,17 @@ tokenizer.backend_tokenizer.normalizer = Sequence([
     newline_to_space,
     Strip()
 ])
 def classify_text(text):
     """
-    Classifies the text and generates a plot of the top 5 AI model predictions.
     Returns both the result message and the plot figure.
     """
     cleaned_text = clean_text(text)
-    # If input is empty, clear the outputs
     if not cleaned_text.strip():
         return "", None
-    # Tokenize input and move to the appropriate device
     inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True).to(device)
-    # Perform inference with the three models
     with torch.no_grad():
         logits_1 = model_1(**inputs).logits
         logits_2 = model_2(**inputs).logits
@@ -110,34 +99,30 @@ def classify_text(text):
             f"**Identified LLM: {ai_argmax_model}**"
         )
-    ai_probs_for_plot = probabilities.clone()
-    top_5_probs, top_5_indices = torch.topk(ai_probs_for_plot, 5)
-    top_5_probs = top_5_probs.cpu().numpy()
-    top_5_labels = [label_mapping[i.item()] for i in top_5_indices]
-    fig, ax = plt.subplots(figsize=(10, 5))
-    bars = ax.barh(top_5_labels, top_5_probs, color='#4CAF50', alpha=0.8)
-    ax.set_xlabel('Probability', fontsize=12)
-    ax.set_title('Top 5 Predictions', fontsize=14, fontweight='bold')
-    ax.invert_yaxis()
-    ax.grid(axis='x', linestyle='--', alpha=0.6)
     for bar in bars:
-        width = bar.get_width()
-        label_x_pos = width + 0.01
-        ax.text(label_x_pos, bar.get_y() + bar.get_height() / 2, f'{width:.2%}', va='center')
-    ax.set_xlim(0, max(top_5_probs) * 1.18)
     plt.tight_layout()
     return result_message, fig
 title = "AI Text Detector"
 description = """

 from tokenizers import Regex
 import matplotlib.pyplot as plt
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model1_path = "modernbert.bin"
 model2_path = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12"
 model3_path = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed22"
 tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
 model_1 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
 model_1.load_state_dict(torch.load(model1_path, map_location=device))
 model_1.to(device).eval()
 model_2 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
 model_2.load_state_dict(torch.hub.load_state_dict_from_url(model2_path, map_location=device))
 model_2.to(device).eval()
 model_3 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
 model_3.load_state_dict(torch.hub.load_state_dict_from_url(model3_path, map_location=device))
 model_3.to(device).eval()
 label_mapping = {
     0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
     6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
     newline_to_space,
     Strip()
 ])
 def classify_text(text):
     """
+    Classifies the text and generates a plot of the human vs AI probability.
     Returns both the result message and the plot figure.
     """
     cleaned_text = clean_text(text)
     if not cleaned_text.strip():
         return "", None
     inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True).to(device)
     with torch.no_grad():
         logits_1 = model_1(**inputs).logits
         logits_2 = model_2(**inputs).logits
             f"**Identified LLM: {ai_argmax_model}**"
         )
+    # Create a plot for Human vs AI probabilities
+    fig, ax = plt.subplots(figsize=(6, 3))
+    categories = ['Human', 'AI']
+    probabilities_for_plot = [human_percentage, ai_percentage]
+    bars = ax.bar(categories, probabilities_for_plot, color=['#4CAF50', '#FF5733'], alpha=0.8)
+    ax.set_ylabel('Probability (%)', fontsize=12)
+    ax.set_title('Human vs AI Probability', fontsize=14, fontweight='bold')
+    ax.grid(axis='y', linestyle='--', alpha=0.6)
+    # Add labels to the bars
     for bar in bars:
+        height = bar.get_height()
+        ax.text(bar.get_x() + bar.get_width() / 2, height + 1, f'{height:.2f}%', ha='center')
+    ax.set_ylim(0, 100)
     plt.tight_layout()
     return result_message, fig
 title = "AI Text Detector"
 description = """