Spaces:
Running
Running
Rename ai_text_detector.py to model_classifier.py
Browse files
ai_text_detector.py → model_classifier.py
RENAMED
|
@@ -6,7 +6,6 @@ from tokenizers.normalizers import Sequence, Replace, Strip, NFKC
|
|
| 6 |
from tokenizers import Regex
|
| 7 |
import os
|
| 8 |
|
| 9 |
-
|
| 10 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 11 |
|
| 12 |
# Updated model paths with the correct model 1 location
|
|
@@ -74,7 +73,7 @@ def clean_text(text: str) -> str:
|
|
| 74 |
text = re.sub(r'\s+([,.;:?!])', r'\1', text)
|
| 75 |
return text
|
| 76 |
|
| 77 |
-
newline_to_space
|
| 78 |
join_hyphen_break = Replace(Regex(r'(\w+)[--]\s*\n\s*(\w+)'), r"\1\2")
|
| 79 |
|
| 80 |
tokenizer.backend_tokenizer.normalizer = Sequence([
|
|
@@ -85,6 +84,11 @@ tokenizer.backend_tokenizer.normalizer = Sequence([
|
|
| 85 |
])
|
| 86 |
|
| 87 |
def classify_text(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
cleaned_text = clean_text(text)
|
| 89 |
if not text.strip():
|
| 90 |
return "⚠️ Please enter some text to analyze"
|
|
@@ -115,9 +119,7 @@ def classify_text(text):
|
|
| 115 |
result_message = f"""
|
| 116 |
### 🟢 **Human Written**
|
| 117 |
**Confidence: {human_prob:.2f}%**
|
| 118 |
-
|
| 119 |
This text appears to be written by a human.
|
| 120 |
-
|
| 121 |
---
|
| 122 |
**Analysis Details:**
|
| 123 |
- Human probability: {human_prob:.2f}%
|
|
@@ -128,11 +130,8 @@ This text appears to be written by a human.
|
|
| 128 |
result_message = f"""
|
| 129 |
### 🔴 **AI Generated**
|
| 130 |
**Confidence: {ai_total_prob:.2f}%**
|
| 131 |
-
|
| 132 |
**Most likely source: {ai_argmax_model}**
|
| 133 |
-
|
| 134 |
This text appears to be generated by an AI model.
|
| 135 |
-
|
| 136 |
---
|
| 137 |
**Analysis Details:**
|
| 138 |
- Human probability: {human_prob:.2f}%
|
|
|
|
| 6 |
from tokenizers import Regex
|
| 7 |
import os
|
| 8 |
|
|
|
|
| 9 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 10 |
|
| 11 |
# Updated model paths with the correct model 1 location
|
|
|
|
| 73 |
text = re.sub(r'\s+([,.;:?!])', r'\1', text)
|
| 74 |
return text
|
| 75 |
|
| 76 |
+
newline_to_space = Replace(Regex(r'\s*\n\s*'), " ")
|
| 77 |
join_hyphen_break = Replace(Regex(r'(\w+)[--]\s*\n\s*(\w+)'), r"\1\2")
|
| 78 |
|
| 79 |
tokenizer.backend_tokenizer.normalizer = Sequence([
|
|
|
|
| 84 |
])
|
| 85 |
|
| 86 |
def classify_text(text):
|
| 87 |
+
"""
|
| 88 |
+
Classify text using ModernBERT ensemble
|
| 89 |
+
Author: deveshpunjabi
|
| 90 |
+
Date: 2025-01-15 07:07:03 UTC
|
| 91 |
+
"""
|
| 92 |
cleaned_text = clean_text(text)
|
| 93 |
if not text.strip():
|
| 94 |
return "⚠️ Please enter some text to analyze"
|
|
|
|
| 119 |
result_message = f"""
|
| 120 |
### 🟢 **Human Written**
|
| 121 |
**Confidence: {human_prob:.2f}%**
|
|
|
|
| 122 |
This text appears to be written by a human.
|
|
|
|
| 123 |
---
|
| 124 |
**Analysis Details:**
|
| 125 |
- Human probability: {human_prob:.2f}%
|
|
|
|
| 130 |
result_message = f"""
|
| 131 |
### 🔴 **AI Generated**
|
| 132 |
**Confidence: {ai_total_prob:.2f}%**
|
|
|
|
| 133 |
**Most likely source: {ai_argmax_model}**
|
|
|
|
| 134 |
This text appears to be generated by an AI model.
|
|
|
|
| 135 |
---
|
| 136 |
**Analysis Details:**
|
| 137 |
- Human probability: {human_prob:.2f}%
|