deveshpunjabi commited on
Commit
a6b6b2d
·
verified ·
1 Parent(s): 2f8459a

Rename ai_text_detector.py to model_classifier.py

Browse files
ai_text_detector.py → model_classifier.py RENAMED
@@ -6,7 +6,6 @@ from tokenizers.normalizers import Sequence, Replace, Strip, NFKC
6
  from tokenizers import Regex
7
  import os
8
 
9
-
10
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
11
 
12
  # Updated model paths with the correct model 1 location
@@ -74,7 +73,7 @@ def clean_text(text: str) -> str:
74
  text = re.sub(r'\s+([,.;:?!])', r'\1', text)
75
  return text
76
 
77
- newline_to_space = Replace(Regex(r'\s*\n\s*'), " ")
78
  join_hyphen_break = Replace(Regex(r'(\w+)[--]\s*\n\s*(\w+)'), r"\1\2")
79
 
80
  tokenizer.backend_tokenizer.normalizer = Sequence([
@@ -85,6 +84,11 @@ tokenizer.backend_tokenizer.normalizer = Sequence([
85
  ])
86
 
87
  def classify_text(text):
 
 
 
 
 
88
  cleaned_text = clean_text(text)
89
  if not text.strip():
90
  return "⚠️ Please enter some text to analyze"
@@ -115,9 +119,7 @@ def classify_text(text):
115
  result_message = f"""
116
  ### 🟢 **Human Written**
117
  **Confidence: {human_prob:.2f}%**
118
-
119
  This text appears to be written by a human.
120
-
121
  ---
122
  **Analysis Details:**
123
  - Human probability: {human_prob:.2f}%
@@ -128,11 +130,8 @@ This text appears to be written by a human.
128
  result_message = f"""
129
  ### 🔴 **AI Generated**
130
  **Confidence: {ai_total_prob:.2f}%**
131
-
132
  **Most likely source: {ai_argmax_model}**
133
-
134
  This text appears to be generated by an AI model.
135
-
136
  ---
137
  **Analysis Details:**
138
  - Human probability: {human_prob:.2f}%
 
6
  from tokenizers import Regex
7
  import os
8
 
 
9
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10
 
11
  # Updated model paths with the correct model 1 location
 
73
  text = re.sub(r'\s+([,.;:?!])', r'\1', text)
74
  return text
75
 
76
+ newline_to_space = Replace(Regex(r'\s*\n\s*'), " ")
77
  join_hyphen_break = Replace(Regex(r'(\w+)[--]\s*\n\s*(\w+)'), r"\1\2")
78
 
79
  tokenizer.backend_tokenizer.normalizer = Sequence([
 
84
  ])
85
 
86
  def classify_text(text):
87
+ """
88
+ Classify text using ModernBERT ensemble
89
+ Author: deveshpunjabi
90
+ Date: 2025-01-15 07:07:03 UTC
91
+ """
92
  cleaned_text = clean_text(text)
93
  if not text.strip():
94
  return "⚠️ Please enter some text to analyze"
 
119
  result_message = f"""
120
  ### 🟢 **Human Written**
121
  **Confidence: {human_prob:.2f}%**
 
122
  This text appears to be written by a human.
 
123
  ---
124
  **Analysis Details:**
125
  - Human probability: {human_prob:.2f}%
 
130
  result_message = f"""
131
  ### 🔴 **AI Generated**
132
  **Confidence: {ai_total_prob:.2f}%**
 
133
  **Most likely source: {ai_argmax_model}**
 
134
  This text appears to be generated by an AI model.
 
135
  ---
136
  **Analysis Details:**
137
  - Human probability: {human_prob:.2f}%