KavinduHansaka commited on
Commit
75bcdb1
·
verified ·
1 Parent(s): 4d7e6ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -15
app.py CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
8
  import torch
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
  import docx
 
11
 
12
  try:
13
  import fitz # PyMuPDF
@@ -129,7 +130,7 @@ def classify_chunks(chunks: List[str], progress=gr.Progress()) -> pd.DataFrame:
129
 
130
  df = pd.DataFrame({
131
  "Text Chunk": chunks,
132
- "AI Probability": [round(p, 4) for p in probabilities],
133
  "Prediction": [
134
  "🤖 Likely AI" if p >= AI_THRESHOLD else "🧍 Human"
135
  for p in probabilities
@@ -144,11 +145,11 @@ def classify_chunks(chunks: List[str], progress=gr.Progress()) -> pd.DataFrame:
144
 
145
  def document_summary(df: pd.DataFrame) -> pd.DataFrame:
146
  high_conf = df[df["Confidence"] == "High"]
147
- avg_score = df["AI Probability"].mean()
148
 
149
  summary = pd.DataFrame([{
150
  "Text Chunk": "📄 Document Summary",
151
- "AI Probability": round(avg_score, 4),
152
  "Prediction": "🤖 Likely AI" if len(high_conf) >= len(df) * 0.6 else "🧍 Human",
153
  "Confidence": "High" if len(high_conf) >= len(df) * 0.6 else "Medium"
154
  }])
@@ -156,6 +157,27 @@ def document_summary(df: pd.DataFrame) -> pd.DataFrame:
156
  return pd.concat([df, summary], ignore_index=True)
157
 
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  # =========================
160
  # GRADIO ENTRY FUNCTION
161
  # =========================
@@ -181,14 +203,9 @@ def run_detector(text_input: str, uploaded_files, progress=gr.Progress()):
181
 
182
  df = classify_chunks(chunks, progress)
183
  final_df = document_summary(df)
 
184
 
185
- with tempfile.NamedTemporaryFile(
186
- delete=False, suffix=".csv", mode="w", encoding="utf-8"
187
- ) as tmp:
188
- final_df.to_csv(tmp.name, index=False)
189
- output_path = tmp.name
190
-
191
- return final_df, output_path
192
 
193
 
194
  # =========================
@@ -197,8 +214,8 @@ def run_detector(text_input: str, uploaded_files, progress=gr.Progress()):
197
  with gr.Blocks(title="🧪 Offline AI Document Detector") as app:
198
  gr.Markdown("## 🧪 Offline AI Document Detector")
199
  gr.Markdown(
200
- "Analyze **PDF, DOCX, TXT, or pasted text** using an open-source AI detector. "
201
- "Optimized for **CPU-only Hugging Face Spaces**."
202
  )
203
 
204
  text_input = gr.Textbox(
@@ -213,13 +230,13 @@ with gr.Blocks(title="🧪 Offline AI Document Detector") as app:
213
  )
214
 
215
  analyze_btn = gr.Button("🔍 Analyze")
216
- output_table = gr.Dataframe(label="📊 Results")
217
- download_file = gr.File(label="⬇️ Download CSV")
218
 
219
  analyze_btn.click(
220
  fn=run_detector,
221
  inputs=[text_input, file_input],
222
- outputs=[output_table, download_file]
223
  )
224
 
225
  if __name__ == "__main__":
 
8
  import torch
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
  import docx
11
+ import matplotlib.pyplot as plt
12
 
13
  try:
14
  import fitz # PyMuPDF
 
130
 
131
  df = pd.DataFrame({
132
  "Text Chunk": chunks,
133
+ "AI Probability (%)": [round(p * 100, 2) for p in probabilities],
134
  "Prediction": [
135
  "🤖 Likely AI" if p >= AI_THRESHOLD else "🧍 Human"
136
  for p in probabilities
 
145
 
146
  def document_summary(df: pd.DataFrame) -> pd.DataFrame:
147
  high_conf = df[df["Confidence"] == "High"]
148
+ avg_prob = df["AI Probability (%)"].mean()
149
 
150
  summary = pd.DataFrame([{
151
  "Text Chunk": "📄 Document Summary",
152
+ "AI Probability (%)": round(avg_prob, 2),
153
  "Prediction": "🤖 Likely AI" if len(high_conf) >= len(df) * 0.6 else "🧍 Human",
154
  "Confidence": "High" if len(high_conf) >= len(df) * 0.6 else "Medium"
155
  }])
 
157
  return pd.concat([df, summary], ignore_index=True)
158
 
159
 
160
+ # =========================
161
+ # VISUALIZATION
162
+ # =========================
163
+ def generate_confidence_plot(df: pd.DataFrame) -> str:
164
+ probs = df[df["Text Chunk"] != "📄 Document Summary"]["AI Probability (%)"]
165
+
166
+ fig, ax = plt.subplots()
167
+ ax.hist(probs, bins=10)
168
+ ax.axvline(AI_THRESHOLD * 100, linestyle="--")
169
+ ax.set_title("AI Probability Distribution")
170
+ ax.set_xlabel("AI Probability (%)")
171
+ ax.set_ylabel("Number of Chunks")
172
+
173
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
174
+ fig.savefig(tmp.name, bbox_inches="tight")
175
+ plot_path = tmp.name
176
+
177
+ plt.close(fig)
178
+ return plot_path
179
+
180
+
181
  # =========================
182
  # GRADIO ENTRY FUNCTION
183
  # =========================
 
203
 
204
  df = classify_chunks(chunks, progress)
205
  final_df = document_summary(df)
206
+ plot_path = generate_confidence_plot(final_df)
207
 
208
+ return final_df, plot_path
 
 
 
 
 
 
209
 
210
 
211
  # =========================
 
214
  with gr.Blocks(title="🧪 Offline AI Document Detector") as app:
215
  gr.Markdown("## 🧪 Offline AI Document Detector")
216
  gr.Markdown(
217
+ "Detect whether text is AI-generated using an **offline, open-source model**. "
218
+ "Supports **PDF, DOCX, TXT, and pasted text**. Optimized for CPU-only environments."
219
  )
220
 
221
  text_input = gr.Textbox(
 
230
  )
231
 
232
  analyze_btn = gr.Button("🔍 Analyze")
233
+ output_table = gr.Dataframe(label="📊 Detection Results")
234
+ confidence_plot = gr.Image(label="📈 AI Probability Distribution")
235
 
236
  analyze_btn.click(
237
  fn=run_detector,
238
  inputs=[text_input, file_input],
239
+ outputs=[output_table, confidence_plot]
240
  )
241
 
242
  if __name__ == "__main__":