Spaces:

KavinduHansaka
/

Toxic_Comment_Classifier

Sleeping

App Files Files Community

KavinduHansaka commited on Jan 23

Commit

75bcdb1

verified ·

1 Parent(s): 4d7e6ac

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -15

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import pandas as pd
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import docx
 try:
     import fitz  # PyMuPDF
@@ -129,7 +130,7 @@ def classify_chunks(chunks: List[str], progress=gr.Progress()) -> pd.DataFrame:
     df = pd.DataFrame({
         "Text Chunk": chunks,
-        "AI Probability": [round(p, 4) for p in probabilities],
         "Prediction": [
             "🤖 Likely AI" if p >= AI_THRESHOLD else "🧍 Human"
             for p in probabilities
@@ -144,11 +145,11 @@ def classify_chunks(chunks: List[str], progress=gr.Progress()) -> pd.DataFrame:
 def document_summary(df: pd.DataFrame) -> pd.DataFrame:
     high_conf = df[df["Confidence"] == "High"]
-    avg_score = df["AI Probability"].mean()
     summary = pd.DataFrame([{
         "Text Chunk": "📄 Document Summary",
-        "AI Probability": round(avg_score, 4),
         "Prediction": "🤖 Likely AI" if len(high_conf) >= len(df) * 0.6 else "🧍 Human",
         "Confidence": "High" if len(high_conf) >= len(df) * 0.6 else "Medium"
     }])
@@ -156,6 +157,27 @@ def document_summary(df: pd.DataFrame) -> pd.DataFrame:
     return pd.concat([df, summary], ignore_index=True)
 # =========================
 # GRADIO ENTRY FUNCTION
 # =========================
@@ -181,14 +203,9 @@ def run_detector(text_input: str, uploaded_files, progress=gr.Progress()):
     df = classify_chunks(chunks, progress)
     final_df = document_summary(df)
-    with tempfile.NamedTemporaryFile(
-        delete=False, suffix=".csv", mode="w", encoding="utf-8"
-    ) as tmp:
-        final_df.to_csv(tmp.name, index=False)
-        output_path = tmp.name
-    return final_df, output_path
 # =========================
@@ -197,8 +214,8 @@ def run_detector(text_input: str, uploaded_files, progress=gr.Progress()):
 with gr.Blocks(title="🧪 Offline AI Document Detector") as app:
     gr.Markdown("## 🧪 Offline AI Document Detector")
     gr.Markdown(
-        "Analyze **PDF, DOCX, TXT, or pasted text** using an open-source AI detector. "
-        "Optimized for **CPU-only Hugging Face Spaces**."
     )
     text_input = gr.Textbox(
@@ -213,13 +230,13 @@ with gr.Blocks(title="🧪 Offline AI Document Detector") as app:
     )
     analyze_btn = gr.Button("🔍 Analyze")
-    output_table = gr.Dataframe(label="📊 Results")
-    download_file = gr.File(label="⬇️ Download CSV")
     analyze_btn.click(
         fn=run_detector,
         inputs=[text_input, file_input],
-        outputs=[output_table, download_file]
     )
 if __name__ == "__main__":

 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import docx
+import matplotlib.pyplot as plt
 try:
     import fitz  # PyMuPDF
     df = pd.DataFrame({
         "Text Chunk": chunks,
+        "AI Probability (%)": [round(p * 100, 2) for p in probabilities],
         "Prediction": [
             "🤖 Likely AI" if p >= AI_THRESHOLD else "🧍 Human"
             for p in probabilities
 def document_summary(df: pd.DataFrame) -> pd.DataFrame:
     high_conf = df[df["Confidence"] == "High"]
+    avg_prob = df["AI Probability (%)"].mean()
     summary = pd.DataFrame([{
         "Text Chunk": "📄 Document Summary",
+        "AI Probability (%)": round(avg_prob, 2),
         "Prediction": "🤖 Likely AI" if len(high_conf) >= len(df) * 0.6 else "🧍 Human",
         "Confidence": "High" if len(high_conf) >= len(df) * 0.6 else "Medium"
     }])
     return pd.concat([df, summary], ignore_index=True)
+# =========================
+# VISUALIZATION
+# =========================
+def generate_confidence_plot(df: pd.DataFrame) -> str:
+    probs = df[df["Text Chunk"] != "📄 Document Summary"]["AI Probability (%)"]
+    fig, ax = plt.subplots()
+    ax.hist(probs, bins=10)
+    ax.axvline(AI_THRESHOLD * 100, linestyle="--")
+    ax.set_title("AI Probability Distribution")
+    ax.set_xlabel("AI Probability (%)")
+    ax.set_ylabel("Number of Chunks")
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
+        fig.savefig(tmp.name, bbox_inches="tight")
+        plot_path = tmp.name
+    plt.close(fig)
+    return plot_path
 # =========================
 # GRADIO ENTRY FUNCTION
 # =========================
     df = classify_chunks(chunks, progress)
     final_df = document_summary(df)
+    plot_path = generate_confidence_plot(final_df)
+    return final_df, plot_path
 # =========================
 with gr.Blocks(title="🧪 Offline AI Document Detector") as app:
     gr.Markdown("## 🧪 Offline AI Document Detector")
     gr.Markdown(
+        "Detect whether text is AI-generated using an **offline, open-source model**. "
+        "Supports **PDF, DOCX, TXT, and pasted text**. Optimized for CPU-only environments."
     )
     text_input = gr.Textbox(
     )
     analyze_btn = gr.Button("🔍 Analyze")
+    output_table = gr.Dataframe(label="📊 Detection Results")
+    confidence_plot = gr.Image(label="📈 AI Probability Distribution")
     analyze_btn.click(
         fn=run_detector,
         inputs=[text_input, file_input],
+        outputs=[output_table, confidence_plot]
     )
 if __name__ == "__main__":