Spaces:
Sleeping
Sleeping
Update src/app.py
Browse files- src/app.py +190 -191
src/app.py
CHANGED
|
@@ -1,197 +1,196 @@
|
|
| 1 |
-
import
|
| 2 |
-
import
|
| 3 |
-
import
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
)
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
#
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
]
|
| 36 |
-
tau = calibrate_threshold(human_calibration_texts, calibration_proportion=0.05)
|
| 37 |
-
|
| 38 |
-
# === Sidebar ===
|
| 39 |
-
with st.sidebar:
|
| 40 |
-
st.header("Settings")
|
| 41 |
-
detection_mode = st.selectbox(
|
| 42 |
-
"Detection Mode",
|
| 43 |
-
["Standard", "Aggressive", "Conservative"],
|
| 44 |
-
help="Standard: Balanced approach, Aggressive: Higher AI detection, Conservative: Higher human detection"
|
| 45 |
-
)
|
| 46 |
-
show_details = st.checkbox("Show Detailed Analysis", value=True)
|
| 47 |
-
enable_chunking = st.checkbox("Enable Text Chunking", value=False)
|
| 48 |
|
| 49 |
-
#
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
)
|
| 59 |
-
|
| 60 |
-
with
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
if has_html_or_ai_artifacts(clean_text):
|
| 79 |
-
st.markdown("""
|
| 80 |
-
<div class="result-card">
|
| 81 |
-
<h2>🔴 AI-Generated Content Detected</h2>
|
| 82 |
-
<p>HTML tags or AI artifacts found in the text.</p>
|
| 83 |
-
</div>
|
| 84 |
-
""", unsafe_allow_html=True)
|
| 85 |
-
else:
|
| 86 |
-
# Process text
|
| 87 |
-
if enable_chunking and len(clean_text.split()) > 50:
|
| 88 |
-
chunks = smart_chunk_text(clean_text)
|
| 89 |
-
else:
|
| 90 |
-
chunks = [clean_text]
|
| 91 |
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
ai_percentage = round((ai_weighted / total_length) * 100, 1) if total_length else 0
|
| 100 |
-
human_percentage = round(100 - ai_percentage, 1)
|
| 101 |
-
|
| 102 |
-
# Apply detection mode adjustments
|
| 103 |
-
if detection_mode == "Aggressive":
|
| 104 |
-
ai_percentage = min(ai_percentage * 1.2, 100)
|
| 105 |
-
human_percentage = 100 - ai_percentage
|
| 106 |
-
elif detection_mode == "Conservative":
|
| 107 |
-
ai_percentage = ai_percentage * 0.8
|
| 108 |
-
human_percentage = 100 - ai_percentage
|
| 109 |
-
|
| 110 |
-
# Result text
|
| 111 |
-
if ai_percentage >= 70:
|
| 112 |
-
result_emoji, result_text = "🔴", "HIGH AI PROBABILITY"
|
| 113 |
-
elif ai_percentage >= 40:
|
| 114 |
-
result_emoji, result_text = "🟡", "MIXED CONTENT"
|
| 115 |
-
else:
|
| 116 |
-
result_emoji, result_text = "🟢", "LIKELY HUMAN"
|
| 117 |
-
|
| 118 |
-
# Display card
|
| 119 |
-
st.markdown(f"""
|
| 120 |
-
<div class="result-card">
|
| 121 |
-
<h2>{result_emoji} {result_text}</h2>
|
| 122 |
-
<div style="display: flex; justify-content: space-between; align-items: center;">
|
| 123 |
-
<div>
|
| 124 |
-
<h3>AI: {ai_percentage}%</h3>
|
| 125 |
-
<h3>Human: {human_percentage}%</h3>
|
| 126 |
-
</div>
|
| 127 |
-
<div style="font-size: 3rem;">{result_emoji}</div>
|
| 128 |
-
</div>
|
| 129 |
-
</div>
|
| 130 |
-
""", unsafe_allow_html=True)
|
| 131 |
-
|
| 132 |
-
# Metrics
|
| 133 |
-
col1, col2, col3 = st.columns(3)
|
| 134 |
-
with col1: st.metric("AI Probability", f"{ai_percentage}%")
|
| 135 |
-
with col2: st.metric("Human Probability", f"{human_percentage}%")
|
| 136 |
-
with col3: st.metric("Confidence", "High" if abs(ai_percentage - human_percentage) > 30 else "Medium")
|
| 137 |
-
|
| 138 |
-
st.progress(ai_percentage / 100)
|
| 139 |
-
|
| 140 |
-
# Highlighted output
|
| 141 |
-
st.subheader("📊 Text Analysis")
|
| 142 |
-
html_output = ""
|
| 143 |
-
for result in results:
|
| 144 |
-
css_class = "ai-highlight" if result["type"] == "AI" else "human-highlight"
|
| 145 |
-
html_output += f'<span class="{css_class}" title="Score: {result["score"]:.3f}">{result["text"]}</span> '
|
| 146 |
-
st.markdown(f'<div style="line-height: 2.5;">{html_output}</div>', unsafe_allow_html=True)
|
| 147 |
-
|
| 148 |
-
# Detailed analysis
|
| 149 |
-
if show_details:
|
| 150 |
-
with st.expander("🔍 Detailed Analysis Report", expanded=True):
|
| 151 |
-
tab1, tab2, tab3 = st.tabs(["Feature Analysis", "Chunk Details", "Visualization"])
|
| 152 |
-
|
| 153 |
-
with tab1:
|
| 154 |
-
st.write("**Feature Scores:** Under development.")
|
| 155 |
-
with tab2:
|
| 156 |
-
for i, result in enumerate(results):
|
| 157 |
-
st.write(f"**Chunk {i+1}:** ({result['type']} - Score: {result['score']:.3f})")
|
| 158 |
-
st.write(result["text"])
|
| 159 |
-
st.divider()
|
| 160 |
-
with tab3:
|
| 161 |
-
if len(results) > 1:
|
| 162 |
-
scores = [r["score"] for r in results]
|
| 163 |
-
fig = go.Figure()
|
| 164 |
-
fig.add_trace(go.Scatter(
|
| 165 |
-
x=list(range(1, len(scores) + 1)),
|
| 166 |
-
y=scores,
|
| 167 |
-
mode='lines+markers',
|
| 168 |
-
name='AI Probability',
|
| 169 |
-
line=dict(color='red', width=3)
|
| 170 |
-
))
|
| 171 |
-
fig.update_layout(
|
| 172 |
-
title="AI Probability Across Text Chunks",
|
| 173 |
-
xaxis_title="Chunk Number",
|
| 174 |
-
yaxis_title="AI Probability",
|
| 175 |
-
showlegend=True
|
| 176 |
-
)
|
| 177 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 178 |
-
|
| 179 |
-
# Download report
|
| 180 |
-
st.download_button(
|
| 181 |
-
"📥 Download Full Report",
|
| 182 |
-
data=json.dumps({
|
| 183 |
-
"overview": {
|
| 184 |
-
"ai_percentage": ai_percentage,
|
| 185 |
-
"human_percentage": human_percentage,
|
| 186 |
-
"result": result_text,
|
| 187 |
-
"detection_mode": detection_mode
|
| 188 |
-
},
|
| 189 |
-
"detailed_results": results
|
| 190 |
-
}, indent=2),
|
| 191 |
-
file_name=f"ai_detection_report_{int(time.time())}.json",
|
| 192 |
-
mime="application/json",
|
| 193 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
-
#
|
| 196 |
-
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
from detector import analyze_text, get_components
|
| 4 |
+
|
| 5 |
+
# Pre-load model
|
| 6 |
+
print("Starting AI Text Detector...")
|
| 7 |
+
try:
|
| 8 |
+
get_components()
|
| 9 |
+
model_status = "✅ Model loaded successfully!"
|
| 10 |
+
except Exception as e:
|
| 11 |
+
model_status = f"⚠️ Model loading issue: {str(e)}"
|
| 12 |
+
|
| 13 |
+
print(model_status)
|
| 14 |
+
|
| 15 |
+
# Custom CSS for better styling
|
| 16 |
+
css = """
|
| 17 |
+
.gradio-container {
|
| 18 |
+
max-width: 1200px !important;
|
| 19 |
+
}
|
| 20 |
+
.result-human {
|
| 21 |
+
padding: 10px;
|
| 22 |
+
border-radius: 5px;
|
| 23 |
+
background: #f0f8f0;
|
| 24 |
+
border-left: 4px solid #4CAF50;
|
| 25 |
+
}
|
| 26 |
+
.result-ai {
|
| 27 |
+
padding: 10px;
|
| 28 |
+
border-radius: 5px;
|
| 29 |
+
background: #fff0f0;
|
| 30 |
+
border-left: 4px solid #f44336;
|
| 31 |
+
}
|
| 32 |
+
.chunk-human {
|
| 33 |
+
background: #f8fff8;
|
| 34 |
+
margin: 5px 0;
|
| 35 |
+
padding: 8px;
|
| 36 |
+
border-radius: 3px;
|
| 37 |
+
border-left: 3px solid #4CAF50;
|
| 38 |
+
}
|
| 39 |
+
.chunk-ai {
|
| 40 |
+
background: #fff8f8;
|
| 41 |
+
margin: 5px 0;
|
| 42 |
+
padding: 8px;
|
| 43 |
+
border-radius: 3px;
|
| 44 |
+
border-left: 3px solid #f44336;
|
| 45 |
+
}
|
| 46 |
+
.confidence-high { color: #388E3C; }
|
| 47 |
+
.confidence-medium { color: #F57C00; }
|
| 48 |
+
.confidence-low { color: #D32F2F; }
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
def analyze_text_interface(text, threshold, chunk_size):
|
| 52 |
+
"""
|
| 53 |
+
Interface function for Gradio
|
| 54 |
+
"""
|
| 55 |
+
if not text or not text.strip():
|
| 56 |
+
return "❌ Please enter some text to analyze.", "", ""
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
result = analyze_text(text, threshold=threshold, chunk_size=chunk_size)
|
| 60 |
+
|
| 61 |
+
if "error" in result:
|
| 62 |
+
return f"❌ Error: {result['error']}", "", ""
|
| 63 |
+
|
| 64 |
+
# Overall result
|
| 65 |
+
overall_html = f"""
|
| 66 |
+
<div class="result-{result['overall_type'].lower()}">
|
| 67 |
+
<h3>Overall Result: {result['overall_type']}</h3>
|
| 68 |
+
<p><strong>Confidence:</strong> {result['overall_confidence']:.2%}</p>
|
| 69 |
+
<p><strong>AI Score:</strong> {result['overall_score']:.3f}</p>
|
| 70 |
+
<p><strong>AI Artifacts Detected:</strong> {'✅ Yes' if result['has_artifacts'] else '❌ No'}</p>
|
| 71 |
+
<p><strong>Chunk Analysis:</strong> {result['ai_chunks']} AI / {result['human_chunks']} Human</p>
|
| 72 |
+
</div>
|
| 73 |
+
"""
|
| 74 |
+
|
| 75 |
+
# Chunk details
|
| 76 |
+
chunk_html = "<h3>Detailed Chunk Analysis:</h3>"
|
| 77 |
+
for i, chunk in enumerate(result['chunks']):
|
| 78 |
+
confidence_class = "confidence-high" if chunk['confidence'] > 0.8 else "confidence-medium" if chunk['confidence'] > 0.6 else "confidence-low"
|
| 79 |
+
chunk_html += f"""
|
| 80 |
+
<div class="chunk-{chunk['type'].lower()}">
|
| 81 |
+
<strong>Chunk {i+1}:</strong> {chunk['type']}
|
| 82 |
+
<br><small>Confidence: <span class="{confidence_class}">{chunk['confidence']:.2%}</span></small>
|
| 83 |
+
<br><small>Text: "{chunk['text'][:100]}{'...' if len(chunk['text']) > 100 else ''}"</small>
|
| 84 |
+
</div>
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
# Raw data for download
|
| 88 |
+
raw_data = {
|
| 89 |
+
"overall_type": result['overall_type'],
|
| 90 |
+
"overall_confidence": result['overall_confidence'],
|
| 91 |
+
"overall_score": result['overall_score'],
|
| 92 |
+
"has_artifacts": result['has_artifacts'],
|
| 93 |
+
"chunk_analysis": {
|
| 94 |
+
"ai_chunks": result['ai_chunks'],
|
| 95 |
+
"human_chunks": result['human_chunks'],
|
| 96 |
+
"total_chunks": result['total_chunks']
|
| 97 |
+
},
|
| 98 |
+
"chunks": result['chunks']
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
return overall_html, chunk_html, str(raw_data)
|
| 102 |
+
|
| 103 |
+
except Exception as e:
|
| 104 |
+
return f"❌ Analysis failed: {str(e)}", "", ""
|
| 105 |
+
|
| 106 |
+
# Example texts
|
| 107 |
+
examples = [
|
| 108 |
+
["This is a sample text written by a human. It contains natural variations in writing style and occasional imperfections that make it authentic."],
|
| 109 |
+
["The aforementioned textual content exhibits characteristics consistent with AI-generated material, including syntactic patterns and lexical choices commonly associated with large language models."],
|
| 110 |
+
["Hello world! This is a test. I hope this works correctly. The weather is nice today."]
|
| 111 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
+
# Create Gradio interface
|
| 114 |
+
with gr.Blocks(css=css, title="AI Text Detector") as demo:
|
| 115 |
+
gr.Markdown(
|
| 116 |
+
"""
|
| 117 |
+
# 🔍 AI Text Detector
|
| 118 |
+
*Detect AI-generated text using advanced machine learning models*
|
| 119 |
+
|
| 120 |
+
**Model Status:** {}
|
| 121 |
+
""".format(model_status)
|
| 122 |
)
|
| 123 |
+
|
| 124 |
+
with gr.Row():
|
| 125 |
+
with gr.Column():
|
| 126 |
+
text_input = gr.Textbox(
|
| 127 |
+
label="Input Text",
|
| 128 |
+
placeholder="Paste or type the text you want to analyze here...",
|
| 129 |
+
lines=8,
|
| 130 |
+
max_lines=20
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
with gr.Row():
|
| 134 |
+
threshold = gr.Slider(
|
| 135 |
+
minimum=0.1,
|
| 136 |
+
maximum=0.9,
|
| 137 |
+
value=0.5,
|
| 138 |
+
step=0.05,
|
| 139 |
+
label="Detection Threshold",
|
| 140 |
+
info="Higher values = more strict AI detection"
|
| 141 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
+
chunk_size = gr.Slider(
|
| 144 |
+
minimum=40,
|
| 145 |
+
maximum=200,
|
| 146 |
+
value=80,
|
| 147 |
+
step=10,
|
| 148 |
+
label="Chunk Size (tokens)",
|
| 149 |
+
info="Smaller chunks = more detailed analysis"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
)
|
| 151 |
+
|
| 152 |
+
analyze_btn = gr.Button("Analyze Text", variant="primary")
|
| 153 |
+
|
| 154 |
+
gr.Examples(
|
| 155 |
+
examples=examples,
|
| 156 |
+
inputs=text_input,
|
| 157 |
+
label="Try these examples:"
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
with gr.Column():
|
| 161 |
+
overall_output = gr.HTML(label="Overall Result")
|
| 162 |
+
chunk_output = gr.HTML(label="Chunk Details")
|
| 163 |
+
raw_output = gr.Textbox(
|
| 164 |
+
label="Raw Data (for download)",
|
| 165 |
+
lines=4,
|
| 166 |
+
max_lines=10
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
# Footer
|
| 170 |
+
gr.Markdown(
|
| 171 |
+
"""
|
| 172 |
+
---
|
| 173 |
+
**How it works:**
|
| 174 |
+
- Text is split into meaningful chunks
|
| 175 |
+
- Each chunk is analyzed by the AI detection model
|
| 176 |
+
- Results are aggregated for overall classification
|
| 177 |
+
- Built with `abhi099k/ai-text-detector-v-n4.0` model
|
| 178 |
+
|
| 179 |
+
**Note:** This tool provides probabilistic estimates and should be used as one of several indicators when evaluating text authenticity.
|
| 180 |
+
"""
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
# Connect the function
|
| 184 |
+
analyze_btn.click(
|
| 185 |
+
fn=analyze_text_interface,
|
| 186 |
+
inputs=[text_input, threshold, chunk_size],
|
| 187 |
+
outputs=[overall_output, chunk_output, raw_output]
|
| 188 |
+
)
|
| 189 |
|
| 190 |
+
# Launch the app
|
| 191 |
+
if __name__ == "__main__":
|
| 192 |
+
demo.launch(
|
| 193 |
+
server_name="0.0.0.0",
|
| 194 |
+
server_port=7860,
|
| 195 |
+
share=False
|
| 196 |
+
)
|