abhi099k commited on
Commit
42057d6
·
verified ·
1 Parent(s): 25121a7

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +190 -191
src/app.py CHANGED
@@ -1,197 +1,196 @@
1
- import streamlit as st
2
- import json
3
- import time
4
- import plotly.graph_objects as go
5
- from detector import (
6
- smart_chunk_text,
7
- has_html_or_ai_artifacts,
8
- calibrate_threshold,
9
- predict_chunks_with_tau,
10
- preprocess_text_for_detection,
11
- )
12
-
13
- st.set_page_config(page_title="AI Text Detector Pro", layout="wide", page_icon="🔍")
14
-
15
- # --- Professional CSS ---
16
- st.markdown("""
17
- <style>
18
- .main-header { font-size: 2.5rem; font-weight: 700; color: #1f77b4; text-align: center; margin-bottom: 1rem; }
19
- .result-card { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 2rem; border-radius: 15px; color: white; margin: 1rem 0; }
20
- .ai-highlight { background-color: #ff6b6b; padding: 4px 8px; border-radius: 4px; color: white; margin: 2px; display: inline-block; }
21
- .human-highlight { background-color: #51cf66; padding: 4px 8px; border-radius: 4px; color: white; margin: 2px; display: inline-block; }
22
- .metric-card { background: #f8f9fa; padding: 1rem; border-radius: 10px; border-left: 4px solid #1f77b4; margin: 0.5rem 0; }
23
- .feature-badge { background: #e9ecef; padding: 0.3rem 0.8rem; border-radius: 20px; font-size: 0.8rem; margin: 0.2rem; display: inline-block; }
24
- </style>
25
- """, unsafe_allow_html=True)
26
-
27
- st.markdown('<div class="main-header">🔍 AI Text Detector Pro</div>', unsafe_allow_html=True)
28
- st.caption("Advanced detection using ensemble methods with GPT-5 pattern recognition")
29
-
30
- # === Calibration ===
31
- human_calibration_texts = [
32
- "This is a genuine human-written sentence for calibration purposes.",
33
- "Another authentic text sample composed by a human author.",
34
- "Calibrating detectors with real-world data improves reliability."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  ]
36
- tau = calibrate_threshold(human_calibration_texts, calibration_proportion=0.05)
37
-
38
- # === Sidebar ===
39
- with st.sidebar:
40
- st.header("Settings")
41
- detection_mode = st.selectbox(
42
- "Detection Mode",
43
- ["Standard", "Aggressive", "Conservative"],
44
- help="Standard: Balanced approach, Aggressive: Higher AI detection, Conservative: Higher human detection"
45
- )
46
- show_details = st.checkbox("Show Detailed Analysis", value=True)
47
- enable_chunking = st.checkbox("Enable Text Chunking", value=False)
48
 
49
- # === Main Interface ===
50
- col1, col2 = st.columns([2, 1])
51
-
52
- with col1:
53
- text = st.text_area(
54
- "Enter text to analyze:",
55
- height=250,
56
- placeholder="Paste your text here...\n\nTip: For better accuracy, provide text with at least 50 words.",
57
- help="The detector works best with longer texts (100+ words)"
58
  )
59
-
60
- with col2:
61
- st.info("**💡 Detection Features:**")
62
- st.write("• GPT-5 pattern recognition")
63
- st.write(" Statistical analysis")
64
- st.write(" Sentence structure evaluation")
65
- st.write("• Repetition detection")
66
- st.write("• HTML/artifact detection")
67
-
68
- if st.button("🚀 Analyze Text", type="primary", use_container_width=True):
69
- if not text.strip():
70
- st.error("⚠️ Please enter some text to analyze!")
71
- else:
72
- with st.spinner("🔄 Analyzing text with advanced detection..."):
73
- time.sleep(1)
74
-
75
- # Preprocess
76
- clean_text = preprocess_text_for_detection(text)
77
-
78
- if has_html_or_ai_artifacts(clean_text):
79
- st.markdown("""
80
- <div class="result-card">
81
- <h2>🔴 AI-Generated Content Detected</h2>
82
- <p>HTML tags or AI artifacts found in the text.</p>
83
- </div>
84
- """, unsafe_allow_html=True)
85
- else:
86
- # Process text
87
- if enable_chunking and len(clean_text.split()) > 50:
88
- chunks = smart_chunk_text(clean_text)
89
- else:
90
- chunks = [clean_text]
91
 
92
- results = predict_chunks_with_tau(chunks, tau)
93
-
94
- # Weighted scoring
95
- total_length = sum(len(c["text"]) for c in results)
96
- ai_weighted = sum(len(c["text"]) * c["score"] for c in results)
97
- human_weighted = total_length - ai_weighted
98
-
99
- ai_percentage = round((ai_weighted / total_length) * 100, 1) if total_length else 0
100
- human_percentage = round(100 - ai_percentage, 1)
101
-
102
- # Apply detection mode adjustments
103
- if detection_mode == "Aggressive":
104
- ai_percentage = min(ai_percentage * 1.2, 100)
105
- human_percentage = 100 - ai_percentage
106
- elif detection_mode == "Conservative":
107
- ai_percentage = ai_percentage * 0.8
108
- human_percentage = 100 - ai_percentage
109
-
110
- # Result text
111
- if ai_percentage >= 70:
112
- result_emoji, result_text = "🔴", "HIGH AI PROBABILITY"
113
- elif ai_percentage >= 40:
114
- result_emoji, result_text = "🟡", "MIXED CONTENT"
115
- else:
116
- result_emoji, result_text = "🟢", "LIKELY HUMAN"
117
-
118
- # Display card
119
- st.markdown(f"""
120
- <div class="result-card">
121
- <h2>{result_emoji} {result_text}</h2>
122
- <div style="display: flex; justify-content: space-between; align-items: center;">
123
- <div>
124
- <h3>AI: {ai_percentage}%</h3>
125
- <h3>Human: {human_percentage}%</h3>
126
- </div>
127
- <div style="font-size: 3rem;">{result_emoji}</div>
128
- </div>
129
- </div>
130
- """, unsafe_allow_html=True)
131
-
132
- # Metrics
133
- col1, col2, col3 = st.columns(3)
134
- with col1: st.metric("AI Probability", f"{ai_percentage}%")
135
- with col2: st.metric("Human Probability", f"{human_percentage}%")
136
- with col3: st.metric("Confidence", "High" if abs(ai_percentage - human_percentage) > 30 else "Medium")
137
-
138
- st.progress(ai_percentage / 100)
139
-
140
- # Highlighted output
141
- st.subheader("📊 Text Analysis")
142
- html_output = ""
143
- for result in results:
144
- css_class = "ai-highlight" if result["type"] == "AI" else "human-highlight"
145
- html_output += f'<span class="{css_class}" title="Score: {result["score"]:.3f}">{result["text"]}</span> '
146
- st.markdown(f'<div style="line-height: 2.5;">{html_output}</div>', unsafe_allow_html=True)
147
-
148
- # Detailed analysis
149
- if show_details:
150
- with st.expander("🔍 Detailed Analysis Report", expanded=True):
151
- tab1, tab2, tab3 = st.tabs(["Feature Analysis", "Chunk Details", "Visualization"])
152
-
153
- with tab1:
154
- st.write("**Feature Scores:** Under development.")
155
- with tab2:
156
- for i, result in enumerate(results):
157
- st.write(f"**Chunk {i+1}:** ({result['type']} - Score: {result['score']:.3f})")
158
- st.write(result["text"])
159
- st.divider()
160
- with tab3:
161
- if len(results) > 1:
162
- scores = [r["score"] for r in results]
163
- fig = go.Figure()
164
- fig.add_trace(go.Scatter(
165
- x=list(range(1, len(scores) + 1)),
166
- y=scores,
167
- mode='lines+markers',
168
- name='AI Probability',
169
- line=dict(color='red', width=3)
170
- ))
171
- fig.update_layout(
172
- title="AI Probability Across Text Chunks",
173
- xaxis_title="Chunk Number",
174
- yaxis_title="AI Probability",
175
- showlegend=True
176
- )
177
- st.plotly_chart(fig, use_container_width=True)
178
-
179
- # Download report
180
- st.download_button(
181
- "📥 Download Full Report",
182
- data=json.dumps({
183
- "overview": {
184
- "ai_percentage": ai_percentage,
185
- "human_percentage": human_percentage,
186
- "result": result_text,
187
- "detection_mode": detection_mode
188
- },
189
- "detailed_results": results
190
- }, indent=2),
191
- file_name=f"ai_detection_report_{int(time.time())}.json",
192
- mime="application/json",
193
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
- # Footer
196
- st.markdown("---")
197
- st.caption("🔬 **AI Text Detector Pro** v2.0 | Enhanced with GPT-5 pattern recognition and statistical analysis")
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from detector import analyze_text, get_components
4
+
5
+ # Pre-load model
6
+ print("Starting AI Text Detector...")
7
+ try:
8
+ get_components()
9
+ model_status = "✅ Model loaded successfully!"
10
+ except Exception as e:
11
+ model_status = f"⚠️ Model loading issue: {str(e)}"
12
+
13
+ print(model_status)
14
+
15
+ # Custom CSS for better styling
16
+ css = """
17
+ .gradio-container {
18
+ max-width: 1200px !important;
19
+ }
20
+ .result-human {
21
+ padding: 10px;
22
+ border-radius: 5px;
23
+ background: #f0f8f0;
24
+ border-left: 4px solid #4CAF50;
25
+ }
26
+ .result-ai {
27
+ padding: 10px;
28
+ border-radius: 5px;
29
+ background: #fff0f0;
30
+ border-left: 4px solid #f44336;
31
+ }
32
+ .chunk-human {
33
+ background: #f8fff8;
34
+ margin: 5px 0;
35
+ padding: 8px;
36
+ border-radius: 3px;
37
+ border-left: 3px solid #4CAF50;
38
+ }
39
+ .chunk-ai {
40
+ background: #fff8f8;
41
+ margin: 5px 0;
42
+ padding: 8px;
43
+ border-radius: 3px;
44
+ border-left: 3px solid #f44336;
45
+ }
46
+ .confidence-high { color: #388E3C; }
47
+ .confidence-medium { color: #F57C00; }
48
+ .confidence-low { color: #D32F2F; }
49
+ """
50
+
51
+ def analyze_text_interface(text, threshold, chunk_size):
52
+ """
53
+ Interface function for Gradio
54
+ """
55
+ if not text or not text.strip():
56
+ return "❌ Please enter some text to analyze.", "", ""
57
+
58
+ try:
59
+ result = analyze_text(text, threshold=threshold, chunk_size=chunk_size)
60
+
61
+ if "error" in result:
62
+ return f"❌ Error: {result['error']}", "", ""
63
+
64
+ # Overall result
65
+ overall_html = f"""
66
+ <div class="result-{result['overall_type'].lower()}">
67
+ <h3>Overall Result: {result['overall_type']}</h3>
68
+ <p><strong>Confidence:</strong> {result['overall_confidence']:.2%}</p>
69
+ <p><strong>AI Score:</strong> {result['overall_score']:.3f}</p>
70
+ <p><strong>AI Artifacts Detected:</strong> {'✅ Yes' if result['has_artifacts'] else '❌ No'}</p>
71
+ <p><strong>Chunk Analysis:</strong> {result['ai_chunks']} AI / {result['human_chunks']} Human</p>
72
+ </div>
73
+ """
74
+
75
+ # Chunk details
76
+ chunk_html = "<h3>Detailed Chunk Analysis:</h3>"
77
+ for i, chunk in enumerate(result['chunks']):
78
+ confidence_class = "confidence-high" if chunk['confidence'] > 0.8 else "confidence-medium" if chunk['confidence'] > 0.6 else "confidence-low"
79
+ chunk_html += f"""
80
+ <div class="chunk-{chunk['type'].lower()}">
81
+ <strong>Chunk {i+1}:</strong> {chunk['type']}
82
+ <br><small>Confidence: <span class="{confidence_class}">{chunk['confidence']:.2%}</span></small>
83
+ <br><small>Text: "{chunk['text'][:100]}{'...' if len(chunk['text']) > 100 else ''}"</small>
84
+ </div>
85
+ """
86
+
87
+ # Raw data for download
88
+ raw_data = {
89
+ "overall_type": result['overall_type'],
90
+ "overall_confidence": result['overall_confidence'],
91
+ "overall_score": result['overall_score'],
92
+ "has_artifacts": result['has_artifacts'],
93
+ "chunk_analysis": {
94
+ "ai_chunks": result['ai_chunks'],
95
+ "human_chunks": result['human_chunks'],
96
+ "total_chunks": result['total_chunks']
97
+ },
98
+ "chunks": result['chunks']
99
+ }
100
+
101
+ return overall_html, chunk_html, str(raw_data)
102
+
103
+ except Exception as e:
104
+ return f"❌ Analysis failed: {str(e)}", "", ""
105
+
106
+ # Example texts
107
+ examples = [
108
+ ["This is a sample text written by a human. It contains natural variations in writing style and occasional imperfections that make it authentic."],
109
+ ["The aforementioned textual content exhibits characteristics consistent with AI-generated material, including syntactic patterns and lexical choices commonly associated with large language models."],
110
+ ["Hello world! This is a test. I hope this works correctly. The weather is nice today."]
111
  ]
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ # Create Gradio interface
114
+ with gr.Blocks(css=css, title="AI Text Detector") as demo:
115
+ gr.Markdown(
116
+ """
117
+ # 🔍 AI Text Detector
118
+ *Detect AI-generated text using advanced machine learning models*
119
+
120
+ **Model Status:** {}
121
+ """.format(model_status)
122
  )
123
+
124
+ with gr.Row():
125
+ with gr.Column():
126
+ text_input = gr.Textbox(
127
+ label="Input Text",
128
+ placeholder="Paste or type the text you want to analyze here...",
129
+ lines=8,
130
+ max_lines=20
131
+ )
132
+
133
+ with gr.Row():
134
+ threshold = gr.Slider(
135
+ minimum=0.1,
136
+ maximum=0.9,
137
+ value=0.5,
138
+ step=0.05,
139
+ label="Detection Threshold",
140
+ info="Higher values = more strict AI detection"
141
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
+ chunk_size = gr.Slider(
144
+ minimum=40,
145
+ maximum=200,
146
+ value=80,
147
+ step=10,
148
+ label="Chunk Size (tokens)",
149
+ info="Smaller chunks = more detailed analysis"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  )
151
+
152
+ analyze_btn = gr.Button("Analyze Text", variant="primary")
153
+
154
+ gr.Examples(
155
+ examples=examples,
156
+ inputs=text_input,
157
+ label="Try these examples:"
158
+ )
159
+
160
+ with gr.Column():
161
+ overall_output = gr.HTML(label="Overall Result")
162
+ chunk_output = gr.HTML(label="Chunk Details")
163
+ raw_output = gr.Textbox(
164
+ label="Raw Data (for download)",
165
+ lines=4,
166
+ max_lines=10
167
+ )
168
+
169
+ # Footer
170
+ gr.Markdown(
171
+ """
172
+ ---
173
+ **How it works:**
174
+ - Text is split into meaningful chunks
175
+ - Each chunk is analyzed by the AI detection model
176
+ - Results are aggregated for overall classification
177
+ - Built with `abhi099k/ai-text-detector-v-n4.0` model
178
+
179
+ **Note:** This tool provides probabilistic estimates and should be used as one of several indicators when evaluating text authenticity.
180
+ """
181
+ )
182
+
183
+ # Connect the function
184
+ analyze_btn.click(
185
+ fn=analyze_text_interface,
186
+ inputs=[text_input, threshold, chunk_size],
187
+ outputs=[overall_output, chunk_output, raw_output]
188
+ )
189
 
190
+ # Launch the app
191
+ if __name__ == "__main__":
192
+ demo.launch(
193
+ server_name="0.0.0.0",
194
+ server_port=7860,
195
+ share=False
196
+ )