abhi099k commited on
Commit
c9a6b37
Β·
verified Β·
1 Parent(s): 4043ee8

Rename src/streamlit_app.py to src/app.py

Browse files
Files changed (2) hide show
  1. src/app.py +197 -0
  2. src/streamlit_app.py +0 -40
src/app.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ import time
4
+ import plotly.graph_objects as go
5
+ from detector import (
6
+ smart_chunk_text,
7
+ has_html_or_ai_artifacts,
8
+ calibrate_threshold,
9
+ predict_chunks_with_tau,
10
+ preprocess_text_for_detection,
11
+ )
12
+
13
+ st.set_page_config(page_title="AI Text Detector Pro", layout="wide", page_icon="πŸ”")
14
+
15
+ # --- Professional CSS ---
16
+ st.markdown("""
17
+ <style>
18
+ .main-header { font-size: 2.5rem; font-weight: 700; color: #1f77b4; text-align: center; margin-bottom: 1rem; }
19
+ .result-card { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 2rem; border-radius: 15px; color: white; margin: 1rem 0; }
20
+ .ai-highlight { background-color: #ff6b6b; padding: 4px 8px; border-radius: 4px; color: white; margin: 2px; display: inline-block; }
21
+ .human-highlight { background-color: #51cf66; padding: 4px 8px; border-radius: 4px; color: white; margin: 2px; display: inline-block; }
22
+ .metric-card { background: #f8f9fa; padding: 1rem; border-radius: 10px; border-left: 4px solid #1f77b4; margin: 0.5rem 0; }
23
+ .feature-badge { background: #e9ecef; padding: 0.3rem 0.8rem; border-radius: 20px; font-size: 0.8rem; margin: 0.2rem; display: inline-block; }
24
+ </style>
25
+ """, unsafe_allow_html=True)
26
+
27
+ st.markdown('<div class="main-header">πŸ” AI Text Detector Pro</div>', unsafe_allow_html=True)
28
+ st.caption("Advanced detection using ensemble methods with GPT-5 pattern recognition")
29
+
30
+ # === Calibration ===
31
+ human_calibration_texts = [
32
+ "This is a genuine human-written sentence for calibration purposes.",
33
+ "Another authentic text sample composed by a human author.",
34
+ "Calibrating detectors with real-world data improves reliability."
35
+ ]
36
+ tau = calibrate_threshold(human_calibration_texts, calibration_proportion=0.05)
37
+
38
+ # === Sidebar ===
39
+ with st.sidebar:
40
+ st.header("Settings")
41
+ detection_mode = st.selectbox(
42
+ "Detection Mode",
43
+ ["Standard", "Aggressive", "Conservative"],
44
+ help="Standard: Balanced approach, Aggressive: Higher AI detection, Conservative: Higher human detection"
45
+ )
46
+ show_details = st.checkbox("Show Detailed Analysis", value=True)
47
+ enable_chunking = st.checkbox("Enable Text Chunking", value=False)
48
+
49
+ # === Main Interface ===
50
+ col1, col2 = st.columns([2, 1])
51
+
52
+ with col1:
53
+ text = st.text_area(
54
+ "Enter text to analyze:",
55
+ height=250,
56
+ placeholder="Paste your text here...\n\nTip: For better accuracy, provide text with at least 50 words.",
57
+ help="The detector works best with longer texts (100+ words)"
58
+ )
59
+
60
+ with col2:
61
+ st.info("**πŸ’‘ Detection Features:**")
62
+ st.write("β€’ GPT-5 pattern recognition")
63
+ st.write("β€’ Statistical analysis")
64
+ st.write("β€’ Sentence structure evaluation")
65
+ st.write("β€’ Repetition detection")
66
+ st.write("β€’ HTML/artifact detection")
67
+
68
+ if st.button("πŸš€ Analyze Text", type="primary", use_container_width=True):
69
+ if not text.strip():
70
+ st.error("⚠️ Please enter some text to analyze!")
71
+ else:
72
+ with st.spinner("πŸ”„ Analyzing text with advanced detection..."):
73
+ time.sleep(1)
74
+
75
+ # Preprocess
76
+ clean_text = preprocess_text_for_detection(text)
77
+
78
+ if has_html_or_ai_artifacts(clean_text):
79
+ st.markdown("""
80
+ <div class="result-card">
81
+ <h2>πŸ”΄ AI-Generated Content Detected</h2>
82
+ <p>HTML tags or AI artifacts found in the text.</p>
83
+ </div>
84
+ """, unsafe_allow_html=True)
85
+ else:
86
+ # Process text
87
+ if enable_chunking and len(clean_text.split()) > 50:
88
+ chunks = smart_chunk_text(clean_text)
89
+ else:
90
+ chunks = [clean_text]
91
+
92
+ results = predict_chunks_with_tau(chunks, tau)
93
+
94
+ # Weighted scoring
95
+ total_length = sum(len(c["text"]) for c in results)
96
+ ai_weighted = sum(len(c["text"]) * c["score"] for c in results)
97
+ human_weighted = total_length - ai_weighted
98
+
99
+ ai_percentage = round((ai_weighted / total_length) * 100, 1) if total_length else 0
100
+ human_percentage = round(100 - ai_percentage, 1)
101
+
102
+ # Apply detection mode adjustments
103
+ if detection_mode == "Aggressive":
104
+ ai_percentage = min(ai_percentage * 1.2, 100)
105
+ human_percentage = 100 - ai_percentage
106
+ elif detection_mode == "Conservative":
107
+ ai_percentage = ai_percentage * 0.8
108
+ human_percentage = 100 - ai_percentage
109
+
110
+ # Result text
111
+ if ai_percentage >= 70:
112
+ result_emoji, result_text = "πŸ”΄", "HIGH AI PROBABILITY"
113
+ elif ai_percentage >= 40:
114
+ result_emoji, result_text = "🟑", "MIXED CONTENT"
115
+ else:
116
+ result_emoji, result_text = "🟒", "LIKELY HUMAN"
117
+
118
+ # Display card
119
+ st.markdown(f"""
120
+ <div class="result-card">
121
+ <h2>{result_emoji} {result_text}</h2>
122
+ <div style="display: flex; justify-content: space-between; align-items: center;">
123
+ <div>
124
+ <h3>AI: {ai_percentage}%</h3>
125
+ <h3>Human: {human_percentage}%</h3>
126
+ </div>
127
+ <div style="font-size: 3rem;">{result_emoji}</div>
128
+ </div>
129
+ </div>
130
+ """, unsafe_allow_html=True)
131
+
132
+ # Metrics
133
+ col1, col2, col3 = st.columns(3)
134
+ with col1: st.metric("AI Probability", f"{ai_percentage}%")
135
+ with col2: st.metric("Human Probability", f"{human_percentage}%")
136
+ with col3: st.metric("Confidence", "High" if abs(ai_percentage - human_percentage) > 30 else "Medium")
137
+
138
+ st.progress(ai_percentage / 100)
139
+
140
+ # Highlighted output
141
+ st.subheader("πŸ“Š Text Analysis")
142
+ html_output = ""
143
+ for result in results:
144
+ css_class = "ai-highlight" if result["type"] == "AI" else "human-highlight"
145
+ html_output += f'<span class="{css_class}" title="Score: {result["score"]:.3f}">{result["text"]}</span> '
146
+ st.markdown(f'<div style="line-height: 2.5;">{html_output}</div>', unsafe_allow_html=True)
147
+
148
+ # Detailed analysis
149
+ if show_details:
150
+ with st.expander("πŸ” Detailed Analysis Report", expanded=True):
151
+ tab1, tab2, tab3 = st.tabs(["Feature Analysis", "Chunk Details", "Visualization"])
152
+
153
+ with tab1:
154
+ st.write("**Feature Scores:** Under development.")
155
+ with tab2:
156
+ for i, result in enumerate(results):
157
+ st.write(f"**Chunk {i+1}:** ({result['type']} - Score: {result['score']:.3f})")
158
+ st.write(result["text"])
159
+ st.divider()
160
+ with tab3:
161
+ if len(results) > 1:
162
+ scores = [r["score"] for r in results]
163
+ fig = go.Figure()
164
+ fig.add_trace(go.Scatter(
165
+ x=list(range(1, len(scores) + 1)),
166
+ y=scores,
167
+ mode='lines+markers',
168
+ name='AI Probability',
169
+ line=dict(color='red', width=3)
170
+ ))
171
+ fig.update_layout(
172
+ title="AI Probability Across Text Chunks",
173
+ xaxis_title="Chunk Number",
174
+ yaxis_title="AI Probability",
175
+ showlegend=True
176
+ )
177
+ st.plotly_chart(fig, use_container_width=True)
178
+
179
+ # Download report
180
+ st.download_button(
181
+ "πŸ“₯ Download Full Report",
182
+ data=json.dumps({
183
+ "overview": {
184
+ "ai_percentage": ai_percentage,
185
+ "human_percentage": human_percentage,
186
+ "result": result_text,
187
+ "detection_mode": detection_mode
188
+ },
189
+ "detailed_results": results
190
+ }, indent=2),
191
+ file_name=f"ai_detection_report_{int(time.time())}.json",
192
+ mime="application/json",
193
+ )
194
+
195
+ # Footer
196
+ st.markdown("---")
197
+ st.caption("πŸ”¬ **AI Text Detector Pro** v2.0 | Enhanced with GPT-5 pattern recognition and statistical analysis")
src/streamlit_app.py DELETED
@@ -1,40 +0,0 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
- import streamlit as st
5
-
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))