Jin Zhu commited on
Commit
51bdea8
·
1 Parent(s): a8c73d1

updat website

Browse files
Files changed (2) hide show
  1. requirements.txt +2 -1
  2. src/app.py +183 -28
requirements.txt CHANGED
@@ -8,4 +8,5 @@ transformers==4.55.2
8
  peft==0.17.1
9
  tqdm
10
  scikit-learn
11
- huggingface_hub
 
 
8
  peft==0.17.1
9
  tqdm
10
  scikit-learn
11
+ huggingface_hub
12
+ json
src/app.py CHANGED
@@ -105,6 +105,55 @@ def load_model(from_pretrained, base_model, cache_dir, device):
105
  model.set_criterion_fn('mean')
106
  return model
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  # -----------------
109
  # Configuration
110
  # -----------------
@@ -141,6 +190,13 @@ except Exception as e:
141
  model_loaded = False
142
  error_message = str(e)
143
 
 
 
 
 
 
 
 
144
  # -----------------
145
  # Streamlit Layout
146
  # -----------------
@@ -152,10 +208,7 @@ with col0:
152
  )
153
 
154
  st.markdown(
155
- """
156
- Click the 'Detect' button to get the probability that the text is generated by LLMs.
157
- Changing some options may improve detection performance (see [our paper](https://arxiv.org/abs/2510.01268) for details).
158
- """
159
  )
160
 
161
  # Display model loading status
@@ -171,21 +224,28 @@ col1, col2 = st.columns((1, 1))
171
 
172
  with col1:
173
  text_input = st.text_area(
174
- label="Text to be detected",
175
- placeholder="Paste your text here",
176
- help="Your text goes here. It can be long; the longer the text, the more reliable the result.",
177
  height=200,
178
  )
179
 
180
  detect_clicked = st.button("Detect", type="primary", use_container_width=True)
181
 
 
 
 
 
 
 
 
182
  with col2:
183
  statistics_ph = st.empty()
184
  statistics_ph.text_input(
185
  label="Statistics",
186
  value="",
187
  disabled=True,
188
- help="Detection statistics will appear here after clicking Detect.",
189
  )
190
 
191
  pvalue_ph = st.empty()
@@ -193,20 +253,7 @@ with col2:
193
  label="p-value",
194
  value="",
195
  disabled=True,
196
- help="p-value will appear here after clicking Detect.",
197
- )
198
-
199
- # -----------------
200
- # Options Section
201
- # -----------------
202
- st.subheader("⚙️ Detection Options")
203
- col_domain, col_extra = st.columns((2, 3))
204
- with col_domain:
205
- selected_domain = st.selectbox(
206
- label="Domain",
207
- options=DOMAINS,
208
- index=0, # Default to General
209
- help="💡 **Tip:** Select the domain that best matches your text for improved detection accuracy."
210
  )
211
 
212
  # -----------------
@@ -216,6 +263,10 @@ if detect_clicked:
216
  if not text_input.strip():
217
  st.warning("⚠️ Please enter some text before detecting.")
218
  else:
 
 
 
 
219
  # Start timing to decide whether to show progress bar
220
  start_time = time.time()
221
 
@@ -240,6 +291,15 @@ if detect_clicked:
240
  # Clear status and show results
241
  status_placeholder.empty()
242
 
 
 
 
 
 
 
 
 
 
243
  # Update score displays
244
  with col2:
245
  statistics_ph.text_input(
@@ -250,12 +310,69 @@ if detect_clicked:
250
  )
251
 
252
  pvalue_ph.text_input(
253
- label="P-value",
254
  value=f"{p_value:.6f}",
255
  disabled=True,
256
- help="P-value will appear here after clicking Detect.",
257
  )
258
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  # Show detailed results
260
  with result_placeholder:
261
  st.caption(f"⏱️ Processing time: {elapsed_time:.2f} seconds")
@@ -265,14 +382,52 @@ if detect_clicked:
265
  st.error(f"❌ Error during detection: {str(e)}")
266
  st.exception(e)
267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  # -----------------
269
  # Footer
270
  # -----------------
271
- st.markdown("---")
272
  st.markdown(
273
  """
274
- <div style='text-align: center; color: gray;'>
275
- <small>Powered by Adaptive LLM Text Detection | For research purposes only</small>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  </div>
277
  """,
278
  unsafe_allow_html=True
 
105
  model.set_criterion_fn('mean')
106
  return model
107
 
108
+ import json
109
+ from datetime import datetime
110
+
111
+ # -----------------
112
+ # Result Feedback
113
+ # -----------------
114
+ def save_feedback(text, domain, statistics, p_value, feedback_type):
115
+ """
116
+ 保存用户反馈到 JSON 文件
117
+ feedback_type: 'expected' 或 'unexpected'
118
+ """
119
+ # 确定保存路径(根据环境选择)
120
+ if os.environ.get('SPACE_ID'):
121
+ feedback_dir = Path('/tmp/feedback_data')
122
+ else:
123
+ feedback_dir = APP_DIR / 'feedback_data'
124
+
125
+ feedback_dir.mkdir(exist_ok=True, parents=True)
126
+ feedback_file = feedback_dir / 'user_feedback.json'
127
+
128
+ # 准备反馈数据
129
+ feedback_entry = {
130
+ 'timestamp': datetime.now().isoformat(),
131
+ 'text': text,
132
+ 'domain': domain,
133
+ 'statistics': float(statistics),
134
+ 'p_value': float(p_value),
135
+ 'feedback': feedback_type
136
+ }
137
+
138
+ # 读取现有数据
139
+ if feedback_file.exists():
140
+ try:
141
+ with open(feedback_file, 'r', encoding='utf-8') as f:
142
+ feedback_data = json.load(f)
143
+ except:
144
+ feedback_data = []
145
+ else:
146
+ feedback_data = []
147
+
148
+ # 添加新反馈
149
+ feedback_data.append(feedback_entry)
150
+
151
+ # 保存到文件
152
+ with open(feedback_file, 'w', encoding='utf-8') as f:
153
+ json.dump(feedback_data, f, ensure_ascii=False, indent=2)
154
+
155
+ return feedback_file
156
+
157
  # -----------------
158
  # Configuration
159
  # -----------------
 
190
  model_loaded = False
191
  error_message = str(e)
192
 
193
+ # =========== 🆕 session_state ===========
194
+ if 'last_detection' not in st.session_state:
195
+ st.session_state.last_detection = None
196
+ if 'feedback_given' not in st.session_state:
197
+ st.session_state.feedback_given = False
198
+ # ========================================
199
+
200
  # -----------------
201
  # Streamlit Layout
202
  # -----------------
 
208
  )
209
 
210
  st.markdown(
211
+ """Pasted the text to be detected below and click the 'Detect' button to get the p-value. Use a better option may improve detection."""
 
 
 
212
  )
213
 
214
  # Display model loading status
 
224
 
225
  with col1:
226
  text_input = st.text_area(
227
+ label="",
228
+ placeholder="Paste your text to be detected here",
229
+ help="Typically, providing text with a longer content would get a more reliable result.",
230
  height=200,
231
  )
232
 
233
  detect_clicked = st.button("Detect", type="primary", use_container_width=True)
234
 
235
+ selected_domain = st.selectbox(
236
+ label="⚙️ Domain (Optional)",
237
+ options=DOMAINS,
238
+ index=0, # Default to General
239
+ help="💡 **Tip:** Select the domain that best matches your text for improving detection accuracy. Default is 'General' that means consider all domains."
240
+ )
241
+
242
  with col2:
243
  statistics_ph = st.empty()
244
  statistics_ph.text_input(
245
  label="Statistics",
246
  value="",
247
  disabled=True,
248
+ help="Statistics will appear here after clicking the Detect button.",
249
  )
250
 
251
  pvalue_ph = st.empty()
 
253
  label="p-value",
254
  value="",
255
  disabled=True,
256
+ help="p-value will appear here after clicking the Detect button.",
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  )
258
 
259
  # -----------------
 
263
  if not text_input.strip():
264
  st.warning("⚠️ Please enter some text before detecting.")
265
  else:
266
+ # ========== Reset feedback state ==========
267
+ st.session_state.feedback_given = False
268
+ # ==========================================
269
+
270
  # Start timing to decide whether to show progress bar
271
  start_time = time.time()
272
 
 
291
  # Clear status and show results
292
  status_placeholder.empty()
293
 
294
+ # ========== 🆕 保存检测结果到 session_state ==========
295
+ st.session_state.last_detection = {
296
+ 'text': text_input,
297
+ 'domain': selected_domain,
298
+ 'statistics': crit,
299
+ 'p_value': p_value,
300
+ 'elapsed_time': elapsed_time
301
+ }
302
+
303
  # Update score displays
304
  with col2:
305
  statistics_ph.text_input(
 
310
  )
311
 
312
  pvalue_ph.text_input(
313
+ label="p-value",
314
  value=f"{p_value:.6f}",
315
  disabled=True,
316
+ help="p-value will appear here after clicking Detect.",
317
  )
318
+
319
+ st.info(
320
+ """
321
+ **📊 p-value:**
322
+ - **Lower p-value** (closer to 0) indicates text is **more likely AI-generated**
323
+ - **Higher p-value** (closer to 1) indicates text is **more likely human-written**
324
+ - Generally, p-value < 0.05 suggests the text may be LLM-generated
325
+ """,
326
+ icon="💡"
327
+ )
328
+
329
+ # ========== 🆕 Feedback buttons (moved here for better UX) ==========
330
+ st.markdown("**📝 Result Feedback**: Does this detection result meet your expectations?")
331
+
332
+ current_text = text_input
333
+ current_domain = selected_domain
334
+ current_statistics = crit
335
+ current_pvalue = p_value
336
+ feedback_col1, feedback_col2 = st.columns(2)
337
+
338
+ with feedback_col1:
339
+ if st.button("✅ Expected", use_container_width=True, type="secondary", key=f"expected_btn_{hash(text_input[:50])}"):
340
+ try:
341
+ feedback_file = save_feedback(
342
+ current_text,
343
+ current_domain,
344
+ current_statistics,
345
+ current_pvalue,
346
+ 'expected'
347
+ )
348
+ st.success("✅ Thank you for your feedback!")
349
+ st.caption(f"💾 Saved to: `{feedback_file.name}`")
350
+ except Exception as e:
351
+ st.error(f"Failed to save feedback: {str(e)}")
352
+ import traceback
353
+ st.code(traceback.format_exc())
354
+
355
+ with feedback_col2:
356
+ if st.button("❌ Unexpected", use_container_width=True, type="secondary", key=f"unexpected_btn_{hash(text_input[:50])}"):
357
+ try:
358
+ feedback_file = save_feedback(
359
+ current_text,
360
+ current_domain,
361
+ current_statistics,
362
+ current_pvalue,
363
+ 'unexpected'
364
+ )
365
+ st.warning("❌ Feedback recorded! This will help us improve.")
366
+ st.caption(f"💾 Saved to: `{feedback_file.name}`")
367
+ except Exception as e:
368
+ st.error(f"Failed to save feedback: {str(e)}")
369
+ import traceback
370
+ st.code(traceback.format_exc())
371
+
372
+ if st.session_state.feedback_given:
373
+ st.success("✅ Feedback submitted successfully!")
374
+ # ============================================
375
+
376
  # Show detailed results
377
  with result_placeholder:
378
  st.caption(f"⏱️ Processing time: {elapsed_time:.2f} seconds")
 
382
  st.error(f"❌ Error during detection: {str(e)}")
383
  st.exception(e)
384
 
385
+ # ========== 🆕 Citation and paper reference section ==========
386
+ # st.markdown("---")
387
+ # st.markdown(
388
+ # """
389
+ # 📄 **Citation** If you find this tool useful for you, please cite our paper: **[AdaDetectGPT: Adaptive Detection of LLM-Generated Text with Statistical Guarantees](https://arxiv.org/abs/2510.01268)**
390
+ # """
391
+ # )
392
+ # with st.expander("📋 BibTeX Citation"):
393
+ # st.code(
394
+ # """
395
+ # @inproceedings{zhou2024adadetectgpt,
396
+ # title={AdaDetectGPT: Adaptive Detection of LLM-Generated Text with Statistical Guarantees},
397
+ # author={Hongyi Zhou and Jin Zhu and Pingfan Su and Kai Ye and Ying Yang and Shakeel A O B Gavioli-Akilagun and Chengchun Shi},
398
+ # booktitle={The Thirty-Ninth Annual Conference on Neural Information Processing Systems (Accepted)},
399
+ # year={2025},
400
+ # }
401
+ # """,
402
+ # language="bibtex"
403
+ # )
404
+
405
  # -----------------
406
  # Footer
407
  # -----------------
 
408
  st.markdown(
409
  """
410
+ <style>
411
+ .footer {
412
+ position: fixed;
413
+ left: 0;
414
+ bottom: 0;
415
+ width: 100%;
416
+ background-color: white;
417
+ color: gray;
418
+ text-align: center;
419
+ padding: 10px;
420
+ border-top: 1px solid #e0e0e0;
421
+ z-index: 999;
422
+ }
423
+
424
+ /* Add padding to main content to prevent overlap with fixed footer */
425
+ .main .block-container {
426
+ padding-bottom: 60px;
427
+ }
428
+ </style>
429
+ <div class='footer'>
430
+ <small>Powered by Adaptive LLM Text Detection | For research purposes only</small>
431
  </div>
432
  """,
433
  unsafe_allow_html=True