Spaces:

stats-powered-ai
/

StatDetectLLM

Running

App Files Files Community

Jin Zhu commited on Oct 15, 2025

Commit

51bdea8

1 Parent(s): a8c73d1

updat website

Browse files

Files changed (2) hide show

requirements.txt +2 -1
src/app.py +183 -28

requirements.txt CHANGED Viewed

@@ -8,4 +8,5 @@ transformers==4.55.2
 peft==0.17.1
 tqdm
 scikit-learn
-huggingface_hub

 peft==0.17.1
 tqdm
 scikit-learn
+huggingface_hub
+json

src/app.py CHANGED Viewed

@@ -105,6 +105,55 @@ def load_model(from_pretrained, base_model, cache_dir, device):
         model.set_criterion_fn('mean')
     return model
 # -----------------
 # Configuration
 # -----------------
@@ -141,6 +190,13 @@ except Exception as e:
     model_loaded = False
     error_message = str(e)
 # -----------------
 # Streamlit Layout
 # -----------------
@@ -152,10 +208,7 @@ with col0:
     )
 st.markdown(
-    """
-Click the 'Detect' button to get the probability that the text is generated by LLMs.
-Changing some options may improve detection performance (see [our paper](https://arxiv.org/abs/2510.01268) for details).
-"""
 )
 # Display model loading status
@@ -171,21 +224,28 @@ col1, col2 = st.columns((1, 1))
 with col1:
     text_input = st.text_area(
-        label="Text to be detected",
-        placeholder="Paste your text here",
-        help="Your text goes here. It can be long; the longer the text, the more reliable the result.",
         height=200,
     )
     detect_clicked = st.button("Detect", type="primary", use_container_width=True)
 with col2:
     statistics_ph = st.empty()
     statistics_ph.text_input(
         label="Statistics",
         value="",
         disabled=True,
-        help="Detection statistics will appear here after clicking Detect.",
     )
     pvalue_ph = st.empty()
@@ -193,20 +253,7 @@ with col2:
         label="p-value",
         value="",
         disabled=True,
-        help="p-value will appear here after clicking Detect.",
-    )
-# -----------------
-# Options Section
-# -----------------
-st.subheader("⚙️ Detection Options")
-col_domain, col_extra = st.columns((2, 3))
-with col_domain:
-    selected_domain = st.selectbox(
-        label="Domain",
-        options=DOMAINS,
-        index=0,  # Default to General
-        help="💡 **Tip:** Select the domain that best matches your text for improved detection accuracy."
     )
 # -----------------
@@ -216,6 +263,10 @@ if detect_clicked:
     if not text_input.strip():
         st.warning("⚠️ Please enter some text before detecting.")
     else:
         # Start timing to decide whether to show progress bar
         start_time = time.time()
@@ -240,6 +291,15 @@ if detect_clicked:
             # Clear status and show results
             status_placeholder.empty()
             # Update score displays
             with col2:
                 statistics_ph.text_input(
@@ -250,12 +310,69 @@ if detect_clicked:
                 )
                 pvalue_ph.text_input(
-                    label="P-value",
                     value=f"{p_value:.6f}",
                     disabled=True,
-                    help="P-value will appear here after clicking Detect.",
                 )
             # Show detailed results
             with result_placeholder:
                 st.caption(f"⏱️ Processing time: {elapsed_time:.2f} seconds")
@@ -265,14 +382,52 @@ if detect_clicked:
             st.error(f"❌ Error during detection: {str(e)}")
             st.exception(e)
 # -----------------
 # Footer
 # -----------------
-st.markdown("---")
 st.markdown(
     """
-    <div style='text-align: center; color: gray;'>
-    <small>Powered by Adaptive LLM Text Detection | For research purposes only</small>
     </div>
     """,
     unsafe_allow_html=True

         model.set_criterion_fn('mean')
     return model
+import json
+from datetime import datetime
+# -----------------
+# Result Feedback
+# -----------------
+def save_feedback(text, domain, statistics, p_value, feedback_type):
+    """
+    保存用户反馈到 JSON 文件
+    feedback_type: 'expected' 或 'unexpected'
+    """
+    # 确定保存路径（根据环境选择）
+    if os.environ.get('SPACE_ID'):
+        feedback_dir = Path('/tmp/feedback_data')
+    else:
+        feedback_dir = APP_DIR / 'feedback_data'
+    feedback_dir.mkdir(exist_ok=True, parents=True)
+    feedback_file = feedback_dir / 'user_feedback.json'
+    # 准备反馈数据
+    feedback_entry = {
+        'timestamp': datetime.now().isoformat(),
+        'text': text,
+        'domain': domain,
+        'statistics': float(statistics),
+        'p_value': float(p_value),
+        'feedback': feedback_type
+    }
+    # 读取现有数据
+    if feedback_file.exists():
+        try:
+            with open(feedback_file, 'r', encoding='utf-8') as f:
+                feedback_data = json.load(f)
+        except:
+            feedback_data = []
+    else:
+        feedback_data = []
+    # 添加新反馈
+    feedback_data.append(feedback_entry)
+    # 保存到文件
+    with open(feedback_file, 'w', encoding='utf-8') as f:
+        json.dump(feedback_data, f, ensure_ascii=False, indent=2)
+    return feedback_file
 # -----------------
 # Configuration
 # -----------------
     model_loaded = False
     error_message = str(e)
+# =========== 🆕 session_state ===========
+if 'last_detection' not in st.session_state:
+    st.session_state.last_detection = None
+if 'feedback_given' not in st.session_state:
+    st.session_state.feedback_given = False
+# ========================================
 # -----------------
 # Streamlit Layout
 # -----------------
     )
 st.markdown(
+    """Pasted the text to be detected below and click the 'Detect' button to get the p-value. Use a better option may improve detection."""
 )
 # Display model loading status
 with col1:
     text_input = st.text_area(
+        label="",
+        placeholder="Paste your text to be detected here",
+        help="Typically, providing text with a longer content would get a more reliable result.",
         height=200,
     )
     detect_clicked = st.button("Detect", type="primary", use_container_width=True)
+    selected_domain = st.selectbox(
+        label="⚙️ Domain (Optional)",
+        options=DOMAINS,
+        index=0,  # Default to General
+        help="💡 **Tip:** Select the domain that best matches your text for improving detection accuracy. Default is 'General' that means consider all domains."
+    )
 with col2:
     statistics_ph = st.empty()
     statistics_ph.text_input(
         label="Statistics",
         value="",
         disabled=True,
+        help="Statistics will appear here after clicking the Detect button.",
     )
     pvalue_ph = st.empty()
         label="p-value",
         value="",
         disabled=True,
+        help="p-value will appear here after clicking the Detect button.",
     )
 # -----------------
     if not text_input.strip():
         st.warning("⚠️ Please enter some text before detecting.")
     else:
+        # ========== Reset feedback state ==========
+        st.session_state.feedback_given = False
+        # ==========================================
         # Start timing to decide whether to show progress bar
         start_time = time.time()
             # Clear status and show results
             status_placeholder.empty()
+            # ========== 🆕 保存检测结果到 session_state ==========
+            st.session_state.last_detection = {
+                'text': text_input,
+                'domain': selected_domain,
+                'statistics': crit,
+                'p_value': p_value,
+                'elapsed_time': elapsed_time
+            }
             # Update score displays
             with col2:
                 statistics_ph.text_input(
                 )
                 pvalue_ph.text_input(
+                    label="p-value",
                     value=f"{p_value:.6f}",
                     disabled=True,
+                    help="p-value will appear here after clicking Detect.",
                 )
+                st.info(
+                    """
+                    **📊 p-value:**
+                    - **Lower p-value** (closer to 0) indicates text is **more likely AI-generated**
+                    - **Higher p-value** (closer to 1) indicates text is **more likely human-written**
+                    - Generally, p-value < 0.05 suggests the text may be LLM-generated
+                    """,
+                    icon="💡"
+                )
+                # ========== 🆕 Feedback buttons (moved here for better UX) ==========
+                st.markdown("**📝 Result Feedback**: Does this detection result meet your expectations?")
+                current_text = text_input
+                current_domain = selected_domain
+                current_statistics = crit
+                current_pvalue = p_value
+                feedback_col1, feedback_col2 = st.columns(2)
+                with feedback_col1:
+                    if st.button("✅ Expected", use_container_width=True, type="secondary", key=f"expected_btn_{hash(text_input[:50])}"):
+                        try:
+                            feedback_file = save_feedback(
+                                current_text,
+                                current_domain,
+                                current_statistics,
+                                current_pvalue,
+                                'expected'
+                            )
+                            st.success("✅ Thank you for your feedback!")
+                            st.caption(f"💾 Saved to: `{feedback_file.name}`")
+                        except Exception as e:
+                            st.error(f"Failed to save feedback: {str(e)}")
+                            import traceback
+                            st.code(traceback.format_exc())
+                with feedback_col2:
+                    if st.button("❌ Unexpected", use_container_width=True, type="secondary", key=f"unexpected_btn_{hash(text_input[:50])}"):
+                        try:
+                            feedback_file = save_feedback(
+                                current_text,
+                                current_domain,
+                                current_statistics,
+                                current_pvalue,
+                                'unexpected'
+                            )
+                            st.warning("❌ Feedback recorded! This will help us improve.")
+                            st.caption(f"💾 Saved to: `{feedback_file.name}`")
+                        except Exception as e:
+                            st.error(f"Failed to save feedback: {str(e)}")
+                            import traceback
+                            st.code(traceback.format_exc())
+                if st.session_state.feedback_given:
+                    st.success("✅ Feedback submitted successfully!")
+                # ============================================
             # Show detailed results
             with result_placeholder:
                 st.caption(f"⏱️ Processing time: {elapsed_time:.2f} seconds")
             st.error(f"❌ Error during detection: {str(e)}")
             st.exception(e)
+# ========== 🆕 Citation and paper reference section ==========
+# st.markdown("---")
+# st.markdown(
+#     """
+#      📄 **Citation** If you find this tool useful for you, please cite our paper: **[AdaDetectGPT: Adaptive Detection of LLM-Generated Text with Statistical Guarantees](https://arxiv.org/abs/2510.01268)**
+#     """
+# )
+# with st.expander("📋 BibTeX Citation"):
+#     st.code(
+#         """
+# @inproceedings{zhou2024adadetectgpt,
+#   title={AdaDetectGPT: Adaptive Detection of LLM-Generated Text with Statistical Guarantees},
+#   author={Hongyi Zhou and Jin Zhu and Pingfan Su and Kai Ye and Ying Yang and Shakeel A O B Gavioli-Akilagun and Chengchun Shi},
+#   booktitle={The Thirty-Ninth Annual Conference on Neural Information Processing Systems (Accepted)},
+#   year={2025},
+# }
+#         """,
+#         language="bibtex"
+#     )
 # -----------------
 # Footer
 # -----------------
 st.markdown(
     """
+    <style>
+    .footer {
+        position: fixed;
+        left: 0;
+        bottom: 0;
+        width: 100%;
+        background-color: white;
+        color: gray;
+        text-align: center;
+        padding: 10px;
+        border-top: 1px solid #e0e0e0;
+        z-index: 999;
+    }
+    /* Add padding to main content to prevent overlap with fixed footer */
+    .main .block-container {
+        padding-bottom: 60px;
+    }
+    </style>
+    <div class='footer'>
+        <small>Powered by Adaptive LLM Text Detection | For research purposes only</small>
     </div>
     """,
     unsafe_allow_html=True