Spaces:

stats-powered-ai
/

StatDetectLLM

Running

@@ -33,9 +33,6 @@ if os.environ.get('SPACE_ID'):
 import streamlit as st
 from FineTune.model import ComputeStat
 import time
-# 🆕 new imports
-import json
-import datetime
 # -----------------
 # Page Configuration
@@ -120,87 +117,9 @@ FEEDBACK_DATASET_ID = os.environ.get('FEEDBACK_DATASET_ID', 'mamba413/user-feedb
 feedback_manager = FeedbackManager(
     dataset_repo_id=FEEDBACK_DATASET_ID,
     hf_token=os.environ.get('HF_TOKEN'),
-    local_backup=False if os.environ.get('SPACE_ID') else True  # 保留本地备份
 )
-# 🆕 Incremental feedback saver for HF Spaces
-IS_SPACE = bool(os.environ.get('SPACE_ID'))
-@st.cache_resource
-def get_feedback_repo(dataset_repo_id: str, hf_token: str):
-    if not IS_SPACE:
-        return None
-    try:
-        from huggingface_hub import login, Repository
-        if hf_token:
-            login(token=hf_token)
-        local_dir = Path('/tmp') / ('hf_ds_' + dataset_repo_id.replace('/', '__'))
-        local_dir.mkdir(parents=True, exist_ok=True)
-        repo = Repository(
-            local_dir=str(local_dir),
-            clone_from=dataset_repo_id,
-            repo_type="dataset",
-            token=hf_token,
-        )
-        return repo
-    except Exception as e:
-        print(f"[feedback repo] init failed: {e}")
-        return None
-def save_feedback_incremental(text: str, domain: str, statistics: float, p_value: float, label: str):
-    """
-    Append a single feedback record to a date-sharded NDJSON file and push.
-    Falls back to FeedbackManager on error or non-space environments.
-    """
-    try:
-        repo = get_feedback_repo(FEEDBACK_DATASET_ID, os.environ.get('HF_TOKEN'))
-        if repo is None:
-            # Fallback (local or repo init failed)
-            return feedback_manager.save_feedback(text, domain, statistics, p_value, label)
-        # Pull latest, append, commit, push
-        try:
-            repo.git_pull(rebase=True)
-        except Exception as e:
-            print(f"[feedback repo] pull warning: {e}")
-        now = datetime.datetime.utcnow()
-        date_str = now.strftime("%Y-%m-%d")
-        payload = {
-            "timestamp": now.isoformat(timespec="seconds") + "Z",
-            "space_id": os.environ.get('SPACE_ID'),
-            "domain": domain,
-            "label": label,
-            "statistics": statistics,
-            "p_value": p_value,
-            "text": text,
-            "app_version": "adadetectgpt-app-1",  # optional tag
-        }
-        feedback_dir = Path(repo.local_dir) / "feedback"
-        feedback_dir.mkdir(parents=True, exist_ok=True)
-        file_path = feedback_dir / f"{date_str}.ndjson"
-        with open(file_path, "a", encoding="utf-8") as f:
-            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
-        # Commit and push only the changed file to minimize traffic
-        repo.git_add(pattern=str(file_path))
-        try:
-            repo.git_commit(f"feedback: append {file_path.name}")
-        except Exception as e:
-            # allow empty commit errors to pass silently if nothing changed
-            print(f"[feedback repo] commit info: {e}")
-        repo.git_push()
-        return True, f"Pushed to {FEEDBACK_DATASET_ID}:{file_path.name}"
-    except Exception as e:
-        # Final fallback if anything goes wrong
-        print(f"[feedback repo] incremental save failed: {e}")
-        try:
-            return feedback_manager.save_feedback(text, domain, statistics, p_value, label)
-        except Exception as e2:
-            return False, f"Fallback failed: {e2}"
 # -----------------
 # Configuration
 # -----------------
@@ -209,7 +128,8 @@ MODEL_CONFIG = {
     'from_pretrained': './src/FineTune/ckpt/',
     'base_model': 'gemma-1b',
     'cache_dir': '../cache',
-    'device': 'cpu' if os.environ.get('SPACE_ID') else 'mps',
     # 'device': 'cuda',
 }
@@ -278,8 +198,7 @@ with col1:
         height=200,
     )
-    # Add a stable key to the Detect button
-    detect_clicked = st.button("Detect", type="primary", use_container_width=True, key="detect_btn")
     selected_domain = st.selectbox(
         label="⚙️ Domain (Optional)",
@@ -312,7 +231,9 @@ if detect_clicked:
     if not text_input.strip():
         st.warning("⚠️ Please enter some text before detecting.")
     else:
         st.session_state.feedback_given = False
         # Start timing to decide whether to show progress bar
         start_time = time.time()
@@ -347,8 +268,84 @@ if detect_clicked:
                 'elapsed_time': elapsed_time
             }
-            # NOTE: Do not render results and feedback here; they are rendered below
-            # based on st.session_state.last_detection so buttons persist across reruns.
             # Show detailed results
             with result_placeholder:
@@ -359,101 +356,6 @@ if detect_clicked:
             st.error(f"❌ Error during detection: {str(e)}")
             st.exception(e)
-# ================= Result & Feedback rendering (persistent across reruns) =================
-if st.session_state.last_detection:
-    data = st.session_state.last_detection
-    with col2:
-        # Update score displays
-        statistics_ph.text_input(
-            label="Statistics",
-            value=f"{data['statistics']:.6f}",
-            disabled=True,
-            help="Detection statistics will appear here after clicking Detect.",
-        )
-        pvalue_ph.text_input(
-            label="p-value",
-            value=f"{data['p_value']:.6f}",
-            disabled=True,
-            help="p-value will appear here after clicking Detect.",
-        )
-        st.info(
-            """
-            **📊 p-value:**
-            - **Lower p-value** (closer to 0) indicates text is **more likely AI-generated**
-            - **Higher p-value** (closer to 1) indicates text is **more likely human-written**
-            - Generally, p-value < 0.05 suggests the text may be LLM-generated
-            """,
-            icon="💡"
-        )
-        st.markdown("**📝 Result Feedback**: Does this detection result meet your expectations?")
-        current_text = data['text']
-        current_domain = data['domain']
-        current_statistics = data['statistics']
-        current_pvalue = data['p_value']
-        feedback_col1, feedback_col2 = st.columns(2)
-        with feedback_col1:
-            # Add a stable, unique key so click state is captured on rerun
-            expected_click = st.button(
-                "✅ Expected",
-                use_container_width=True,
-                type="secondary",
-                key=f"expected_btn_{hash(current_text[:50])}"
-            )
-            print("--------------------------------------------------")
-            print(f"Expected button clicked: {expected_click}")
-            if expected_click and not st.session_state.feedback_given:
-                try:
-                    # 🆕 use incremental saver (auto-fallbacks when needed)
-                    success, message = save_feedback_incremental(
-                        current_text,
-                        current_domain,
-                        current_statistics,
-                        current_pvalue,
-                        'expected'
-                    )
-                    if success:
-                        st.success("✅ Thanks for your positive feedback!")
-                        st.session_state.feedback_given = True
-                    else:
-                        st.error(f"Failed to save feedback: {message}")
-                except Exception as e:
-                    st.error(f"Failed to save feedback: {str(e)}")
-                    import traceback
-                    st.code(traceback.format_exc())
-        with feedback_col2:
-            unexpected_click = st.button(
-                "❌ Unexpected",
-                use_container_width=True,
-                type="secondary",
-                key=f"unexpected_btn_{hash(current_text[:50])}"
-            )
-            if unexpected_click and not st.session_state.feedback_given:
-                try:
-                    # 🆕 use incremental saver (auto-fallbacks when needed)
-                    success, message = save_feedback_incremental(
-                        current_text,
-                        current_domain,
-                        current_statistics,
-                        current_pvalue,
-                        'unexpected'
-                    )
-                    if success:
-                        st.warning("Feedback recorded! This will help us improve.")
-                        st.session_state.feedback_given = True
-                    else:
-                        st.error(f"Failed to save feedback: {message}")
-                except Exception as e:
-                    st.error(f"Failed to save feedback: {str(e)}")
-                    import traceback
-                    st.code(traceback.format_exc())
 # ========== 🆕 Citation and paper reference section ==========
 # st.markdown("---")
 # st.markdown(

 import streamlit as st
 from FineTune.model import ComputeStat
 import time
 # -----------------
 # Page Configuration
 feedback_manager = FeedbackManager(
     dataset_repo_id=FEEDBACK_DATASET_ID,
     hf_token=os.environ.get('HF_TOKEN'),
+    local_backup=True  # 保留本地备份
 )
 # -----------------
 # Configuration
 # -----------------
     'from_pretrained': './src/FineTune/ckpt/',
     'base_model': 'gemma-1b',
     'cache_dir': '../cache',
+    # 'device': 'mps',
+    'device': 'cpu',
     # 'device': 'cuda',
 }
         height=200,
     )
+    detect_clicked = st.button("Detect", type="primary", use_container_width=True)
     selected_domain = st.selectbox(
         label="⚙️ Domain (Optional)",
     if not text_input.strip():
         st.warning("⚠️ Please enter some text before detecting.")
     else:
+        # ========== Reset feedback state ==========
         st.session_state.feedback_given = False
+        # ==========================================
         # Start timing to decide whether to show progress bar
         start_time = time.time()
                 'elapsed_time': elapsed_time
             }
+            # Update score displays
+            with col2:
+                statistics_ph.text_input(
+                    label="Statistics",
+                    value=f"{crit:.6f}",
+                    disabled=True,
+                    help="Detection statistics will appear here after clicking Detect.",
+                )
+                pvalue_ph.text_input(
+                    label="p-value",
+                    value=f"{p_value:.6f}",
+                    disabled=True,
+                    help="p-value will appear here after clicking Detect.",
+                )
+                st.info(
+                    """
+                    **📊 p-value:**
+                    - **Lower p-value** (closer to 0) indicates text is **more likely AI-generated**
+                    - **Higher p-value** (closer to 1) indicates text is **more likely human-written**
+                    - Generally, p-value < 0.05 suggests the text may be LLM-generated
+                    """,
+                    icon="💡"
+                )
+                # ========== 🆕 Feedback buttons (moved here for better UX) ==========
+                st.markdown("**📝 Result Feedback**: Does this detection result meet your expectations?")
+                current_text = text_input
+                current_domain = selected_domain
+                current_statistics = crit
+                current_pvalue = p_value
+                feedback_col1, feedback_col2 = st.columns(2)
+                with feedback_col1:
+                    if st.button("✅ Expected", use_container_width=True, type="secondary", key=f"expected_btn_{hash(text_input[:50])}"):
+                        try:
+                            success, message = feedback_manager.save_feedback(
+                                current_text,
+                                current_domain,
+                                current_statistics,
+                                current_pvalue,
+                                'expected'
+                            )
+                            if success:
+                                st.success("✅ Thank you for your feedback!")
+                                st.caption(f"💾 {message}")
+                            else:
+                                st.error(f"Failed to save feedback: {message}")
+                        except Exception as e:
+                            st.error(f"Failed to save feedback: {str(e)}")
+                            import traceback
+                            st.code(traceback.format_exc())
+                with feedback_col2:
+                    if st.button("❌ Unexpected", use_container_width=True, type="secondary", key=f"unexpected_btn_{hash(text_input[:50])}"):
+                        try:
+                            success, message = feedback_manager.save_feedback(
+                                current_text,
+                                current_domain,
+                                current_statistics,
+                                current_pvalue,
+                                'unexpected'
+                            )
+                            if success:
+                                st.warning("❌ Feedback recorded! This will help us improve.")
+                                st.caption(f"💾 {message}")
+                            else:
+                                st.error(f"Failed to save feedback: {message}")
+                        except Exception as e:
+                            st.error(f"Failed to save feedback: {str(e)}")
+                            import traceback
+                            st.code(traceback.format_exc())
+                if st.session_state.feedback_given:
+                    st.success("✅ Feedback submitted successfully!")
+                # ============================================
             # Show detailed results
             with result_placeholder:
             st.error(f"❌ Error during detection: {str(e)}")
             st.exception(e)
 # ========== 🆕 Citation and paper reference section ==========
 # st.markdown("---")
 # st.markdown(

src/feedback.py CHANGED Viewed

@@ -6,60 +6,6 @@ from huggingface_hub import HfApi, upload_file, hf_hub_download
 from typing import Optional
 import pandas as pd
-def save_feedback_incremental(text: str, domain: str, statistics: float, p_value: float, label: str):
-    """
-    Append a single feedback record to a date-sharded NDJSON file and push.
-    Falls back to FeedbackManager on error or non-space environments.
-    """
-    try:
-        repo = get_feedback_repo(FEEDBACK_DATASET_ID, os.environ.get('HF_TOKEN'))
-        if repo is None:
-            # Fallback (local or repo init failed)
-            return feedback_manager.save_feedback(text, domain, statistics, p_value, label)
-        # Pull latest, append, commit, push
-        try:
-            repo.git_pull(rebase=True)
-        except Exception as e:
-            print(f"[feedback repo] pull warning: {e}")
-        now = datetime.datetime.utcnow()
-        date_str = now.strftime("%Y-%m-%d")
-        payload = {
-            "timestamp": now.isoformat(timespec="seconds") + "Z",
-            "space_id": os.environ.get('SPACE_ID'),
-            "domain": domain,
-            "label": label,
-            "statistics": statistics,
-            "p_value": p_value,
-            "text": text,
-            "app_version": "adadetectgpt-app-1",  # optional tag
-        }
-        feedback_dir = Path(repo.local_dir) / "feedback"
-        feedback_dir.mkdir(parents=True, exist_ok=True)
-        file_path = feedback_dir / f"{date_str}.ndjson"
-        with open(file_path, "a", encoding="utf-8") as f:
-            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
-        # Commit and push only the changed file to minimize traffic
-        repo.git_add(pattern=str(file_path))
-        try:
-            repo.git_commit(f"feedback: append {file_path.name}")
-        except Exception as e:
-            # allow empty commit errors to pass silently if nothing changed
-            print(f"[feedback repo] commit info: {e}")
-        repo.git_push()
-        return True, f"Pushed to {FEEDBACK_DATASET_ID}:{file_path.name}"
-    except Exception as e:
-        # Final fallback if anything goes wrong
-        print(f"[feedback repo] incremental save failed: {e}")
-        try:
-            return feedback_manager.save_feedback(text, domain, statistics, p_value, label)
-        except Exception as e2:
-            return False, f"Fallback failed: {e2}"
 class FeedbackManager:
     """管理用户反馈，支持保存到 Hugging Face 私有数据集"""
@@ -85,6 +31,7 @@ class FeedbackManager:
         if self.dataset_repo_id and self.hf_token:
             self.api = HfApi(token=self.hf_token)
             # 确保数据集存在
         else:
             self.api = None
             print("⚠️ No HF dataset configured. Will only save locally.")
@@ -97,7 +44,58 @@ class FeedbackManager:
         self.local_dir.mkdir(exist_ok=True, parents=True)
         self.local_file = self.local_dir / 'user_feedback.json'
     def _load_existing_data(self) -> list:
         """从 HF 数据集加载现有数据"""
         existing_data = []

 from typing import Optional
 import pandas as pd
 class FeedbackManager:
     """管理用户反馈，支持保存到 Hugging Face 私有数据集"""
         if self.dataset_repo_id and self.hf_token:
             self.api = HfApi(token=self.hf_token)
             # 确保数据集存在
+            self._ensure_dataset_exists()
         else:
             self.api = None
             print("⚠️ No HF dataset configured. Will only save locally.")
         self.local_dir.mkdir(exist_ok=True, parents=True)
         self.local_file = self.local_dir / 'user_feedback.json'
+    def _ensure_dataset_exists(self):
+        """确保 HF 数据集存在，如果不存在则创建"""
+        try:
+            from huggingface_hub import create_repo
+            # 尝试创建数据集仓库（如果已存在会抛出异常）
+            try:
+                create_repo(
+                    repo_id=self.dataset_repo_id,
+                    token=self.hf_token,
+                    private=True,
+                    repo_type="dataset"
+                )
+                print(f"✅ Created new private dataset: {self.dataset_repo_id}")
+                # 创建初始的 README.md
+                readme_content = f"""---
+license: mit
+---
+# AdaDetectGPT User Feedback Dataset
+This dataset contains user feedback from the AdaDetectGPT detection system.
+## Data Format
+Each entry contains:
+- `timestamp`: When the feedback was submitted
+- `text`: The text that was analyzed
+- `domain`: The domain selected for analysis
+- `statistics`: The computed statistics value
+- `p_value`: The p-value from the detection
+- `feedback`: User feedback (expected/unexpected)
+"""
+                readme_file = self.local_dir / 'README.md'
+                readme_file.write_text(readme_content)
+                upload_file(
+                    path_or_fileobj=str(readme_file),
+                    path_in_repo="README.md",
+                    repo_id=self.dataset_repo_id,
+                    repo_type="dataset",
+                    token=self.hf_token
+                )
+            except Exception as e:
+                if "already exists" not in str(e):
+                    print(f"⚠️ Dataset check: {e}")
+        except Exception as e:
+            print(f"⚠️ Could not verify dataset: {e}")
     def _load_existing_data(self) -> list:
         """从 HF 数据集加载现有数据"""
         existing_data = []