Spaces:

intersteller2887
/

Turing-test-web

Sleeping

App Files Files Community

intersteller2887 commited on Jul 10, 2025

Commit

d78e376

verified ·

1 Parent(s): fe34f00

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -107

app.py CHANGED Viewed

@@ -6,12 +6,10 @@ import random
 import shutil
 import time
 import collections
-from filelock import FileLock
-from datasets import load_dataset, Audio
 from huggingface_hub import HfApi
 dataset = load_dataset("intersteller2887/Turing-test-dataset", split="train")
-dataset = dataset.cast_column("audio", Audio(decode=False))
 target_audio_dir = "/home/user/app/audio"
 os.makedirs(target_audio_dir, exist_ok=True)
@@ -94,20 +92,29 @@ DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
 def load_or_initialize_count_json(audio_paths):
     if os.path.exists(COUNT_JSON_PATH):
         with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
-            count_data = json.load(f)
     else:
-        count_data = {}
     updated = False
-    # Count initialize
     for path in audio_paths:
         filename = os.path.basename(path)
         if filename not in count_data:
-            count_data[filename] = 0
             updated = True
     if updated or not os.path.exists(COUNT_JSON_PATH):
         with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
             json.dump(count_data, f, indent=4, ensure_ascii=False)
     return count_data
@@ -115,96 +122,25 @@ def load_or_initialize_count_json(audio_paths):
 def append_cache_buster(audio_path):
     return f"{audio_path}?t={int(time.time() * 1000)}"
-# k for number of questions per test
-"""def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
     eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
     if len(eligible_paths) < k:
         raise ValueError(f"可用音频数量不足（只剩 {len(eligible_paths)} 条 count<{max_count} 的音频），无法抽取 {k} 条")
-    random.shuffle(eligible_paths)
-    selected = random.sample(eligible_paths, k)
     for path in selected:
         filename = os.path.basename(path)
-        count_data[filename] += 1
     with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
         json.dump(count_data, f, indent=4, ensure_ascii=False)
-    return selected, count_data"""
-import threading
-LOCK_TEST_LOG = "/home/user/app/lock_test_log.json"
-def test_file_locking(n=5):
-    """
-    测试 FileLock 是否生效，通过并发写入一个共享 JSON 文件观察是否冲突。
-    """
-    lock_path = COUNT_JSON_PATH + ".lock"
-    log_lock = FileLock(lock_path)
-    def worker(thread_id):
-        with log_lock:
-            try:
-                # 读取当前日志内容
-                if os.path.exists(LOCK_TEST_LOG):
-                    with open(LOCK_TEST_LOG, "r", encoding="utf-8") as f:
-                        log_data = json.load(f)
-                else:
-                    log_data = []
-                # 添加一条记录
-                log_data.append({
-                    "thread": thread_id,
-                    "timestamp": time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()),
-                    "note": f"Thread {thread_id} acquired lock and wrote this entry."
-                })
-                # 写入日志文件
-                with open(LOCK_TEST_LOG, "w", encoding="utf-8") as f:
-                    json.dump(log_data, f, ensure_ascii=False, indent=2)
-                print(f"✅ Thread {thread_id} wrote to log.")
-            except Exception as e:
-                print(f"❌ Thread {thread_id} failed: {e}")
-    # 启动多个线程并发写入
-    threads = []
-    for i in range(n):
-        t = threading.Thread(target=worker, args=(i,))
-        threads.append(t)
-        t.start()
-    for t in threads:
-        t.join()
-    print(f"\n✅ 测试完成。日志记录保存在: {LOCK_TEST_LOG}")
-def sample_audio_paths(audio_paths, count_data, k=100, max_count=1):
-    lock_path = COUNT_JSON_PATH + ".lock"
-    with FileLock(lock_path):  # 加锁，确保只有一个用户操作
-        with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
-            count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
-        eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
-        if len(eligible_paths) < k:
-            raise ValueError(f"可用音频数量不足（只剩 {len(eligible_paths)} 条 count<{max_count} 的音频），无法抽取 {k} 条")
-        random.seed(int(time.time()))
-        selected = random.sample(eligible_paths, k)
-        # 更新计数
-        for path in selected:
-            filename = os.path.basename(path)
-            count_data[filename] = count_data.get(filename, 0) + 1
-        with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
-            json.dump(count_data, f, indent=4, ensure_ascii=False)
     return selected, count_data
 count_data = load_or_initialize_count_json(all_data_audio_paths)
@@ -405,8 +341,6 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
     username = user_data.get("username", "user")
     timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
     submission_filename = f"submissions_{username}_{timestamp}.json"
-    lock_log_path = "/home/user/app/lock_test_log.json"
-    lock_log_repo_path = f"logs/lock_test_log_{username}_{timestamp}.json"
     final_data_package = {
         "user_info": user_data,
@@ -448,19 +382,6 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
             )
             print("count.json 上传成功")
-        if os.path.exists(lock_log_path):
-            api.upload_file(
-                path_or_fileobj=lock_log_path,
-                path_in_repo=lock_log_repo_path,
-                repo_id=repo_id,
-                repo_type="dataset",
-                token=hf_token,
-                commit_message=f"Upload FileLock test log from {username}"
-            )
-            print(f"✅ FileLock 测试日志上传成功: {lock_log_repo_path}")
-        else:
-            print("ℹ️ 未发现 lock_test_log.json，跳过上传。")
     except Exception as e:
         print(f"上传出错: {e}")
@@ -515,10 +436,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
         start_btn = gr.Button("开始挑战", variant="primary")
     with info_page:
-        gr.Markdown("## 请提供一些基本信息")
         username_input = gr.Textbox(label="用户名", placeholder="请输入你的昵称")
         age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄")
         gender_input = gr.Radio(["男", "女", "其他"], label="性别")
         education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他（请注明）"], label="学历")
@@ -609,10 +529,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
     start_btn.click(fn=start_challenge, outputs=[welcome_page, info_page])
-    for comp in [username_input, age_input, gender_input, education_input, education_other_input, ai_experience_input]:
         comp.change(
             fn=check_info_complete,
-            # inputs=[age_input, gender_input, education_input, education_other_input, ai_experience_input],
             inputs=[username_input, age_input, gender_input, education_input, education_other_input, ai_experience_input],
             outputs=submit_info_btn
         )
@@ -681,6 +600,5 @@ if __name__ == "__main__":
         for audio_file in set(all_files):
             if not os.path.exists(audio_file):
                 print(f"⚠️ Warning: Audio file not found: {audio_file}")
-    test_file_locking(n=5)
     demo.launch(debug=True)

 import shutil
 import time
 import collections
+from datasets import load_dataset
 from huggingface_hub import HfApi
 dataset = load_dataset("intersteller2887/Turing-test-dataset", split="train")
 target_audio_dir = "/home/user/app/audio"
 os.makedirs(target_audio_dir, exist_ok=True)
 def load_or_initialize_count_json(audio_paths):
     if os.path.exists(COUNT_JSON_PATH):
         with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
+            # 使用 object_pairs_hook 保持原始顺序
+            count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
     else:
+        count_data = collections.OrderedDict()
     updated = False
+    # 获取所有样例音频文件名
+    sample_audio_files = {os.path.basename(d["audio"]) for d in DIMENSIONS_DATA}
     for path in audio_paths:
         filename = os.path.basename(path)
         if filename not in count_data:
+            # 如果是样例音频，直接设置为最大值
+            if filename in sample_audio_files:
+                count_data[filename] = 999  # 设置为很大的值，确保不会被选中
+            else:
+                count_data[filename] = 0
             updated = True
     if updated or not os.path.exists(COUNT_JSON_PATH):
         with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
+            # 确保写入时也保持顺序
             json.dump(count_data, f, indent=4, ensure_ascii=False)
     return count_data
 def append_cache_buster(audio_path):
     return f"{audio_path}?t={int(time.time() * 1000)}"
+def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
     eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
     if len(eligible_paths) < k:
         raise ValueError(f"可用音频数量不足（只剩 {len(eligible_paths)} 条 count<{max_count} 的音频），无法抽取 {k} 条")
+    eligible_paths_copy = eligible_paths.copy()
+    random.seed(int(time.time()))
+    selected = random.sample(eligible_paths_copy, k)
     for path in selected:
         filename = os.path.basename(path)
+        count_data[filename] = count_data.get(filename, 0) + 1
     with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
         json.dump(count_data, f, indent=4, ensure_ascii=False)
     return selected, count_data
 count_data = load_or_initialize_count_json(all_data_audio_paths)
     username = user_data.get("username", "user")
     timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
     submission_filename = f"submissions_{username}_{timestamp}.json"
     final_data_package = {
         "user_info": user_data,
             )
             print("count.json 上传成功")
     except Exception as e:
         print(f"上传出错: {e}")
         start_btn = gr.Button("开始挑战", variant="primary")
     with info_page:
         username_input = gr.Textbox(label="用户名", placeholder="请输入你的昵称")
+        gr.Markdown("## 请提供一些基本信息")
+        username_input = gr.Textbox(label="用户名", placeholder="请输入一个昵称或代号")
         age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄")
         gender_input = gr.Radio(["男", "女", "其他"], label="性别")
         education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他（请注明）"], label="学历")
     start_btn.click(fn=start_challenge, outputs=[welcome_page, info_page])
+    for comp in [age_input, gender_input, education_input, education_other_input, ai_experience_input]:
         comp.change(
             fn=check_info_complete,
             inputs=[username_input, age_input, gender_input, education_input, education_other_input, ai_experience_input],
             outputs=submit_info_btn
         )
         for audio_file in set(all_files):
             if not os.path.exists(audio_file):
                 print(f"⚠️ Warning: Audio file not found: {audio_file}")
     demo.launch(debug=True)