Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,12 +6,10 @@ import random
|
|
| 6 |
import shutil
|
| 7 |
import time
|
| 8 |
import collections
|
| 9 |
-
from
|
| 10 |
-
from datasets import load_dataset, Audio
|
| 11 |
from huggingface_hub import HfApi
|
| 12 |
|
| 13 |
dataset = load_dataset("intersteller2887/Turing-test-dataset", split="train")
|
| 14 |
-
dataset = dataset.cast_column("audio", Audio(decode=False))
|
| 15 |
|
| 16 |
target_audio_dir = "/home/user/app/audio"
|
| 17 |
os.makedirs(target_audio_dir, exist_ok=True)
|
|
@@ -94,20 +92,29 @@ DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
|
|
| 94 |
def load_or_initialize_count_json(audio_paths):
|
| 95 |
if os.path.exists(COUNT_JSON_PATH):
|
| 96 |
with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
|
| 97 |
-
|
|
|
|
| 98 |
else:
|
| 99 |
-
count_data =
|
| 100 |
|
| 101 |
updated = False
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
| 103 |
for path in audio_paths:
|
| 104 |
filename = os.path.basename(path)
|
| 105 |
if filename not in count_data:
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
updated = True
|
| 108 |
|
| 109 |
if updated or not os.path.exists(COUNT_JSON_PATH):
|
| 110 |
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
|
|
|
| 111 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 112 |
|
| 113 |
return count_data
|
|
@@ -115,96 +122,25 @@ def load_or_initialize_count_json(audio_paths):
|
|
| 115 |
def append_cache_buster(audio_path):
|
| 116 |
return f"{audio_path}?t={int(time.time() * 1000)}"
|
| 117 |
|
| 118 |
-
|
| 119 |
-
"""def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
|
| 120 |
eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
|
| 121 |
|
| 122 |
if len(eligible_paths) < k:
|
| 123 |
raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
|
| 124 |
|
| 125 |
-
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
for path in selected:
|
| 129 |
filename = os.path.basename(path)
|
| 130 |
-
count_data[filename]
|
| 131 |
|
| 132 |
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 133 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 134 |
|
| 135 |
-
return selected, count_data"""
|
| 136 |
-
|
| 137 |
-
import threading
|
| 138 |
-
|
| 139 |
-
LOCK_TEST_LOG = "/home/user/app/lock_test_log.json"
|
| 140 |
-
|
| 141 |
-
def test_file_locking(n=5):
|
| 142 |
-
"""
|
| 143 |
-
测试 FileLock 是否生效,通过并发写入一个共享 JSON 文件观察是否冲突。
|
| 144 |
-
"""
|
| 145 |
-
lock_path = COUNT_JSON_PATH + ".lock"
|
| 146 |
-
log_lock = FileLock(lock_path)
|
| 147 |
-
|
| 148 |
-
def worker(thread_id):
|
| 149 |
-
with log_lock:
|
| 150 |
-
try:
|
| 151 |
-
# 读取当前日志内容
|
| 152 |
-
if os.path.exists(LOCK_TEST_LOG):
|
| 153 |
-
with open(LOCK_TEST_LOG, "r", encoding="utf-8") as f:
|
| 154 |
-
log_data = json.load(f)
|
| 155 |
-
else:
|
| 156 |
-
log_data = []
|
| 157 |
-
|
| 158 |
-
# 添加一条记录
|
| 159 |
-
log_data.append({
|
| 160 |
-
"thread": thread_id,
|
| 161 |
-
"timestamp": time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()),
|
| 162 |
-
"note": f"Thread {thread_id} acquired lock and wrote this entry."
|
| 163 |
-
})
|
| 164 |
-
|
| 165 |
-
# 写入日志文件
|
| 166 |
-
with open(LOCK_TEST_LOG, "w", encoding="utf-8") as f:
|
| 167 |
-
json.dump(log_data, f, ensure_ascii=False, indent=2)
|
| 168 |
-
|
| 169 |
-
print(f"✅ Thread {thread_id} wrote to log.")
|
| 170 |
-
except Exception as e:
|
| 171 |
-
print(f"❌ Thread {thread_id} failed: {e}")
|
| 172 |
-
|
| 173 |
-
# 启动多个线程并发写入
|
| 174 |
-
threads = []
|
| 175 |
-
for i in range(n):
|
| 176 |
-
t = threading.Thread(target=worker, args=(i,))
|
| 177 |
-
threads.append(t)
|
| 178 |
-
t.start()
|
| 179 |
-
|
| 180 |
-
for t in threads:
|
| 181 |
-
t.join()
|
| 182 |
-
|
| 183 |
-
print(f"\n✅ 测试完成。日志记录保存在: {LOCK_TEST_LOG}")
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
def sample_audio_paths(audio_paths, count_data, k=100, max_count=1):
|
| 188 |
-
lock_path = COUNT_JSON_PATH + ".lock"
|
| 189 |
-
with FileLock(lock_path): # 加锁,确保只有一个用户操作
|
| 190 |
-
with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
|
| 191 |
-
count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
|
| 192 |
-
|
| 193 |
-
eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
|
| 194 |
-
if len(eligible_paths) < k:
|
| 195 |
-
raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
|
| 196 |
-
|
| 197 |
-
random.seed(int(time.time()))
|
| 198 |
-
selected = random.sample(eligible_paths, k)
|
| 199 |
-
|
| 200 |
-
# 更新计数
|
| 201 |
-
for path in selected:
|
| 202 |
-
filename = os.path.basename(path)
|
| 203 |
-
count_data[filename] = count_data.get(filename, 0) + 1
|
| 204 |
-
|
| 205 |
-
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 206 |
-
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 207 |
-
|
| 208 |
return selected, count_data
|
| 209 |
|
| 210 |
count_data = load_or_initialize_count_json(all_data_audio_paths)
|
|
@@ -405,8 +341,6 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
|
|
| 405 |
username = user_data.get("username", "user")
|
| 406 |
timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
|
| 407 |
submission_filename = f"submissions_{username}_{timestamp}.json"
|
| 408 |
-
lock_log_path = "/home/user/app/lock_test_log.json"
|
| 409 |
-
lock_log_repo_path = f"logs/lock_test_log_{username}_{timestamp}.json"
|
| 410 |
|
| 411 |
final_data_package = {
|
| 412 |
"user_info": user_data,
|
|
@@ -448,19 +382,6 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
|
|
| 448 |
)
|
| 449 |
print("count.json 上传成功")
|
| 450 |
|
| 451 |
-
if os.path.exists(lock_log_path):
|
| 452 |
-
api.upload_file(
|
| 453 |
-
path_or_fileobj=lock_log_path,
|
| 454 |
-
path_in_repo=lock_log_repo_path,
|
| 455 |
-
repo_id=repo_id,
|
| 456 |
-
repo_type="dataset",
|
| 457 |
-
token=hf_token,
|
| 458 |
-
commit_message=f"Upload FileLock test log from {username}"
|
| 459 |
-
)
|
| 460 |
-
print(f"✅ FileLock 测试日志上传成功: {lock_log_repo_path}")
|
| 461 |
-
else:
|
| 462 |
-
print("ℹ️ 未发现 lock_test_log.json,跳过上传。")
|
| 463 |
-
|
| 464 |
except Exception as e:
|
| 465 |
print(f"上传出错: {e}")
|
| 466 |
|
|
@@ -515,10 +436,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
|
|
| 515 |
start_btn = gr.Button("开始挑战", variant="primary")
|
| 516 |
|
| 517 |
with info_page:
|
| 518 |
-
gr.Markdown("## 请提供一些基本信息")
|
| 519 |
-
|
| 520 |
username_input = gr.Textbox(label="用户名", placeholder="请输入你的昵称")
|
| 521 |
-
|
|
|
|
| 522 |
age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄")
|
| 523 |
gender_input = gr.Radio(["男", "女", "其他"], label="性别")
|
| 524 |
education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他(请注明)"], label="学历")
|
|
@@ -609,10 +529,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
|
|
| 609 |
|
| 610 |
start_btn.click(fn=start_challenge, outputs=[welcome_page, info_page])
|
| 611 |
|
| 612 |
-
for comp in [
|
| 613 |
comp.change(
|
| 614 |
fn=check_info_complete,
|
| 615 |
-
# inputs=[age_input, gender_input, education_input, education_other_input, ai_experience_input],
|
| 616 |
inputs=[username_input, age_input, gender_input, education_input, education_other_input, ai_experience_input],
|
| 617 |
outputs=submit_info_btn
|
| 618 |
)
|
|
@@ -681,6 +600,5 @@ if __name__ == "__main__":
|
|
| 681 |
for audio_file in set(all_files):
|
| 682 |
if not os.path.exists(audio_file):
|
| 683 |
print(f"⚠️ Warning: Audio file not found: {audio_file}")
|
| 684 |
-
|
| 685 |
-
test_file_locking(n=5)
|
| 686 |
demo.launch(debug=True)
|
|
|
|
| 6 |
import shutil
|
| 7 |
import time
|
| 8 |
import collections
|
| 9 |
+
from datasets import load_dataset
|
|
|
|
| 10 |
from huggingface_hub import HfApi
|
| 11 |
|
| 12 |
dataset = load_dataset("intersteller2887/Turing-test-dataset", split="train")
|
|
|
|
| 13 |
|
| 14 |
target_audio_dir = "/home/user/app/audio"
|
| 15 |
os.makedirs(target_audio_dir, exist_ok=True)
|
|
|
|
| 92 |
def load_or_initialize_count_json(audio_paths):
|
| 93 |
if os.path.exists(COUNT_JSON_PATH):
|
| 94 |
with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
|
| 95 |
+
# 使用 object_pairs_hook 保持原始顺序
|
| 96 |
+
count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
|
| 97 |
else:
|
| 98 |
+
count_data = collections.OrderedDict()
|
| 99 |
|
| 100 |
updated = False
|
| 101 |
+
|
| 102 |
+
# 获取所有样例音频文件名
|
| 103 |
+
sample_audio_files = {os.path.basename(d["audio"]) for d in DIMENSIONS_DATA}
|
| 104 |
+
|
| 105 |
for path in audio_paths:
|
| 106 |
filename = os.path.basename(path)
|
| 107 |
if filename not in count_data:
|
| 108 |
+
# 如果是样例音频,直接设置为最大值
|
| 109 |
+
if filename in sample_audio_files:
|
| 110 |
+
count_data[filename] = 999 # 设置为很大的值,确保不会被选中
|
| 111 |
+
else:
|
| 112 |
+
count_data[filename] = 0
|
| 113 |
updated = True
|
| 114 |
|
| 115 |
if updated or not os.path.exists(COUNT_JSON_PATH):
|
| 116 |
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 117 |
+
# 确保写入时也保持顺序
|
| 118 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 119 |
|
| 120 |
return count_data
|
|
|
|
| 122 |
def append_cache_buster(audio_path):
|
| 123 |
return f"{audio_path}?t={int(time.time() * 1000)}"
|
| 124 |
|
| 125 |
+
def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
|
|
|
|
| 126 |
eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
|
| 127 |
|
| 128 |
if len(eligible_paths) < k:
|
| 129 |
raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
|
| 130 |
|
| 131 |
+
eligible_paths_copy = eligible_paths.copy()
|
| 132 |
+
|
| 133 |
+
random.seed(int(time.time()))
|
| 134 |
+
|
| 135 |
+
selected = random.sample(eligible_paths_copy, k)
|
| 136 |
|
| 137 |
for path in selected:
|
| 138 |
filename = os.path.basename(path)
|
| 139 |
+
count_data[filename] = count_data.get(filename, 0) + 1
|
| 140 |
|
| 141 |
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 142 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
return selected, count_data
|
| 145 |
|
| 146 |
count_data = load_or_initialize_count_json(all_data_audio_paths)
|
|
|
|
| 341 |
username = user_data.get("username", "user")
|
| 342 |
timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
|
| 343 |
submission_filename = f"submissions_{username}_{timestamp}.json"
|
|
|
|
|
|
|
| 344 |
|
| 345 |
final_data_package = {
|
| 346 |
"user_info": user_data,
|
|
|
|
| 382 |
)
|
| 383 |
print("count.json 上传成功")
|
| 384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
except Exception as e:
|
| 386 |
print(f"上传出错: {e}")
|
| 387 |
|
|
|
|
| 436 |
start_btn = gr.Button("开始挑战", variant="primary")
|
| 437 |
|
| 438 |
with info_page:
|
|
|
|
|
|
|
| 439 |
username_input = gr.Textbox(label="用户名", placeholder="请输入你的昵称")
|
| 440 |
+
gr.Markdown("## 请提供一些基本信息")
|
| 441 |
+
username_input = gr.Textbox(label="用户名", placeholder="请输入一个昵称或代号")
|
| 442 |
age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄")
|
| 443 |
gender_input = gr.Radio(["男", "女", "其他"], label="性别")
|
| 444 |
education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他(请注明)"], label="学历")
|
|
|
|
| 529 |
|
| 530 |
start_btn.click(fn=start_challenge, outputs=[welcome_page, info_page])
|
| 531 |
|
| 532 |
+
for comp in [age_input, gender_input, education_input, education_other_input, ai_experience_input]:
|
| 533 |
comp.change(
|
| 534 |
fn=check_info_complete,
|
|
|
|
| 535 |
inputs=[username_input, age_input, gender_input, education_input, education_other_input, ai_experience_input],
|
| 536 |
outputs=submit_info_btn
|
| 537 |
)
|
|
|
|
| 600 |
for audio_file in set(all_files):
|
| 601 |
if not os.path.exists(audio_file):
|
| 602 |
print(f"⚠️ Warning: Audio file not found: {audio_file}")
|
| 603 |
+
|
|
|
|
| 604 |
demo.launch(debug=True)
|