Emilyxml commited on
Commit
6cc8dbe
·
verified ·
1 Parent(s): f544cf4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -49
app.py CHANGED
@@ -6,7 +6,7 @@ import csv
6
  from datetime import datetime
7
  from pathlib import Path
8
  from PIL import Image
9
- from huggingface_hub import snapshot_download, HfApi # 引入 HfApi 用于直接上传
10
 
11
  # --- 1. 配置区域 ---
12
  DATASET_REPO_ID = "Emilyxml/moveit"
@@ -15,10 +15,8 @@ LOG_FOLDER = Path("logs")
15
  LOG_FOLDER.mkdir(parents=True, exist_ok=True)
16
  TOKEN = os.environ.get("HF_TOKEN")
17
 
18
- # 初始化 API 工具
19
- api = HfApi(token=TOKEN)
20
-
21
  # --- 2. 自动下载数据 ---
 
22
  if not os.path.exists(DATA_FOLDER) or not os.listdir(DATA_FOLDER):
23
  try:
24
  print("🚀 正在从 Dataset 下载数据...")
@@ -33,10 +31,18 @@ if not os.path.exists(DATA_FOLDER) or not os.listdir(DATA_FOLDER):
33
  except Exception as e:
34
  print(f"⚠️ 下载失败: {e}")
35
 
36
- # --- (注意:这里删除了 CommitScheduler) ---
37
- # 我们不再需要 Scheduler,改用手动上传
38
-
39
- # --- 3. 数据加载 ---
 
 
 
 
 
 
 
 
40
  def load_data():
41
  groups = {}
42
  if not os.path.exists(DATA_FOLDER):
@@ -75,38 +81,50 @@ def load_data():
75
 
76
  ALL_GROUPS, ALL_GROUP_IDS = load_data()
77
 
78
- # --- 图片优化函数 ---
79
- def optimize_image(image_path, max_width=800):
80
- if not image_path: return None
 
 
 
 
81
  try:
82
  img = Image.open(image_path)
 
 
 
 
83
  if img.width > max_width:
84
  ratio = max_width / img.width
85
  new_height = int(img.height * ratio)
 
86
  img = img.resize((max_width, new_height), Image.LANCZOS)
87
  return img
88
  except Exception as e:
89
- print(f"Error: {e}")
90
  return None
91
 
92
- # --- 4. 核心逻辑 ---
93
 
94
  def get_next_question(user_state):
95
  idx = user_state["index"]
96
 
 
97
  if idx >= len(ALL_GROUP_IDS):
98
  return (
99
  gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
100
  gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
101
  gr.update(value="## 🎉 测试结束!感谢您的参与。", visible=True),
102
- user_state, []
103
  )
104
 
105
  group_id = ALL_GROUP_IDS[idx]
106
  group_data = ALL_GROUPS[group_id]
107
 
108
- origin_img = optimize_image(group_data["origin"], max_width=600)
 
109
 
 
110
  candidates = group_data["candidates"].copy()
111
  random.shuffle(candidates)
112
 
@@ -116,7 +134,8 @@ def get_next_question(user_state):
116
 
117
  for i, path in enumerate(candidates):
118
  label = f"Option {chr(65+i)}"
119
- optimized_img = optimize_image(path, max_width=600)
 
120
  gallery_items.append((optimized_img, label))
121
  choices.append(label)
122
  candidates_info.append({"label": label, "path": path})
@@ -156,43 +175,33 @@ def save_and_next(user_state, candidates_info, selected_options, is_none=False):
156
  break
157
  method_str = "; ".join(selected_methods)
158
 
159
- # --- 关键修改:保存并立即上传 ---
160
  user_filename = f"user_{user_state['user_id']}.csv"
161
  user_file_path = LOG_FOLDER / user_filename
162
 
163
- # 1. 写入本地 CSV
164
- file_exists = user_file_path.exists()
165
- with open(user_file_path, "a", newline="", encoding="utf-8") as f:
166
- writer = csv.writer(f)
167
- if not file_exists:
168
- writer.writerow(["user_id", "timestamp", "group_id", "choices", "methods"])
169
- writer.writerow([
170
- user_state["user_id"],
171
- datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
172
- group_id,
173
- choice_str,
174
- method_str
175
- ])
176
-
177
- # 2. 立即上传到 Dataset (同步操作)
178
- try:
179
- print(f"Uploading {user_filename} to dataset...")
180
- api.upload_file(
181
- path_or_fileobj=user_file_path,
182
- path_in_repo=f"logs/{user_filename}", # 在 Dataset 中创建 logs 文件夹
183
- repo_id=DATASET_REPO_ID,
184
- repo_type="dataset"
185
- )
186
- print("Upload success!")
187
- except Exception as e:
188
- print(f"⚠️ Upload failed: {e}")
189
- # 如果是 Token 权限问题,这里会在 Space Logs 里报错
190
 
191
  user_state["index"] += 1
192
  return get_next_question(user_state)
193
 
194
- # --- 5. 界面构建 ---
195
  with gr.Blocks(title="User Study") as demo:
 
196
  state_user = gr.State(lambda: {"user_id": str(uuid.uuid4())[:8], "index": 0})
197
  state_candidates_info = gr.State([])
198
 
@@ -201,6 +210,7 @@ with gr.Blocks(title="User Study") as demo:
201
 
202
  with gr.Row():
203
  with gr.Column(scale=1):
 
204
  img_origin = gr.Image(label="Reference (参考原图)", interactive=False, height=400, format="jpeg")
205
 
206
  with gr.Column(scale=2):
@@ -221,7 +231,4 @@ with gr.Blocks(title="User Study") as demo:
221
 
222
  btn_submit.click(fn=lambda s, c, o: save_and_next(s, c, o, is_none=False), inputs=[state_user, state_candidates_info, checkbox_options], outputs=[img_origin, gallery_candidates, checkbox_options, md_instruction, btn_submit, btn_none, md_end, state_user, state_candidates_info])
223
 
224
- btn_none.click(fn=lambda s, c, o: save_and_next(s, c, o, is_none=True), inputs=[state_user, state_candidates_info, checkbox_options], outputs=[img_origin, gallery_candidates, checkbox_options, md_instruction, btn_submit, btn_none, md_end, state_user, state_candidates_info])
225
-
226
- if __name__ == "__main__":
227
- demo.launch()
 
6
  from datetime import datetime
7
  from pathlib import Path
8
  from PIL import Image
9
+ from huggingface_hub import CommitScheduler, snapshot_download
10
 
11
  # --- 1. 配置区域 ---
12
  DATASET_REPO_ID = "Emilyxml/moveit"
 
15
  LOG_FOLDER.mkdir(parents=True, exist_ok=True)
16
  TOKEN = os.environ.get("HF_TOKEN")
17
 
 
 
 
18
  # --- 2. 自动下载数据 ---
19
+ # 只有本地为空时才下载,避免每次重启都浪费时间
20
  if not os.path.exists(DATA_FOLDER) or not os.listdir(DATA_FOLDER):
21
  try:
22
  print("🚀 正在从 Dataset 下载数据...")
 
31
  except Exception as e:
32
  print(f"⚠️ 下载失败: {e}")
33
 
34
+ # --- 3. 恢复后台同步 (解决点击卡顿的关键) ---
35
+ # 使用 Scheduler,提交操作不需要等待网络上传,速度最快
36
+ scheduler = CommitScheduler(
37
+ repo_id=DATASET_REPO_ID,
38
+ repo_type="dataset",
39
+ folder_path=LOG_FOLDER,
40
+ path_in_repo="logs",
41
+ every=1, # 每1分钟同步一次,或者是积累了一定数量同步
42
+ token=TOKEN
43
+ )
44
+
45
+ # --- 4. 数据加载 ---
46
  def load_data():
47
  groups = {}
48
  if not os.path.exists(DATA_FOLDER):
 
81
 
82
  ALL_GROUPS, ALL_GROUP_IDS = load_data()
83
 
84
+ # --- NEW: 更激进的图片优化 (解决加载慢的关键) ---
85
+ def optimize_image(image_path, max_width=500):
86
+ """
87
+ 调整大小至 500px,对于 User Study 的缩略图查看完全足够。
88
+ """
89
+ if not image_path:
90
+ return None
91
  try:
92
  img = Image.open(image_path)
93
+ # 转换为 RGB 防止 PNG 透明通道在 JPEG 转换时报错
94
+ if img.mode in ("RGBA", "P"):
95
+ img = img.convert("RGB")
96
+
97
  if img.width > max_width:
98
  ratio = max_width / img.width
99
  new_height = int(img.height * ratio)
100
+ # 使用 LANCZOS 算法保证缩放质量
101
  img = img.resize((max_width, new_height), Image.LANCZOS)
102
  return img
103
  except Exception as e:
104
+ print(f"Error loading image {image_path}: {e}")
105
  return None
106
 
107
+ # --- 5. 核心逻辑 ---
108
 
109
  def get_next_question(user_state):
110
  idx = user_state["index"]
111
 
112
+ # 结束逻辑
113
  if idx >= len(ALL_GROUP_IDS):
114
  return (
115
  gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
116
  gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
117
  gr.update(value="## 🎉 测试结束!感谢您的参与。", visible=True),
118
+ user_state, []
119
  )
120
 
121
  group_id = ALL_GROUP_IDS[idx]
122
  group_data = ALL_GROUPS[group_id]
123
 
124
+ # 1. 优化原图
125
+ origin_img = optimize_image(group_data["origin"], max_width=500)
126
 
127
+ # 2. 优化候选图
128
  candidates = group_data["candidates"].copy()
129
  random.shuffle(candidates)
130
 
 
134
 
135
  for i, path in enumerate(candidates):
136
  label = f"Option {chr(65+i)}"
137
+ # 优化每张候选图
138
+ optimized_img = optimize_image(path, max_width=500)
139
  gallery_items.append((optimized_img, label))
140
  choices.append(label)
141
  candidates_info.append({"label": label, "path": path})
 
175
  break
176
  method_str = "; ".join(selected_methods)
177
 
178
+ # --- 极速保存:只写本地文件,不等待网络上传 ---
179
  user_filename = f"user_{user_state['user_id']}.csv"
180
  user_file_path = LOG_FOLDER / user_filename
181
 
182
+ # 使用 Scheduler 提供的锁来保证多线程安全
183
+ with scheduler.lock:
184
+ file_exists = user_file_path.exists()
185
+ with open(user_file_path, "a", newline="", encoding="utf-8") as f:
186
+ writer = csv.writer(f)
187
+ if not file_exists:
188
+ writer.writerow(["user_id", "timestamp", "group_id", "choices", "methods"])
189
+ writer.writerow([
190
+ user_state["user_id"],
191
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
192
+ group_id,
193
+ choice_str,
194
+ method_str
195
+ ])
196
+
197
+ print(f"✅ Local Saved: {group_id} (Upload will happen in background)")
 
 
 
 
 
 
 
 
 
 
 
198
 
199
  user_state["index"] += 1
200
  return get_next_question(user_state)
201
 
202
+ # --- 6. 界面构建 ---
203
  with gr.Blocks(title="User Study") as demo:
204
+
205
  state_user = gr.State(lambda: {"user_id": str(uuid.uuid4())[:8], "index": 0})
206
  state_candidates_info = gr.State([])
207
 
 
210
 
211
  with gr.Row():
212
  with gr.Column(scale=1):
213
+ # 强制 JPEG 格式,quality 默认 90,显示速度快
214
  img_origin = gr.Image(label="Reference (参考原图)", interactive=False, height=400, format="jpeg")
215
 
216
  with gr.Column(scale=2):
 
231
 
232
  btn_submit.click(fn=lambda s, c, o: save_and_next(s, c, o, is_none=False), inputs=[state_user, state_candidates_info, checkbox_options], outputs=[img_origin, gallery_candidates, checkbox_options, md_instruction, btn_submit, btn_none, md_end, state_user, state_candidates_info])
233
 
234
+ btn_none.click(fn=lambda s, c, o: save_and_next(s, c, o, is_none=True), inputs=[state_user, state_candidates_info, checkbox