Toya0421 commited on
Commit
243ef17
·
verified ·
1 Parent(s): 0176aba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -104
app.py CHANGED
@@ -29,14 +29,10 @@ REWRITE_CONCURRENCY = int(os.getenv("REWRITE_CONCURRENCY", "3")) # 5,6人想定
29
  _rewrite_sem = threading.Semaphore(REWRITE_CONCURRENCY)
30
 
31
  # ======================================================
32
- # (①) ログ:毎アクションはFiles(/data/log.csv)へ追記のみ(csv 1行追記)
33
- # + 5分ごと「新規グがある時だけ」dataset.push_to_hub(dirtyフラグ)
34
  # ======================================================
35
  _log_lock = threading.Lock()
36
- PUSH_INTERVAL_SEC = int(os.getenv("PUSH_INTERVAL_SEC", "600")) # 5分
37
-
38
- # ★追加:dirtyフラグ(ログが増えたか)
39
- _log_dirty = False
40
 
41
  # ★CSVの列順を固定(headerもこの順で出す)
42
  LOG_COLUMNS = [
@@ -50,90 +46,51 @@ LOG_COLUMNS = [
50
  "page_text",
51
  ]
52
 
 
 
 
 
 
 
 
53
  def save_log(entry):
54
  """
55
- 毎アクション:log.csv 1行追記(軽量
56
  """
57
- global _log_dirty
58
-
59
- dirpath = os.path.dirname(LOG_FILE)
60
- if dirpath:
61
- os.makedirs(dirpath, exist_ok=True)
62
-
63
- # 列順を固定して欠損は空に
64
  row = {k: entry.get(k, "") for k in LOG_COLUMNS}
65
-
66
  with _log_lock:
67
- file_exists = os.path.exists(LOG_FILE)
68
 
69
- with open(LOG_FILE, "a", encoding="utf-8", newline="") as f:
70
- writer = csv.DictWriter(f, fieldnames=LOG_COLUMNS)
71
- if not file_exists:
72
- writer.writeheader()
73
- writer.writerow(row)
74
 
75
- _log_dirty = True # ★追加:新規ログが追加された
76
-
77
- def _push_logs_to_hub_once():
78
  """
79
- 5分ごと:/data/log.csv 読み込み、parquet化して Dataset として Hub にpush
80
- ただし dirty のときだけ実行
81
  """
82
- global _log_dirty
 
83
 
84
- if not HF_TOKEN:
85
- print("[WARN] HF_TOKEN is not set. Skip push.")
86
- return
87
- if not os.path.exists(LOG_FILE):
88
- return
 
 
89
 
90
- # ★dirtyでないなら何もしない
91
- with _log_lock:
92
- if not _log_dirty:
93
- return
94
- # dirtyを先に落としておく(push失敗時は戻す)
95
- _log_dirty = False
96
-
97
- try:
98
- # CSV追記と競合しないようロックして読む
99
- with _log_lock:
100
- all_logs = pd.read_csv(LOG_FILE)
101
-
102
- tmp_dir = tempfile.mkdtemp()
103
- tmp_path = os.path.join(tmp_dir, "data.parquet")
104
- all_logs.to_parquet(tmp_path)
105
-
106
- dataset = Dataset.from_parquet(tmp_path)
107
- dataset.push_to_hub(DATASET_REPO, token=HF_TOKEN)
108
-
109
- except Exception as e:
110
- # push失敗時は dirty を戻して次回再試行
111
- with _log_lock:
112
- _log_dirty = True
113
- raise e
114
-
115
- def _start_periodic_push_thread():
116
  """
117
- アプリ起動時に1回だけ呼ぶ。以後はdaemonレッで5分ごとpush。
118
  """
119
- def _worker():
120
- while True:
121
- time.sleep(PUSH_INTERVAL_SEC)
122
- try:
123
- _push_logs_to_hub_once()
124
- except Exception as e:
125
- # push失敗しても全体を落とさない
126
- print(f"[WARN] periodic push failed: {e}")
127
-
128
- t = threading.Thread(target=_worker, daemon=True)
129
- t.start()
130
 
131
- # 起動(1回だけ)
132
- _start_periodic_push_thread()
133
 
134
  # ======================================================
135
  # 新しい教材管理:passages フォルダからランダム選択
136
  # ※ used_passages は session_state に保持(グローバル禁止)
 
137
  # ======================================================
138
 
139
  def load_passage_file(text_id):
@@ -158,11 +115,18 @@ def get_title_from_excel(text_id):
158
  return None
159
  return str(title)
160
 
161
- def get_new_passage_random(used_passages_set):
162
  """
163
  passages フォルダからランダムに教材���選び(pg◯.txt)、
164
  passage_information.xlsx の Text# の flesch_score を original_level として返す。
 
 
 
 
165
  """
 
 
 
166
  files = glob.glob("passages/pg*.txt")
167
  if not files:
168
  return None, None, None, None, used_passages_set
@@ -174,10 +138,28 @@ def get_new_passage_random(used_passages_set):
174
  if num.isdigit():
175
  all_ids.append(int(num))
176
 
177
- available = [pid for pid in all_ids if pid not in used_passages_set]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  if not available:
179
  used_passages_set = set()
180
- available = list(all_ids)
181
 
182
  text_id = random.choice(available)
183
  used_passages_set.add(text_id)
@@ -241,7 +223,6 @@ def extract_main_body(text: str) -> str:
241
  kept.append(ln)
242
 
243
  # 先頭の「短いタイトル行」っぽいものを数行だけ落とす(本文が始まるまで)
244
- # 例:短い1行、ピリオド無し、単語数少ない、など
245
  def is_title_like(s: str) -> bool:
246
  t = s.strip()
247
  if not t:
@@ -252,7 +233,6 @@ def extract_main_body(text: str) -> str:
252
  return False
253
 
254
  i = 0
255
- # 空行・タイトルらしい行を最大3行までスキップ
256
  skipped = 0
257
  while i < len(kept) and skipped < 3 and is_title_like(kept[i]):
258
  i += 1
@@ -260,11 +240,9 @@ def extract_main_body(text: str) -> str:
260
 
261
  body_lines = kept[i:]
262
 
263
- # 末尾の空行を整理
264
  while body_lines and body_lines[-1].strip() == "":
265
  body_lines.pop()
266
 
267
- # 連続空行を最大1つに圧縮(章見出しの区切りは残る)
268
  out = []
269
  blank = False
270
  for ln in body_lines:
@@ -305,7 +283,6 @@ excluding the title, author name, source information, chapter number, annotation
305
  {text}
306
  """
307
 
308
- # (③) rewrite API 同時実行制限
309
  with _rewrite_sem:
310
  resp = client.chat.completions.create(
311
  model="google/gemini-2.5-flash",
@@ -316,11 +293,6 @@ excluding the title, author name, source information, chapter number, annotation
316
  return resp.choices[0].message.content.strip()
317
 
318
  def split_pages(text, max_words=300):
319
- """
320
- 文単位でページを分割する。
321
- - 文の途中でページを分割しない
322
- - max_words の上限を超えないようにする
323
- """
324
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
325
  pages = []
326
  current_page = []
@@ -352,7 +324,6 @@ def start_test(student_id, level_input, group_input, session_state):
352
  action = "start_pushed"
353
  now = (datetime.utcnow() + timedelta(hours=9)).isoformat()
354
 
355
- # student_id 未入力でも「押した」ログは残す(元意図に近い)
356
  if not student_id or str(student_id).strip() == "":
357
  entry = {
358
  "user_id": None,
@@ -366,7 +337,6 @@ def start_test(student_id, level_input, group_input, session_state):
366
  }
367
  save_log(entry)
368
 
369
- # ★修正:出力個数を「UIのoutputs(13個)」に揃える
370
  return (
371
  "", # title_display
372
  "", # text_display
@@ -378,7 +348,7 @@ def start_test(student_id, level_input, group_input, session_state):
378
  "", # hidden_orig_lev
379
  None, # hidden_assigned_lev(level)
380
  gr.update(interactive=False, visible=False), # prev_btn
381
- gr.update(interactive=False, visible=True), # next_btn(元の意図踏襲)
382
  gr.update(interactive=False, visible=False), # finish_btn
383
  session_state
384
  )
@@ -387,7 +357,6 @@ def start_test(student_id, level_input, group_input, session_state):
387
  level = int(level_input)
388
  group = int(group_input)
389
 
390
- # startでリセット(元コード踏襲)
391
  used_passages_set = set()
392
 
393
  entry = {
@@ -402,7 +371,8 @@ def start_test(student_id, level_input, group_input, session_state):
402
  }
403
  save_log(entry)
404
 
405
- pid, text, orig_lev, title, used_passages_set = get_new_passage_random(used_passages_set)
 
406
  if text is None:
407
  return (
408
  "", # title_display
@@ -420,11 +390,10 @@ def start_test(student_id, level_input, group_input, session_state):
420
  session_state
421
  )
422
 
423
- # ★追加:グループ分岐
424
  if group == 1:
425
- processed = extract_main_body(text) # 書き換えなし、本文抽出のみ
426
  else:
427
- processed = rewrite_level(text, level) # 今のプロンプトで書き換え
428
 
429
  pages = split_pages(processed)
430
  total = len(pages)
@@ -453,7 +422,6 @@ def start_test(student_id, level_input, group_input, session_state):
453
  }
454
  save_log(entry2)
455
 
456
- # session_state 更新(★groupを保持)
457
  session_state = {
458
  "user_id": user_id,
459
  "level": level,
@@ -605,7 +573,7 @@ def prev_page(pages_json, current_page, total_pages, pid, orig_lev, session_stat
605
  def finish_or_retire(pages_json, current_page, pid, orig_lev, action, session_state):
606
  user_id = session_state.get("user_id")
607
  level = session_state.get("level")
608
- group = session_state.get("group") # ★追加:グループ保持(未設定なら2扱い)
609
  used_passages_set = set(session_state.get("used_passages", []))
610
 
611
  pages = json.loads(pages_json)
@@ -623,7 +591,8 @@ def finish_or_retire(pages_json, current_page, pid, orig_lev, action, session_st
623
  }
624
  save_log(entry)
625
 
626
- new_pid, new_text, new_orig_lev, title, used_passages_set = get_new_passage_random(used_passages_set)
 
627
  if new_text is None:
628
  return (
629
  "", "教材がありません", "", json.dumps([]), 0, "",
@@ -634,7 +603,6 @@ def finish_or_retire(pages_json, current_page, pid, orig_lev, action, session_st
634
  session_state
635
  )
636
 
637
- # ★追加:グループ分岐
638
  if group == 1:
639
  processed = extract_main_body(new_text)
640
  else:
@@ -690,6 +658,7 @@ def finish_or_retire(pages_json, current_page, pid, orig_lev, action, session_st
690
 
691
  # ======================================================
692
  # UI(タイトル表示を追加。それ以外は変更しない)
 
693
  # ======================================================
694
  custom_css = """
695
  /* ===============================
@@ -734,7 +703,6 @@ custom_css = """
734
  background-color: #1e1e1e !important;
735
  color: #e6e6e6 !important;
736
  }
737
- /* 教材の背景は黒すぎると読みにくいのでやや明るめのチャコール */
738
  .reading-area {
739
  background-color: #2a2a2a !important;
740
  color: #f2f2f2 !important;
@@ -745,7 +713,6 @@ custom_css = """
745
  color: #f0f0f0 !important;
746
  border: 1px solid #555 !important;
747
  }
748
- /* ボタンを見やすく */
749
  button {
750
  background-color: #3a3a3a !important;
751
  color: #f0f0f0 !important;
@@ -764,12 +731,10 @@ custom_css = """
764
  with gr.Blocks(css=custom_css) as demo:
765
  gr.Markdown("# 📚 Reading Exercise")
766
 
767
- # セッションごとの状態(グローバル禁止)
768
  session_state = gr.State({"user_id": None, "level": None, "group": 2, "used_passages": []})
769
 
770
  student_id_input = gr.Textbox(label="学生番号(必須)")
771
 
772
- # ★追加:グループ選択(1:本文抽出のみ, 2:書き換え)
773
  group_input = gr.Radio(
774
  choices=[("Group 1", 1), ("Group 2", 2)],
775
  label="実験グループを選択",
@@ -784,7 +749,6 @@ with gr.Blocks(css=custom_css) as demo:
784
 
785
  start_btn = gr.Button("スタート")
786
 
787
- # ★追加:タイトル表示(教材の上)
788
  title_display = gr.Markdown("**Title:** ", elem_classes=["title-card"])
789
 
790
  text_display = gr.Textbox(
@@ -885,7 +849,17 @@ with gr.Blocks(css=custom_css) as demo:
885
  ]
886
  )
887
 
888
- # Gradio側のキュもONHF環境差分で壊れにくい設定に寄せる
889
- demo.queue(max_size=64)
 
 
 
890
 
 
 
 
 
 
 
 
891
  demo.launch()
 
29
  _rewrite_sem = threading.Semaphore(REWRITE_CONCURRENCY)
30
 
31
  # ======================================================
32
+ # (①) ログ:DatasetにもFilesにも保存しない
33
+ # メモリ上保持し、パスワード付きでCSVダウンード
34
  # ======================================================
35
  _log_lock = threading.Lock()
 
 
 
 
36
 
37
  # ★CSVの列順を固定(headerもこの順で出す)
38
  LOG_COLUMNS = [
 
46
  "page_text",
47
  ]
48
 
49
+ # ★追加:メモリログ(Filesに保存しない)
50
+ _LOG_ROWS: list[dict] = []
51
+
52
+ # ★追加:ダウンロード用パスワード(環境変数推奨)
53
+ LOG_DOWNLOAD_PASSWORD = os.getenv("LOG_DOWNLOAD_PASSWORD", "0421")
54
+
55
+
56
  def save_log(entry):
57
  """
58
+ 毎アクション:メモリに追記のみFilesにもDatasetにも保存しない
59
  """
 
 
 
 
 
 
 
60
  row = {k: entry.get(k, "") for k in LOG_COLUMNS}
 
61
  with _log_lock:
62
+ _LOG_ROWS.append(row)
63
 
 
 
 
 
 
64
 
65
+ def export_logs_to_csv_file() -> str:
 
 
66
  """
67
+ 現在のメモリログ一時CSVにしてパスを返す
 
68
  """
69
+ with _log_lock:
70
+ rows = list(_LOG_ROWS)
71
 
72
+ tmp_dir = tempfile.mkdtemp()
73
+ path = os.path.join(tmp_dir, "log.csv")
74
+ with open(path, "w", encoding="utf-8", newline="") as f:
75
+ w = csv.DictWriter(f, fieldnames=LOG_COLUMNS)
76
+ w.writeheader()
77
+ w.writerows(rows)
78
+ return path
79
 
80
+
81
+ def download_log_csv(password: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  """
83
+ ワーが一致した場合のみCSVを生成して返す
84
  """
85
+ if (password or "").strip() != LOG_DOWNLOAD_PASSWORD:
86
+ raise gr.Error("パスワードが違います。")
87
+ return export_logs_to_csv_file()
 
 
 
 
 
 
 
 
88
 
 
 
89
 
90
  # ======================================================
91
  # 新しい教材管理:passages フォルダからランダム選択
92
  # ※ used_passages は session_state に保持(グローバル禁止)
93
+ # ★変更:target level よりスコアが低い教材から選ぶ(excelのflesch_score)
94
  # ======================================================
95
 
96
  def load_passage_file(text_id):
 
115
  return None
116
  return str(title)
117
 
118
+ def get_new_passage_random(used_passages_set, target_level):
119
  """
120
  passages フォルダからランダムに教材���選び(pg◯.txt)、
121
  passage_information.xlsx の Text# の flesch_score を original_level として返す。
122
+
123
+ ★変更点:
124
+ - ユーザーの target_level に対応する目標FREよりも低い(=難しい)教材のみから選ぶ
125
+ ※ flesch_score は passage_information.xlsx から取得
126
  """
127
+ level_to_flesch = {1: 90, 2: 75, 3: 65, 4: 55, 5: 40}
128
+ target_flesch = float(level_to_flesch[int(target_level)])
129
+
130
  files = glob.glob("passages/pg*.txt")
131
  if not files:
132
  return None, None, None, None, used_passages_set
 
138
  if num.isdigit():
139
  all_ids.append(int(num))
140
 
141
+ # ★追加:excelのflesch_scoreでフィルタ(target_fleschより低いものだけ)
142
+ # passage_information.xlsx に該当行がないものは除外(スコア不明だと条件判定できないため)
143
+ eligible_ids = []
144
+ for pid in all_ids:
145
+ row = passage_info_df[passage_info_df["Text#"] == pid]
146
+ if len(row) == 0:
147
+ continue
148
+ fs = row.iloc[0].get("flesch_score", None)
149
+ try:
150
+ fs = float(fs)
151
+ except Exception:
152
+ continue
153
+ if fs < target_flesch:
154
+ eligible_ids.append(pid)
155
+
156
+ if not eligible_ids:
157
+ return None, None, None, None, used_passages_set
158
+
159
+ available = [pid for pid in eligible_ids if pid not in used_passages_set]
160
  if not available:
161
  used_passages_set = set()
162
+ available = list(eligible_ids)
163
 
164
  text_id = random.choice(available)
165
  used_passages_set.add(text_id)
 
223
  kept.append(ln)
224
 
225
  # 先頭の「短いタイトル行」っぽいものを数行だけ落とす(本文が始まるまで)
 
226
  def is_title_like(s: str) -> bool:
227
  t = s.strip()
228
  if not t:
 
233
  return False
234
 
235
  i = 0
 
236
  skipped = 0
237
  while i < len(kept) and skipped < 3 and is_title_like(kept[i]):
238
  i += 1
 
240
 
241
  body_lines = kept[i:]
242
 
 
243
  while body_lines and body_lines[-1].strip() == "":
244
  body_lines.pop()
245
 
 
246
  out = []
247
  blank = False
248
  for ln in body_lines:
 
283
  {text}
284
  """
285
 
 
286
  with _rewrite_sem:
287
  resp = client.chat.completions.create(
288
  model="google/gemini-2.5-flash",
 
293
  return resp.choices[0].message.content.strip()
294
 
295
  def split_pages(text, max_words=300):
 
 
 
 
 
296
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
297
  pages = []
298
  current_page = []
 
324
  action = "start_pushed"
325
  now = (datetime.utcnow() + timedelta(hours=9)).isoformat()
326
 
 
327
  if not student_id or str(student_id).strip() == "":
328
  entry = {
329
  "user_id": None,
 
337
  }
338
  save_log(entry)
339
 
 
340
  return (
341
  "", # title_display
342
  "", # text_display
 
348
  "", # hidden_orig_lev
349
  None, # hidden_assigned_lev(level)
350
  gr.update(interactive=False, visible=False), # prev_btn
351
+ gr.update(interactive=False, visible=True), # next_btn
352
  gr.update(interactive=False, visible=False), # finish_btn
353
  session_state
354
  )
 
357
  level = int(level_input)
358
  group = int(group_input)
359
 
 
360
  used_passages_set = set()
361
 
362
  entry = {
 
371
  }
372
  save_log(entry)
373
 
374
+ # ★変更:target level を渡して「難しい教材のみ」から選ぶ
375
+ pid, text, orig_lev, title, used_passages_set = get_new_passage_random(used_passages_set, level)
376
  if text is None:
377
  return (
378
  "", # title_display
 
390
  session_state
391
  )
392
 
 
393
  if group == 1:
394
+ processed = extract_main_body(text)
395
  else:
396
+ processed = rewrite_level(text, level)
397
 
398
  pages = split_pages(processed)
399
  total = len(pages)
 
422
  }
423
  save_log(entry2)
424
 
 
425
  session_state = {
426
  "user_id": user_id,
427
  "level": level,
 
573
  def finish_or_retire(pages_json, current_page, pid, orig_lev, action, session_state):
574
  user_id = session_state.get("user_id")
575
  level = session_state.get("level")
576
+ group = session_state.get("group")
577
  used_passages_set = set(session_state.get("used_passages", []))
578
 
579
  pages = json.loads(pages_json)
 
591
  }
592
  save_log(entry)
593
 
594
+ # ★変更:target level を渡して「難しい教材のみ」から選ぶ
595
+ new_pid, new_text, new_orig_lev, title, used_passages_set = get_new_passage_random(used_passages_set, level)
596
  if new_text is None:
597
  return (
598
  "", "教材がありません", "", json.dumps([]), 0, "",
 
603
  session_state
604
  )
605
 
 
606
  if group == 1:
607
  processed = extract_main_body(new_text)
608
  else:
 
658
 
659
  # ======================================================
660
  # UI(タイトル表示を追加。それ以外は変更しない)
661
+ # ★追加:パスワード付きログCSVダウンロード
662
  # ======================================================
663
  custom_css = """
664
  /* ===============================
 
703
  background-color: #1e1e1e !important;
704
  color: #e6e6e6 !important;
705
  }
 
706
  .reading-area {
707
  background-color: #2a2a2a !important;
708
  color: #f2f2f2 !important;
 
713
  color: #f0f0f0 !important;
714
  border: 1px solid #555 !important;
715
  }
 
716
  button {
717
  background-color: #3a3a3a !important;
718
  color: #f0f0f0 !important;
 
731
  with gr.Blocks(css=custom_css) as demo:
732
  gr.Markdown("# 📚 Reading Exercise")
733
 
 
734
  session_state = gr.State({"user_id": None, "level": None, "group": 2, "used_passages": []})
735
 
736
  student_id_input = gr.Textbox(label="学生番号(必須)")
737
 
 
738
  group_input = gr.Radio(
739
  choices=[("Group 1", 1), ("Group 2", 2)],
740
  label="実験グループを選択",
 
749
 
750
  start_btn = gr.Button("スタート")
751
 
 
752
  title_display = gr.Markdown("**Title:** ", elem_classes=["title-card"])
753
 
754
  text_display = gr.Textbox(
 
849
  ]
850
  )
851
 
852
+ # ★追加:ログCSVダウンロパスワード必須
853
+ gr.Markdown("## 🔐 管理者用:ログCSVダウンロード(パスワード必須)")
854
+ admin_password = gr.Textbox(label="Password", type="password")
855
+ download_btn = gr.Button("ログCSVを生成してダウンロード")
856
+ download_file = gr.File(label="Download log.csv")
857
 
858
+ download_btn.click(
859
+ fn=download_log_csv,
860
+ inputs=[admin_password],
861
+ outputs=[download_file]
862
+ )
863
+
864
+ demo.queue(max_size=64)
865
  demo.launch()