Toya0421 commited on
Commit
61756fa
·
verified ·
1 Parent(s): a82115b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -11
app.py CHANGED
@@ -60,23 +60,33 @@ def list_passage_files_sorted(passages_dir: str) -> list[tuple[int, str]]:
60
  items.sort(key=lambda x: x[0])
61
  return items
62
 
 
63
  def load_text(path: str) -> str:
64
  with open(path, "r", encoding="utf-8") as f:
65
  return f.read()
66
 
 
67
  # =========================
68
  # ★追加:プロンプト選択
69
  # =========================
70
  PROMPT_MODES = {
71
  "既存プロンプト": "legacy",
72
- "AIプロンプトセット(FRE制御)": "final_v1",
73
  }
74
 
 
 
 
 
 
 
 
 
75
  def build_prompt(text: str, target_level: int, prompt_mode: str) -> str:
76
  """
77
  prompt_mode:
78
  - "legacy": 元の1本プロンプト(現状踏襲)
79
- - "final_v1": あなたの最終プロンプトセットを1つに落とし込み
80
  """
81
  level_to_flesch = {1: 90, 2: 75, 3: 65, 4: 55, 5: 40}
82
  target_flesch = level_to_flesch[int(target_level)]
@@ -94,7 +104,6 @@ excluding the title, author name, source information, chapter number, annotation
94
  """
95
 
96
  # ---- final_v1: あなたの「最終プロンプトセット」をコードに落とし込み ----
97
- # Level番号(1..5) -> 学習者レベル名
98
  level_name = {
99
  1: "Level 90",
100
  2: "Level 75",
@@ -146,7 +155,6 @@ excluding the title, author name, source information, chapter number, annotation
146
 
147
  constraints_block = "\n".join(level_constraints)
148
 
149
- # 「Target FRE は補助。Constraintsが主」前提の骨格をそのままテンプレ化
150
  return f"""Rewrite the following text for learners at {level_name}.
151
 
152
  Requirements:
@@ -168,6 +176,7 @@ Text:
168
  >>>
169
  """
170
 
 
171
  # =========================
172
  # 書き換え(プロンプト切替)
173
  # =========================
@@ -196,22 +205,26 @@ def rewrite_level(text: str, target_level: int, prompt_mode: str) -> str:
196
 
197
  raise RuntimeError(f"Rewrite failed after retries: {last_error}")
198
 
 
199
  # =========================
200
  # 指標(FRE + 単語数)
201
  # =========================
202
  def count_words_english(text: str) -> int:
203
  return len(_word_re.findall(text))
204
 
 
205
  def compute_metrics(text: str) -> tuple[float, int]:
206
  fre = float(textstat.flesch_reading_ease(text))
207
  wc = count_words_english(text)
208
  return fre, wc
209
 
 
210
  # =========================
211
  # CSV追記
212
  # =========================
213
  _csv_lock = threading.Lock()
214
 
 
215
  def append_csv_row(row: dict):
216
  fieldnames = ["timestamp_jst", "Text#", "target_level", "flesch_reading_ease", "word_count", "rewritten_text"]
217
  with _csv_lock:
@@ -222,20 +235,24 @@ def append_csv_row(row: dict):
222
  w.writeheader()
223
  w.writerow({k: row.get(k, "") for k in fieldnames})
224
 
 
225
  # =========================
226
- # txt追記
227
  # =========================
228
  _txt_lock = threading.Lock()
229
 
 
230
  def append_rewrite_txt(
231
  text_id: int,
232
  target_level: int,
 
233
  fre: float,
234
  word_count: int,
235
  rewritten_text: str,
236
  ):
237
  ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
238
- path = os.path.join(REWRITE_DIR, f"Text_{text_id}_{target_level}.txt")
 
239
 
240
  block = (
241
  f"# Text {text_id}\n"
@@ -253,6 +270,7 @@ def append_rewrite_txt(
253
  with open(path, "a", encoding="utf-8") as f:
254
  f.write(block)
255
 
 
256
  # =========================
257
  # 停止フラグ
258
  # =========================
@@ -261,10 +279,12 @@ def set_stop(flag: bool):
261
  with _stop_flag_lock:
262
  _stop_flag = flag
263
 
 
264
  def get_stop() -> bool:
265
  with _stop_flag_lock:
266
  return _stop_flag
267
 
 
268
  # =========================
269
  # UIロジック
270
  # =========================
@@ -272,6 +292,7 @@ def init_state():
272
  files = list_passage_files_sorted(PASSAGES_DIR)
273
  return {"files": files, "idx": 0}
274
 
 
275
  def start(level: int, prompt_mode: str):
276
  set_stop(False)
277
  st = init_state()
@@ -282,6 +303,7 @@ def start(level: int, prompt_mode: str):
282
  msg = f"準備完了: {total}件。次に処理するのは #Text {st['files'][0][0]} です。"
283
  return st, msg, "", "", "", None, None
284
 
 
285
  def run_one(level: int, prompt_mode: str, state: dict):
286
  set_stop(False)
287
 
@@ -312,6 +334,7 @@ def run_one(level: int, prompt_mode: str, state: dict):
312
  append_rewrite_txt(
313
  text_id=text_id,
314
  target_level=level,
 
315
  fre=fre,
316
  word_count=wc,
317
  rewritten_text=rewritten,
@@ -319,6 +342,7 @@ def run_one(level: int, prompt_mode: str, state: dict):
319
 
320
  state["idx"] = idx + 1
321
 
 
322
  header = (
323
  f"#Text {text_id}\n"
324
  f"Target Level: {level}\n"
@@ -326,11 +350,12 @@ def run_one(level: int, prompt_mode: str, state: dict):
326
  f"Flesch Reading Ease: {fre:.2f}\n"
327
  f"Word Count: {wc}\n"
328
  f"Saved CSV: {CSV_PATH}\n"
329
- f"Saved TXT: {os.path.join(REWRITE_DIR, f'Text_{text_id}_{level}.txt')}"
330
  )
331
  progress = f"{state['idx']} / {total}"
332
  return state, "1件処理しました。", header, progress, rewritten, None, None
333
 
 
334
  def run_all(level: int, prompt_mode: str, state: dict):
335
  set_stop(False)
336
 
@@ -369,11 +394,13 @@ def run_all(level: int, prompt_mode: str, state: dict):
369
  append_rewrite_txt(
370
  text_id=text_id,
371
  target_level=level,
 
372
  fre=fre,
373
  word_count=wc,
374
  rewritten_text=rewritten,
375
  )
376
 
 
377
  last_header = (
378
  f"#Text {text_id}\n"
379
  f"Target Level: {level}\n"
@@ -381,7 +408,7 @@ def run_all(level: int, prompt_mode: str, state: dict):
381
  f"Flesch Reading Ease: {fre:.2f}\n"
382
  f"Word Count: {wc}\n"
383
  f"Saved CSV: {CSV_PATH}\n"
384
- f"Saved TXT: {os.path.join(REWRITE_DIR, f'Text_{text_id}_{level}.txt')}"
385
  )
386
  last_text = rewritten
387
 
@@ -390,16 +417,19 @@ def run_all(level: int, prompt_mode: str, state: dict):
390
 
391
  return state, "全件処理が完了しました。", last_header, f"{idx} / {total}", last_text, None, None
392
 
 
393
  def stop():
394
  set_stop(True)
395
  return "停止要求を受け付けました(処理中の1件が終わったタイミングで止まります)。"
396
 
 
397
  def reset_csv():
398
  with _csv_lock:
399
  if os.path.exists(CSV_PATH):
400
  os.remove(CSV_PATH)
401
  return f"CSVを削除しました: {CSV_PATH}"
402
 
 
403
  def reset_rewrite_folder():
404
  removed = 0
405
  with _txt_lock:
@@ -411,13 +441,16 @@ def reset_rewrite_folder():
411
  pass
412
  return f"rewrite_passages の Text_*.txt を削除しました({removed}件): {REWRITE_DIR}"
413
 
 
414
  # =========================
415
  # ★追加:ダウンロード機能
416
  # =========================
417
  def list_generated_txt_files() -> list[str]:
418
- files = sorted(glob.glob(os.path.join(REWRITE_DIR, "Text_*_*.txt")))
 
419
  return [os.path.basename(f) for f in files]
420
 
 
421
  def build_single_txt_path(selected_name: str) -> str:
422
  """
423
  選択されたtxtをダウンロード用に返す(gr.Fileに渡す)
@@ -427,6 +460,7 @@ def build_single_txt_path(selected_name: str) -> str:
427
  raise FileNotFoundError(f"Not found: {path}")
428
  return path
429
 
 
430
  def build_zip_of_txts(mode: str, n_last: int) -> str:
431
  """
432
  mode:
@@ -461,15 +495,18 @@ def build_zip_of_txts(mode: str, n_last: int) -> str:
461
 
462
  return zip_path
463
 
 
464
  def download_csv() -> str:
465
  if not os.path.exists(CSV_PATH):
466
  raise FileNotFoundError("rewrite_scores.csv is not created yet.")
467
  return CSV_PATH
468
 
 
469
  def refresh_txt_dropdown() -> gr.Dropdown:
470
  names = list_generated_txt_files()
471
  return gr.Dropdown(choices=names, value=(names[-1] if names else None))
472
 
 
473
  # =========================
474
  # Gradio UI(Spaces向け)
475
  # =========================
@@ -506,7 +543,10 @@ with gr.Blocks() as demo:
506
 
507
  with gr.Row():
508
  refresh_btn = gr.Button("txt一覧を更新")
509
- txt_dropdown = gr.Dropdown(choices=list_generated_txt_files(), label="生成済み txt を選択(1個ダウンロード)")
 
 
 
510
  download_one_btn = gr.Button("選択したtxtをダウンロード")
511
  download_one_file = gr.File(label="Download (single txt)")
512
 
@@ -529,7 +569,6 @@ with gr.Blocks() as demo:
529
  gr.Markdown(f"📝 TXTフォルダ: `{REWRITE_DIR}`")
530
 
531
  # ---- 既存ボタン(prompt_mode を追加して渡すだけ)----
532
- # UI値(日本語ラベル) -> 内部キー("legacy"/"final_v1")に変換
533
  def _pm_key(pm_label: str) -> str:
534
  return PROMPT_MODES.get(pm_label, "legacy")
535
 
 
60
  items.sort(key=lambda x: x[0])
61
  return items
62
 
63
+
64
  def load_text(path: str) -> str:
65
  with open(path, "r", encoding="utf-8") as f:
66
  return f.read()
67
 
68
+
69
  # =========================
70
  # ★追加:プロンプト選択
71
  # =========================
72
  PROMPT_MODES = {
73
  "既存プロンプト": "legacy",
74
+ "最終プロンプトセット(FRE制御)": "final_v1",
75
  }
76
 
77
+
78
+ def prompt_tag_from_mode(prompt_mode: str) -> str:
79
+ """
80
+ ダウンロードファイル名で判別できるように、ファイル名安全な短いタグにする
81
+ """
82
+ return "legacy" if prompt_mode == "legacy" else "finalv1"
83
+
84
+
85
  def build_prompt(text: str, target_level: int, prompt_mode: str) -> str:
86
  """
87
  prompt_mode:
88
  - "legacy": 元の1本プロンプト(現状踏襲)
89
+ - "final_v1": 最終プロンプトセット(Constraints重視)
90
  """
91
  level_to_flesch = {1: 90, 2: 75, 3: 65, 4: 55, 5: 40}
92
  target_flesch = level_to_flesch[int(target_level)]
 
104
  """
105
 
106
  # ---- final_v1: あなたの「最終プロンプトセット」をコードに落とし込み ----
 
107
  level_name = {
108
  1: "Level 90",
109
  2: "Level 75",
 
155
 
156
  constraints_block = "\n".join(level_constraints)
157
 
 
158
  return f"""Rewrite the following text for learners at {level_name}.
159
 
160
  Requirements:
 
176
  >>>
177
  """
178
 
179
+
180
  # =========================
181
  # 書き換え(プロンプト切替)
182
  # =========================
 
205
 
206
  raise RuntimeError(f"Rewrite failed after retries: {last_error}")
207
 
208
+
209
  # =========================
210
  # 指標(FRE + 単語数)
211
  # =========================
212
  def count_words_english(text: str) -> int:
213
  return len(_word_re.findall(text))
214
 
215
+
216
  def compute_metrics(text: str) -> tuple[float, int]:
217
  fre = float(textstat.flesch_reading_ease(text))
218
  wc = count_words_english(text)
219
  return fre, wc
220
 
221
+
222
  # =========================
223
  # CSV追記
224
  # =========================
225
  _csv_lock = threading.Lock()
226
 
227
+
228
  def append_csv_row(row: dict):
229
  fieldnames = ["timestamp_jst", "Text#", "target_level", "flesch_reading_ease", "word_count", "rewritten_text"]
230
  with _csv_lock:
 
235
  w.writeheader()
236
  w.writerow({k: row.get(k, "") for k in fieldnames})
237
 
238
+
239
  # =========================
240
+ # txt追記(★ファイル名に prompt_tag を付ける)
241
  # =========================
242
  _txt_lock = threading.Lock()
243
 
244
+
245
  def append_rewrite_txt(
246
  text_id: int,
247
  target_level: int,
248
+ prompt_mode: str, # ★追加
249
  fre: float,
250
  word_count: int,
251
  rewritten_text: str,
252
  ):
253
  ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
254
+ tag = prompt_tag_from_mode(prompt_mode)
255
+ path = os.path.join(REWRITE_DIR, f"Text_{text_id}_{target_level}_{tag}.txt")
256
 
257
  block = (
258
  f"# Text {text_id}\n"
 
270
  with open(path, "a", encoding="utf-8") as f:
271
  f.write(block)
272
 
273
+
274
  # =========================
275
  # 停止フラグ
276
  # =========================
 
279
  with _stop_flag_lock:
280
  _stop_flag = flag
281
 
282
+
283
  def get_stop() -> bool:
284
  with _stop_flag_lock:
285
  return _stop_flag
286
 
287
+
288
  # =========================
289
  # UIロジック
290
  # =========================
 
292
  files = list_passage_files_sorted(PASSAGES_DIR)
293
  return {"files": files, "idx": 0}
294
 
295
+
296
  def start(level: int, prompt_mode: str):
297
  set_stop(False)
298
  st = init_state()
 
303
  msg = f"準備完了: {total}件。次に処理するのは #Text {st['files'][0][0]} です。"
304
  return st, msg, "", "", "", None, None
305
 
306
+
307
  def run_one(level: int, prompt_mode: str, state: dict):
308
  set_stop(False)
309
 
 
334
  append_rewrite_txt(
335
  text_id=text_id,
336
  target_level=level,
337
+ prompt_mode=prompt_mode, # ★追加
338
  fre=fre,
339
  word_count=wc,
340
  rewritten_text=rewritten,
 
342
 
343
  state["idx"] = idx + 1
344
 
345
+ tag = prompt_tag_from_mode(prompt_mode)
346
  header = (
347
  f"#Text {text_id}\n"
348
  f"Target Level: {level}\n"
 
350
  f"Flesch Reading Ease: {fre:.2f}\n"
351
  f"Word Count: {wc}\n"
352
  f"Saved CSV: {CSV_PATH}\n"
353
+ f"Saved TXT: {os.path.join(REWRITE_DIR, f'Text_{text_id}_{level}_{tag}.txt')}"
354
  )
355
  progress = f"{state['idx']} / {total}"
356
  return state, "1件処理しました。", header, progress, rewritten, None, None
357
 
358
+
359
  def run_all(level: int, prompt_mode: str, state: dict):
360
  set_stop(False)
361
 
 
394
  append_rewrite_txt(
395
  text_id=text_id,
396
  target_level=level,
397
+ prompt_mode=prompt_mode, # ★追加
398
  fre=fre,
399
  word_count=wc,
400
  rewritten_text=rewritten,
401
  )
402
 
403
+ tag = prompt_tag_from_mode(prompt_mode)
404
  last_header = (
405
  f"#Text {text_id}\n"
406
  f"Target Level: {level}\n"
 
408
  f"Flesch Reading Ease: {fre:.2f}\n"
409
  f"Word Count: {wc}\n"
410
  f"Saved CSV: {CSV_PATH}\n"
411
+ f"Saved TXT: {os.path.join(REWRITE_DIR, f'Text_{text_id}_{level}_{tag}.txt')}"
412
  )
413
  last_text = rewritten
414
 
 
417
 
418
  return state, "全件処理が完了しました。", last_header, f"{idx} / {total}", last_text, None, None
419
 
420
+
421
  def stop():
422
  set_stop(True)
423
  return "停止要求を受け付けました(処理中の1件が終わったタイミングで止まります)。"
424
 
425
+
426
  def reset_csv():
427
  with _csv_lock:
428
  if os.path.exists(CSV_PATH):
429
  os.remove(CSV_PATH)
430
  return f"CSVを削除しました: {CSV_PATH}"
431
 
432
+
433
  def reset_rewrite_folder():
434
  removed = 0
435
  with _txt_lock:
 
441
  pass
442
  return f"rewrite_passages の Text_*.txt を削除しました({removed}件): {REWRITE_DIR}"
443
 
444
+
445
  # =========================
446
  # ★追加:ダウンロード機能
447
  # =========================
448
  def list_generated_txt_files() -> list[str]:
449
+ # ★ファイル名が Text_{id}_{level}_{tag}.txt になったのでパターン更新
450
+ files = sorted(glob.glob(os.path.join(REWRITE_DIR, "Text_*_*_*.txt")))
451
  return [os.path.basename(f) for f in files]
452
 
453
+
454
  def build_single_txt_path(selected_name: str) -> str:
455
  """
456
  選択されたtxtをダウンロード用に返す(gr.Fileに渡す)
 
460
  raise FileNotFoundError(f"Not found: {path}")
461
  return path
462
 
463
+
464
  def build_zip_of_txts(mode: str, n_last: int) -> str:
465
  """
466
  mode:
 
495
 
496
  return zip_path
497
 
498
+
499
  def download_csv() -> str:
500
  if not os.path.exists(CSV_PATH):
501
  raise FileNotFoundError("rewrite_scores.csv is not created yet.")
502
  return CSV_PATH
503
 
504
+
505
  def refresh_txt_dropdown() -> gr.Dropdown:
506
  names = list_generated_txt_files()
507
  return gr.Dropdown(choices=names, value=(names[-1] if names else None))
508
 
509
+
510
  # =========================
511
  # Gradio UI(Spaces向け)
512
  # =========================
 
543
 
544
  with gr.Row():
545
  refresh_btn = gr.Button("txt一覧を更新")
546
+ txt_dropdown = gr.Dropdown(
547
+ choices=list_generated_txt_files(),
548
+ label="生成済み txt を選択(1個ダウンロード)"
549
+ )
550
  download_one_btn = gr.Button("選択したtxtをダウンロード")
551
  download_one_file = gr.File(label="Download (single txt)")
552
 
 
569
  gr.Markdown(f"📝 TXTフォルダ: `{REWRITE_DIR}`")
570
 
571
  # ---- 既存ボタン(prompt_mode を追加して渡すだけ)----
 
572
  def _pm_key(pm_label: str) -> str:
573
  return PROMPT_MODES.get(pm_label, "legacy")
574