Toya0421 commited on
Commit
09f51e0
·
verified ·
1 Parent(s): 47b3874

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -207
app.py CHANGED
@@ -4,6 +4,7 @@ import glob
4
  import csv
5
  import threading
6
  import shutil
 
7
  from datetime import datetime, timedelta
8
  from typing import Optional
9
 
@@ -11,6 +12,9 @@ import gradio as gr
11
  import textstat
12
  from openai import OpenAI
13
 
 
 
 
14
  # =========================
15
  # 設定(元コード踏襲)
16
  # =========================
@@ -33,6 +37,9 @@ os.makedirs(REWRITE_DIR, exist_ok=True)
33
 
34
  PASSAGES_DIR = os.getenv("PASSAGES_DIR", "passages")
35
 
 
 
 
36
  if not API_KEY:
37
  raise RuntimeError("API_KEY is not set (env: API_KEY)")
38
 
@@ -44,147 +51,77 @@ _rewrite_sem = threading.Semaphore(REWRITE_CONCURRENCY)
44
  _stop_flag_lock = threading.Lock()
45
  _stop_flag = False
46
 
 
47
  # =========================
48
- # passages の列挙
49
  # =========================
50
- def list_passage_files_sorted(passages_dir: str) -> list[tuple[int, str]]:
51
- pattern = os.path.join(passages_dir, "pg*.txt")
52
- files = glob.glob(pattern)
53
-
54
- items: list[tuple[int, str]] = []
55
- for fp in files:
56
- name = os.path.basename(fp)
57
- m = re.match(r"pg(\d+)\.txt$", name)
58
- if m:
59
- items.append((int(m.group(1)), fp))
60
- items.sort(key=lambda x: x[0])
61
- return items
62
-
63
-
64
  def load_text(path: str) -> str:
65
  with open(path, "r", encoding="utf-8") as f:
66
  return f.read()
67
 
68
 
 
 
 
 
 
 
 
 
 
 
69
  # =========================
70
- # ★追加:プロンプト選択
71
  # =========================
72
- PROMPT_MODES = {
73
- "既存プロンプト": "legacy",
74
- "AIプロンプトセット(FRE制御)": "final_v1",
75
- }
76
 
 
77
 
78
- def prompt_tag_from_mode(prompt_mode: str) -> str:
79
- """
80
- ダウンロードファイル名で判別できるように、ファイル名安全な短いタグにする
81
- """
82
- return "legacy" if prompt_mode == "legacy" else "finalv1"
 
83
 
 
84
 
85
- def build_prompt(text: str, target_level: int, prompt_mode: str) -> str:
86
- """
87
- prompt_mode:
88
- - "legacy": 元の1本プロンプト(現状踏襲)
89
- - "final_v1": 最終プロンプトセット(Constraints重視)
90
- """
91
- level_to_flesch = {1: 90, 2: 75, 3: 65, 4: 55, 5: 40}
92
- target_flesch = level_to_flesch[int(target_level)]
93
 
94
- if prompt_mode == "legacy":
95
- return f"""
96
- Rewrite the following text to achieve a Flesch Readability Score of {target_flesch}.
 
97
 
 
 
 
 
 
 
 
 
 
98
  Requirements:
99
  - Extract only the main text, excluding titles, author names, source information, chapter numbers, annotations, footers, etc.
100
  - Clearly separate sections such as chapters by inserting blank lines between them.
101
  - Maintain the original meaning faithfully; do not add or remove important information.
102
- - Rewrite into clear, modern, and natural English suitable for learners at the target level.
103
- Avoid archaic expressions, inverted syntax, and literary expressions typical of old picture books.
104
- Use simple, modern sentence structures and word order.
105
  - Output only the rewritten text. Do not include explanatory notes or other additional text.
106
  {text}
107
- """
108
-
109
- # ---- final_v1: あなたの「最終プロンプトセット」をコードに落とし込み ----
110
- level_name = {
111
- 1: "Level 90",
112
- 2: "Level 75",
113
- 3: "Level 65",
114
- 4: "Level 55",
115
- 5: "Level 40",
116
- }[int(target_level)]
117
-
118
- level_constraints = {
119
- 5: [
120
- "- Average sentence length: 18–25 words",
121
- "- Use abstract nouns (e.g., system, process, structure, responsibility)",
122
- "- Allow multi-syllable words",
123
- "- Use complex sentences with relative clauses",
124
- "- Prefer formal, academic tone",
125
- "- Avoid short sentences",
126
- ],
127
- 4: [
128
- "- Average sentence length: 14–18 words",
129
- "- Mix abstract and concrete vocabulary",
130
- "- Avoid very short sentences",
131
- "- Limit technical jargon",
132
- "- Use clear logical flow",
133
- ],
134
- 3: [
135
- "- Average sentence length: 10–14 words",
136
- "- Use common vocabulary",
137
- "- One main idea per sentence",
138
- "- Prefer active voice",
139
- "- Avoid abstract nominalizations",
140
- "- Split long sentences into shorter independent sentences",
141
- ],
142
- 2: [
143
- "- Average sentence length: 7–10 words",
144
- "- Use everyday vocabulary only",
145
- "- Avoid complex connectors (however, therefore, although)",
146
- "- Use short, simple sentences",
147
- "- Repeat key ideas using different simple wording",
148
- ],
149
- 1: [
150
- "- Average sentence length: 4–6 words",
151
- "- Use only basic vocabulary",
152
- "- One action or fact per sentence",
153
- "- Avoid abstract words completely",
154
- "- Avoid explanations and reasoning",
155
- "- Prefer many short declarative sentences",
156
- ],
157
- }[int(target_level)]
158
-
159
- constraints_block = "\n".join(level_constraints)
160
-
161
- return f"""Rewrite the following text for learners at {level_name}.
162
-
163
- Requirements:
164
- - Target Flesch Reading Ease: around {target_flesch} (±3)
165
- - Keep the original meaning.
166
- - Do not add new information.
167
- - Output only the rewritten text.
168
-
169
- Constraints:
170
- {constraints_block}
171
-
172
- Additional handling (same as current system):
173
- - Extract only the portions of the text that should be read as the main body, excluding title/author/source/chapter numbers/annotations/footers.
174
- - When outputting, make sure sections divided by chapters are clearly distinguishable by leaving a blank line between them.
175
-
176
- Text:
177
- <<<
178
- {text}
179
- >>>
180
- """
181
 
182
 
183
  # =========================
184
- # 書き換え(プロンプト切替)
185
  # =========================
186
- def rewrite_level(text: str, target_level: int, prompt_mode: str) -> str:
187
- prompt = build_prompt(text=text, target_level=target_level, prompt_mode=prompt_mode)
188
 
189
  max_tokens_candidates = [3000, 2000, 1500, 1000]
190
  last_error = None
@@ -223,7 +160,7 @@ def compute_metrics(text: str) -> tuple[float, int]:
223
 
224
 
225
  # =========================
226
- # CSV追記
227
  # =========================
228
  _csv_lock = threading.Lock()
229
 
@@ -240,7 +177,7 @@ def append_csv_row(row: dict):
240
 
241
 
242
  # =========================
243
- # txt追記(★ファイル名 prompt_tag を付ける)
244
  # =========================
245
  _txt_lock = threading.Lock()
246
 
@@ -248,19 +185,21 @@ _txt_lock = threading.Lock()
248
  def append_rewrite_txt(
249
  text_id: int,
250
  target_level: int,
251
- prompt_mode: str, # ★追加
252
- fre: float,
253
  word_count: int,
254
  rewritten_text: str,
255
  ):
256
  ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
257
- tag = prompt_tag_from_mode(prompt_mode)
258
- path = os.path.join(REWRITE_DIR, f"Text_{text_id}_{target_level}_{tag}.txt")
 
259
 
260
  block = (
261
  f"# Text {text_id}\n"
262
  f"Target Level: {target_level}\n"
263
- f"Flesch Reading Ease: {fre:.2f}\n"
 
264
  f"Word Count: {word_count}\n"
265
  f"Timestamp (JST): {ts}\n"
266
  f"Model: {MODEL}\n\n"
@@ -292,44 +231,85 @@ def get_stop() -> bool:
292
  # UIロジック
293
  # =========================
294
  def init_state():
295
- files = list_passage_files_sorted(PASSAGES_DIR)
296
- return {"files": files, "idx": 0}
 
 
 
 
 
 
 
 
 
297
 
298
 
299
- def start(level: int, prompt_mode: str):
300
  set_stop(False)
301
  st = init_state()
302
- total = len(st["files"])
303
- if total == 0:
304
- return st, "passages/pg*.txt が見つかりません", "", "", "", None, None
305
 
306
- msg = f"準備完了: {total}件。次に処理するのは #Text {st['files'][0][0]} です。"
307
- return st, msg, "", "", "", None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
 
 
 
309
 
310
- def run_one(level: int, prompt_mode: str, state: dict):
 
311
  set_stop(False)
312
 
313
- files = state.get("files", [])
314
- idx = int(state.get("idx", 0))
315
- total = len(files)
 
 
 
316
 
317
- if idx >= total:
318
- return state, "全て処理済みです。", "", "", "", None, None
319
 
320
- text_id, path = files[idx]
321
  original = load_text(path)
322
 
323
- rewritten = rewrite_level(original, target_level=level, prompt_mode=prompt_mode)
324
- fre, wc = compute_metrics(rewritten)
325
 
 
326
  ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
327
 
328
  append_csv_row({
329
  "timestamp_jst": ts,
330
  "Text#": text_id,
331
  "target_level": level,
332
- "flesch_reading_ease": f"{fre:.2f}",
333
  "word_count": wc,
334
  "rewritten_text": rewritten
335
  })
@@ -337,59 +317,65 @@ def run_one(level: int, prompt_mode: str, state: dict):
337
  append_rewrite_txt(
338
  text_id=text_id,
339
  target_level=level,
340
- prompt_mode=prompt_mode, # ★追加
341
- fre=fre,
342
  word_count=wc,
343
  rewritten_text=rewritten,
344
  )
345
 
346
- state["idx"] = idx + 1
 
 
347
 
348
- tag = prompt_tag_from_mode(prompt_mode)
349
  header = (
350
  f"#Text {text_id}\n"
351
  f"Target Level: {level}\n"
352
- f"Prompt Mode: {prompt_mode}\n"
353
- f"Flesch Reading Ease: {fre:.2f}\n"
354
  f"Word Count: {wc}\n"
355
  f"Saved CSV: {CSV_PATH}\n"
356
- f"Saved TXT: {os.path.join(REWRITE_DIR, f'Text_{text_id}_{level}_{tag}.txt')}"
357
  )
358
- progress = f"{state['idx']} / {total}"
 
359
  return state, "1件処理しました。", header, progress, rewritten, None, None
360
 
361
 
362
- def run_all(level: int, prompt_mode: str, state: dict):
363
  set_stop(False)
364
 
365
- files = state.get("files", [])
366
- idx = int(state.get("idx", 0))
367
- total = len(files)
368
 
369
- if idx >= total:
370
- return state, "全て処理済みです。", "", f"{idx} / {total}", "", None, None
371
 
372
  last_header = ""
373
  last_text = ""
374
 
375
- while idx < total:
376
  if get_stop():
377
- state["idx"] = idx
378
- return state, "停止しました。", last_header, f"{idx} / {total}", last_text, None, None
 
379
 
380
- text_id, path = files[idx]
 
381
  original = load_text(path)
382
 
383
- rewritten = rewrite_level(original, target_level=level, prompt_mode=prompt_mode)
384
- fre, wc = compute_metrics(rewritten)
385
 
 
386
  ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
387
 
388
  append_csv_row({
389
  "timestamp_jst": ts,
390
  "Text#": text_id,
391
  "target_level": level,
392
- "flesch_reading_ease": f"{fre:.2f}",
393
  "word_count": wc,
394
  "rewritten_text": rewritten
395
  })
@@ -397,28 +383,29 @@ def run_all(level: int, prompt_mode: str, state: dict):
397
  append_rewrite_txt(
398
  text_id=text_id,
399
  target_level=level,
400
- prompt_mode=prompt_mode, # ★追加
401
- fre=fre,
402
  word_count=wc,
403
  rewritten_text=rewritten,
404
  )
405
 
406
- tag = prompt_tag_from_mode(prompt_mode)
 
 
 
 
407
  last_header = (
408
  f"#Text {text_id}\n"
409
  f"Target Level: {level}\n"
410
- f"Prompt Mode: {prompt_mode}\n"
411
- f"Flesch Reading Ease: {fre:.2f}\n"
412
  f"Word Count: {wc}\n"
413
  f"Saved CSV: {CSV_PATH}\n"
414
- f"Saved TXT: {os.path.join(REWRITE_DIR, f'Text_{text_id}_{level}_{tag}.txt')}"
415
  )
416
  last_text = rewritten
417
 
418
- idx += 1
419
- state["idx"] = idx
420
-
421
- return state, "全件処理が完了しました。", last_header, f"{idx} / {total}", last_text, None, None
422
 
423
 
424
  def stop():
@@ -446,18 +433,14 @@ def reset_rewrite_folder():
446
 
447
 
448
  # =========================
449
- # ★追加:ダウンロード機能
450
  # =========================
451
  def list_generated_txt_files() -> list[str]:
452
- # ★ファイル名が Text_{id}_{level}_{tag}.txt になったのでパターン更新
453
- files = sorted(glob.glob(os.path.join(REWRITE_DIR, "Text_*_*_*.txt")))
454
  return [os.path.basename(f) for f in files]
455
 
456
 
457
  def build_single_txt_path(selected_name: str) -> str:
458
- """
459
- 選択されたtxtをダウンロード用に返す(gr.Fileに渡す)
460
- """
461
  path = os.path.join(REWRITE_DIR, selected_name)
462
  if not os.path.exists(path):
463
  raise FileNotFoundError(f"Not found: {path}")
@@ -465,11 +448,6 @@ def build_single_txt_path(selected_name: str) -> str:
465
 
466
 
467
  def build_zip_of_txts(mode: str, n_last: int) -> str:
468
- """
469
- mode:
470
- - 'all' : 全txt
471
- - 'last_n' : 最新N個
472
- """
473
  files = sorted(glob.glob(os.path.join(REWRITE_DIR, "Text_*.txt")), key=os.path.getmtime)
474
  if not files:
475
  raise FileNotFoundError("No generated txt files yet.")
@@ -477,12 +455,10 @@ def build_zip_of_txts(mode: str, n_last: int) -> str:
477
  if mode == "last_n":
478
  files = files[-max(1, int(n_last)):]
479
 
480
- # ZIPは毎回作り直す
481
  zip_path = os.path.join(OUT_DIR, "rewrite_passages.zip")
482
  if os.path.exists(zip_path):
483
  os.remove(zip_path)
484
 
485
- # 一時ディレクトリに集めてからzip(shutil.make_archiveはディレクトリ単位)
486
  tmp_dir = os.path.join(OUT_DIR, "_zip_tmp")
487
  if os.path.exists(tmp_dir):
488
  shutil.rmtree(tmp_dir)
@@ -492,8 +468,6 @@ def build_zip_of_txts(mode: str, n_last: int) -> str:
492
  shutil.copy(fp, os.path.join(tmp_dir, os.path.basename(fp)))
493
 
494
  shutil.make_archive(os.path.join(OUT_DIR, "rewrite_passages"), "zip", tmp_dir)
495
-
496
- # 後片付け
497
  shutil.rmtree(tmp_dir, ignore_errors=True)
498
 
499
  return zip_path
@@ -518,21 +492,15 @@ with gr.Blocks() as demo:
518
 
519
  state = gr.State(init_state())
520
 
521
- # ★追加:プロンプト選択UI
522
- prompt_mode_ui = gr.Dropdown(
523
- choices=list(PROMPT_MODES.keys()),
524
- value="既存(シンプル)",
525
- label="Prompt Mode(試すプロンプトを選択)"
526
- )
527
-
528
  level = gr.Dropdown(choices=[1, 2, 3, 4, 5], value=1, label="Target Level (1..5)")
529
  status = gr.Textbox(label="Status", interactive=False)
530
- header = gr.Textbox(label="Result Header (#Text / FRE / Words)", lines=7, interactive=False)
531
  progress = gr.Textbox(label="Progress", interactive=False)
532
  output_text = gr.Textbox(label="Rewritten Text", lines=18, interactive=False)
533
 
534
  with gr.Row():
535
- start_btn = gr.Button("開始(ファイル読み込み)")
536
  one_btn = gr.Button("次へ(1件処理)")
537
  all_btn = gr.Button("全件実行(残りを処理)")
538
  stop_btn = gr.Button("停止")
@@ -567,27 +535,24 @@ with gr.Blocks() as demo:
567
  download_csv_btn = gr.Button("CSV(rewrite_scores.csv)をダウンロード")
568
  download_csv_file = gr.File(label="Download (csv)")
569
 
570
- # 既存の表示(参考)
571
  gr.Markdown(f"📄 CSVパス: `{CSV_PATH}`")
572
  gr.Markdown(f"📝 TXTフォルダ: `{REWRITE_DIR}`")
 
573
 
574
- # ---- 既存ボタン(prompt_mode を追加て渡すだけ)----
575
- def _pm_key(pm_label: str) -> str:
576
- return PROMPT_MODES.get(pm_label, "legacy")
577
-
578
  start_btn.click(
579
- fn=lambda lvl, pm: start(lvl, _pm_key(pm)),
580
- inputs=[level, prompt_mode_ui],
581
  outputs=[state, status, header, progress, output_text, download_one_file, download_zip_file]
582
  )
583
  one_btn.click(
584
- fn=lambda lvl, pm, st: run_one(lvl, _pm_key(pm), st),
585
- inputs=[level, prompt_mode_ui, state],
586
  outputs=[state, status, header, progress, output_text, download_one_file, download_zip_file]
587
  )
588
  all_btn.click(
589
- fn=lambda lvl, pm, st: run_all(lvl, _pm_key(pm), st),
590
- inputs=[level, prompt_mode_ui, state],
591
  outputs=[state, status, header, progress, output_text, download_one_file, download_zip_file]
592
  )
593
 
 
4
  import csv
5
  import threading
6
  import shutil
7
+ import random
8
  from datetime import datetime, timedelta
9
  from typing import Optional
10
 
 
12
  import textstat
13
  from openai import OpenAI
14
 
15
+ # ★追加:xlsx読込
16
+ import pandas as pd
17
+
18
  # =========================
19
  # 設定(元コード踏襲)
20
  # =========================
 
37
 
38
  PASSAGES_DIR = os.getenv("PASSAGES_DIR", "passages")
39
 
40
+ # ★追加:Excel
41
+ PASSAGE_INFO_XLSX = os.getenv("PASSAGE_INFO_XLSX", "passage_information.xlsx")
42
+
43
  if not API_KEY:
44
  raise RuntimeError("API_KEY is not set (env: API_KEY)")
45
 
 
51
  _stop_flag_lock = threading.Lock()
52
  _stop_flag = False
53
 
54
+
55
  # =========================
56
+ # ユーティリティ
57
  # =========================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def load_text(path: str) -> str:
59
  with open(path, "r", encoding="utf-8") as f:
60
  return f.read()
61
 
62
 
63
+ def target_flesch_from_level(level: int) -> int:
64
+ level_to_flesch = {1: 90, 2: 75, 3: 65, 4: 55, 5: 40}
65
+ return level_to_flesch[int(level)]
66
+
67
+
68
+ def passage_path_from_text_id(text_id: int, passages_dir: str) -> str:
69
+ # 元コードの pg{ID}.txt に合わせる
70
+ return os.path.join(passages_dir, f"pg{text_id}.txt")
71
+
72
+
73
  # =========================
74
+ # Excel読み込み(Text# と flesch_score)
75
  # =========================
76
+ def load_passage_info(xlsx_path: str) -> pd.DataFrame:
77
+ if not os.path.exists(xlsx_path):
78
+ raise FileNotFoundError(f"Not found: {xlsx_path}")
 
79
 
80
+ df = pd.read_excel(xlsx_path)
81
 
82
+ # 必須列チェック(スペースや大小の揺れを許容)
83
+ cols = {c.strip(): c for c in df.columns}
84
+ if "Text#" not in cols or "flesch_score" not in cols:
85
+ raise ValueError(
86
+ "passage_information.xlsx must contain columns: 'Text#' and 'flesch_score'"
87
+ )
88
 
89
+ df = df.rename(columns={cols["Text#"]: "Text#", cols["flesch_score"]: "flesch_score"})
90
 
91
+ # 型整形
92
+ df["Text#"] = pd.to_numeric(df["Text#"], errors="coerce").astype("Int64")
93
+ df["flesch_score"] = pd.to_numeric(df["flesch_score"], errors="coerce")
 
 
 
 
 
94
 
95
+ df = df.dropna(subset=["Text#", "flesch_score"]).copy()
96
+ df["Text#"] = df["Text#"].astype(int)
97
+ df["flesch_score"] = df["flesch_score"].astype(float)
98
+ return df
99
 
100
+
101
+ # =========================
102
+ # プロンプト(★1本固定:元の1つ目だけ)
103
+ # =========================
104
+ def build_prompt(text: str, target_level: int) -> str:
105
+ target_flesch = target_flesch_from_level(target_level)
106
+
107
+ return f"""
108
+ Rewrite the following text to achieve a Flesch Readability Score of {target_flesch}.
109
  Requirements:
110
  - Extract only the main text, excluding titles, author names, source information, chapter numbers, annotations, footers, etc.
111
  - Clearly separate sections such as chapters by inserting blank lines between them.
112
  - Maintain the original meaning faithfully; do not add or remove important information.
113
+ - Rewrite the text in clear, modern English suitable for learners, consistently eliminating archaic, outdated, or literary expressions and unnatural syntax common in older texts, regardless of the target level.
114
+ - Key points to keep in mind when lowering reading comprehension difficulty include keeping sentences short, using familiar vocabulary, prioritizing simple sentence structures, avoiding jargon or providing clear explanations when used, and minimizing figurative language, idioms, and other expressions where meaning cannot be directly inferred from the words themselves.
 
115
  - Output only the rewritten text. Do not include explanatory notes or other additional text.
116
  {text}
117
+ """.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
 
120
  # =========================
121
+ # 書き換え
122
  # =========================
123
+ def rewrite_level(text: str, target_level: int) -> str:
124
+ prompt = build_prompt(text=text, target_level=target_level)
125
 
126
  max_tokens_candidates = [3000, 2000, 1500, 1000]
127
  last_error = None
 
160
 
161
 
162
  # =========================
163
+ # CSV追記(元コード踏襲:列は増やさない)
164
  # =========================
165
  _csv_lock = threading.Lock()
166
 
 
177
 
178
 
179
  # =========================
180
+ # txt追記(★ファイル名 Text# と Target level が分か形に
181
  # =========================
182
  _txt_lock = threading.Lock()
183
 
 
185
  def append_rewrite_txt(
186
  text_id: int,
187
  target_level: int,
188
+ original_fre: float, # ★追加:元教材スコア
189
+ rewritten_fre: float,
190
  word_count: int,
191
  rewritten_text: str,
192
  ):
193
  ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
194
+
195
+ # ★要件:#Text Target level がわかるファイル名
196
+ path = os.path.join(REWRITE_DIR, f"Text_{text_id}_Level_{target_level}.txt")
197
 
198
  block = (
199
  f"# Text {text_id}\n"
200
  f"Target Level: {target_level}\n"
201
+ f"Original Flesch (from passage_information.xlsx): {original_fre:.2f}\n"
202
+ f"Rewritten Flesch Reading Ease: {rewritten_fre:.2f}\n"
203
  f"Word Count: {word_count}\n"
204
  f"Timestamp (JST): {ts}\n"
205
  f"Model: {MODEL}\n\n"
 
231
  # UIロジック
232
  # =========================
233
  def init_state():
234
+ # ★Excelから Text#/flesch_score 読み込み
235
+ df = load_passage_info(PASSAGE_INFO_XLSX)
236
+
237
+ # stateに保持(元コードの構造を壊さない範囲で追加)
238
+ return {
239
+ "df": df,
240
+ "eligible_ids": [], # start()でlevelに応じてセット
241
+ "orig_score_map": {}, # text_id -> original fre
242
+ "done": 0,
243
+ "total": 0,
244
+ }
245
 
246
 
247
+ def start(level: int):
248
  set_stop(False)
249
  st = init_state()
 
 
 
250
 
251
+ target_flesch = target_flesch_from_level(level)
252
+
253
+ df = st["df"]
254
+ # ★要件:Targetよりスコアが低い=難しいもの
255
+ eligible = df[df["flesch_score"] < float(target_flesch)].copy()
256
+
257
+ # passages/pg{Text#}.txt が存在するものだけに絞る(無いと失敗するため)
258
+ ids = []
259
+ score_map = {}
260
+ for _, r in eligible.iterrows():
261
+ tid = int(r["Text#"])
262
+ fp = passage_path_from_text_id(tid, PASSAGES_DIR)
263
+ if os.path.exists(fp):
264
+ ids.append(tid)
265
+ score_map[tid] = float(r["flesch_score"])
266
+
267
+ if not ids:
268
+ msg = (
269
+ f"条件に合う教材がありません。\n"
270
+ f"- Target FRE: {target_flesch}\n"
271
+ f"- 条件: flesch_score < {target_flesch}\n"
272
+ f"- かつ {PASSAGES_DIR}/pg{{Text#}}.txt が存在"
273
+ )
274
+ return st, msg, "", "", "", None, None
275
+
276
+ random.shuffle(ids)
277
+ st["eligible_ids"] = ids
278
+ st["orig_score_map"] = score_map
279
+ st["done"] = 0
280
+ st["total"] = len(ids)
281
 
282
+ msg = f"準備完了: 条件に合う {st['total']} 件からランダムに処理します(Target FRE={target_flesch})。"
283
+ return st, msg, "", f"0 / {st['total']}", "", None, None
284
 
285
+
286
+ def run_one(level: int, state: dict):
287
  set_stop(False)
288
 
289
+ ids = state.get("eligible_ids", [])
290
+ done = int(state.get("done", 0))
291
+ total = int(state.get("total", 0))
292
+
293
+ if not ids:
294
+ return state, "全て処理済みです。", "", f"{done} / {total}", "", None, None
295
 
296
+ # ★ランダム(ただし未処理リストから先頭を取る:startでshuffle済み)
297
+ text_id = ids.pop(0)
298
 
299
+ path = passage_path_from_text_id(text_id, PASSAGES_DIR)
300
  original = load_text(path)
301
 
302
+ rewritten = rewrite_level(original, target_level=level)
303
+ rewritten_fre, wc = compute_metrics(rewritten)
304
 
305
+ original_fre = float(state.get("orig_score_map", {}).get(text_id, float("nan")))
306
  ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
307
 
308
  append_csv_row({
309
  "timestamp_jst": ts,
310
  "Text#": text_id,
311
  "target_level": level,
312
+ "flesch_reading_ease": f"{rewritten_fre:.2f}",
313
  "word_count": wc,
314
  "rewritten_text": rewritten
315
  })
 
317
  append_rewrite_txt(
318
  text_id=text_id,
319
  target_level=level,
320
+ original_fre=original_fre,
321
+ rewritten_fre=rewritten_fre,
322
  word_count=wc,
323
  rewritten_text=rewritten,
324
  )
325
 
326
+ done += 1
327
+ state["done"] = done
328
+ state["eligible_ids"] = ids
329
 
330
+ saved_txt = os.path.join(REWRITE_DIR, f"Text_{text_id}_Level_{level}.txt")
331
  header = (
332
  f"#Text {text_id}\n"
333
  f"Target Level: {level}\n"
334
+ f"Original Flesch (xlsx): {original_fre:.2f}\n"
335
+ f"Rewritten Flesch Reading Ease: {rewritten_fre:.2f}\n"
336
  f"Word Count: {wc}\n"
337
  f"Saved CSV: {CSV_PATH}\n"
338
+ f"Saved TXT: {saved_txt}"
339
  )
340
+
341
+ progress = f"{done} / {total}"
342
  return state, "1件処理しました。", header, progress, rewritten, None, None
343
 
344
 
345
+ def run_all(level: int, state: dict):
346
  set_stop(False)
347
 
348
+ ids = state.get("eligible_ids", [])
349
+ done = int(state.get("done", 0))
350
+ total = int(state.get("total", 0))
351
 
352
+ if not ids:
353
+ return state, "全て処理済みです。", "", f"{done} / {total}", "", None, None
354
 
355
  last_header = ""
356
  last_text = ""
357
 
358
+ while ids:
359
  if get_stop():
360
+ state["eligible_ids"] = ids
361
+ state["done"] = done
362
+ return state, "停止しました。", last_header, f"{done} / {total}", last_text, None, None
363
 
364
+ text_id = ids.pop(0)
365
+ path = passage_path_from_text_id(text_id, PASSAGES_DIR)
366
  original = load_text(path)
367
 
368
+ rewritten = rewrite_level(original, target_level=level)
369
+ rewritten_fre, wc = compute_metrics(rewritten)
370
 
371
+ original_fre = float(state.get("orig_score_map", {}).get(text_id, float("nan")))
372
  ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
373
 
374
  append_csv_row({
375
  "timestamp_jst": ts,
376
  "Text#": text_id,
377
  "target_level": level,
378
+ "flesch_reading_ease": f"{rewritten_fre:.2f}",
379
  "word_count": wc,
380
  "rewritten_text": rewritten
381
  })
 
383
  append_rewrite_txt(
384
  text_id=text_id,
385
  target_level=level,
386
+ original_fre=original_fre,
387
+ rewritten_fre=rewritten_fre,
388
  word_count=wc,
389
  rewritten_text=rewritten,
390
  )
391
 
392
+ done += 1
393
+ state["done"] = done
394
+ state["eligible_ids"] = ids
395
+
396
+ saved_txt = os.path.join(REWRITE_DIR, f"Text_{text_id}_Level_{level}.txt")
397
  last_header = (
398
  f"#Text {text_id}\n"
399
  f"Target Level: {level}\n"
400
+ f"Original Flesch (xlsx): {original_fre:.2f}\n"
401
+ f"Rewritten Flesch Reading Ease: {rewritten_fre:.2f}\n"
402
  f"Word Count: {wc}\n"
403
  f"Saved CSV: {CSV_PATH}\n"
404
+ f"Saved TXT: {saved_txt}"
405
  )
406
  last_text = rewritten
407
 
408
+ return state, "全件処理が完了しました。", last_header, f"{done} / {total}", last_text, None, None
 
 
 
409
 
410
 
411
  def stop():
 
433
 
434
 
435
  # =========================
436
+ # ダウンロード機能(ファイル名パターン更新)
437
  # =========================
438
  def list_generated_txt_files() -> list[str]:
439
+ files = sorted(glob.glob(os.path.join(REWRITE_DIR, "Text_*_Level_*.txt")))
 
440
  return [os.path.basename(f) for f in files]
441
 
442
 
443
  def build_single_txt_path(selected_name: str) -> str:
 
 
 
444
  path = os.path.join(REWRITE_DIR, selected_name)
445
  if not os.path.exists(path):
446
  raise FileNotFoundError(f"Not found: {path}")
 
448
 
449
 
450
  def build_zip_of_txts(mode: str, n_last: int) -> str:
 
 
 
 
 
451
  files = sorted(glob.glob(os.path.join(REWRITE_DIR, "Text_*.txt")), key=os.path.getmtime)
452
  if not files:
453
  raise FileNotFoundError("No generated txt files yet.")
 
455
  if mode == "last_n":
456
  files = files[-max(1, int(n_last)):]
457
 
 
458
  zip_path = os.path.join(OUT_DIR, "rewrite_passages.zip")
459
  if os.path.exists(zip_path):
460
  os.remove(zip_path)
461
 
 
462
  tmp_dir = os.path.join(OUT_DIR, "_zip_tmp")
463
  if os.path.exists(tmp_dir):
464
  shutil.rmtree(tmp_dir)
 
468
  shutil.copy(fp, os.path.join(tmp_dir, os.path.basename(fp)))
469
 
470
  shutil.make_archive(os.path.join(OUT_DIR, "rewrite_passages"), "zip", tmp_dir)
 
 
471
  shutil.rmtree(tmp_dir, ignore_errors=True)
472
 
473
  return zip_path
 
492
 
493
  state = gr.State(init_state())
494
 
495
+ # ★変更:プロンプト選択UIを削除(1本固定)
 
 
 
 
 
 
496
  level = gr.Dropdown(choices=[1, 2, 3, 4, 5], value=1, label="Target Level (1..5)")
497
  status = gr.Textbox(label="Status", interactive=False)
498
+ header = gr.Textbox(label="Result Header (#Text / FRE / Words)", lines=8, interactive=False)
499
  progress = gr.Textbox(label="Progress", interactive=False)
500
  output_text = gr.Textbox(label="Rewritten Text", lines=18, interactive=False)
501
 
502
  with gr.Row():
503
+ start_btn = gr.Button("開始(Excel読み込み+候補抽出)")
504
  one_btn = gr.Button("次へ(1件処理)")
505
  all_btn = gr.Button("全件実行(残りを処理)")
506
  stop_btn = gr.Button("停止")
 
535
  download_csv_btn = gr.Button("CSV(rewrite_scores.csv)をダウンロード")
536
  download_csv_file = gr.File(label="Download (csv)")
537
 
 
538
  gr.Markdown(f"📄 CSVパス: `{CSV_PATH}`")
539
  gr.Markdown(f"📝 TXTフォルダ: `{REWRITE_DIR}`")
540
+ gr.Markdown(f"📘 Excel: `{PASSAGE_INFO_XLSX}` / passages dir: `{PASSAGES_DIR}`")
541
 
542
+ # ---- 既存ボタン(prompt_modeし)----
 
 
 
543
  start_btn.click(
544
+ fn=start,
545
+ inputs=[level],
546
  outputs=[state, status, header, progress, output_text, download_one_file, download_zip_file]
547
  )
548
  one_btn.click(
549
+ fn=run_one,
550
+ inputs=[level, state],
551
  outputs=[state, status, header, progress, output_text, download_one_file, download_zip_file]
552
  )
553
  all_btn.click(
554
+ fn=run_all,
555
+ inputs=[level, state],
556
  outputs=[state, status, header, progress, output_text, download_one_file, download_zip_file]
557
  )
558