Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -65,13 +65,24 @@ def load_text(path: str) -> str:
|
|
| 65 |
return f.read()
|
| 66 |
|
| 67 |
# =========================
|
| 68 |
-
#
|
| 69 |
# =========================
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
level_to_flesch = {1: 90, 2: 75, 3: 65, 4: 55, 5: 40}
|
| 72 |
target_flesch = level_to_flesch[int(target_level)]
|
| 73 |
|
| 74 |
-
|
|
|
|
| 75 |
Rewrite the following passage so it fits about {target_flesch} Flesch Reading Ease Score
|
| 76 |
- Extract only the portions of the text that should be read as the main body,
|
| 77 |
excluding the title, author name, source information, chapter number, annotations, and footers.
|
|
@@ -82,6 +93,87 @@ excluding the title, author name, source information, chapter number, annotation
|
|
| 82 |
{text}
|
| 83 |
"""
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
max_tokens_candidates = [3000, 2000, 1500, 1000]
|
| 86 |
last_error = None
|
| 87 |
|
|
@@ -180,7 +272,7 @@ def init_state():
|
|
| 180 |
files = list_passage_files_sorted(PASSAGES_DIR)
|
| 181 |
return {"files": files, "idx": 0}
|
| 182 |
|
| 183 |
-
def start(level: int):
|
| 184 |
set_stop(False)
|
| 185 |
st = init_state()
|
| 186 |
total = len(st["files"])
|
|
@@ -190,7 +282,7 @@ def start(level: int):
|
|
| 190 |
msg = f"準備完了: {total}件。次に処理するのは #Text {st['files'][0][0]} です。"
|
| 191 |
return st, msg, "", "", "", None, None
|
| 192 |
|
| 193 |
-
def run_one(level: int, state: dict):
|
| 194 |
set_stop(False)
|
| 195 |
|
| 196 |
files = state.get("files", [])
|
|
@@ -203,7 +295,7 @@ def run_one(level: int, state: dict):
|
|
| 203 |
text_id, path = files[idx]
|
| 204 |
original = load_text(path)
|
| 205 |
|
| 206 |
-
rewritten = rewrite_level(original, target_level=level)
|
| 207 |
fre, wc = compute_metrics(rewritten)
|
| 208 |
|
| 209 |
ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
|
|
@@ -230,6 +322,7 @@ def run_one(level: int, state: dict):
|
|
| 230 |
header = (
|
| 231 |
f"#Text {text_id}\n"
|
| 232 |
f"Target Level: {level}\n"
|
|
|
|
| 233 |
f"Flesch Reading Ease: {fre:.2f}\n"
|
| 234 |
f"Word Count: {wc}\n"
|
| 235 |
f"Saved CSV: {CSV_PATH}\n"
|
|
@@ -238,7 +331,7 @@ def run_one(level: int, state: dict):
|
|
| 238 |
progress = f"{state['idx']} / {total}"
|
| 239 |
return state, "1件処理しました。", header, progress, rewritten, None, None
|
| 240 |
|
| 241 |
-
def run_all(level: int, state: dict):
|
| 242 |
set_stop(False)
|
| 243 |
|
| 244 |
files = state.get("files", [])
|
|
@@ -259,7 +352,7 @@ def run_all(level: int, state: dict):
|
|
| 259 |
text_id, path = files[idx]
|
| 260 |
original = load_text(path)
|
| 261 |
|
| 262 |
-
rewritten = rewrite_level(original, target_level=level)
|
| 263 |
fre, wc = compute_metrics(rewritten)
|
| 264 |
|
| 265 |
ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
|
|
@@ -284,6 +377,7 @@ def run_all(level: int, state: dict):
|
|
| 284 |
last_header = (
|
| 285 |
f"#Text {text_id}\n"
|
| 286 |
f"Target Level: {level}\n"
|
|
|
|
| 287 |
f"Flesch Reading Ease: {fre:.2f}\n"
|
| 288 |
f"Word Count: {wc}\n"
|
| 289 |
f"Saved CSV: {CSV_PATH}\n"
|
|
@@ -384,9 +478,16 @@ with gr.Blocks() as demo:
|
|
| 384 |
|
| 385 |
state = gr.State(init_state())
|
| 386 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
level = gr.Dropdown(choices=[1, 2, 3, 4, 5], value=1, label="Target Level (1..5)")
|
| 388 |
status = gr.Textbox(label="Status", interactive=False)
|
| 389 |
-
header = gr.Textbox(label="Result Header (#Text / FRE / Words)", lines=
|
| 390 |
progress = gr.Textbox(label="Progress", interactive=False)
|
| 391 |
output_text = gr.Textbox(label="Rewritten Text", lines=18, interactive=False)
|
| 392 |
|
|
@@ -427,10 +528,27 @@ with gr.Blocks() as demo:
|
|
| 427 |
gr.Markdown(f"📄 CSVパス: `{CSV_PATH}`")
|
| 428 |
gr.Markdown(f"📝 TXTフォルダ: `{REWRITE_DIR}`")
|
| 429 |
|
| 430 |
-
# ---- 既存ボタン ----
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
stop_btn.click(fn=stop, inputs=[], outputs=[status])
|
| 435 |
reset_btn.click(fn=reset_csv, inputs=[], outputs=[status])
|
| 436 |
reset_txt_btn.click(fn=reset_rewrite_folder, inputs=[], outputs=[status])
|
|
|
|
| 65 |
return f.read()
|
| 66 |
|
| 67 |
# =========================
|
| 68 |
+
# ★追加:プロンプト選択
|
| 69 |
# =========================
|
| 70 |
+
PROMPT_MODES = {
|
| 71 |
+
"既存(シンプル)": "legacy",
|
| 72 |
+
"最終プロンプトセット(FRE制御)": "final_v1",
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
def build_prompt(text: str, target_level: int, prompt_mode: str) -> str:
|
| 76 |
+
"""
|
| 77 |
+
prompt_mode:
|
| 78 |
+
- "legacy": 元の1本プロンプト(現状踏襲)
|
| 79 |
+
- "final_v1": あなたの最終プロンプトセットを1つに落とし込み
|
| 80 |
+
"""
|
| 81 |
level_to_flesch = {1: 90, 2: 75, 3: 65, 4: 55, 5: 40}
|
| 82 |
target_flesch = level_to_flesch[int(target_level)]
|
| 83 |
|
| 84 |
+
if prompt_mode == "legacy":
|
| 85 |
+
return f"""
|
| 86 |
Rewrite the following passage so it fits about {target_flesch} Flesch Reading Ease Score
|
| 87 |
- Extract only the portions of the text that should be read as the main body,
|
| 88 |
excluding the title, author name, source information, chapter number, annotations, and footers.
|
|
|
|
| 93 |
{text}
|
| 94 |
"""
|
| 95 |
|
| 96 |
+
# ---- final_v1: あなたの「最終プロンプトセット」をコードに落とし込み ----
|
| 97 |
+
# Level番号(1..5) -> 学習者レベル名
|
| 98 |
+
level_name = {
|
| 99 |
+
1: "Level 90",
|
| 100 |
+
2: "Level 75",
|
| 101 |
+
3: "Level 65",
|
| 102 |
+
4: "Level 55",
|
| 103 |
+
5: "Level 40",
|
| 104 |
+
}[int(target_level)]
|
| 105 |
+
|
| 106 |
+
level_constraints = {
|
| 107 |
+
5: [
|
| 108 |
+
"- Average sentence length: 18–25 words",
|
| 109 |
+
"- Use abstract nouns (e.g., system, process, structure, responsibility)",
|
| 110 |
+
"- Allow multi-syllable words",
|
| 111 |
+
"- Use complex sentences with relative clauses",
|
| 112 |
+
"- Prefer formal, academic tone",
|
| 113 |
+
"- Avoid short sentences",
|
| 114 |
+
],
|
| 115 |
+
4: [
|
| 116 |
+
"- Average sentence length: 14–18 words",
|
| 117 |
+
"- Mix abstract and concrete vocabulary",
|
| 118 |
+
"- Avoid very short sentences",
|
| 119 |
+
"- Limit technical jargon",
|
| 120 |
+
"- Use clear logical flow",
|
| 121 |
+
],
|
| 122 |
+
3: [
|
| 123 |
+
"- Average sentence length: 10–14 words",
|
| 124 |
+
"- Use common vocabulary",
|
| 125 |
+
"- One main idea per sentence",
|
| 126 |
+
"- Prefer active voice",
|
| 127 |
+
"- Avoid abstract nominalizations",
|
| 128 |
+
"- Split long sentences into shorter independent sentences",
|
| 129 |
+
],
|
| 130 |
+
2: [
|
| 131 |
+
"- Average sentence length: 7–10 words",
|
| 132 |
+
"- Use everyday vocabulary only",
|
| 133 |
+
"- Avoid complex connectors (however, therefore, although)",
|
| 134 |
+
"- Use short, simple sentences",
|
| 135 |
+
"- Repeat key ideas using different simple wording",
|
| 136 |
+
],
|
| 137 |
+
1: [
|
| 138 |
+
"- Average sentence length: 4–6 words",
|
| 139 |
+
"- Use only basic vocabulary",
|
| 140 |
+
"- One action or fact per sentence",
|
| 141 |
+
"- Avoid abstract words completely",
|
| 142 |
+
"- Avoid explanations and reasoning",
|
| 143 |
+
"- Prefer many short declarative sentences",
|
| 144 |
+
],
|
| 145 |
+
}[int(target_level)]
|
| 146 |
+
|
| 147 |
+
constraints_block = "\n".join(level_constraints)
|
| 148 |
+
|
| 149 |
+
# 「Target FRE は補助。Constraintsが主」前提の骨格をそのままテンプレ化
|
| 150 |
+
return f"""Rewrite the following text for learners at {level_name}.
|
| 151 |
+
|
| 152 |
+
Requirements:
|
| 153 |
+
- Target Flesch Reading Ease: around {target_flesch} (±3)
|
| 154 |
+
- Keep the original meaning.
|
| 155 |
+
- Do not add new information.
|
| 156 |
+
- Output only the rewritten text.
|
| 157 |
+
|
| 158 |
+
Constraints:
|
| 159 |
+
{constraints_block}
|
| 160 |
+
|
| 161 |
+
Additional handling (same as current system):
|
| 162 |
+
- Extract only the portions of the text that should be read as the main body, excluding title/author/source/chapter numbers/annotations/footers.
|
| 163 |
+
- When outputting, make sure sections divided by chapters are clearly distinguishable by leaving a blank line between them.
|
| 164 |
+
|
| 165 |
+
Text:
|
| 166 |
+
<<<
|
| 167 |
+
{text}
|
| 168 |
+
>>>
|
| 169 |
+
"""
|
| 170 |
+
|
| 171 |
+
# =========================
|
| 172 |
+
# 書き換え(プロンプト切替)
|
| 173 |
+
# =========================
|
| 174 |
+
def rewrite_level(text: str, target_level: int, prompt_mode: str) -> str:
|
| 175 |
+
prompt = build_prompt(text=text, target_level=target_level, prompt_mode=prompt_mode)
|
| 176 |
+
|
| 177 |
max_tokens_candidates = [3000, 2000, 1500, 1000]
|
| 178 |
last_error = None
|
| 179 |
|
|
|
|
| 272 |
files = list_passage_files_sorted(PASSAGES_DIR)
|
| 273 |
return {"files": files, "idx": 0}
|
| 274 |
|
| 275 |
+
def start(level: int, prompt_mode: str):
|
| 276 |
set_stop(False)
|
| 277 |
st = init_state()
|
| 278 |
total = len(st["files"])
|
|
|
|
| 282 |
msg = f"準備完了: {total}件。次に処理するのは #Text {st['files'][0][0]} です。"
|
| 283 |
return st, msg, "", "", "", None, None
|
| 284 |
|
| 285 |
+
def run_one(level: int, prompt_mode: str, state: dict):
|
| 286 |
set_stop(False)
|
| 287 |
|
| 288 |
files = state.get("files", [])
|
|
|
|
| 295 |
text_id, path = files[idx]
|
| 296 |
original = load_text(path)
|
| 297 |
|
| 298 |
+
rewritten = rewrite_level(original, target_level=level, prompt_mode=prompt_mode)
|
| 299 |
fre, wc = compute_metrics(rewritten)
|
| 300 |
|
| 301 |
ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
| 322 |
header = (
|
| 323 |
f"#Text {text_id}\n"
|
| 324 |
f"Target Level: {level}\n"
|
| 325 |
+
f"Prompt Mode: {prompt_mode}\n"
|
| 326 |
f"Flesch Reading Ease: {fre:.2f}\n"
|
| 327 |
f"Word Count: {wc}\n"
|
| 328 |
f"Saved CSV: {CSV_PATH}\n"
|
|
|
|
| 331 |
progress = f"{state['idx']} / {total}"
|
| 332 |
return state, "1件処理しました。", header, progress, rewritten, None, None
|
| 333 |
|
| 334 |
+
def run_all(level: int, prompt_mode: str, state: dict):
|
| 335 |
set_stop(False)
|
| 336 |
|
| 337 |
files = state.get("files", [])
|
|
|
|
| 352 |
text_id, path = files[idx]
|
| 353 |
original = load_text(path)
|
| 354 |
|
| 355 |
+
rewritten = rewrite_level(original, target_level=level, prompt_mode=prompt_mode)
|
| 356 |
fre, wc = compute_metrics(rewritten)
|
| 357 |
|
| 358 |
ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
| 377 |
last_header = (
|
| 378 |
f"#Text {text_id}\n"
|
| 379 |
f"Target Level: {level}\n"
|
| 380 |
+
f"Prompt Mode: {prompt_mode}\n"
|
| 381 |
f"Flesch Reading Ease: {fre:.2f}\n"
|
| 382 |
f"Word Count: {wc}\n"
|
| 383 |
f"Saved CSV: {CSV_PATH}\n"
|
|
|
|
| 478 |
|
| 479 |
state = gr.State(init_state())
|
| 480 |
|
| 481 |
+
# ★追加:プロンプト選択UI
|
| 482 |
+
prompt_mode_ui = gr.Dropdown(
|
| 483 |
+
choices=list(PROMPT_MODES.keys()),
|
| 484 |
+
value="既存(シンプル)",
|
| 485 |
+
label="Prompt Mode(試すプロンプトを選択)"
|
| 486 |
+
)
|
| 487 |
+
|
| 488 |
level = gr.Dropdown(choices=[1, 2, 3, 4, 5], value=1, label="Target Level (1..5)")
|
| 489 |
status = gr.Textbox(label="Status", interactive=False)
|
| 490 |
+
header = gr.Textbox(label="Result Header (#Text / FRE / Words)", lines=7, interactive=False)
|
| 491 |
progress = gr.Textbox(label="Progress", interactive=False)
|
| 492 |
output_text = gr.Textbox(label="Rewritten Text", lines=18, interactive=False)
|
| 493 |
|
|
|
|
| 528 |
gr.Markdown(f"📄 CSVパス: `{CSV_PATH}`")
|
| 529 |
gr.Markdown(f"📝 TXTフォルダ: `{REWRITE_DIR}`")
|
| 530 |
|
| 531 |
+
# ---- 既存ボタン(prompt_mode を追加して渡すだけ)----
|
| 532 |
+
# UI値(日本語ラベル) -> 内部キー("legacy"/"final_v1")に変換
|
| 533 |
+
def _pm_key(pm_label: str) -> str:
|
| 534 |
+
return PROMPT_MODES.get(pm_label, "legacy")
|
| 535 |
+
|
| 536 |
+
start_btn.click(
|
| 537 |
+
fn=lambda lvl, pm: start(lvl, _pm_key(pm)),
|
| 538 |
+
inputs=[level, prompt_mode_ui],
|
| 539 |
+
outputs=[state, status, header, progress, output_text, download_one_file, download_zip_file]
|
| 540 |
+
)
|
| 541 |
+
one_btn.click(
|
| 542 |
+
fn=lambda lvl, pm, st: run_one(lvl, _pm_key(pm), st),
|
| 543 |
+
inputs=[level, prompt_mode_ui, state],
|
| 544 |
+
outputs=[state, status, header, progress, output_text, download_one_file, download_zip_file]
|
| 545 |
+
)
|
| 546 |
+
all_btn.click(
|
| 547 |
+
fn=lambda lvl, pm, st: run_all(lvl, _pm_key(pm), st),
|
| 548 |
+
inputs=[level, prompt_mode_ui, state],
|
| 549 |
+
outputs=[state, status, header, progress, output_text, download_one_file, download_zip_file]
|
| 550 |
+
)
|
| 551 |
+
|
| 552 |
stop_btn.click(fn=stop, inputs=[], outputs=[status])
|
| 553 |
reset_btn.click(fn=reset_csv, inputs=[], outputs=[status])
|
| 554 |
reset_txt_btn.click(fn=reset_rewrite_folder, inputs=[], outputs=[status])
|