Toya0421 commited on
Commit
41f70dc
·
verified ·
1 Parent(s): 3fa7ad3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -29
app.py CHANGED
@@ -4,6 +4,7 @@ import glob
4
  import csv
5
  import threading
6
  from datetime import datetime, timedelta
 
7
 
8
  import gradio as gr
9
  import textstat
@@ -16,12 +17,18 @@ API_KEY = os.getenv("API_KEY")
16
  BASE_URL = os.getenv("BASE_URL", "https://openrouter.ai/api/v1")
17
  MODEL = os.getenv("MODEL", "google/gemini-2.5-flash")
18
 
19
- # Hugging Face Spaces 永続ストレージ(推奨
20
- # Spaces の Persistent Storage を有効化している前提(/data が使える)
21
  OUT_DIR = os.getenv("OUT_DIR", "/data")
22
  os.makedirs(OUT_DIR, exist_ok=True)
 
 
23
  CSV_PATH = os.path.join(OUT_DIR, "rewrite_scores.csv")
24
 
 
 
 
 
 
25
  PASSAGES_DIR = os.getenv("PASSAGES_DIR", "passages")
26
 
27
  if not API_KEY:
@@ -36,7 +43,6 @@ _rewrite_sem = threading.Semaphore(REWRITE_CONCURRENCY)
36
  _stop_flag_lock = threading.Lock()
37
  _stop_flag = False
38
 
39
-
40
  # =========================
41
  # passages の列挙
42
  # =========================
@@ -44,7 +50,7 @@ def list_passage_files_sorted(passages_dir: str) -> list[tuple[int, str]]:
44
  pattern = os.path.join(passages_dir, "pg*.txt")
45
  files = glob.glob(pattern)
46
 
47
- items = []
48
  for fp in files:
49
  name = os.path.basename(fp)
50
  m = re.match(r"pg(\d+)\.txt$", name)
@@ -53,12 +59,10 @@ def list_passage_files_sorted(passages_dir: str) -> list[tuple[int, str]]:
53
  items.sort(key=lambda x: x[0])
54
  return items
55
 
56
-
57
  def load_text(path: str) -> str:
58
  with open(path, "r", encoding="utf-8") as f:
59
  return f.read()
60
 
61
-
62
  # =========================
63
  # 書き換え(プロンプト同一)
64
  # =========================
@@ -86,7 +90,6 @@ excluding the title, author name, source information, chapter number, annotation
86
  )
87
  return resp.choices[0].message.content.strip()
88
 
89
-
90
  # =========================
91
  # 指標(FRE + 単語数)
92
  # =========================
@@ -100,16 +103,13 @@ def compute_metrics(text: str) -> tuple[float, int]:
100
  wc = count_words_english(text)
101
  return fre, wc
102
 
103
-
104
  # =========================
105
  # CSV追記(軽量・永続)
106
  # =========================
107
  _csv_lock = threading.Lock()
108
 
109
  def append_csv_row(row: dict):
110
- """
111
- /data/rewrite_scores.csv に1行追記(ヘッダ無ければ作成)
112
- """
113
  fieldnames = ["timestamp_jst", "Text#", "target_level", "flesch_reading_ease", "word_count", "rewritten_text"]
114
  with _csv_lock:
115
  exists = os.path.exists(CSV_PATH)
@@ -119,6 +119,41 @@ def append_csv_row(row: dict):
119
  w.writeheader()
120
  w.writerow({k: row.get(k, "") for k in fieldnames})
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  # =========================
124
  # 停止フラグ
@@ -132,16 +167,12 @@ def get_stop() -> bool:
132
  with _stop_flag_lock:
133
  return _stop_flag
134
 
135
-
136
  # =========================
137
  # UIロジック
138
  # =========================
139
  def init_state():
140
  files = list_passage_files_sorted(PASSAGES_DIR)
141
- return {
142
- "files": files, # [(text_id, path), ...]
143
- "idx": 0, # 次に処理する位置
144
- }
145
 
146
  def start(level: int):
147
  set_stop(False)
@@ -172,6 +203,7 @@ def run_one(level: int, state: dict):
172
  # JSTタイムスタンプ(+9)
173
  ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
174
 
 
175
  append_csv_row({
176
  "timestamp_jst": ts,
177
  "Text#": text_id,
@@ -181,16 +213,30 @@ def run_one(level: int, state: dict):
181
  "rewritten_text": rewritten
182
  })
183
 
 
 
 
 
 
 
 
 
 
184
  state["idx"] = idx + 1
185
 
186
- header = f"#Text {text_id}\nTarget Level: {level}\nFlesch Reading Ease: {fre:.2f}\nWord Count: {wc}\nSaved: {CSV_PATH}"
 
 
 
 
 
 
 
187
  progress = f"{state['idx']} / {total}"
188
  return state, "1件処理しました。", header, progress, rewritten, gr.update(visible=True)
189
 
190
  def run_all(level: int, state: dict):
191
- """
192
- 全件(または残り)を順次処理。途中で「停止」ボタンで止められる。
193
- """
194
  set_stop(False)
195
 
196
  files = state.get("files", [])
@@ -225,7 +271,23 @@ def run_all(level: int, state: dict):
225
  "rewritten_text": rewritten
226
  })
227
 
228
- last_header = f"#Text {text_id}\nTarget Level: {level}\nFlesch Reading Ease: {fre:.2f}\nWord Count: {wc}\nSaved: {CSV_PATH}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  last_text = rewritten
230
 
231
  idx += 1
@@ -243,18 +305,31 @@ def reset_csv():
243
  os.remove(CSV_PATH)
244
  return f"CSVを削除しました: {CSV_PATH}"
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
  # =========================
248
  # Gradio UI(Spaces向け)
249
  # =========================
250
  with gr.Blocks() as demo:
251
- gr.Markdown("# 🔁 Passage Rewrite + FRE Scoring (HF Spaces)")
252
 
253
  state = gr.State(init_state())
254
 
255
  level = gr.Dropdown(choices=[1, 2, 3, 4, 5], value=1, label="Target Level (1..5)")
256
  status = gr.Textbox(label="Status", interactive=False)
257
- header = gr.Textbox(label="Result Header (#Text / FRE / Words)", lines=5, interactive=False)
258
  progress = gr.Textbox(label="Progress", interactive=False)
259
  output_text = gr.Textbox(label="Rewritten Text", lines=18, interactive=False)
260
 
@@ -263,18 +338,20 @@ with gr.Blocks() as demo:
263
  one_btn = gr.Button("次へ(1件処理)")
264
  all_btn = gr.Button("全件実行(残りを処理)")
265
  stop_btn = gr.Button("停止")
 
266
  with gr.Row():
267
  reset_btn = gr.Button("CSVリセット(削除)")
268
- csv_hint = gr.Markdown(f"📄 CSV保存先: `{CSV_PATH}`SpacesのFilesに出ます)")
269
 
270
- # stop通知を見せる用
271
- stop_note = gr.Markdown(visible=False)
272
 
273
- start_btn.click(fn=start, inputs=[level], outputs=[state, status, header, progress, output_text, stop_note])
274
- one_btn.click(fn=run_one, inputs=[level, state], outputs=[state, status, header, progress, output_text, stop_note])
275
- all_btn.click(fn=run_all, inputs=[level, state], outputs=[state, status, header, progress, output_text, stop_note])
276
  stop_btn.click(fn=stop, inputs=[], outputs=[status])
277
  reset_btn.click(fn=reset_csv, inputs=[], outputs=[status])
 
278
 
279
  demo.queue(max_size=64)
280
  demo.launch()
 
4
  import csv
5
  import threading
6
  from datetime import datetime, timedelta
7
+ from typing import Optional
8
 
9
  import gradio as gr
10
  import textstat
 
17
  BASE_URL = os.getenv("BASE_URL", "https://openrouter.ai/api/v1")
18
  MODEL = os.getenv("MODEL", "google/gemini-2.5-flash")
19
 
20
+ # Hugging Face Spaces 永続ストレージ(Persistent Storage 有効なら /data が永続
 
21
  OUT_DIR = os.getenv("OUT_DIR", "/data")
22
  os.makedirs(OUT_DIR, exist_ok=True)
23
+
24
+ # スコアCSV(任意だが便利)
25
  CSV_PATH = os.path.join(OUT_DIR, "rewrite_scores.csv")
26
 
27
+ # ★追加:書き換え本文をtxtで蓄積するフォルダ(要件:rewrite_passages)
28
+ # 既定は /data/rewrite_passages(永続に残る)
29
+ REWRITE_DIR = os.getenv("REWRITE_DIR", os.path.join(OUT_DIR, "rewrite_passages"))
30
+ os.makedirs(REWRITE_DIR, exist_ok=True)
31
+
32
  PASSAGES_DIR = os.getenv("PASSAGES_DIR", "passages")
33
 
34
  if not API_KEY:
 
43
  _stop_flag_lock = threading.Lock()
44
  _stop_flag = False
45
 
 
46
  # =========================
47
  # passages の列挙
48
  # =========================
 
50
  pattern = os.path.join(passages_dir, "pg*.txt")
51
  files = glob.glob(pattern)
52
 
53
+ items: list[tuple[int, str]] = []
54
  for fp in files:
55
  name = os.path.basename(fp)
56
  m = re.match(r"pg(\d+)\.txt$", name)
 
59
  items.sort(key=lambda x: x[0])
60
  return items
61
 
 
62
  def load_text(path: str) -> str:
63
  with open(path, "r", encoding="utf-8") as f:
64
  return f.read()
65
 
 
66
  # =========================
67
  # 書き換え(プロンプト同一)
68
  # =========================
 
90
  )
91
  return resp.choices[0].message.content.strip()
92
 
 
93
  # =========================
94
  # 指標(FRE + 単語数)
95
  # =========================
 
103
  wc = count_words_english(text)
104
  return fre, wc
105
 
 
106
  # =========================
107
  # CSV追記(軽量・永続)
108
  # =========================
109
  _csv_lock = threading.Lock()
110
 
111
  def append_csv_row(row: dict):
112
+ """rewrite_scores.csv に1行追記(ヘッダ無ければ作成)"""
 
 
113
  fieldnames = ["timestamp_jst", "Text#", "target_level", "flesch_reading_ease", "word_count", "rewritten_text"]
114
  with _csv_lock:
115
  exists = os.path.exists(CSV_PATH)
 
119
  w.writeheader()
120
  w.writerow({k: row.get(k, "") for k in fieldnames})
121
 
122
+ # =========================
123
+ # ★追加:rewrite_passages に txt 追記
124
+ # =========================
125
+ _txt_lock = threading.Lock()
126
+
127
+ def append_rewrite_txt(
128
+ text_id: int,
129
+ target_level: int,
130
+ fre: float,
131
+ word_count: int,
132
+ rewritten_text: str,
133
+ ):
134
+ """
135
+ rewrite_passages/Text_{id}.txt に結果を追記(同じText#の再実行も追記で残す)
136
+ """
137
+ ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
138
+ path = os.path.join(REWRITE_DIR, f"Text_{text_id}.txt")
139
+
140
+ block = (
141
+ f"# Text {text_id}\n"
142
+ f"Target Level: {target_level}\n"
143
+ f"Flesch Reading Ease: {fre:.2f}\n"
144
+ f"Word Count: {word_count}\n"
145
+ f"Timestamp (JST): {ts}\n"
146
+ f"Model: {MODEL}\n"
147
+ f"\n"
148
+ f"---- Rewritten Text ----\n"
149
+ f"{rewritten_text}\n"
150
+ f"\n"
151
+ f"{'=' * 80}\n"
152
+ )
153
+
154
+ with _txt_lock:
155
+ with open(path, "a", encoding="utf-8") as f:
156
+ f.write(block)
157
 
158
  # =========================
159
  # 停止フラグ
 
167
  with _stop_flag_lock:
168
  return _stop_flag
169
 
 
170
  # =========================
171
  # UIロジック
172
  # =========================
173
  def init_state():
174
  files = list_passage_files_sorted(PASSAGES_DIR)
175
+ return {"files": files, "idx": 0}
 
 
 
176
 
177
  def start(level: int):
178
  set_stop(False)
 
203
  # JSTタイムスタンプ(+9)
204
  ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
205
 
206
+ # CSV追記
207
  append_csv_row({
208
  "timestamp_jst": ts,
209
  "Text#": text_id,
 
213
  "rewritten_text": rewritten
214
  })
215
 
216
+ # ★txt追記
217
+ append_rewrite_txt(
218
+ text_id=text_id,
219
+ target_level=level,
220
+ fre=fre,
221
+ word_count=wc,
222
+ rewritten_text=rewritten,
223
+ )
224
+
225
  state["idx"] = idx + 1
226
 
227
+ header = (
228
+ f"#Text {text_id}\n"
229
+ f"Target Level: {level}\n"
230
+ f"Flesch Reading Ease: {fre:.2f}\n"
231
+ f"Word Count: {wc}\n"
232
+ f"Saved CSV: {CSV_PATH}\n"
233
+ f"Saved TXT: {os.path.join(REWRITE_DIR, f'Text_{text_id}.txt')}"
234
+ )
235
  progress = f"{state['idx']} / {total}"
236
  return state, "1件処理しました。", header, progress, rewritten, gr.update(visible=True)
237
 
238
  def run_all(level: int, state: dict):
239
+ """全件(または残り)を順次処理。途中で「停止」ボタンで止められる。"""
 
 
240
  set_stop(False)
241
 
242
  files = state.get("files", [])
 
271
  "rewritten_text": rewritten
272
  })
273
 
274
+ # ★txt追記
275
+ append_rewrite_txt(
276
+ text_id=text_id,
277
+ target_level=level,
278
+ fre=fre,
279
+ word_count=wc,
280
+ rewritten_text=rewritten,
281
+ )
282
+
283
+ last_header = (
284
+ f"#Text {text_id}\n"
285
+ f"Target Level: {level}\n"
286
+ f"Flesch Reading Ease: {fre:.2f}\n"
287
+ f"Word Count: {wc}\n"
288
+ f"Saved CSV: {CSV_PATH}\n"
289
+ f"Saved TXT: {os.path.join(REWRITE_DIR, f'Text_{text_id}.txt')}"
290
+ )
291
  last_text = rewritten
292
 
293
  idx += 1
 
305
  os.remove(CSV_PATH)
306
  return f"CSVを削除しました: {CSV_PATH}"
307
 
308
+ def reset_rewrite_folder():
309
+ """
310
+ rewrite_passages を全消しは危険なので、ここでは「中の txt を削除」する実装。
311
+ """
312
+ removed = 0
313
+ with _txt_lock:
314
+ for fp in glob.glob(os.path.join(REWRITE_DIR, "Text_*.txt")):
315
+ try:
316
+ os.remove(fp)
317
+ removed += 1
318
+ except Exception:
319
+ pass
320
+ return f"rewrite_passages の Text_*.txt を削除しました({removed}件): {REWRITE_DIR}"
321
 
322
  # =========================
323
  # Gradio UI(Spaces向け)
324
  # =========================
325
  with gr.Blocks() as demo:
326
+ gr.Markdown("# 🔁 Passage Rewrite + FRE/Word Scoring (HF Spaces)")
327
 
328
  state = gr.State(init_state())
329
 
330
  level = gr.Dropdown(choices=[1, 2, 3, 4, 5], value=1, label="Target Level (1..5)")
331
  status = gr.Textbox(label="Status", interactive=False)
332
+ header = gr.Textbox(label="Result Header (#Text / FRE / Words)", lines=6, interactive=False)
333
  progress = gr.Textbox(label="Progress", interactive=False)
334
  output_text = gr.Textbox(label="Rewritten Text", lines=18, interactive=False)
335
 
 
338
  one_btn = gr.Button("次へ(1件処理)")
339
  all_btn = gr.Button("全件実行(残りを処理)")
340
  stop_btn = gr.Button("停止")
341
+
342
  with gr.Row():
343
  reset_btn = gr.Button("CSVリセット(削除)")
344
+ reset_txt_btn = gr.Button("rewrite_passagesリセットText_*.txt削除)")
345
 
346
+ gr.Markdown(f"📄 CSV保存先: `{CSV_PATH}`(SpacesのFilesに出ます)")
347
+ gr.Markdown(f"📝 TXT保存先: `{REWRITE_DIR}`(SpacesのFilesに出ます)")
348
 
349
+ start_btn.click(fn=start, inputs=[level], outputs=[state, status, header, progress, output_text])
350
+ one_btn.click(fn=run_one, inputs=[level, state], outputs=[state, status, header, progress, output_text])
351
+ all_btn.click(fn=run_all, inputs=[level, state], outputs=[state, status, header, progress, output_text])
352
  stop_btn.click(fn=stop, inputs=[], outputs=[status])
353
  reset_btn.click(fn=reset_csv, inputs=[], outputs=[status])
354
+ reset_txt_btn.click(fn=reset_rewrite_folder, inputs=[], outputs=[status])
355
 
356
  demo.queue(max_size=64)
357
  demo.launch()