Toya0421 commited on
Commit
2ceb3a6
·
verified ·
1 Parent(s): 9e8ba73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +411 -388
app.py CHANGED
@@ -1,200 +1,100 @@
1
- import os
2
- import re
3
- import json
4
- import glob
5
- import time
6
- import random
7
- import sqlite3
8
- import threading
9
- import tempfile
10
- import inspect
11
- from datetime import datetime, timedelta
12
-
13
  import gradio as gr
14
- import pandas as pd
15
  from openai import OpenAI
16
  from datasets import Dataset
 
 
 
17
 
18
- # =========================
19
- # Config
20
- # =========================
21
  API_KEY = os.getenv("API_KEY")
22
- BASE_URL = os.getenv("BASE_URL", "https://openrouter.ai/api/v1")
23
- HF_TOKEN = os.getenv("HF_TOKEN") # pushするなら必要
24
- DATASET_REPO = os.getenv("DATASET_REPO", "Toya0421/reading_exercise_logging")
25
-
26
- # pushを有効にするか(負荷テスト中は 0 推奨)
27
- ENABLE_HF_PUSH = os.getenv("ENABLE_HF_PUSH", "0") == "1"
28
-
29
- # push間隔(秒)
30
- PUSH_INTERVAL_SEC = int(os.getenv("PUSH_INTERVAL_SEC", "300")) # 5分
31
- # push時に出すparquet一時ファイル名
32
- PARQUET_NAME = "data.parquet"
33
-
34
- LOG_DB = "reading_logs.sqlite"
35
- CACHE_DIR = "rewrite_cache"
36
- os.makedirs(CACHE_DIR, exist_ok=True)
37
 
38
  client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
39
 
40
- # passage_information.xlsx 読み込み
41
  passage_info_df = pd.read_excel("passage_information.xlsx")
42
 
43
- # =========================
44
- # SQLite (WAL) for logs
45
- # =========================
46
- _db_lock = threading.Lock()
47
- _push_lock = threading.Lock()
48
-
49
- def init_db():
50
- with _db_lock:
51
- conn = sqlite3.connect(LOG_DB, check_same_thread=False)
52
- conn.execute("PRAGMA journal_mode=WAL;")
53
- conn.execute("""
54
- CREATE TABLE IF NOT EXISTS logs (
55
- id INTEGER PRIMARY KEY AUTOINCREMENT,
56
- user_id TEXT,
57
- assigned_level INTEGER,
58
- passage_id INTEGER,
59
- original_level REAL,
60
- action_time TEXT,
61
- action_type TEXT,
62
- page_text TEXT
63
- );
64
- """)
65
- conn.commit()
66
- conn.close()
67
-
68
- init_db()
69
-
70
- def now_jst_iso():
71
- return (datetime.utcnow() + timedelta(hours=9)).isoformat()
72
-
73
- def save_log(entry: dict):
74
- # 1イベント=1INSERT(軽量)
75
- with _db_lock:
76
- conn = sqlite3.connect(LOG_DB, check_same_thread=False)
77
- conn.execute("PRAGMA journal_mode=WAL;")
78
- conn.execute("""
79
- INSERT INTO logs(user_id, assigned_level, passage_id, original_level, action_time, action_type, page_text)
80
- VALUES (?, ?, ?, ?, ?, ?, ?)
81
- """, (
82
- entry.get("user_id"),
83
- entry.get("assigned_level"),
84
- entry.get("passage_id"),
85
- entry.get("original_level"),
86
- entry.get("action_time"),
87
- entry.get("action_type"),
88
- entry.get("page_text"),
89
- ))
90
- conn.commit()
91
- conn.close()
92
-
93
- def export_sqlite_to_parquet(parquet_path: str):
94
- # DB全体を読み出してparquet化(pushは低頻度でOK)
95
- with _db_lock:
96
- conn = sqlite3.connect(LOG_DB, check_same_thread=False)
97
- df = pd.read_sql_query("SELECT * FROM logs ORDER BY id ASC", conn)
98
- conn.close()
99
- df.to_parquet(parquet_path, index=False)
100
-
101
- def push_to_hub_if_enabled():
102
- if not ENABLE_HF_PUSH:
103
- return
104
- if not HF_TOKEN:
105
- print("[WARN] ENABLE_HF_PUSH=1 ですが HF_TOKEN がありません。pushをスキップします。")
106
- return
107
-
108
- # pushが重なると壊れるのでロック
109
- if not _push_lock.acquire(blocking=False):
110
- return
111
- try:
112
- tmp_dir = tempfile.mkdtemp()
113
- parquet_path = os.path.join(tmp_dir, PARQUET_NAME)
114
- export_sqlite_to_parquet(parquet_path)
115
- dataset = Dataset.from_parquet(parquet_path)
116
- dataset.push_to_hub(DATASET_REPO, token=HF_TOKEN)
117
- print(f"[INFO] Pushed logs to hub: {DATASET_REPO} ({len(dataset)} rows)")
118
- except Exception as e:
119
- print(f"[ERROR] push_to_hub failed: {e}")
120
- finally:
121
- _push_lock.release()
122
-
123
- def start_periodic_pusher():
124
- # Spacesはプロセスが1つとは限らないので、push頻度は低め推奨
125
- if not ENABLE_HF_PUSH:
126
- return
127
-
128
- def loop():
129
- while True:
130
- time.sleep(PUSH_INTERVAL_SEC)
131
- push_to_hub_if_enabled()
132
-
133
- th = threading.Thread(target=loop, daemon=True)
134
- th.start()
135
-
136
- start_periodic_pusher()
137
-
138
- # =========================
139
- # Passages
140
- # =========================
141
- def list_passage_ids():
142
- files = glob.glob("passages/pg*.txt")
143
- ids = []
144
- for f in files:
145
- name = os.path.basename(f)
146
- num = name.replace("pg", "").replace(".txt", "")
147
- if num.isdigit():
148
- ids.append(int(num))
149
- return sorted(ids)
150
 
151
- ALL_PASSAGE_IDS = list_passage_ids()
152
 
153
- def load_passage_file(text_id: int):
 
 
 
 
 
 
 
154
  path = f"passages/pg{text_id}.txt"
155
  if not os.path.exists(path):
156
  return None
157
  with open(path, "r", encoding="utf-8") as f:
158
  return f.read()
159
 
160
- def get_new_passage_random(used_passages_list):
161
- # used_passages_list: list[int]
162
- used = set(used_passages_list or [])
163
- if not ALL_PASSAGE_IDS:
164
- return None, None, None, []
 
165
 
166
- available = [pid for pid in ALL_PASSAGE_IDS if pid not in used]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  if not available:
168
- used.clear()
169
- available = list(ALL_PASSAGE_IDS)
170
 
171
- pid = random.choice(available)
172
- used.add(pid)
 
173
 
174
- text = load_passage_file(pid)
 
175
  if text is None:
176
- return None, None, None, list(used)
177
-
178
- row = passage_info_df[passage_info_df["Text#"] == pid]
179
- orig_level = None if len(row) == 0 else row.iloc[0]["flesch_score"]
180
- return pid, text, orig_level, list(used)
181
-
182
- # =========================
183
- # Rewrite (cached)
184
- # =========================
185
- def rewrite_cache_path(passage_id: int, level: int):
186
- return os.path.join(CACHE_DIR, f"pg{passage_id}_lv{level}.json")
187
-
188
- def rewrite_level(text: str, target_level: int, passage_id: int):
189
- cpath = rewrite_cache_path(passage_id, target_level)
190
- if os.path.exists(cpath):
191
- try:
192
- with open(cpath, "r", encoding="utf-8") as f:
193
- return json.load(f)["rewritten"]
194
- except Exception:
195
- pass
196
-
197
- level_to_flesch = {1: 90, 2: 70, 3: 55, 4: 40, 5: 25}
 
 
 
198
  target_flesch = level_to_flesch[int(target_level)]
199
 
200
  prompt = f"""
@@ -202,9 +102,9 @@ Rewrite the following passage so it fits about {target_flesch} Flesch Reading Ea
202
  - Extract only the portions of the text that should be read as the main body,
203
  excluding the title, author name, source information, chapter number, annotations, and footers.
204
  - When outputting, make sure sections divided by chapters, etc., are clearly distinguishable by leaving a blank line between them.
205
- - Preserve the original meaning faithfully.
206
- - Do not add new information or remove essential information.
207
- - Output only the rewritten passage. Do not include explanations.
208
  {text}
209
  """
210
 
@@ -212,93 +112,135 @@ excluding the title, author name, source information, chapter number, annotation
212
  model="google/gemini-2.5-flash",
213
  messages=[{"role": "user", "content": prompt}],
214
  temperature=0.4,
215
- max_tokens=5000,
216
  )
217
- rewritten = resp.choices[0].message.content.strip()
218
 
219
- try:
220
- with open(cpath, "w", encoding="utf-8") as f:
221
- json.dump({"rewritten": rewritten}, f, ensure_ascii=False)
222
- except Exception:
223
- pass
224
 
225
- return rewritten
226
 
227
  def split_pages(text, max_words=300):
 
 
 
 
 
 
228
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
229
- pages, current, wc = [], [], 0
230
- for s in sentences:
231
- w = s.split()
232
- if wc + len(w) > max_words and current:
233
- pages.append(" ".join(current))
234
- current, wc = [s], len(w)
 
 
 
 
 
 
 
 
235
  else:
236
- current.append(s)
237
- wc += len(w)
238
- if current:
239
- pages.append(" ".join(current))
 
 
 
240
  return pages or [text]
241
 
242
- # =========================
243
- # Gradio handlers (stateful per user)
244
- # state = {"user_id": str|None, "level": int|None, "used_passages": list[int]}
245
- # =========================
246
- def start_test(student_id, level_input, state):
247
- if state is None:
248
- state = {"user_id": None, "level": None, "used_passages": []}
249
 
250
- if not student_id or str(student_id).strip() == "":
251
- return (
252
- "", "", "[]", 0, 0, "", "", "",
253
- gr.update(interactive=False, visible=False),
254
- gr.update(interactive=False, visible=False),
255
- gr.update(interactive=False, visible=False),
256
- state
257
- )
 
 
 
 
 
 
 
 
 
258
 
259
- state["user_id"] = str(student_id).strip()
260
- state["level"] = int(level_input)
261
- state["used_passages"] = []
262
 
263
- save_log({
264
- "user_id": state["user_id"],
265
- "assigned_level": state["level"],
 
 
 
 
 
 
 
 
 
 
 
266
  "passage_id": None,
267
  "original_level": None,
268
- "action_time": now_jst_iso(),
269
- "action_type": "start_pushed",
270
  "page_text": None
271
- })
 
272
 
273
- pid, text, orig_lev, used_list = get_new_passage_random(state["used_passages"])
274
- state["used_passages"] = used_list
 
 
 
 
 
 
275
 
 
 
 
 
276
  if text is None:
277
  return (
278
- "教材が見つかりません", "", "[]", 0, 0, "", "", "",
279
- gr.update(interactive=False, visible=False),
280
  gr.update(interactive=False, visible=False),
281
  gr.update(interactive=False, visible=False),
282
- state
283
  )
284
 
285
- rewritten = rewrite_level(text, state["level"], pid)
286
  pages = split_pages(rewritten)
287
  total = len(pages)
288
 
289
- prev_upd = gr.update(interactive=False, visible=False)
290
- next_upd = gr.update(interactive=(total > 1), visible=(total > 1))
291
- finish_upd = gr.update(interactive=(total == 1), visible=(total == 1))
292
-
293
- save_log({
294
- "user_id": state["user_id"],
295
- "assigned_level": state["level"],
 
 
 
 
 
 
 
 
296
  "passage_id": pid,
297
  "original_level": orig_lev,
298
- "action_time": now_jst_iso(),
299
- "action_type": "page_displayed_1",
300
  "page_text": pages[0]
301
- })
 
302
 
303
  return (
304
  pages[0],
@@ -306,147 +248,176 @@ def start_test(student_id, level_input, state):
306
  json.dumps(pages, ensure_ascii=False),
307
  0,
308
  total,
309
- str(pid),
310
- "" if orig_lev is None else str(orig_lev),
311
- str(state["level"]),
312
- prev_upd, next_upd, finish_upd,
313
- state
 
314
  )
315
 
316
- def next_page(pages_json, current_page, total_pages, pid, orig_lev, state):
317
- pages = json.loads(pages_json) if pages_json else []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  if not pages:
319
- return ("", "", "[]", 0,
320
  gr.update(interactive=False, visible=False),
321
  gr.update(interactive=False, visible=False),
322
- gr.update(interactive=False, visible=False),
323
- state)
324
-
325
- save_log({
326
- "user_id": state.get("user_id"),
327
- "assigned_level": state.get("level"),
328
- "passage_id": int(pid),
329
- "original_level": float(orig_lev) if orig_lev not in ("", None) else None,
330
- "action_time": now_jst_iso(),
331
- "action_type": "next_pushed",
332
- "page_text": None
333
- })
334
 
335
- total_pages = int(total_pages)
336
- current_page = int(current_page)
337
  new_page = min(current_page + 1, total_pages - 1)
338
 
339
- save_log({
340
- "user_id": state.get("user_id"),
341
- "assigned_level": state.get("level"),
342
- "passage_id": int(pid),
343
- "original_level": float(orig_lev) if orig_lev not in ("", None) else None,
344
- "action_time": now_jst_iso(),
 
345
  "action_type": f"page_displayed_{new_page+1}",
346
  "page_text": pages[new_page]
347
- })
 
348
 
349
- prev_upd = gr.update(interactive=(new_page > 0), visible=(new_page > 0))
350
- next_visible = (new_page < total_pages - 1)
351
- next_upd = gr.update(interactive=next_visible, visible=next_visible)
352
- finish_upd = gr.update(interactive=(not next_visible), visible=(not next_visible))
 
 
 
 
 
 
353
 
354
  return (
355
  pages[new_page],
356
  f"{new_page+1} / {total_pages}",
357
- json.dumps(pages, ensure_ascii=False),
358
  new_page,
359
- prev_upd, next_upd, finish_upd,
360
- state
 
361
  )
362
 
363
- def prev_page(pages_json, current_page, total_pages, pid, orig_lev, state):
364
- pages = json.loads(pages_json) if pages_json else []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  if not pages:
366
- return ("", "", "[]", 0,
367
- gr.update(interactive=False, visible=False),
368
  gr.update(interactive=False, visible=False),
369
  gr.update(interactive=False, visible=False),
370
- state)
371
-
372
- save_log({
373
- "user_id": state.get("user_id"),
374
- "assigned_level": state.get("level"),
375
- "passage_id": int(pid),
376
- "original_level": float(orig_lev) if orig_lev not in ("", None) else None,
377
- "action_time": now_jst_iso(),
378
- "action_type": "prev_pushed",
379
- "page_text": None
380
- })
381
 
382
- total_pages = int(total_pages)
383
- current_page = int(current_page)
384
  new_page = max(current_page - 1, 0)
385
 
386
- save_log({
387
- "user_id": state.get("user_id"),
388
- "assigned_level": state.get("level"),
389
- "passage_id": int(pid),
390
- "original_level": float(orig_lev) if orig_lev not in ("", None) else None,
391
- "action_time": now_jst_iso(),
392
- "action_type": f"page_displayed_{new_page+1}",
393
- "page_text": pages[new_page]
394
- })
395
-
396
  prev_upd = gr.update(interactive=(new_page > 0), visible=(new_page > 0))
397
  next_visible = (new_page < total_pages - 1)
398
  next_upd = gr.update(interactive=next_visible, visible=next_visible)
399
  finish_upd = gr.update(interactive=(not next_visible), visible=(not next_visible))
400
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  return (
402
  pages[new_page],
403
  f"{new_page+1} / {total_pages}",
404
- json.dumps(pages, ensure_ascii=False),
405
  new_page,
406
- prev_upd, next_upd, finish_upd,
407
- state
 
408
  )
409
 
410
- def finish_or_retire(pages_json, current_page, pid, orig_lev, action, state):
411
- save_log({
412
- "user_id": state.get("user_id"),
413
- "assigned_level": state.get("level"),
414
- "passage_id": int(pid),
415
- "original_level": float(orig_lev) if orig_lev not in ("", None) else None,
416
- "action_time": now_jst_iso(),
 
 
 
 
417
  "action_type": action,
418
  "page_text": None
419
- })
420
-
421
- new_pid, new_text, new_orig_lev, used_list = get_new_passage_random(state.get("used_passages", []))
422
- state["used_passages"] = used_list
423
 
 
424
  if new_text is None:
425
  return (
426
- "教材がありません", "", "[]", 0, 0, "", "", "",
427
- gr.update(interactive=False, visible=False),
428
  gr.update(interactive=False, visible=False),
429
  gr.update(interactive=False, visible=False),
430
- state
431
  )
432
 
433
- rewritten = rewrite_level(new_text, state["level"], new_pid)
434
  new_pages = split_pages(rewritten)
435
  total = len(new_pages)
436
 
437
- prev_upd = gr.update(interactive=False, visible=False)
438
- next_upd = gr.update(interactive=(total > 1), visible=(total > 1))
439
- finish_upd = gr.update(interactive=(total == 1), visible=(total == 1))
440
-
441
- save_log({
442
- "user_id": state.get("user_id"),
443
- "assigned_level": state.get("level"),
 
 
 
 
 
 
444
  "passage_id": new_pid,
445
  "original_level": new_orig_lev,
446
- "action_time": now_jst_iso(),
447
  "action_type": "page_displayed_1",
448
  "page_text": new_pages[0]
449
- })
 
450
 
451
  return (
452
  new_pages[0],
@@ -454,67 +425,107 @@ def finish_or_retire(pages_json, current_page, pid, orig_lev, action, state):
454
  json.dumps(new_pages, ensure_ascii=False),
455
  0,
456
  total,
457
- str(new_pid),
458
- "" if new_orig_lev is None else str(new_orig_lev),
459
- str(state["level"]),
460
- prev_upd, next_upd, finish_upd,
461
- state
 
462
  )
463
 
464
- # =========================
465
- # Gradio queue/launch (version-safe)
466
- # =========================
467
- def safe_queue(blocks: gr.Blocks, concurrency: int = 8, max_size: int = 64):
468
- sig = inspect.signature(blocks.queue)
469
- kwargs = {}
470
- # Gradioのバージョン差を吸収
471
- if "default_concurrency_limit" in sig.parameters:
472
- kwargs["default_concurrency_limit"] = concurrency
473
- elif "concurrency_limit" in sig.parameters:
474
- kwargs["concurrency_limit"] = concurrency
475
- elif "concurrency_count" in sig.parameters:
476
- kwargs["concurrency_count"] = concurrency
477
- if "max_size" in sig.parameters:
478
- kwargs["max_size"] = max_size
479
- return blocks.queue(**kwargs)
480
-
481
- def safe_launch(blocks: gr.Blocks):
482
- sig = inspect.signature(blocks.launch)
483
- kwargs = {}
484
- # Spacesだと server_name/server_port は不要なことが多いが、あっても問題ない
485
- if "server_name" in sig.parameters:
486
- kwargs["server_name"] = "0.0.0.0"
487
- if "server_port" in sig.parameters and os.getenv("PORT"):
488
- kwargs["server_port"] = int(os.getenv("PORT"))
489
- # max_threadsがある版だけ付ける
490
- if "max_threads" in sig.parameters:
491
- kwargs["max_threads"] = 16
492
- return blocks.launch(**kwargs)
493
-
494
- # =========================
495
- # UI
496
- # =========================
497
  custom_css = """
 
 
 
498
  .big-text {
499
  font-size: 22px !important;
500
  line-height: 1.8 !important;
501
  font-family: "Noto Sans", sans-serif !important;
502
  }
 
503
  .reading-area {
504
  padding: 20px !important;
505
  border-radius: 12px !important;
506
  border: 1px solid #ccc !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
  }
508
  """
509
 
 
510
  with gr.Blocks(css=custom_css) as demo:
511
  gr.Markdown("# 📚 Reading Exercise")
512
 
513
  student_id_input = gr.Textbox(label="学生番号(必須)")
514
- level_input = gr.Dropdown(choices=[1,2,3,4,5], label="Reading Level", value=3)
 
 
 
 
 
515
  start_btn = gr.Button("スタート")
516
 
517
- text_display = gr.Textbox(label="教材", lines=18, interactive=False, elem_classes=["big-text", "reading-area"])
 
 
 
 
 
518
  page_display = gr.Textbox(label="進行状況", lines=1, interactive=False)
519
 
520
  hidden_pages = gr.Textbox(visible=False)
@@ -531,59 +542,71 @@ with gr.Blocks(css=custom_css) as demo:
531
 
532
  retire_btn = gr.Button("リタイア")
533
 
534
- state = gr.State({"user_id": None, "level": None, "used_passages": []})
535
-
536
  start_btn.click(
537
  fn=start_test,
538
- inputs=[student_id_input, level_input, state],
539
  outputs=[
540
  text_display, page_display,
541
  hidden_pages, hidden_page_index,
542
  hidden_total_pages, hidden_passage_id,
543
  hidden_orig_lev, hidden_assigned_lev,
544
- prev_btn, next_btn, finish_btn,
545
- state
546
- ],
547
  )
548
 
549
  next_btn.click(
550
  fn=next_page,
551
- inputs=[hidden_pages, hidden_page_index, hidden_total_pages, hidden_passage_id, hidden_orig_lev, state],
552
- outputs=[text_display, page_display, hidden_pages, hidden_page_index, prev_btn, next_btn, finish_btn, state],
 
 
 
 
 
 
 
 
553
  )
554
 
555
  prev_btn.click(
556
  fn=prev_page,
557
- inputs=[hidden_pages, hidden_page_index, hidden_total_pages, hidden_passage_id, hidden_orig_lev, state],
558
- outputs=[text_display, page_display, hidden_pages, hidden_page_index, prev_btn, next_btn, finish_btn, state],
 
 
 
 
 
 
 
 
559
  )
560
 
561
  finish_btn.click(
562
- fn=lambda p, i, pid, o, st: finish_or_retire(p, i, pid, o, "finished", st),
563
- inputs=[hidden_pages, hidden_page_index, hidden_passage_id, hidden_orig_lev, state],
564
  outputs=[
565
  text_display, page_display,
566
  hidden_pages, hidden_page_index,
567
  hidden_total_pages, hidden_passage_id,
568
  hidden_orig_lev, hidden_assigned_lev,
569
- prev_btn, next_btn, finish_btn,
570
- state
571
- ],
572
  )
573
 
574
  retire_btn.click(
575
- fn=lambda p, i, pid, o, st: finish_or_retire(p, i, pid, o, "retire", st),
576
- inputs=[hidden_pages, hidden_page_index, hidden_passage_id, hidden_orig_lev, state],
 
 
 
577
  outputs=[
578
  text_display, page_display,
579
  hidden_pages, hidden_page_index,
580
  hidden_total_pages, hidden_passage_id,
581
  hidden_orig_lev, hidden_assigned_lev,
582
- prev_btn, next_btn, finish_btn,
583
- state
584
- ],
585
  )
586
 
587
- # queueはバージョン差があるので安全ラッパ
588
- safe_queue(demo, concurrency=8, max_size=64)
589
- safe_launch(demo)
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  from openai import OpenAI
3
  from datasets import Dataset
4
+ from datetime import datetime, timedelta
5
+ import pandas as pd
6
+ import time, os, random, tempfile, json, glob
7
 
8
+ # --- API / HF 設定 ---
 
 
9
  API_KEY = os.getenv("API_KEY")
10
+ BASE_URL = "https://openrouter.ai/api/v1"
11
+ HF_TOKEN = os.getenv("HF_TOKEN")
12
+ DATASET_REPO = "Toya0421/reading_exercise_logging"
13
+ LOG_FILE = "reading_logs.csv"
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
16
 
17
+ # --- passage_information.xlsx 読み込み (Text# と flesch_score 使用) ---
18
  passage_info_df = pd.read_excel("passage_information.xlsx")
19
 
20
+ # --- 状態変数 ---
21
+ used_passages = set()
22
+ current_user_id = None
23
+ current_level = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
 
25
 
26
+ # ======================================================
27
+ # 新しい教材管理:passages フォルダからランダム選択
28
+ # ======================================================
29
+
30
+ def load_passage_file(text_id):
31
+ """
32
+ passages/pg{text_id}.txt を読み込み、内容を返す。
33
+ """
34
  path = f"passages/pg{text_id}.txt"
35
  if not os.path.exists(path):
36
  return None
37
  with open(path, "r", encoding="utf-8") as f:
38
  return f.read()
39
 
40
+ def get_new_passage_random():
41
+ """
42
+ passages フォルダからランダムに教材を選び(pg◯.txt)、
43
+ passage_information.xlsx Text# の flesch_score を original_level として返す。
44
+ """
45
+ global used_passages
46
 
47
+ # --- pg*.txt を取得 ---
48
+ files = glob.glob("passages/pg*.txt")
49
+ if not files:
50
+ return None, None, None
51
+
52
+ # --- ファイル名から Text# (整数) を抽出 ---
53
+ all_ids = []
54
+ for f in files:
55
+ name = os.path.basename(f)
56
+ num = name.replace("pg", "").replace(".txt", "")
57
+ if num.isdigit():
58
+ all_ids.append(int(num))
59
+
60
+ # --- 未使用の ID を優先 ---
61
+ available = [pid for pid in all_ids if pid not in used_passages]
62
  if not available:
63
+ used_passages.clear()
64
+ available = list(all_ids)
65
 
66
+ # --- ランダムに選択 ---
67
+ text_id = random.choice(available)
68
+ used_passages.add(text_id)
69
 
70
+ # --- テキスト読み込み ---
71
+ text = load_passage_file(text_id)
72
  if text is None:
73
+ return None, None, None
74
+
75
+ # --- Excel から original_level (flesch_score) を取得 ---
76
+ row = passage_info_df[passage_info_df["Text#"] == text_id]
77
+ if len(row) == 0:
78
+ orig_level = None
79
+ else:
80
+ orig_level = row.iloc[0]["flesch_score"]
81
+
82
+ return text_id, text, orig_level
83
+
84
+
85
+ # ======================================================
86
+ # Rewrite
87
+ # ======================================================
88
+
89
+ def rewrite_level(text, target_level):
90
+ level_to_flesch = {
91
+ 1: 90,
92
+ 2: 70,
93
+ 3: 55,
94
+ 4: 40,
95
+ 5: 25
96
+ }
97
+
98
  target_flesch = level_to_flesch[int(target_level)]
99
 
100
  prompt = f"""
 
102
  - Extract only the portions of the text that should be read as the main body,
103
  excluding the title, author name, source information, chapter number, annotations, and footers.
104
  - When outputting, make sure sections divided by chapters, etc., are clearly distinguishable by leaving a blank line between them.
105
+ - Preserve the original meaning faithfully.
106
+ - Do not add new information or remove essential information.
107
+ - Output only the rewritten passage. Do not include explanations.
108
  {text}
109
  """
110
 
 
112
  model="google/gemini-2.5-flash",
113
  messages=[{"role": "user", "content": prompt}],
114
  temperature=0.4,
115
+ max_tokens=5000
116
  )
117
+ return resp.choices[0].message.content.strip()
118
 
 
 
 
 
 
119
 
120
+ import re
121
 
122
  def split_pages(text, max_words=300):
123
+ """
124
+ 文単位でページを分割する。
125
+ - 文の途中でページを分割しない
126
+ - max_words の上限を超えないようにする
127
+ """
128
+ # 文に分割(. ? ! のあとに改行やスペースが続くパターン)
129
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
130
+ pages = []
131
+ current_page = []
132
+ current_word_count = 0
133
+
134
+ for sentence in sentences:
135
+ words = sentence.split()
136
+ sentence_len = len(words)
137
+
138
+ # 次の文を追加すると max_words を超える場合 → 新しいページを作る
139
+ if current_word_count + sentence_len > max_words:
140
+ if current_page:
141
+ pages.append(" ".join(current_page))
142
+ current_page = [sentence]
143
+ current_word_count = sentence_len
144
  else:
145
+ current_page.append(sentence)
146
+ current_word_count += sentence_len
147
+
148
+ # 最後のページを追加
149
+ if current_page:
150
+ pages.append(" ".join(current_page))
151
+
152
  return pages or [text]
153
 
 
 
 
 
 
 
 
154
 
155
+ # ======================================================
156
+ # Save Log
157
+ # ======================================================
158
+
159
+ def save_log(entry):
160
+ df = pd.DataFrame([entry])
161
+ if os.path.exists(LOG_FILE):
162
+ df.to_csv(LOG_FILE, mode="a", index=False, header=False)
163
+ else:
164
+ df.to_csv(LOG_FILE, index=False)
165
+
166
+ all_logs = pd.read_csv(LOG_FILE)
167
+ tmp_dir = tempfile.mkdtemp()
168
+ tmp_path = os.path.join(tmp_dir, "data.parquet")
169
+ all_logs.to_parquet(tmp_path)
170
+ dataset = Dataset.from_parquet(tmp_path)
171
+ dataset.push_to_hub(DATASET_REPO, token=HF_TOKEN)
172
 
 
 
 
173
 
174
+ # ======================================================
175
+ # Start
176
+ # ======================================================
177
+
178
+ def start_test(student_id, level_input):
179
+ global current_user_id, current_level, used_passages
180
+ used_passages = set()
181
+
182
+ action = "start_pushed"
183
+ now = (datetime.utcnow() + timedelta(hours=9)).isoformat()
184
+
185
+ entry = {
186
+ "user_id": student_id,
187
+ "assigned_level": current_level,
188
  "passage_id": None,
189
  "original_level": None,
190
+ "action_time": now,
191
+ "action_type": action,
192
  "page_text": None
193
+ }
194
+ save_log(entry)
195
 
196
+ if not student_id or str(student_id).strip() == "":
197
+ return (
198
+ "", "", json.dumps([]), 0, "",
199
+ 0, "", None, None,
200
+ gr.update(interactive=False, visible=False),
201
+ gr.update(interactive=False, visible=True),
202
+ gr.update(interactive=False, visible=False)
203
+ )
204
 
205
+ current_user_id = str(student_id).strip()
206
+ current_level = int(level_input)
207
+
208
+ pid, text, orig_lev = get_new_passage_random()
209
  if text is None:
210
  return (
211
+ "教材が見つかりません", "", json.dumps([]), 0, "",
212
+ 0, "", None, None,
213
  gr.update(interactive=False, visible=False),
214
  gr.update(interactive=False, visible=False),
215
+ gr.update(interactive=False, visible=False)
216
  )
217
 
218
+ rewritten = rewrite_level(text, current_level)
219
  pages = split_pages(rewritten)
220
  total = len(pages)
221
 
222
+ if total == 1:
223
+ prev_upd = gr.update(interactive=False, visible=False)
224
+ next_upd = gr.update(interactive=False, visible=False)
225
+ finish_upd = gr.update(interactive=True, visible=True)
226
+ else:
227
+ prev_upd = gr.update(interactive=False, visible=False)
228
+ next_upd = gr.update(interactive=True, visible=True)
229
+ finish_upd = gr.update(interactive=False, visible=False)
230
+
231
+ page_num = 1
232
+ now = (datetime.utcnow() + timedelta(hours=9)).isoformat()
233
+
234
+ entry = {
235
+ "user_id": current_user_id,
236
+ "assigned_level": current_level,
237
  "passage_id": pid,
238
  "original_level": orig_lev,
239
+ "action_time": now,
240
+ "action_type": f"page_displayed_{page_num}",
241
  "page_text": pages[0]
242
+ }
243
+ save_log(entry)
244
 
245
  return (
246
  pages[0],
 
248
  json.dumps(pages, ensure_ascii=False),
249
  0,
250
  total,
251
+ pid,
252
+ orig_lev,
253
+ current_level,
254
+ prev_upd,
255
+ next_upd,
256
+ finish_upd
257
  )
258
 
259
+
260
+ # ======================================================
261
+ # Next / Prev / Finish(以下は元コードのまま)
262
+ # ======================================================
263
+
264
+ def next_page(pages_json, current_page, total_pages, pid, orig_lev):
265
+ now = (datetime.utcnow() + timedelta(hours=9)).isoformat()
266
+
267
+ entry = {
268
+ "user_id": current_user_id,
269
+ "assigned_level": current_level,
270
+ "passage_id": pid,
271
+ "original_level": orig_lev,
272
+ "action_time": now,
273
+ "action_type": "next_pushed",
274
+ "page_text": None
275
+ }
276
+ save_log(entry)
277
+
278
+ pages = json.loads(pages_json)
279
  if not pages:
280
+ return ("", "", json.dumps([]), 0,
281
  gr.update(interactive=False, visible=False),
282
  gr.update(interactive=False, visible=False),
283
+ gr.update(interactive=False, visible=False))
 
 
 
 
 
 
 
 
 
 
 
284
 
 
 
285
  new_page = min(current_page + 1, total_pages - 1)
286
 
287
+ now2 = (datetime.utcnow() + timedelta(hours=9)).isoformat()
288
+ entry2 = {
289
+ "user_id": current_user_id,
290
+ "assigned_level": current_level,
291
+ "passage_id": pid,
292
+ "original_level": orig_lev,
293
+ "action_time": now2,
294
  "action_type": f"page_displayed_{new_page+1}",
295
  "page_text": pages[new_page]
296
+ }
297
+ save_log(entry2)
298
 
299
+ if new_page == total_pages - 1:
300
+ return (
301
+ pages[new_page],
302
+ f"{new_page+1} / {total_pages}",
303
+ json.dumps(pages),
304
+ new_page,
305
+ gr.update(interactive=True, visible=True),
306
+ gr.update(interactive=False, visible=False),
307
+ gr.update(interactive=True, visible=True)
308
+ )
309
 
310
  return (
311
  pages[new_page],
312
  f"{new_page+1} / {total_pages}",
313
+ json.dumps(pages),
314
  new_page,
315
+ gr.update(interactive=(new_page > 0), visible=(new_page > 0)),
316
+ gr.update(interactive=True, visible=True),
317
+ gr.update(interactive=False, visible=False)
318
  )
319
 
320
+
321
+ def prev_page(pages_json, current_page, total_pages, pid, orig_lev):
322
+ now = (datetime.utcnow() + timedelta(hours=9)).isoformat()
323
+
324
+ entry = {
325
+ "user_id": current_user_id,
326
+ "assigned_level": current_level,
327
+ "passage_id": pid,
328
+ "original_level": orig_lev,
329
+ "action_time": now,
330
+ "action_type": "prev_pushed",
331
+ "page_text": None
332
+ }
333
+ save_log(entry)
334
+
335
+ pages = json.loads(pages_json)
336
  if not pages:
337
+ return ("", "", json.dumps([]), 0,
 
338
  gr.update(interactive=False, visible=False),
339
  gr.update(interactive=False, visible=False),
340
+ gr.update(interactive=False, visible=False))
 
 
 
 
 
 
 
 
 
 
341
 
 
 
342
  new_page = max(current_page - 1, 0)
343
 
 
 
 
 
 
 
 
 
 
 
344
  prev_upd = gr.update(interactive=(new_page > 0), visible=(new_page > 0))
345
  next_visible = (new_page < total_pages - 1)
346
  next_upd = gr.update(interactive=next_visible, visible=next_visible)
347
  finish_upd = gr.update(interactive=(not next_visible), visible=(not next_visible))
348
 
349
+ now2 = (datetime.utcnow() + timedelta(hours=9)).isoformat()
350
+ entry2 = {
351
+ "user_id": current_user_id,
352
+ "assigned_level": current_level,
353
+ "passage_id": pid,
354
+ "original_level": orig_lev,
355
+ "action_time": now2,
356
+ "action_type": f"page_displayed_{new_page+1}",
357
+ "page_text": pages[new_page]
358
+ }
359
+ save_log(entry2)
360
+
361
  return (
362
  pages[new_page],
363
  f"{new_page+1} / {total_pages}",
364
+ json.dumps(pages),
365
  new_page,
366
+ prev_upd,
367
+ next_upd,
368
+ finish_upd
369
  )
370
 
371
+
372
+ def finish_or_retire(pages_json, current_page, pid, orig_lev, action):
373
+ pages = json.loads(pages_json)
374
+ now = (datetime.utcnow() + timedelta(hours=9)).isoformat()
375
+
376
+ entry = {
377
+ "user_id": current_user_id,
378
+ "assigned_level": current_level,
379
+ "passage_id": pid,
380
+ "original_level": orig_lev,
381
+ "action_time": now,
382
  "action_type": action,
383
  "page_text": None
384
+ }
385
+ save_log(entry)
 
 
386
 
387
+ new_pid, new_text, new_orig_lev = get_new_passage_random()
388
  if new_text is None:
389
  return (
390
+ "教材がありません", "", json.dumps([]), 0, "",
391
+ 0, "", None, None,
392
  gr.update(interactive=False, visible=False),
393
  gr.update(interactive=False, visible=False),
394
+ gr.update(interactive=False, visible=False)
395
  )
396
 
397
+ rewritten = rewrite_level(new_text, current_level)
398
  new_pages = split_pages(rewritten)
399
  total = len(new_pages)
400
 
401
+ if total == 1:
402
+ prev_upd = gr.update(interactive=False, visible=False)
403
+ next_upd = gr.update(interactive=False, visible=False)
404
+ finish_upd = gr.update(interactive=True, visible=True)
405
+ else:
406
+ prev_upd = gr.update(interactive=False, visible=False)
407
+ next_upd = gr.update(interactive=True, visible=True)
408
+ finish_upd = gr.update(interactive=False, visible=False)
409
+
410
+ now2 = (datetime.utcnow() + timedelta(hours=9)).isoformat()
411
+ entry2 = {
412
+ "user_id": current_user_id,
413
+ "assigned_level": current_level,
414
  "passage_id": new_pid,
415
  "original_level": new_orig_lev,
416
+ "action_time": now2,
417
  "action_type": "page_displayed_1",
418
  "page_text": new_pages[0]
419
+ }
420
+ save_log(entry2)
421
 
422
  return (
423
  new_pages[0],
 
425
  json.dumps(new_pages, ensure_ascii=False),
426
  0,
427
  total,
428
+ new_pid,
429
+ new_orig_lev,
430
+ current_level,
431
+ prev_upd,
432
+ next_upd,
433
+ finish_upd
434
  )
435
 
436
+
437
+ # ======================================================
438
+ # UI
439
+ # ======================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  custom_css = """
441
+ /* ===============================
442
+ 共通(両モード)
443
+ =============================== */
444
  .big-text {
445
  font-size: 22px !important;
446
  line-height: 1.8 !important;
447
  font-family: "Noto Sans", sans-serif !important;
448
  }
449
+ /* 教材表示ボックス */
450
  .reading-area {
451
  padding: 20px !important;
452
  border-radius: 12px !important;
453
  border: 1px solid #ccc !important;
454
+ transition: background-color 0.2s ease, color 0.2s ease;
455
+ }
456
+ /* ===============================
457
+ ライトモード
458
+ =============================== */
459
+ @media (prefers-color-scheme: light) {
460
+ body, .gradio-container {
461
+ background-color: #ffffff !important;
462
+ color: #222 !important;
463
+ }
464
+ .reading-area {
465
+ background-color: #fafafa !important;
466
+ color: #222 !important;
467
+ border-color: #ddd !important;
468
+ }
469
+ textarea, input, .gr-textbox textarea {
470
+ background-color: #ffffff !important;
471
+ color: #222 !important;
472
+ border: 1px solid #ccc !important;
473
+ }
474
+ }
475
+ /* ===============================
476
+ ダークモード
477
+ =============================== */
478
+ @media (prefers-color-scheme: dark) {
479
+ body, .gradio-container {
480
+ background-color: #1e1e1e !important;
481
+ color: #e6e6e6 !important;
482
+ }
483
+ /* 教材の背景は黒すぎると読みにくいのでやや明るめのチャコール */
484
+ .reading-area {
485
+ background-color: #2a2a2a !important;
486
+ color: #f2f2f2 !important;
487
+ border-color: #444 !important;
488
+ }
489
+ textarea, input, .gr-textbox textarea {
490
+ background-color: #2c2c2c !important;
491
+ color: #f0f0f0 !important;
492
+ border: 1px solid #555 !important;
493
+ }
494
+ /* ボタンを見やすく */
495
+ button {
496
+ background-color: #3a3a3a !important;
497
+ color: #f0f0f0 !important;
498
+ border: 1px solid #555 !important;
499
+ }
500
+ button:hover {
501
+ background-color: #4a4a4a !important;
502
+ }
503
+ .gr-panel, .gr-box, .gr-group {
504
+ background-color: #272727 !important;
505
+ border-color: #444 !important;
506
+ }
507
  }
508
  """
509
 
510
+
511
  with gr.Blocks(css=custom_css) as demo:
512
  gr.Markdown("# 📚 Reading Exercise")
513
 
514
  student_id_input = gr.Textbox(label="学生番号(必須)")
515
+ level_input = gr.Dropdown(
516
+ choices=[1,2,3,4,5],
517
+ label="あなたの Reading Level(Level Testの結果を選択)",
518
+ value=3
519
+ )
520
+
521
  start_btn = gr.Button("スタート")
522
 
523
+ text_display = gr.Textbox(
524
+ label="教材",
525
+ lines=18,
526
+ interactive=False,
527
+ elem_classes=["big-text", "reading-area"]
528
+ )
529
  page_display = gr.Textbox(label="進行状況", lines=1, interactive=False)
530
 
531
  hidden_pages = gr.Textbox(visible=False)
 
542
 
543
  retire_btn = gr.Button("リタイア")
544
 
 
 
545
  start_btn.click(
546
  fn=start_test,
547
+ inputs=[student_id_input, level_input],
548
  outputs=[
549
  text_display, page_display,
550
  hidden_pages, hidden_page_index,
551
  hidden_total_pages, hidden_passage_id,
552
  hidden_orig_lev, hidden_assigned_lev,
553
+ prev_btn, next_btn, finish_btn
554
+ ]
 
555
  )
556
 
557
  next_btn.click(
558
  fn=next_page,
559
+ inputs=[
560
+ hidden_pages, hidden_page_index,
561
+ hidden_total_pages, hidden_passage_id,
562
+ hidden_orig_lev
563
+ ],
564
+ outputs=[
565
+ text_display, page_display,
566
+ hidden_pages, hidden_page_index,
567
+ prev_btn, next_btn, finish_btn
568
+ ]
569
  )
570
 
571
  prev_btn.click(
572
  fn=prev_page,
573
+ inputs=[
574
+ hidden_pages, hidden_page_index,
575
+ hidden_total_pages, hidden_passage_id,
576
+ hidden_orig_lev
577
+ ],
578
+ outputs=[
579
+ text_display, page_display,
580
+ hidden_pages, hidden_page_index,
581
+ prev_btn, next_btn, finish_btn
582
+ ]
583
  )
584
 
585
  finish_btn.click(
586
+ fn=lambda p, i, pid, o: finish_or_retire(p, i, pid, o, "finished"),
587
+ inputs=[hidden_pages, hidden_page_index, hidden_passage_id, hidden_orig_lev],
588
  outputs=[
589
  text_display, page_display,
590
  hidden_pages, hidden_page_index,
591
  hidden_total_pages, hidden_passage_id,
592
  hidden_orig_lev, hidden_assigned_lev,
593
+ prev_btn, next_btn, finish_btn
594
+ ]
 
595
  )
596
 
597
  retire_btn.click(
598
+ fn=lambda p, i, pid, o: finish_or_retire(p, i, pid, o, "retire"),
599
+ inputs=[
600
+ hidden_pages, hidden_page_index,
601
+ hidden_passage_id, hidden_orig_lev
602
+ ],
603
  outputs=[
604
  text_display, page_display,
605
  hidden_pages, hidden_page_index,
606
  hidden_total_pages, hidden_passage_id,
607
  hidden_orig_lev, hidden_assigned_lev,
608
+ prev_btn, next_btn, finish_btn
609
+ ]
 
610
  )
611
 
612
+ demo.launch()