Toya0421 commited on
Commit
f9ce81f
·
verified ·
1 Parent(s): c92e8e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +266 -153
app.py CHANGED
@@ -1,206 +1,319 @@
1
  import gradio as gr
2
- import pandas as pd
3
- import datetime
4
- import json
5
- import uuid
6
- import os
7
- from datasets import load_dataset, Dataset
8
  from openai import OpenAI
 
 
 
 
9
 
 
10
  API_KEY = os.getenv("API_KEY")
 
11
  HF_TOKEN = os.getenv("HF_TOKEN")
12
- HF_DATASET = "Toya0421/reading_exercise_logging"
 
13
 
14
- client = OpenAI(api_key=API_KEY)
15
 
16
- # passage.csv 読み込み
17
- if os.path.exists("passage.csv") and os.path.getsize("passage.csv") > 0:
18
- passages_df = pd.read_csv("passage.csv")
19
- else:
20
- passages_df = pd.DataFrame(columns=["passage_id","genre","text","original_lexile_score"])
 
 
 
 
21
 
22
- # ページ分割(1ページ=約120ワード)
23
- def split_into_pages(text, words_per_page=120):
24
- words = text.split()
25
- pages = [" ".join(words[i:i+words_per_page]) for i in range(0, len(words), words_per_page)]
26
- return pages
 
27
 
28
- # 書き換え
29
  def rewrite_to_lexile(text, target_lexile):
30
  prompt = f"""
31
- You are an expert at leveling reading materials based on Lexile measures.
32
-
33
- Rewrite the following English passage so that the estimated Lexile score is close to {target_lexile}.
34
- - Keep original meaning.
35
- - Keep length similar.
36
- - Use vocabulary and structure appropriate for the target Lexile.
37
-
38
- Return ONLY the rewritten passage. Do not add explanations.
39
 
40
  {text}
41
  """
42
  resp = client.chat.completions.create(
43
- model="gpt-5",
44
  messages=[{"role": "user", "content": prompt}],
45
- temperature=0.3,
 
46
  )
47
  return resp.choices[0].message.content.strip()
48
 
49
- # Hugging Faceにログ保存
50
- def save_log_to_hf(row):
51
- try:
52
- dataset = load_dataset(HF_DATASET, split="train")
53
- dataset = dataset.add_item(row)
54
- dataset.push_to_hub(HF_DATASET)
55
- except:
56
- dataset = Dataset.from_list([row])
57
- dataset.push_to_hub(HF_DATASET)
58
-
59
- # ローカルにも追記
60
- def save_local_csv(row):
61
- df = pd.DataFrame([row])
62
- if os.path.exists("full_log.csv"):
63
- df.to_csv("full_log.csv", mode="a", header=False, index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  else:
65
- df.to_csv("full_log.csv", index=False)
66
-
67
- # ✅ 状態管理
68
- session_state = {
69
- "user_id": None,
70
- "genre": None,
71
- "lexile": None,
72
- "passage_id": None,
73
- "original_lexile": None,
74
- "pages": [],
75
- "current_page": 0,
76
- "actions": [],
77
- "start_time": None,
78
- }
79
-
80
- # スト開始
81
  def start_test(student_id, genre, lexile):
82
- if not student_id:
83
- return "学生番号を入力してください", "", "", gr.update(interactive=False)
 
84
 
85
- session_state["user_id"] = student_id
86
- session_state["genre"] = genre
87
- session_state["lexile"] = int(lexile)
 
 
88
 
89
- row = passages_df[passages_df["genre"] == genre].sample(1).iloc[0]
90
- pid = row["passage_id"]
91
- orig_text = row["text"]
92
- orig_lex = row["original_lexile_score"]
93
 
94
- rewritten = rewrite_to_lexile(orig_text, session_state["lexile"])
95
- pages = split_into_pages(rewritten)
 
96
 
97
- session_state.update({
98
- "passage_id": pid,
99
- "original_lexile": orig_lex,
100
- "pages": pages,
101
- "current_page": 0,
102
- "start_time": datetime.datetime.now().isoformat(),
103
- "actions": ["first_page_displayed"]
104
- })
105
 
106
  return (
107
  pages[0],
108
- f"1 / {len(pages)} ページ",
109
- "",
110
- gr.update(interactive=True)
 
 
 
 
 
 
 
111
  )
112
 
113
- def next_page():
114
- if session_state["current_page"] < len(session_state["pages"]) - 1:
115
- session_state["current_page"] += 1
116
- session_state["actions"].append("next_page")
117
- page = session_state["current_page"]
118
- return session_state["pages"][page], f"{page+1} / {len(session_state['pages'])} ページ"
119
-
120
- def prev_page():
121
- if session_state["current_page"] > 0:
122
- session_state["current_page"] -= 1
123
- session_state["actions"].append("prev_page")
124
- page = session_state["current_page"]
125
- return session_state["pages"][page], f"{page+1} / {len(session_state['pages'])} ページ"
126
-
127
- # ✅ 読み終えた or リタイア → 次の教材へ
128
- def finish(action):
129
- finished = datetime.datetime.now().isoformat()
130
-
131
- # ✅ ログ1行を保存
132
- log_row = {
133
- "user_id": session_state["user_id"],
134
- "genre": session_state["genre"],
135
- "lexile": session_state["lexile"],
136
- "passage_id": session_state["passage_id"],
137
- "original_lexile": session_state["original_lexile"],
138
- "start_time": session_state["start_time"],
139
- "finished_time": finished,
140
- "user_action": json.dumps(session_state["actions"], ensure_ascii=False),
141
- }
142
 
143
- save_log_to_hf(log_row)
144
- save_local_csv(log_row)
 
 
 
 
 
 
 
 
 
 
145
 
146
- # 次の教材選択
147
- if action == "finished":
148
- next_df = passages_df[passages_df["genre"] == session_state["genre"]]
149
- else: # retire
150
- next_df = passages_df[passages_df["genre"] != session_state["genre"]]
151
 
152
- if len(next_df) == 0:
153
- return "教材がありません。終了です。", ""
 
 
 
 
 
 
 
 
 
 
 
154
 
155
- row = next_df.sample(1).iloc[0]
156
- pid = row["passage_id"]
157
- orig_text = row["text"]
158
- orig_lex = row["original_lexile_score"]
 
159
 
160
- rewritten = rewrite_to_lexile(orig_text, session_state["lexile"])
161
- pages = split_into_pages(rewritten)
162
 
163
- session_state.update({
164
- "passage_id": pid,
165
- "original_lexile": orig_lex,
166
- "pages": pages,
167
- "current_page": 0,
168
- "start_time": datetime.datetime.now().isoformat(),
169
- "actions": ["first_page_displayed"]
170
- })
171
 
172
- return pages[0], f"1 / {len(pages)} ページ"
 
173
 
174
- # ✅ Gradio UI
175
- genres = [
176
- "Literature","Science&Technology","History","Social Science&Society",
177
- "Arts&Culture","Religion&Philosophy","Lifestyle&Hobbies",
178
- "Health&Medicine","Education&Reference"
179
- ]
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  with gr.Blocks() as demo:
182
- gr.Markdown("## Lexile Reading Exercise")
183
 
184
- student_id = gr.Textbox(label="学生番号(必須)")
185
- lexile = gr.Number(label="受検者のLexile")
186
- genre_select = gr.Dropdown(choices=genres, label="ジャンルを1つ選択")
187
 
188
  start_btn = gr.Button("スタート")
189
 
190
- text_display = gr.Textbox(label="教材", lines=15)
191
- page_display = gr.Textbox(label="ページ")
192
- debug_out = gr.Textbox(label="デバッグ用(任意)")
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  prev_btn = gr.Button("前へ")
195
  next_btn = gr.Button("次へ")
196
  finish_btn = gr.Button("読み終えた")
197
  retire_btn = gr.Button("リタイア")
198
 
199
- start_btn.click(start_test, [student_id, genre_select, lexile], [text_display, page_display, debug_out, finish_btn])
200
- next_btn.click(next_page, [], [text_display, page_display])
201
- prev_btn.click(prev_page, [], [text_display, page_display])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
- finish_btn.click(lambda: finish("finished"), [], [text_display, page_display])
204
- retire_btn.click(lambda: finish("retire"), [], [text_display, page_display])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  demo.launch()
 
1
  import gradio as gr
 
 
 
 
 
 
2
  from openai import OpenAI
3
+ from datasets import Dataset
4
+ from datetime import datetime, timedelta
5
+ import pandas as pd
6
+ import time, os, random, uuid, tempfile, json
7
 
8
+ # --- API設定 ---
9
  API_KEY = os.getenv("API_KEY")
10
+ BASE_URL = "https://openrouter.ai/api/v1"
11
  HF_TOKEN = os.getenv("HF_TOKEN")
12
+ DATASET_REPO = "Toya0421/reading_exercise_logging"
13
+ LOG_FILE = "reading_logs.csv"
14
 
15
+ client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
16
 
17
+ # --- passage.csv 読み込み ---
18
+ # (columns: passage_id, genre, text, original_lexile_score)
19
+ passages_df = pd.read_csv("passage.csv")
20
+
21
+ genres = [
22
+ "Literature","Science&Technology","History","Social Science&Society",
23
+ "Arts&Culture","Religion&Philosophy","Lifestyle&Hobbies",
24
+ "Health&Medicine","Education&Reference"
25
+ ]
26
 
27
+ # --- 状態 ---
28
+ used_passages = set()
29
+ current_user_id = None
30
+ current_genre = None
31
+ current_lexile = None
32
+ action_log = [] # next/backなどページ操作ログ保存
33
 
34
+ # --- 書き換え ---
35
  def rewrite_to_lexile(text, target_lexile):
36
  prompt = f"""
37
+ Rewrite the following passage so it fits about {target_lexile} Lexile.
38
+ - Keep original meaning and length
39
+ - Avoid figurative language
40
+ - Use simple syntax
41
+ - Output only the rewritten passage
 
 
 
42
 
43
  {text}
44
  """
45
  resp = client.chat.completions.create(
46
+ model="google/gemma-3-27b-it:free",
47
  messages=[{"role": "user", "content": prompt}],
48
+ temperature=0.4,
49
+ max_tokens=1000
50
  )
51
  return resp.choices[0].message.content.strip()
52
 
53
+ # --- ページ分割 ---
54
+ def split_pages(text, words=120):
55
+ w = text.split()
56
+ return [" ".join(w[i:i+words]) for i in range(0, len(w), words)]
57
+
58
+ # --- next教材取得(同ジャンル or 別ジャンル) ---
59
+ def get_new_passage(same_genre=True):
60
+ global used_passages, current_genre
61
+
62
+ if same_genre:
63
+ df = passages_df[passages_df["genre"] == current_genre]
64
+ else:
65
+ df = passages_df[passages_df["genre"] != current_genre]
66
+
67
+ if len(df) == 0:
68
+ return None, None, None
69
+
70
+ available = [pid for pid in df["passage_id"] if pid not in used_passages]
71
+
72
+ # 使い切ったらリセット
73
+ if not available:
74
+ used_passages.clear()
75
+ available = list(df["passage_id"])
76
+
77
+ pid = random.choice(available)
78
+ row = df[df["passage_id"] == pid].iloc[0]
79
+ used_passages.add(pid)
80
+
81
+ return pid, row["text"], row["original_lexile_score"]
82
+
83
+ # --- ログ保存 ---
84
+ def save_log(entry):
85
+ df = pd.DataFrame([entry])
86
+
87
+ # ✅ CSV追記
88
+ if os.path.exists(LOG_FILE):
89
+ df.to_csv(LOG_FILE, mode="a", index=False, header=False)
90
  else:
91
+ df.to_csv(LOG_FILE, index=False)
92
+
93
+ # ✅ HuggingFaceへpush
94
+ all_logs = pd.read_csv(LOG_FILE)
95
+ tmp_dir = tempfile.mkdtemp()
96
+ tmp_path = os.path.join(tmp_dir, "data.parquet")
97
+ all_logs.to_parquet(tmp_path)
98
+
99
+ dataset = Dataset.from_parquet(tmp_path)
100
+ dataset.push_to_hub(DATASET_REPO, token=HF_TOKEN)
101
+
102
+ # =========================
103
+ # Gradio動作
104
+ # =========================
105
+
106
+ # ---ター ---
107
  def start_test(student_id, genre, lexile):
108
+ global current_user_id, current_genre, current_lexile, used_passages, action_log
109
+ used_passages = set()
110
+ action_log = []
111
 
112
+ if not student_id or student_id.strip() == "":
113
+ return (
114
+ "", "", "", 0,
115
+ "⚠️ 学生番号を入力してください", False, "", 0, "", "", 0
116
+ )
117
 
118
+ current_user_id = student_id.strip()
119
+ current_genre = genre
120
+ current_lexile = int(lexile)
 
121
 
122
+ pid, text, orig_lex = get_new_passage(same_genre=True)
123
+ if text is None:
124
+ return ("", "", "", 0, "教材がありません", False, "", 0, "", "", 0)
125
 
126
+ rewritten = rewrite_to_lexile(text, current_lexile)
127
+ pages = split_pages(rewritten)
128
+
129
+ start_time = (datetime.utcnow() + timedelta(hours=9)).isoformat()
 
 
 
 
130
 
131
  return (
132
  pages[0],
133
+ f"1 / {len(pages)}",
134
+ json.dumps(pages, ensure_ascii=False),
135
+ 0, # current_page
136
+ "", # feedback
137
+ True, # 表示ON
138
+ start_time,
139
+ len(pages),
140
+ pid,
141
+ orig_lex,
142
+ current_lexile
143
  )
144
 
145
+ # --- 次へ ---
146
+ def next_page(pages_json, current_page):
147
+ pages = json.loads(pages_json)
148
+ if current_page < len(pages)-1:
149
+ current_page += 1
150
+ action_log.append({"action": "next", "time": (datetime.utcnow()+timedelta(hours=9)).isoformat()})
151
+ return (
152
+ pages[current_page],
153
+ f"{current_page+1} / {len(pages)}",
154
+ json.dumps(pages, ensure_ascii=False),
155
+ current_page
156
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
+ # --- 前へ ---
159
+ def prev_page(pages_json, current_page):
160
+ pages = json.loads(pages_json)
161
+ if current_page > 0:
162
+ current_page -= 1
163
+ action_log.append({"action": "prev", "time": (datetime.utcnow()+timedelta(hours=9)).isoformat()})
164
+ return (
165
+ pages[current_page],
166
+ f"{current_page+1} / {len(pages)}",
167
+ json.dumps(pages, ensure_ascii=False),
168
+ current_page
169
+ )
170
 
171
+ # --- 読み終えた(同ジャンル) or リタイア(別ジャンル) ---
172
+ def finish_or_retire(pages_json, current_page, pid, orig_lex, start_time, action):
173
+ pages = json.loads(pages_json)
174
+ now = (datetime.utcnow()+timedelta(hours=9)).isoformat()
 
175
 
176
+ # ログ保存
177
+ entry = {
178
+ "user_id": current_user_id,
179
+ "genre": current_genre,
180
+ "lexile_assigned": current_lexile,
181
+ "passage_id": pid,
182
+ "original_lexile": orig_lex,
183
+ "start_time": start_time,
184
+ "finished_time": now,
185
+ "actions": json.dumps(action_log, ensure_ascii=False),
186
+ "result": action
187
+ }
188
+ save_log(entry)
189
 
190
+ # 次教材取得
191
+ if action == "finished":
192
+ new_pid, new_text, new_orig_lex = get_new_passage(same_genre=True)
193
+ else:
194
+ new_pid, new_text, new_orig_lex = get_new_passage(same_genre=False)
195
 
196
+ if new_text is None:
197
+ return "教材がありません", "", "", 0, "", False, "", 0, "", "", 0
198
 
199
+ rewritten = rewrite_to_lexile(new_text, current_lexile)
200
+ new_pages = split_pages(rewritten)
201
+ new_start = (datetime.utcnow()+timedelta(hours=9)).isoformat()
 
 
 
 
 
202
 
203
+ # reset action log
204
+ action_log.clear()
205
 
206
+ return (
207
+ new_pages[0],
208
+ f"1 / {len(new_pages)}",
209
+ json.dumps(new_pages, ensure_ascii=False),
210
+ 0,
211
+ "",
212
+ True,
213
+ new_start,
214
+ len(new_pages),
215
+ new_pid,
216
+ new_orig_lex,
217
+ current_lexile
218
+ )
219
+
220
+ # =========================
221
+ # UI構築
222
+ # =========================
223
 
224
  with gr.Blocks() as demo:
225
+ gr.Markdown("# 📚 Reading Exercise")
226
 
227
+ student_id_input = gr.Textbox(label="学生番号", placeholder="例: B123456")
228
+ lexile_input = gr.Number(label="Lexile(例: 900)")
229
+ genre_input = gr.Dropdown(choices=genres, label="ジャンル")
230
 
231
  start_btn = gr.Button("スタート")
232
 
233
+ # 表示領域大きめ
234
+ text_display = gr.Textbox(label="教材", lines=15, interactive=False)
235
+ page_display = gr.Textbox(label="進行状況", lines=1, interactive=False)
236
+
237
+ # hidden
238
+ hidden_pages = gr.Textbox(visible=False)
239
+ hidden_page_index = gr.Number(visible=False)
240
+ hidden_start_time = gr.Textbox(visible=False)
241
+ hidden_total_pages = gr.Number(visible=False)
242
+ hidden_passage_id = gr.Textbox(visible=False)
243
+ hidden_orig_lex = gr.Textbox(visible=False)
244
+ hidden_assigned_lex = gr.Textbox(visible=False)
245
+
246
+ visible_state = gr.State(False)
247
+
248
+ feedback = gr.Textbox(label="メッセージ", interactive=False)
249
 
250
  prev_btn = gr.Button("前へ")
251
  next_btn = gr.Button("次へ")
252
  finish_btn = gr.Button("読み終えた")
253
  retire_btn = gr.Button("リタイア")
254
 
255
+ # --- start ---
256
+ start_btn.click(
257
+ fn=start_test,
258
+ inputs=[student_id_input, genre_input, lexile_input],
259
+ outputs=[
260
+ text_display, page_display,
261
+ hidden_pages, hidden_page_index,
262
+ feedback, visible_state,
263
+ hidden_start_time, hidden_total_pages,
264
+ hidden_passage_id, hidden_orig_lex, hidden_assigned_lex
265
+ ]
266
+ )
267
+
268
+ # --- next ---
269
+ next_btn.click(
270
+ fn=next_page,
271
+ inputs=[hidden_pages, hidden_page_index],
272
+ outputs=[text_display, page_display, hidden_pages, hidden_page_index]
273
+ )
274
 
275
+ # --- prev ---
276
+ prev_btn.click(
277
+ fn=prev_page,
278
+ inputs=[hidden_pages, hidden_page_index],
279
+ outputs=[text_display, page_display, hidden_pages, hidden_page_index]
280
+ )
281
+
282
+ # --- 読み終えた ---
283
+ finish_btn.click(
284
+ fn=lambda p, i, pid, o, st: finish_or_retire(p, i, pid, o, st, "finished"),
285
+ inputs=[hidden_pages, hidden_page_index, hidden_passage_id, hidden_orig_lex, hidden_start_time],
286
+ outputs=[
287
+ text_display, page_display, hidden_pages, hidden_page_index,
288
+ feedback, visible_state, hidden_start_time,
289
+ hidden_total_pages, hidden_passage_id, hidden_orig_lex, hidden_assigned_lex
290
+ ]
291
+ )
292
+
293
+ # --- リタイア ---
294
+ retire_btn.click(
295
+ fn=lambda p, i, pid, o, st: finish_or_retire(p, i, pid, o, st, "retire"),
296
+ inputs=[hidden_pages, hidden_page_index, hidden_passage_id, hidden_orig_lex, hidden_start_time],
297
+ outputs=[
298
+ text_display, page_display, hidden_pages, hidden_page_index,
299
+ feedback, visible_state, hidden_start_time,
300
+ hidden_total_pages, hidden_passage_id, hidden_orig_lex, hidden_assigned_lex
301
+ ]
302
+ )
303
+
304
+ # 表示ON/OFF
305
+ def toggle(show):
306
+ v = bool(show)
307
+ return (
308
+ gr.update(visible=v), gr.update(visible=v),
309
+ gr.update(visible=v), gr.update(visible=v),
310
+ gr.update(visible=v)
311
+ )
312
+
313
+ visible_state.change(
314
+ fn=toggle,
315
+ inputs=visible_state,
316
+ outputs=[text_display, page_display, prev_btn, next_btn, feedback]
317
+ )
318
 
319
  demo.launch()