yashm commited on
Commit
5be6f35
·
verified ·
1 Parent(s): 0c46cfb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +519 -228
app.py CHANGED
@@ -1,61 +1,81 @@
 
1
  import random
2
- import pandas as pd
 
 
 
3
  import gradio as gr
 
4
  from datasets import load_dataset
5
 
6
  DATASET_REPO = "yashm/bioinformatics-qa-dataset"
7
  RANDOM_SEED = 42
 
8
  random.seed(RANDOM_SEED)
9
 
10
 
11
- def load_data():
 
12
  ds = load_dataset(DATASET_REPO)
13
 
14
  frames = []
15
  for split_name in ds.keys():
16
- split_df = ds[split_name].to_pandas().copy()
17
- split_df["split"] = split_name
18
- frames.append(split_df)
19
 
20
  df = pd.concat(frames, ignore_index=True)
21
 
22
  required = ["id", "topic", "question", "answer"]
23
  missing = [c for c in required if c not in df.columns]
24
  if missing:
25
- raise ValueError(f"Missing required columns in dataset: {missing}")
26
 
27
  df = df[["id", "topic", "question", "answer", "split"]].copy()
28
- for col in ["topic", "question", "answer"]:
29
  df[col] = df[col].astype(str).str.strip()
30
 
31
  df = df.dropna(subset=["topic", "question", "answer"])
32
  df = df[(df["question"] != "") & (df["answer"] != "")]
 
33
  df = df.reset_index(drop=True)
34
 
35
  return df
36
 
37
 
38
- df = load_data()
39
- ALL_TOPICS = ["All"] + sorted(df["topic"].unique().tolist())
40
- GLOBAL_UNIQUE_ANSWERS = df["answer"].dropna().unique().tolist()
41
 
42
 
43
- def stats_text(correct, total, streak, best_streak):
44
- acc = (correct / total * 100.0) if total > 0 else 0.0
 
 
45
  return (
46
- f"Score: {correct}/{total} | "
47
- f"Accuracy: {acc:.1f}% | "
48
- f"Streak: {streak} | "
49
- f"Best Streak: {best_streak}"
50
  )
51
 
52
 
53
- def filter_df(topic, keyword):
54
- out = df.copy()
 
 
 
 
 
 
 
 
55
 
56
- if topic and topic != "All":
57
  out = out[out["topic"] == topic]
58
 
 
 
 
59
  if keyword and keyword.strip():
60
  q = keyword.strip().lower()
61
  out = out[
@@ -64,105 +84,267 @@ def filter_df(topic, keyword):
64
  | out["answer"].str.lower().str.contains(q, na=False)
65
  ]
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  return out.reset_index(drop=True)
68
 
69
 
70
- def explore(topic, keyword, limit):
71
- out = filter_df(topic, keyword)
72
- return out.head(int(limit))
 
 
 
 
 
 
 
 
 
 
 
 
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- def get_random_example(topic, keyword):
76
- out = filter_df(topic, keyword)
77
- if out.empty:
78
- return "No matching rows found.", "", "", ""
79
- row = out.sample(1).iloc[0]
80
- return row["topic"], row["question"], row["answer"], row["split"]
81
 
82
 
83
- def generate_question(topic):
84
- pool = df if topic == "All" else df[df["topic"] == topic]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  if pool.empty:
86
  return (
87
- "No questions available for this topic.",
88
  gr.update(choices=[], value=None),
89
  "",
90
  "",
 
 
91
  )
92
 
93
- row = pool.sample(1).iloc[0]
94
- q_topic = row["topic"]
95
- q_text = row["question"]
96
- correct = row["answer"]
97
-
98
- same_topic_answers = (
99
- df[df["topic"] == q_topic]["answer"]
100
- .dropna()
101
- .astype(str)
102
- .str.strip()
103
- .unique()
104
- .tolist()
105
- )
106
- same_topic_answers = [a for a in same_topic_answers if a and a != correct]
107
 
108
- distractors = []
109
- if len(same_topic_answers) >= 3:
110
- distractors = random.sample(same_topic_answers, 3)
 
111
  else:
112
- distractors.extend(same_topic_answers)
113
- need = 3 - len(distractors)
114
- fallback_pool = [
115
- a for a in GLOBAL_UNIQUE_ANSWERS
116
- if a != correct and a not in distractors
117
- ]
118
- if len(fallback_pool) < need:
119
- return (
120
- "Not enough unique answers to build 4 choices.",
121
- gr.update(choices=[], value=None),
122
- "",
123
- "",
124
- )
125
- distractors.extend(random.sample(fallback_pool, need))
 
 
 
 
 
 
 
126
 
127
- choices = distractors + [correct]
128
- random.shuffle(choices)
 
129
 
130
- shown = f"Topic: {q_topic}\n\nQuestion: {q_text}"
131
- return shown, gr.update(choices=choices, value=None), correct, q_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
 
134
  def submit_and_next(
135
- selected_answer,
136
- current_correct_answer,
137
- current_question,
138
- topic_filter,
139
- correct_count,
140
- total_count,
141
- streak,
142
- best_streak,
 
 
143
  ):
144
- if not current_correct_answer or not current_question:
145
  return (
146
- "Click 'Start Quiz' to begin.",
147
- stats_text(correct_count, total_count, streak, best_streak),
148
  gr.update(),
149
  gr.update(),
150
- current_correct_answer,
151
- current_question,
 
 
 
 
152
  correct_count,
153
  total_count,
154
  streak,
155
  best_streak,
156
  )
157
 
158
- if not selected_answer:
159
  return (
160
- "Please select one option.",
161
- stats_text(correct_count, total_count, streak, best_streak),
162
  gr.update(),
163
  gr.update(),
164
- current_correct_answer,
165
- current_question,
 
 
 
 
166
  correct_count,
167
  total_count,
168
  streak,
@@ -170,32 +352,37 @@ def submit_and_next(
170
  )
171
 
172
  total_count += 1
173
- if selected_answer == current_correct_answer:
174
  correct_count += 1
175
  streak += 1
176
  best_streak = max(best_streak, streak)
177
  result = (
178
  "Correct.\n\n"
179
- f"Your answer: {selected_answer}\n\n"
180
- f"Reference answer: {current_correct_answer}"
181
  )
182
  else:
183
  streak = 0
184
  result = (
185
  "Incorrect.\n\n"
186
- f"Your answer: {selected_answer}\n\n"
187
- f"Correct answer: {current_correct_answer}"
188
  )
189
 
190
- next_q, next_choices, next_correct, next_question = generate_question(topic_filter)
 
 
191
 
192
  return (
193
  result,
194
- stats_text(correct_count, total_count, streak, best_streak),
195
  next_q,
196
  next_choices,
197
  next_correct,
198
- next_question,
 
 
 
 
199
  correct_count,
200
  total_count,
201
  streak,
@@ -203,159 +390,263 @@ def submit_and_next(
203
  )
204
 
205
 
206
- def start_quiz(topic_filter, correct_count, total_count, streak, best_streak):
207
- q, choices, correct, question = generate_question(topic_filter)
208
  return (
209
- q,
210
- choices,
211
- correct,
212
- question,
213
- stats_text(correct_count, total_count, streak, best_streak),
214
- "Quiz started. Pick an option and click Submit.",
215
  )
216
 
217
 
218
- def reset_score():
219
- correct_count = 0
220
- total_count = 0
221
- streak = 0
222
- best_streak = 0
223
- return (
224
- correct_count,
225
- total_count,
226
- streak,
227
- best_streak,
228
- stats_text(correct_count, total_count, streak, best_streak),
229
- "Score reset. Click Start Quiz.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  )
231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
- with gr.Blocks(title="Bioinformatics QA Quiz Demo", theme=gr.themes.Soft()) as demo:
234
- gr.Markdown(
235
- """
236
- # Bioinformatics QA Demo
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
- Interactive demo built from the public dataset:
239
- **yashm/bioinformatics-qa-dataset**
 
 
 
240
 
241
- This app is for learning and research purposes only.
242
- Use with caution. Validate information before high-stakes use.
243
- """
244
- )
 
245
 
246
- with gr.Tab("Explore Dataset"):
247
- with gr.Row():
248
- ex_topic = gr.Dropdown(choices=ALL_TOPICS, value="All", label="Topic")
249
- ex_keyword = gr.Textbox(label="Keyword", placeholder="Search topic, question, or answer")
250
- ex_limit = gr.Slider(minimum=5, maximum=100, value=15, step=5, label="Rows")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
- with gr.Row():
253
- ex_search_btn = gr.Button("Search")
254
- ex_random_btn = gr.Button("Random Example")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
- ex_table = gr.Dataframe(
257
- headers=["id", "topic", "question", "answer", "split"],
258
- label="Matching Rows",
259
- wrap=True,
260
- )
 
 
 
 
 
 
 
261
 
262
- gr.Markdown("### Random Example")
263
- ex_r_topic = gr.Textbox(label="Topic", interactive=False)
264
- ex_r_question = gr.Textbox(label="Question", lines=4, interactive=False)
265
- ex_r_answer = gr.Textbox(label="Answer", lines=6, interactive=False)
266
- ex_r_split = gr.Textbox(label="Split", interactive=False)
267
 
268
- ex_search_btn.click(
269
- fn=explore,
270
- inputs=[ex_topic, ex_keyword, ex_limit],
271
- outputs=[ex_table],
272
- )
273
 
274
- ex_random_btn.click(
275
- fn=get_random_example,
276
- inputs=[ex_topic, ex_keyword],
277
- outputs=[ex_r_topic, ex_r_question, ex_r_answer, ex_r_split],
278
- )
279
 
280
- with gr.Tab("Quiz Yourself"):
281
- with gr.Row():
282
- quiz_topic = gr.Dropdown(choices=ALL_TOPICS, value="All", label="Topic Filter")
283
- start_btn = gr.Button("Start Quiz")
284
- reset_btn = gr.Button("Reset Score")
285
-
286
- quiz_stats = gr.Markdown(value=stats_text(0, 0, 0, 0))
287
- quiz_status = gr.Textbox(label="Status", interactive=False)
288
-
289
- quiz_question = gr.Textbox(label="Question", lines=5, interactive=False)
290
- quiz_choices = gr.Radio(choices=[], label="Choose one answer")
291
- submit_btn = gr.Button("Submit (auto next question)")
292
-
293
- quiz_result = gr.Textbox(label="Last Result", lines=6, interactive=False)
294
-
295
- correct_state = gr.State("")
296
- question_state = gr.State("")
297
-
298
- correct_count_state = gr.State(0)
299
- total_count_state = gr.State(0)
300
- streak_state = gr.State(0)
301
- best_streak_state = gr.State(0)
302
-
303
- start_btn.click(
304
- fn=start_quiz,
305
- inputs=[
306
- quiz_topic,
307
- correct_count_state,
308
- total_count_state,
309
- streak_state,
310
- best_streak_state,
311
- ],
312
- outputs=[
313
- quiz_question,
314
- quiz_choices,
315
- correct_state,
316
- question_state,
317
- quiz_stats,
318
- quiz_status,
319
- ],
320
- )
321
 
322
- submit_btn.click(
323
- fn=submit_and_next,
324
- inputs=[
325
- quiz_choices,
326
- correct_state,
327
- question_state,
328
- quiz_topic,
329
- correct_count_state,
330
- total_count_state,
331
- streak_state,
332
- best_streak_state,
333
- ],
334
- outputs=[
335
- quiz_result,
336
- quiz_stats,
337
- quiz_question,
338
- quiz_choices,
339
- correct_state,
340
- question_state,
341
- correct_count_state,
342
- total_count_state,
343
- streak_state,
344
- best_streak_state,
345
- ],
346
- )
347
 
348
- reset_btn.click(
349
- fn=reset_score,
350
- inputs=[],
351
- outputs=[
352
- correct_count_state,
353
- total_count_state,
354
- streak_state,
355
- best_streak_state,
356
- quiz_stats,
357
- quiz_status,
358
- ],
359
- )
360
 
361
  demo.launch()
 
1
+ import os
2
  import random
3
+ import tempfile
4
+ from functools import lru_cache
5
+ from typing import Dict, List, Tuple
6
+
7
  import gradio as gr
8
+ import pandas as pd
9
  from datasets import load_dataset
10
 
11
  DATASET_REPO = "yashm/bioinformatics-qa-dataset"
12
  RANDOM_SEED = 42
13
+
14
  random.seed(RANDOM_SEED)
15
 
16
 
17
+ @lru_cache(maxsize=1)
18
+ def load_data() -> pd.DataFrame:
19
  ds = load_dataset(DATASET_REPO)
20
 
21
  frames = []
22
  for split_name in ds.keys():
23
+ part = ds[split_name].to_pandas().copy()
24
+ part["split"] = split_name
25
+ frames.append(part)
26
 
27
  df = pd.concat(frames, ignore_index=True)
28
 
29
  required = ["id", "topic", "question", "answer"]
30
  missing = [c for c in required if c not in df.columns]
31
  if missing:
32
+ raise ValueError(f"Dataset is missing required columns: {missing}")
33
 
34
  df = df[["id", "topic", "question", "answer", "split"]].copy()
35
+ for col in ["topic", "question", "answer", "split"]:
36
  df[col] = df[col].astype(str).str.strip()
37
 
38
  df = df.dropna(subset=["topic", "question", "answer"])
39
  df = df[(df["question"] != "") & (df["answer"] != "")]
40
+ df["answer_len"] = df["answer"].str.len()
41
  df = df.reset_index(drop=True)
42
 
43
  return df
44
 
45
 
46
+ DF = load_data()
47
+ ALL_TOPICS = sorted(DF["topic"].unique().tolist())
48
+ ALL_SPLITS = sorted(DF["split"].unique().tolist())
49
 
50
 
51
+ def compute_stats(df: pd.DataFrame) -> str:
52
+ total_rows = len(df)
53
+ total_topics = df["topic"].nunique() if total_rows else 0
54
+ avg_answer_len = float(df["answer_len"].mean()) if total_rows else 0.0
55
  return (
56
+ f"Total rows: {total_rows} | "
57
+ f"Unique topics: {total_topics} | "
58
+ f"Average answer length: {avg_answer_len:.1f} chars"
 
59
  )
60
 
61
 
62
+ def apply_filters(
63
+ topic: str,
64
+ split: str,
65
+ keyword: str,
66
+ min_len: int,
67
+ max_len: int,
68
+ sort_by: str,
69
+ sort_dir: str
70
+ ) -> pd.DataFrame:
71
+ out = DF.copy()
72
 
73
+ if topic != "All":
74
  out = out[out["topic"] == topic]
75
 
76
+ if split != "All":
77
+ out = out[out["split"] == split]
78
+
79
  if keyword and keyword.strip():
80
  q = keyword.strip().lower()
81
  out = out[
 
84
  | out["answer"].str.lower().str.contains(q, na=False)
85
  ]
86
 
87
+ out = out[(out["answer_len"] >= int(min_len)) & (out["answer_len"] <= int(max_len))]
88
+
89
+ col_map = {
90
+ "ID": "id",
91
+ "Topic": "topic",
92
+ "Question length": "question",
93
+ "Answer length": "answer_len",
94
+ "Split": "split",
95
+ }
96
+ sort_col = col_map.get(sort_by, "id")
97
+ ascending = sort_dir == "Ascending"
98
+ out = out.sort_values(by=sort_col, ascending=ascending, kind="stable")
99
+
100
  return out.reset_index(drop=True)
101
 
102
 
103
+ def run_explore(
104
+ topic: str,
105
+ split: str,
106
+ keyword: str,
107
+ min_len: int,
108
+ max_len: int,
109
+ sort_by: str,
110
+ sort_dir: str,
111
+ page_size: int,
112
+ page_number: int
113
+ ):
114
+ filtered = apply_filters(topic, split, keyword, min_len, max_len, sort_by, sort_dir)
115
+ total = len(filtered)
116
+ pages = max(1, (total + page_size - 1) // page_size)
117
+ page_number = min(max(1, page_number), pages)
118
 
119
+ start = (page_number - 1) * page_size
120
+ end = min(start + page_size, total)
121
+
122
+ page_df = filtered.iloc[start:end].copy()
123
+ table_df = page_df[["id", "topic", "question", "answer", "split", "answer_len"]]
124
+
125
+ summary = (
126
+ f"{compute_stats(filtered)}\n"
127
+ f"Showing rows {start + 1} to {end if total else 0} of {total} | "
128
+ f"Page {page_number} of {pages}"
129
+ )
130
+
131
+ max_row_slider = max(1, len(page_df))
132
+ return (
133
+ summary,
134
+ table_df,
135
+ page_df.to_json(orient="records"),
136
+ gr.update(maximum=pages, value=page_number),
137
+ gr.update(maximum=max_row_slider, value=1),
138
+ )
139
+
140
+
141
+ def show_row_detail(page_df_json: str, row_idx_1based: int):
142
+ if not page_df_json:
143
+ return "No data loaded for this page.", "", "", "", ""
144
+
145
+ page_df = pd.read_json(page_df_json)
146
+ if page_df.empty:
147
+ return "No rows in this page.", "", "", "", ""
148
+
149
+ idx = int(row_idx_1based) - 1
150
+ idx = max(0, min(idx, len(page_df) - 1))
151
+ row = page_df.iloc[idx]
152
+
153
+ header = f"Record {idx + 1} on current page"
154
+ return (
155
+ header,
156
+ str(row["topic"]),
157
+ str(row["question"]),
158
+ str(row["answer"]),
159
+ f"Split: {row['split']} | ID: {row['id']} | Answer length: {row['answer_len']}",
160
+ )
161
+
162
+
163
+ def export_filtered_csv(
164
+ topic: str,
165
+ split: str,
166
+ keyword: str,
167
+ min_len: int,
168
+ max_len: int,
169
+ sort_by: str,
170
+ sort_dir: str
171
+ ):
172
+ filtered = apply_filters(topic, split, keyword, min_len, max_len, sort_by, sort_dir)
173
+ export_df = filtered[["id", "topic", "question", "answer", "split"]].copy()
174
 
175
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
176
+ export_df.to_csv(tmp.name, index=False)
177
+ return tmp.name
 
 
 
178
 
179
 
180
+ def related_examples(question_text: str, topic: str, k: int = 3) -> str:
181
+ subset = DF[DF["topic"] == topic].copy()
182
+ if subset.empty:
183
+ return "No related examples found."
184
+
185
+ q_words = set(str(question_text).lower().split())
186
+ if not q_words:
187
+ return "No related examples found."
188
+
189
+ def overlap_score(text: str) -> int:
190
+ return len(q_words.intersection(set(str(text).lower().split())))
191
+
192
+ subset["score"] = subset["question"].apply(overlap_score)
193
+ subset = subset.sort_values(by=["score", "id"], ascending=[False, True])
194
+ subset = subset[subset["question"] != question_text].head(k)
195
+
196
+ if subset.empty:
197
+ return "No related examples found."
198
+
199
+ lines = []
200
+ for _, r in subset.iterrows():
201
+ lines.append(f"- {r['question']}")
202
+ return "\n".join(lines)
203
+
204
+
205
+ def score_text(correct: int, total: int, streak: int, best_streak: int) -> str:
206
+ acc = (100.0 * correct / total) if total > 0 else 0.0
207
+ return (
208
+ f"Score: {correct}/{total} | "
209
+ f"Accuracy: {acc:.1f}% | "
210
+ f"Streak: {streak} | "
211
+ f"Best streak: {best_streak}"
212
+ )
213
+
214
+
215
+ def generate_question(topic_filter: str, difficulty: str):
216
+ pool = DF.copy()
217
+ if topic_filter != "All":
218
+ pool = pool[pool["topic"] == topic_filter]
219
+
220
  if pool.empty:
221
  return (
222
+ "No questions available for this filter.",
223
  gr.update(choices=[], value=None),
224
  "",
225
  "",
226
+ "",
227
+ "",
228
  )
229
 
230
+ row = pool.sample(1, random_state=random.randint(0, 10_000_000)).iloc[0]
231
+ topic = str(row["topic"])
232
+ question = str(row["question"])
233
+ correct = str(row["answer"])
234
+
235
+ same_topic = DF[(DF["topic"] == topic) & (DF["answer"] != correct)].copy()
236
+ global_pool = DF[DF["answer"] != correct].copy()
 
 
 
 
 
 
 
237
 
238
+ if difficulty == "Easy":
239
+ candidate = global_pool
240
+ elif difficulty == "Medium":
241
+ candidate = same_topic if len(same_topic["answer"].unique()) >= 3 else global_pool
242
  else:
243
+ target_len = len(correct)
244
+ hard_pool = same_topic.copy()
245
+ hard_pool["len_gap"] = (hard_pool["answer"].str.len() - target_len).abs()
246
+ hard_pool = hard_pool.sort_values(by=["len_gap", "id"])
247
+ if len(hard_pool["answer"].unique()) >= 3:
248
+ candidate = hard_pool
249
+ elif len(same_topic["answer"].unique()) >= 3:
250
+ candidate = same_topic
251
+ else:
252
+ candidate = global_pool
253
+
254
+ distractor_answers = candidate["answer"].dropna().astype(str).drop_duplicates().tolist()
255
+ if len(distractor_answers) < 3:
256
+ return (
257
+ "Not enough distractors to generate a 4-option question.",
258
+ gr.update(choices=[], value=None),
259
+ "",
260
+ "",
261
+ "",
262
+ "",
263
+ )
264
 
265
+ distractors = random.sample(distractor_answers, 3)
266
+ options = distractors + [correct]
267
+ random.shuffle(options)
268
 
269
+ question_block = f"Topic: {topic}\n\nQuestion: {question}"
270
+
271
+ teach_note = (
272
+ f"Teaching note: This question belongs to {topic}. "
273
+ f"Focus on core definitions and tool usage terms."
274
+ )
275
+ related = related_examples(question, topic, k=3)
276
+
277
+ return (
278
+ question_block,
279
+ gr.update(choices=options, value=None),
280
+ correct,
281
+ question,
282
+ topic,
283
+ f"{teach_note}\n\nRelated questions:\n{related}",
284
+ )
285
+
286
+
287
+ def start_quiz(
288
+ topic_filter: str,
289
+ difficulty: str,
290
+ correct_count: int,
291
+ total_count: int,
292
+ streak: int,
293
+ best_streak: int
294
+ ):
295
+ q, choices, correct, raw_q, raw_topic, teach = generate_question(topic_filter, difficulty)
296
+ return (
297
+ q,
298
+ choices,
299
+ correct,
300
+ raw_q,
301
+ raw_topic,
302
+ teach,
303
+ score_text(correct_count, total_count, streak, best_streak),
304
+ "Quiz started. Select an answer and submit.",
305
+ )
306
 
307
 
308
  def submit_and_next(
309
+ selected: str,
310
+ current_correct: str,
311
+ current_q: str,
312
+ current_topic: str,
313
+ topic_filter: str,
314
+ difficulty: str,
315
+ correct_count: int,
316
+ total_count: int,
317
+ streak: int,
318
+ best_streak: int
319
  ):
320
+ if not current_correct or not current_q:
321
  return (
322
+ "Click Start Quiz first.",
 
323
  gr.update(),
324
  gr.update(),
325
+ current_correct,
326
+ current_q,
327
+ current_topic,
328
+ "",
329
+ score_text(correct_count, total_count, streak, best_streak),
330
+ "No active question.",
331
  correct_count,
332
  total_count,
333
  streak,
334
  best_streak,
335
  )
336
 
337
+ if not selected:
338
  return (
339
+ "Please select one option before submitting.",
 
340
  gr.update(),
341
  gr.update(),
342
+ current_correct,
343
+ current_q,
344
+ current_topic,
345
+ "",
346
+ score_text(correct_count, total_count, streak, best_streak),
347
+ "Waiting for answer selection.",
348
  correct_count,
349
  total_count,
350
  streak,
 
352
  )
353
 
354
  total_count += 1
355
+ if selected == current_correct:
356
  correct_count += 1
357
  streak += 1
358
  best_streak = max(best_streak, streak)
359
  result = (
360
  "Correct.\n\n"
361
+ f"Your answer: {selected}\n\n"
362
+ f"Reference answer: {current_correct}"
363
  )
364
  else:
365
  streak = 0
366
  result = (
367
  "Incorrect.\n\n"
368
+ f"Your answer: {selected}\n\n"
369
+ f"Correct answer: {current_correct}"
370
  )
371
 
372
+ next_q, next_choices, next_correct, next_raw_q, next_raw_topic, next_teach = generate_question(
373
+ topic_filter, difficulty
374
+ )
375
 
376
  return (
377
  result,
 
378
  next_q,
379
  next_choices,
380
  next_correct,
381
+ next_raw_q,
382
+ next_raw_topic,
383
+ next_teach,
384
+ score_text(correct_count, total_count, streak, best_streak),
385
+ "Auto-loaded next question.",
386
  correct_count,
387
  total_count,
388
  streak,
 
390
  )
391
 
392
 
393
+ def reset_score():
 
394
  return (
395
+ 0, 0, 0, 0,
396
+ score_text(0, 0, 0, 0),
397
+ "Score reset. Click Start Quiz."
 
 
 
398
  )
399
 
400
 
401
+ CSS = """
402
+ :root {
403
+ --brand: #0f766e;
404
+ --accent: #0ea5e9;
405
+ --bg-soft: #f8fafc;
406
+ --card: #ffffff;
407
+ --text: #0f172a;
408
+ --muted: #475569;
409
+ }
410
+ body {
411
+ background: linear-gradient(180deg, #f0fdfa 0%, #f8fafc 35%, #ffffff 100%);
412
+ }
413
+ .gradio-container {
414
+ max-width: 1280px !important;
415
+ }
416
+ #hero {
417
+ background: linear-gradient(135deg, rgba(15,118,110,0.10), rgba(14,165,233,0.10));
418
+ border: 1px solid rgba(15,118,110,0.20);
419
+ border-radius: 16px;
420
+ padding: 14px 16px;
421
+ }
422
+ #hero h1, #hero p {
423
+ color: var(--text);
424
+ }
425
+ .card {
426
+ background: var(--card);
427
+ border-radius: 14px;
428
+ border: 1px solid #e2e8f0;
429
+ padding: 10px 12px;
430
+ }
431
+ """
432
+
433
+ with gr.Blocks(
434
+ title="Bioinformatics QA Teaching Studio",
435
+ css=CSS,
436
+ theme=gr.themes.Soft(
437
+ primary_hue="teal",
438
+ secondary_hue="sky",
439
+ neutral_hue="slate"
440
+ ),
441
+ ) as demo:
442
+ gr.HTML(
443
+ """
444
+ <div id="hero">
445
+ <h1>Bioinformatics QA Teaching Studio</h1>
446
+ <p>
447
+ Explore the dataset, learn core concepts, and practice with teaching-mode multiple-choice quizzes.
448
+ This app is for learning and research purposes only. Validate content before high-stakes use.
449
+ </p>
450
+ </div>
451
+ """
452
  )
453
 
454
+ with gr.Tabs():
455
+ with gr.Tab("Explore"):
456
+ with gr.Row():
457
+ topic_dd = gr.Dropdown(
458
+ choices=["All"] + ALL_TOPICS,
459
+ value="All",
460
+ label="Topic"
461
+ )
462
+ split_dd = gr.Dropdown(
463
+ choices=["All"] + ALL_SPLITS,
464
+ value="All",
465
+ label="Split"
466
+ )
467
+ keyword_tb = gr.Textbox(
468
+ label="Keyword search",
469
+ placeholder="Search topic, question, or answer"
470
+ )
471
+
472
+ with gr.Row():
473
+ min_len = gr.Slider(0, int(max(DF["answer_len"].max(), 20)), value=0, step=1, label="Min answer length")
474
+ max_len = gr.Slider(0, int(max(DF["answer_len"].max(), 20)), value=int(DF["answer_len"].max()), step=1, label="Max answer length")
475
+ sort_by = gr.Dropdown(
476
+ choices=["ID", "Topic", "Question length", "Answer length", "Split"],
477
+ value="ID",
478
+ label="Sort by"
479
+ )
480
+ sort_dir = gr.Radio(
481
+ choices=["Ascending", "Descending"],
482
+ value="Ascending",
483
+ label="Order"
484
+ )
485
+
486
+ with gr.Row():
487
+ page_size = gr.Slider(5, 100, value=15, step=5, label="Rows per page")
488
+ page_number = gr.Slider(1, 1, value=1, step=1, label="Page")
489
+ run_btn = gr.Button("Apply filters", variant="primary")
490
+ export_btn = gr.Button("Export filtered CSV")
491
+
492
+ summary_md = gr.Markdown(value=compute_stats(DF))
493
+ table = gr.Dataframe(
494
+ headers=["id", "topic", "question", "answer", "split", "answer_len"],
495
+ wrap=True,
496
+ interactive=False,
497
+ label="Filtered results"
498
+ )
499
 
500
+ filtered_state = gr.State("")
501
+ row_slider = gr.Slider(1, 1, value=1, step=1, label="Inspect row on current page")
502
+ inspect_btn = gr.Button("Show row details")
503
+
504
+ detail_header = gr.Markdown(value="Select filters and click Apply.")
505
+ detail_topic = gr.Textbox(label="Topic", interactive=False)
506
+ detail_question = gr.Textbox(label="Question", lines=4, interactive=False)
507
+ detail_answer = gr.Textbox(label="Answer", lines=7, interactive=False)
508
+ detail_meta = gr.Textbox(label="Metadata", interactive=False)
509
+ csv_file = gr.File(label="Download CSV", interactive=False)
510
+
511
+ run_btn.click(
512
+ fn=run_explore,
513
+ inputs=[topic_dd, split_dd, keyword_tb, min_len, max_len, sort_by, sort_dir, page_size, page_number],
514
+ outputs=[summary_md, table, filtered_state, page_number, row_slider],
515
+ )
516
 
517
+ inspect_btn.click(
518
+ fn=show_row_detail,
519
+ inputs=[filtered_state, row_slider],
520
+ outputs=[detail_header, detail_topic, detail_question, detail_answer, detail_meta],
521
+ )
522
 
523
+ export_btn.click(
524
+ fn=export_filtered_csv,
525
+ inputs=[topic_dd, split_dd, keyword_tb, min_len, max_len, sort_by, sort_dir],
526
+ outputs=[csv_file],
527
+ )
528
 
529
+ with gr.Tab("Quiz"):
530
+ with gr.Row():
531
+ quiz_topic = gr.Dropdown(
532
+ choices=["All"] + ALL_TOPICS,
533
+ value="All",
534
+ label="Topic filter"
535
+ )
536
+ difficulty = gr.Radio(
537
+ choices=["Easy", "Medium", "Hard"],
538
+ value="Medium",
539
+ label="Difficulty"
540
+ )
541
+ start_btn = gr.Button("Start quiz", variant="primary")
542
+ reset_btn = gr.Button("Reset score")
543
+
544
+ quiz_score = gr.Markdown(value=score_text(0, 0, 0, 0))
545
+ quiz_status = gr.Textbox(label="Status", interactive=False)
546
+
547
+ question_box = gr.Textbox(label="Question", lines=5, interactive=False)
548
+ choices_radio = gr.Radio(choices=[], label="Choose one answer")
549
+ submit_btn = gr.Button("Submit and load next question", variant="primary")
550
+
551
+ result_box = gr.Textbox(label="Result", lines=6, interactive=False)
552
+ teaching_box = gr.Textbox(label="Teaching support", lines=8, interactive=False)
553
+
554
+ correct_state = gr.State("")
555
+ q_state = gr.State("")
556
+ topic_state = gr.State("")
557
+
558
+ correct_count_state = gr.State(0)
559
+ total_count_state = gr.State(0)
560
+ streak_state = gr.State(0)
561
+ best_streak_state = gr.State(0)
562
+
563
+ start_btn.click(
564
+ fn=start_quiz,
565
+ inputs=[
566
+ quiz_topic,
567
+ difficulty,
568
+ correct_count_state,
569
+ total_count_state,
570
+ streak_state,
571
+ best_streak_state,
572
+ ],
573
+ outputs=[
574
+ question_box,
575
+ choices_radio,
576
+ correct_state,
577
+ q_state,
578
+ topic_state,
579
+ teaching_box,
580
+ quiz_score,
581
+ quiz_status,
582
+ ],
583
+ )
584
 
585
+ submit_btn.click(
586
+ fn=submit_and_next,
587
+ inputs=[
588
+ choices_radio,
589
+ correct_state,
590
+ q_state,
591
+ topic_state,
592
+ quiz_topic,
593
+ difficulty,
594
+ correct_count_state,
595
+ total_count_state,
596
+ streak_state,
597
+ best_streak_state,
598
+ ],
599
+ outputs=[
600
+ result_box,
601
+ question_box,
602
+ choices_radio,
603
+ correct_state,
604
+ q_state,
605
+ topic_state,
606
+ teaching_box,
607
+ quiz_score,
608
+ quiz_status,
609
+ correct_count_state,
610
+ total_count_state,
611
+ streak_state,
612
+ best_streak_state,
613
+ ],
614
+ )
615
 
616
+ reset_btn.click(
617
+ fn=reset_score,
618
+ inputs=[],
619
+ outputs=[
620
+ correct_count_state,
621
+ total_count_state,
622
+ streak_state,
623
+ best_streak_state,
624
+ quiz_score,
625
+ quiz_status,
626
+ ],
627
+ )
628
 
629
+ with gr.Tab("About"):
630
+ gr.Markdown(
631
+ """
632
+ ## About this teaching app
 
633
 
634
+ This Space demonstrates:
635
+ - Practical dataset exploration for bioinformatics QA data
636
+ - Teaching-mode multiple-choice practice with topic-aware distractors
637
+ - Session score tracking with streak metrics
 
638
 
639
+ ## Important notice
 
 
 
 
640
 
641
+ This app is intended for learning and research use only.
642
+ Use with caution.
643
+ Do not use as a replacement for expert biomedical or clinical judgment.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
 
645
+ ## Dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646
 
647
+ - Source: yashm/bioinformatics-qa-dataset
648
+ - Citation and DOI are listed in the project README
649
+ """
650
+ )
 
 
 
 
 
 
 
 
651
 
652
  demo.launch()