yashm commited on
Commit
1ea3f1b
·
verified ·
1 Parent(s): 3c76fd2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +361 -0
app.py ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import pandas as pd
3
+ import gradio as gr
4
+ from datasets import load_dataset
5
+
6
+ DATASET_REPO = "yashm/bioinformatics-qa-dataset"
7
+ RANDOM_SEED = 42
8
+ random.seed(RANDOM_SEED)
9
+
10
+
11
+ def load_data():
12
+ ds = load_dataset(DATASET_REPO)
13
+
14
+ frames = []
15
+ for split_name in ds.keys():
16
+ split_df = ds[split_name].to_pandas().copy()
17
+ split_df["split"] = split_name
18
+ frames.append(split_df)
19
+
20
+ df = pd.concat(frames, ignore_index=True)
21
+
22
+ required = ["id", "topic", "question", "answer"]
23
+ missing = [c for c in required if c not in df.columns]
24
+ if missing:
25
+ raise ValueError(f"Missing required columns in dataset: {missing}")
26
+
27
+ df = df[["id", "topic", "question", "answer", "split"]].copy()
28
+ for col in ["topic", "question", "answer"]:
29
+ df[col] = df[col].astype(str).str.strip()
30
+
31
+ df = df.dropna(subset=["topic", "question", "answer"])
32
+ df = df[(df["question"] != "") & (df["answer"] != "")]
33
+ df = df.reset_index(drop=True)
34
+
35
+ return df
36
+
37
+
38
+ df = load_data()
39
+ ALL_TOPICS = ["All"] + sorted(df["topic"].unique().tolist())
40
+ GLOBAL_UNIQUE_ANSWERS = df["answer"].dropna().unique().tolist()
41
+
42
+
43
+ def stats_text(correct, total, streak, best_streak):
44
+ acc = (correct / total * 100.0) if total > 0 else 0.0
45
+ return (
46
+ f"Score: {correct}/{total} | "
47
+ f"Accuracy: {acc:.1f}% | "
48
+ f"Streak: {streak} | "
49
+ f"Best Streak: {best_streak}"
50
+ )
51
+
52
+
53
+ def filter_df(topic, keyword):
54
+ out = df.copy()
55
+
56
+ if topic and topic != "All":
57
+ out = out[out["topic"] == topic]
58
+
59
+ if keyword and keyword.strip():
60
+ q = keyword.strip().lower()
61
+ out = out[
62
+ out["topic"].str.lower().str.contains(q, na=False)
63
+ | out["question"].str.lower().str.contains(q, na=False)
64
+ | out["answer"].str.lower().str.contains(q, na=False)
65
+ ]
66
+
67
+ return out.reset_index(drop=True)
68
+
69
+
70
+ def explore(topic, keyword, limit):
71
+ out = filter_df(topic, keyword)
72
+ return out.head(int(limit))
73
+
74
+
75
+ def get_random_example(topic, keyword):
76
+ out = filter_df(topic, keyword)
77
+ if out.empty:
78
+ return "No matching rows found.", "", "", ""
79
+ row = out.sample(1).iloc[0]
80
+ return row["topic"], row["question"], row["answer"], row["split"]
81
+
82
+
83
+ def generate_question(topic):
84
+ pool = df if topic == "All" else df[df["topic"] == topic]
85
+ if pool.empty:
86
+ return (
87
+ "No questions available for this topic.",
88
+ gr.update(choices=[], value=None),
89
+ "",
90
+ "",
91
+ )
92
+
93
+ row = pool.sample(1).iloc[0]
94
+ q_topic = row["topic"]
95
+ q_text = row["question"]
96
+ correct = row["answer"]
97
+
98
+ same_topic_answers = (
99
+ df[df["topic"] == q_topic]["answer"]
100
+ .dropna()
101
+ .astype(str)
102
+ .str.strip()
103
+ .unique()
104
+ .tolist()
105
+ )
106
+ same_topic_answers = [a for a in same_topic_answers if a and a != correct]
107
+
108
+ distractors = []
109
+ if len(same_topic_answers) >= 3:
110
+ distractors = random.sample(same_topic_answers, 3)
111
+ else:
112
+ distractors.extend(same_topic_answers)
113
+ need = 3 - len(distractors)
114
+ fallback_pool = [
115
+ a for a in GLOBAL_UNIQUE_ANSWERS
116
+ if a != correct and a not in distractors
117
+ ]
118
+ if len(fallback_pool) < need:
119
+ return (
120
+ "Not enough unique answers to build 4 choices.",
121
+ gr.update(choices=[], value=None),
122
+ "",
123
+ "",
124
+ )
125
+ distractors.extend(random.sample(fallback_pool, need))
126
+
127
+ choices = distractors + [correct]
128
+ random.shuffle(choices)
129
+
130
+ shown = f"Topic: {q_topic}\n\nQuestion: {q_text}"
131
+ return shown, gr.update(choices=choices, value=None), correct, q_text
132
+
133
+
134
+ def submit_and_next(
135
+ selected_answer,
136
+ current_correct_answer,
137
+ current_question,
138
+ topic_filter,
139
+ correct_count,
140
+ total_count,
141
+ streak,
142
+ best_streak,
143
+ ):
144
+ if not current_correct_answer or not current_question:
145
+ return (
146
+ "Click 'Start Quiz' to begin.",
147
+ stats_text(correct_count, total_count, streak, best_streak),
148
+ gr.update(),
149
+ gr.update(),
150
+ current_correct_answer,
151
+ current_question,
152
+ correct_count,
153
+ total_count,
154
+ streak,
155
+ best_streak,
156
+ )
157
+
158
+ if not selected_answer:
159
+ return (
160
+ "Please select one option.",
161
+ stats_text(correct_count, total_count, streak, best_streak),
162
+ gr.update(),
163
+ gr.update(),
164
+ current_correct_answer,
165
+ current_question,
166
+ correct_count,
167
+ total_count,
168
+ streak,
169
+ best_streak,
170
+ )
171
+
172
+ total_count += 1
173
+ if selected_answer == current_correct_answer:
174
+ correct_count += 1
175
+ streak += 1
176
+ best_streak = max(best_streak, streak)
177
+ result = (
178
+ "Correct.\n\n"
179
+ f"Your answer: {selected_answer}\n\n"
180
+ f"Reference answer: {current_correct_answer}"
181
+ )
182
+ else:
183
+ streak = 0
184
+ result = (
185
+ "Incorrect.\n\n"
186
+ f"Your answer: {selected_answer}\n\n"
187
+ f"Correct answer: {current_correct_answer}"
188
+ )
189
+
190
+ next_q, next_choices, next_correct, next_question = generate_question(topic_filter)
191
+
192
+ return (
193
+ result,
194
+ stats_text(correct_count, total_count, streak, best_streak),
195
+ next_q,
196
+ next_choices,
197
+ next_correct,
198
+ next_question,
199
+ correct_count,
200
+ total_count,
201
+ streak,
202
+ best_streak,
203
+ )
204
+
205
+
206
+ def start_quiz(topic_filter, correct_count, total_count, streak, best_streak):
207
+ q, choices, correct, question = generate_question(topic_filter)
208
+ return (
209
+ q,
210
+ choices,
211
+ correct,
212
+ question,
213
+ stats_text(correct_count, total_count, streak, best_streak),
214
+ "Quiz started. Pick an option and click Submit.",
215
+ )
216
+
217
+
218
+ def reset_score():
219
+ correct_count = 0
220
+ total_count = 0
221
+ streak = 0
222
+ best_streak = 0
223
+ return (
224
+ correct_count,
225
+ total_count,
226
+ streak,
227
+ best_streak,
228
+ stats_text(correct_count, total_count, streak, best_streak),
229
+ "Score reset. Click Start Quiz.",
230
+ )
231
+
232
+
233
+ with gr.Blocks(title="Bioinformatics QA Quiz Demo", theme=gr.themes.Soft()) as demo:
234
+ gr.Markdown(
235
+ """
236
+ # Bioinformatics QA Demo
237
+
238
+ Interactive demo built from the public dataset:
239
+ **yashm/bioinformatics-qa-dataset**
240
+
241
+ This app is for learning and research purposes only.
242
+ Use with caution. Validate information before high-stakes use.
243
+ """
244
+ )
245
+
246
+ with gr.Tab("Explore Dataset"):
247
+ with gr.Row():
248
+ ex_topic = gr.Dropdown(choices=ALL_TOPICS, value="All", label="Topic")
249
+ ex_keyword = gr.Textbox(label="Keyword", placeholder="Search topic, question, or answer")
250
+ ex_limit = gr.Slider(minimum=5, maximum=100, value=15, step=5, label="Rows")
251
+
252
+ with gr.Row():
253
+ ex_search_btn = gr.Button("Search")
254
+ ex_random_btn = gr.Button("Random Example")
255
+
256
+ ex_table = gr.Dataframe(
257
+ headers=["id", "topic", "question", "answer", "split"],
258
+ label="Matching Rows",
259
+ wrap=True,
260
+ )
261
+
262
+ gr.Markdown("### Random Example")
263
+ ex_r_topic = gr.Textbox(label="Topic", interactive=False)
264
+ ex_r_question = gr.Textbox(label="Question", lines=4, interactive=False)
265
+ ex_r_answer = gr.Textbox(label="Answer", lines=6, interactive=False)
266
+ ex_r_split = gr.Textbox(label="Split", interactive=False)
267
+
268
+ ex_search_btn.click(
269
+ fn=explore,
270
+ inputs=[ex_topic, ex_keyword, ex_limit],
271
+ outputs=[ex_table],
272
+ )
273
+
274
+ ex_random_btn.click(
275
+ fn=get_random_example,
276
+ inputs=[ex_topic, ex_keyword],
277
+ outputs=[ex_r_topic, ex_r_question, ex_r_answer, ex_r_split],
278
+ )
279
+
280
+ with gr.Tab("Quiz Yourself"):
281
+ with gr.Row():
282
+ quiz_topic = gr.Dropdown(choices=ALL_TOPICS, value="All", label="Topic Filter")
283
+ start_btn = gr.Button("Start Quiz")
284
+ reset_btn = gr.Button("Reset Score")
285
+
286
+ quiz_stats = gr.Markdown(value=stats_text(0, 0, 0, 0))
287
+ quiz_status = gr.Textbox(label="Status", interactive=False)
288
+
289
+ quiz_question = gr.Textbox(label="Question", lines=5, interactive=False)
290
+ quiz_choices = gr.Radio(choices=[], label="Choose one answer")
291
+ submit_btn = gr.Button("Submit (auto next question)")
292
+
293
+ quiz_result = gr.Textbox(label="Last Result", lines=6, interactive=False)
294
+
295
+ correct_state = gr.State("")
296
+ question_state = gr.State("")
297
+
298
+ correct_count_state = gr.State(0)
299
+ total_count_state = gr.State(0)
300
+ streak_state = gr.State(0)
301
+ best_streak_state = gr.State(0)
302
+
303
+ start_btn.click(
304
+ fn=start_quiz,
305
+ inputs=[
306
+ quiz_topic,
307
+ correct_count_state,
308
+ total_count_state,
309
+ streak_state,
310
+ best_streak_state,
311
+ ],
312
+ outputs=[
313
+ quiz_question,
314
+ quiz_choices,
315
+ correct_state,
316
+ question_state,
317
+ quiz_stats,
318
+ quiz_status,
319
+ ],
320
+ )
321
+
322
+ submit_btn.click(
323
+ fn=submit_and_next,
324
+ inputs=[
325
+ quiz_choices,
326
+ correct_state,
327
+ question_state,
328
+ quiz_topic,
329
+ correct_count_state,
330
+ total_count_state,
331
+ streak_state,
332
+ best_streak_state,
333
+ ],
334
+ outputs=[
335
+ quiz_result,
336
+ quiz_stats,
337
+ quiz_question,
338
+ quiz_choices,
339
+ correct_state,
340
+ question_state,
341
+ correct_count_state,
342
+ total_count_state,
343
+ streak_state,
344
+ best_streak_state,
345
+ ],
346
+ )
347
+
348
+ reset_btn.click(
349
+ fn=reset_score,
350
+ inputs=[],
351
+ outputs=[
352
+ correct_count_state,
353
+ total_count_state,
354
+ streak_state,
355
+ best_streak_state,
356
+ quiz_stats,
357
+ quiz_status,
358
+ ],
359
+ )
360
+
361
+ demo.launch()