Spaces:

yashm
/

Bioinformatics

Sleeping

App Files Files Community

yashm commited on Apr 25

Commit

1ea3f1b

verified ·

1 Parent(s): 3c76fd2

Create app.py

Browse files

Files changed (1) hide show

app.py +361 -0

app.py ADDED Viewed

	@@ -0,0 +1,361 @@

+import random
+import pandas as pd
+import gradio as gr
+from datasets import load_dataset
+DATASET_REPO = "yashm/bioinformatics-qa-dataset"
+RANDOM_SEED = 42
+random.seed(RANDOM_SEED)
+def load_data():
+    ds = load_dataset(DATASET_REPO)
+    frames = []
+    for split_name in ds.keys():
+        split_df = ds[split_name].to_pandas().copy()
+        split_df["split"] = split_name
+        frames.append(split_df)
+    df = pd.concat(frames, ignore_index=True)
+    required = ["id", "topic", "question", "answer"]
+    missing = [c for c in required if c not in df.columns]
+    if missing:
+        raise ValueError(f"Missing required columns in dataset: {missing}")
+    df = df[["id", "topic", "question", "answer", "split"]].copy()
+    for col in ["topic", "question", "answer"]:
+        df[col] = df[col].astype(str).str.strip()
+    df = df.dropna(subset=["topic", "question", "answer"])
+    df = df[(df["question"] != "") & (df["answer"] != "")]
+    df = df.reset_index(drop=True)
+    return df
+df = load_data()
+ALL_TOPICS = ["All"] + sorted(df["topic"].unique().tolist())
+GLOBAL_UNIQUE_ANSWERS = df["answer"].dropna().unique().tolist()
+def stats_text(correct, total, streak, best_streak):
+    acc = (correct / total * 100.0) if total > 0 else 0.0
+    return (
+        f"Score: {correct}/{total} | "
+        f"Accuracy: {acc:.1f}% | "
+        f"Streak: {streak} | "
+        f"Best Streak: {best_streak}"
+    )
+def filter_df(topic, keyword):
+    out = df.copy()
+    if topic and topic != "All":
+        out = out[out["topic"] == topic]
+    if keyword and keyword.strip():
+        q = keyword.strip().lower()
+        out = out[
+            out["topic"].str.lower().str.contains(q, na=False)
+            | out["question"].str.lower().str.contains(q, na=False)
+            | out["answer"].str.lower().str.contains(q, na=False)
+        ]
+    return out.reset_index(drop=True)
+def explore(topic, keyword, limit):
+    out = filter_df(topic, keyword)
+    return out.head(int(limit))
+def get_random_example(topic, keyword):
+    out = filter_df(topic, keyword)
+    if out.empty:
+        return "No matching rows found.", "", "", ""
+    row = out.sample(1).iloc[0]
+    return row["topic"], row["question"], row["answer"], row["split"]
+def generate_question(topic):
+    pool = df if topic == "All" else df[df["topic"] == topic]
+    if pool.empty:
+        return (
+            "No questions available for this topic.",
+            gr.update(choices=[], value=None),
+            "",
+            "",
+        )
+    row = pool.sample(1).iloc[0]
+    q_topic = row["topic"]
+    q_text = row["question"]
+    correct = row["answer"]
+    same_topic_answers = (
+        df[df["topic"] == q_topic]["answer"]
+        .dropna()
+        .astype(str)
+        .str.strip()
+        .unique()
+        .tolist()
+    )
+    same_topic_answers = [a for a in same_topic_answers if a and a != correct]
+    distractors = []
+    if len(same_topic_answers) >= 3:
+        distractors = random.sample(same_topic_answers, 3)
+    else:
+        distractors.extend(same_topic_answers)
+        need = 3 - len(distractors)
+        fallback_pool = [
+            a for a in GLOBAL_UNIQUE_ANSWERS
+            if a != correct and a not in distractors
+        ]
+        if len(fallback_pool) < need:
+            return (
+                "Not enough unique answers to build 4 choices.",
+                gr.update(choices=[], value=None),
+                "",
+                "",
+            )
+        distractors.extend(random.sample(fallback_pool, need))
+    choices = distractors + [correct]
+    random.shuffle(choices)
+    shown = f"Topic: {q_topic}\n\nQuestion: {q_text}"
+    return shown, gr.update(choices=choices, value=None), correct, q_text
+def submit_and_next(
+    selected_answer,
+    current_correct_answer,
+    current_question,
+    topic_filter,
+    correct_count,
+    total_count,
+    streak,
+    best_streak,
+):
+    if not current_correct_answer or not current_question:
+        return (
+            "Click 'Start Quiz' to begin.",
+            stats_text(correct_count, total_count, streak, best_streak),
+            gr.update(),
+            gr.update(),
+            current_correct_answer,
+            current_question,
+            correct_count,
+            total_count,
+            streak,
+            best_streak,
+        )
+    if not selected_answer:
+        return (
+            "Please select one option.",
+            stats_text(correct_count, total_count, streak, best_streak),
+            gr.update(),
+            gr.update(),
+            current_correct_answer,
+            current_question,
+            correct_count,
+            total_count,
+            streak,
+            best_streak,
+        )
+    total_count += 1
+    if selected_answer == current_correct_answer:
+        correct_count += 1
+        streak += 1
+        best_streak = max(best_streak, streak)
+        result = (
+            "Correct.\n\n"
+            f"Your answer: {selected_answer}\n\n"
+            f"Reference answer: {current_correct_answer}"
+        )
+    else:
+        streak = 0
+        result = (
+            "Incorrect.\n\n"
+            f"Your answer: {selected_answer}\n\n"
+            f"Correct answer: {current_correct_answer}"
+        )
+    next_q, next_choices, next_correct, next_question = generate_question(topic_filter)
+    return (
+        result,
+        stats_text(correct_count, total_count, streak, best_streak),
+        next_q,
+        next_choices,
+        next_correct,
+        next_question,
+        correct_count,
+        total_count,
+        streak,
+        best_streak,
+    )
+def start_quiz(topic_filter, correct_count, total_count, streak, best_streak):
+    q, choices, correct, question = generate_question(topic_filter)
+    return (
+        q,
+        choices,
+        correct,
+        question,
+        stats_text(correct_count, total_count, streak, best_streak),
+        "Quiz started. Pick an option and click Submit.",
+    )
+def reset_score():
+    correct_count = 0
+    total_count = 0
+    streak = 0
+    best_streak = 0
+    return (
+        correct_count,
+        total_count,
+        streak,
+        best_streak,
+        stats_text(correct_count, total_count, streak, best_streak),
+        "Score reset. Click Start Quiz.",
+    )
+with gr.Blocks(title="Bioinformatics QA Quiz Demo", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+# Bioinformatics QA Demo
+Interactive demo built from the public dataset:
+**yashm/bioinformatics-qa-dataset**
+This app is for learning and research purposes only.
+Use with caution. Validate information before high-stakes use.
+"""
+    )
+    with gr.Tab("Explore Dataset"):
+        with gr.Row():
+            ex_topic = gr.Dropdown(choices=ALL_TOPICS, value="All", label="Topic")
+            ex_keyword = gr.Textbox(label="Keyword", placeholder="Search topic, question, or answer")
+            ex_limit = gr.Slider(minimum=5, maximum=100, value=15, step=5, label="Rows")
+        with gr.Row():
+            ex_search_btn = gr.Button("Search")
+            ex_random_btn = gr.Button("Random Example")
+        ex_table = gr.Dataframe(
+            headers=["id", "topic", "question", "answer", "split"],
+            label="Matching Rows",
+            wrap=True,
+        )
+        gr.Markdown("### Random Example")
+        ex_r_topic = gr.Textbox(label="Topic", interactive=False)
+        ex_r_question = gr.Textbox(label="Question", lines=4, interactive=False)
+        ex_r_answer = gr.Textbox(label="Answer", lines=6, interactive=False)
+        ex_r_split = gr.Textbox(label="Split", interactive=False)
+        ex_search_btn.click(
+            fn=explore,
+            inputs=[ex_topic, ex_keyword, ex_limit],
+            outputs=[ex_table],
+        )
+        ex_random_btn.click(
+            fn=get_random_example,
+            inputs=[ex_topic, ex_keyword],
+            outputs=[ex_r_topic, ex_r_question, ex_r_answer, ex_r_split],
+        )
+    with gr.Tab("Quiz Yourself"):
+        with gr.Row():
+            quiz_topic = gr.Dropdown(choices=ALL_TOPICS, value="All", label="Topic Filter")
+            start_btn = gr.Button("Start Quiz")
+            reset_btn = gr.Button("Reset Score")
+        quiz_stats = gr.Markdown(value=stats_text(0, 0, 0, 0))
+        quiz_status = gr.Textbox(label="Status", interactive=False)
+        quiz_question = gr.Textbox(label="Question", lines=5, interactive=False)
+        quiz_choices = gr.Radio(choices=[], label="Choose one answer")
+        submit_btn = gr.Button("Submit (auto next question)")
+        quiz_result = gr.Textbox(label="Last Result", lines=6, interactive=False)
+        correct_state = gr.State("")
+        question_state = gr.State("")
+        correct_count_state = gr.State(0)
+        total_count_state = gr.State(0)
+        streak_state = gr.State(0)
+        best_streak_state = gr.State(0)
+        start_btn.click(
+            fn=start_quiz,
+            inputs=[
+                quiz_topic,
+                correct_count_state,
+                total_count_state,
+                streak_state,
+                best_streak_state,
+            ],
+            outputs=[
+                quiz_question,
+                quiz_choices,
+                correct_state,
+                question_state,
+                quiz_stats,
+                quiz_status,
+            ],
+        )
+        submit_btn.click(
+            fn=submit_and_next,
+            inputs=[
+                quiz_choices,
+                correct_state,
+                question_state,
+                quiz_topic,
+                correct_count_state,
+                total_count_state,
+                streak_state,
+                best_streak_state,
+            ],
+            outputs=[
+                quiz_result,
+                quiz_stats,
+                quiz_question,
+                quiz_choices,
+                correct_state,
+                question_state,
+                correct_count_state,
+                total_count_state,
+                streak_state,
+                best_streak_state,
+            ],
+        )
+        reset_btn.click(
+            fn=reset_score,
+            inputs=[],
+            outputs=[
+                correct_count_state,
+                total_count_state,
+                streak_state,
+                best_streak_state,
+                quiz_stats,
+                quiz_status,
+            ],
+        )
+demo.launch()