| import os |
| from datetime import datetime |
| import random |
| from typing import List |
| import gradio as gr |
| from datasets import load_dataset, Dataset, DatasetDict |
| from huggingface_hub import whoami, InferenceClient |
| import black |
|
|
| |
| client = InferenceClient( |
| api_key=os.getenv("HF_TOKEN"), |
| ) |
|
|
| |
| EXAM_MAX_QUESTIONS = int( |
| os.getenv("EXAM_MAX_QUESTIONS", 5) |
| ) |
| EXAM_PASSING_SCORE = float(os.getenv("EXAM_PASSING_SCORE", 0.8)) |
| EXAM_DATASET_ID = "burtenshaw/dummy-code-quiz" |
|
|
| |
| ds = load_dataset(EXAM_DATASET_ID, split="train", download_mode="force_redownload") |
| quiz_data = list(ds) |
| |
| if EXAM_MAX_QUESTIONS: |
| quiz_data = quiz_data[:EXAM_MAX_QUESTIONS] |
|
|
|
|
| def format_python_code(code: str) -> str: |
| """Format Python code using black.""" |
| try: |
| return black.format_str(code, mode=black.Mode()) |
| except Exception as e: |
| gr.Warning(f"Code formatting failed: {str(e)}") |
| return code |
|
|
|
|
| def check_code( |
| user_code: str, solution: str, challenge: str, assessment_criteria: List[str] |
| ): |
| """ |
| Use LLM to evaluate if the user's code solution is correct. |
| Returns True if the solution is correct, False otherwise. |
| """ |
| |
| formatted_user_code = format_python_code(user_code) |
| formatted_solution = format_python_code(solution) |
|
|
| assessment_criteria_str = "\n".join( |
| [f"{i + 1}. {c}" for i, c in enumerate(assessment_criteria)] |
| ) |
|
|
| prompt = f"""You are an expert Python programming instructor evaluating a student's code solution. |
| |
| Challenge: |
| {challenge} |
| |
| Reference Solution: |
| {formatted_solution} |
| |
| Student's Solution: |
| {formatted_user_code} |
| |
| Assessment Criteria: |
| {assessment_criteria_str} |
| |
| Evaluate if the student's solution is functionally equivalent to the reference solution. |
| Consider: |
| 1. Does it solve the problem correctly? |
| 2. Does it handle edge cases appropriately? |
| 3. Does it follow the requirements of the challenge? |
| 4. Does it meet the assessment criteria? |
| |
| Respond with ONLY "CORRECT" or "INCORRECT" followed by a brief explanation. |
| """ |
|
|
| messages = [{"role": "user", "content": prompt}] |
|
|
| try: |
| completion = client.chat.completions.create( |
| model="Qwen/Qwen2.5-Coder-32B-Instruct", |
| messages=messages, |
| max_tokens=500, |
| ) |
|
|
| response = completion.choices[0].message.content.strip() |
|
|
| |
| is_correct = response.upper().startswith("CORRECT") |
|
|
| |
| explanation = response.split("\n", 1)[1] if "\n" in response else "" |
| status = "✅ Correct!" if is_correct else "❌ Incorrect!" |
| gr.Info(f"{status}\n\n{explanation}") |
|
|
| return is_correct |
|
|
| except Exception as e: |
| gr.Warning(f"Error checking code: {str(e)}") |
| |
| is_correct = formatted_user_code.strip() == formatted_solution.strip() |
| status = "✅ Correct!" if is_correct else "❌ Incorrect!" |
| gr.Info(f"{status} (Fallback comparison)") |
| return is_correct |
|
|
|
|
| def on_user_logged_in(token: gr.OAuthToken | None): |
| """ |
| Handle user login state. |
| On a valid token, hide the login button and reveal the Start button while keeping Next and Submit hidden. |
| Also, clear the question text, code input, status, and image. |
| """ |
| if token is not None: |
| return ( |
| gr.update(visible=False), |
| gr.update(visible=True), |
| gr.update(visible=False), |
| gr.update(visible=False), |
| "", |
| gr.update(value="", visible=False), |
| "", |
| gr.update(value="", visible=False), |
| ) |
| else: |
| return ( |
| gr.update(visible=True), |
| gr.update(visible=False), |
| gr.update(visible=False), |
| gr.update(visible=False), |
| "", |
| gr.update(value="", visible=False), |
| "", |
| gr.update(value="", visible=False), |
| ) |
|
|
|
|
| def push_results_to_hub( |
| user_answers: list, token: gr.OAuthToken | None, signed_in_message: str |
| ): |
| """Push results to Hugging Face Hub.""" |
|
|
| print(f"signed_in_message: {signed_in_message}") |
|
|
| if not user_answers: |
| gr.Warning("No answers to submit!") |
| return "No answers to submit!" |
|
|
| if token is None: |
| gr.Warning("Please log in to Hugging Face before pushing!") |
| return "Please log in to Hugging Face before pushing!" |
|
|
| |
| correct_count = sum(1 for answer in user_answers if answer["is_correct"]) |
| total_questions = len(user_answers) |
| grade = correct_count / total_questions if total_questions > 0 else 0 |
|
|
| if grade < float(EXAM_PASSING_SCORE): |
| gr.Warning( |
| f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}" |
| ) |
| return f"You scored {grade:.1%}. Please try again to achieve at least {float(EXAM_PASSING_SCORE):.1%}" |
|
|
| gr.Info("Submitting answers to the Hub. Please wait...", duration=2) |
|
|
| user_info = whoami(token=token.token) |
| username = user_info["name"] |
| repo_id = f"{EXAM_DATASET_ID}_responses" |
| submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
|
| |
| submission_data = [ |
| { |
| "username": username, |
| "datetime": submission_time, |
| "grade": grade, |
| **answer, |
| } |
| for answer in user_answers |
| ] |
|
|
| try: |
| |
| existing_ds = load_dataset(repo_id) |
| |
| if not isinstance(existing_ds, dict): |
| existing_ds = DatasetDict({"default": existing_ds}) |
| except Exception: |
| |
| existing_ds = DatasetDict() |
|
|
| |
| new_ds = Dataset.from_list(submission_data) |
|
|
| |
| existing_ds[username] = new_ds |
|
|
| |
| existing_ds.push_to_hub( |
| repo_id, |
| private=True, |
| ) |
|
|
| return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}" |
|
|
|
|
| def handle_quiz(question_idx, user_answers, submitted_code, is_start): |
| """Handle quiz state and progression""" |
| |
| start_btn_update = gr.update(visible=False) if is_start else None |
|
|
| |
| if is_start: |
| question_idx = 0 |
| else: |
| |
| if ( |
| question_idx < len(quiz_data) and submitted_code.strip() |
| ): |
| current_q = quiz_data[question_idx] |
| |
| formatted_code = format_python_code(submitted_code) |
| is_correct = check_code( |
| formatted_code, |
| current_q["solution"], |
| current_q["challenge"], |
| current_q["assessment_criteria"], |
| ) |
| user_answers.append( |
| { |
| "challenge": current_q["challenge"], |
| "submitted_code": formatted_code, |
| "correct_solution": current_q["solution"], |
| "assessment_criteria": current_q["assessment_criteria"], |
| "is_correct": is_correct, |
| } |
| ) |
| question_idx += 1 |
|
|
| |
| if question_idx >= len(quiz_data): |
| correct_count = sum(1 for answer in user_answers if answer["is_correct"]) |
| grade = correct_count / len(user_answers) |
| results_text = ( |
| f"**Quiz Complete!**\n\n" |
| f"Your score: {grade:.1%}\n" |
| f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n" |
| f"Your answers:\n\n" |
| ) |
| for idx, answer in enumerate(user_answers): |
| results_text += ( |
| f"Question {idx + 1}: {'✅' if answer['is_correct'] else '❌'}\n" |
| ) |
|
|
| return ( |
| "", |
| gr.update(value="", visible=False), |
| f"{'✅ Passed!' if grade >= EXAM_PASSING_SCORE else '❌ Did not pass'}", |
| question_idx, |
| user_answers, |
| gr.update(visible=False), |
| gr.update(visible=False), |
| gr.update(visible=True), |
| gr.update(value=results_text, visible=True), |
| gr.update(visible=False), |
| ) |
| else: |
| |
| q = quiz_data[question_idx] |
| challenge_text = f"## Question {question_idx + 1} \n### {q['challenge']}" |
| return ( |
| challenge_text, |
| gr.update(value=q["placeholder"], visible=True), |
| "Submit your code solution and click 'Next' to continue.", |
| question_idx, |
| user_answers, |
| gr.update(visible=False), |
| gr.update(visible=True), |
| gr.update(visible=False), |
| gr.update(visible=False), |
| gr.update( |
| value=q["image"], visible=True if q["image"] else False |
| ), |
| ) |
|
|
|
|
| with gr.Blocks() as demo: |
| demo.title = f"Coding Quiz: {EXAM_DATASET_ID}" |
| |
| question_idx = gr.State(value=0) |
| user_answers = gr.State(value=[]) |
|
|
| with gr.Row(variant="compact"): |
| gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz") |
| with gr.Row(variant="compact"): |
| gr.Markdown( |
| "Log in first, then click 'Start' to begin. Complete each coding challenge, click 'Next', " |
| "and finally click 'Submit' to publish your results to the Hugging Face Hub." |
| ) |
|
|
| with gr.Row(variant="panel"): |
| with gr.Column(): |
| question_text = gr.Markdown("") |
| question_image = gr.Image( |
| label="Question Image", visible=True, type="pil" |
| ) |
| with gr.Column(): |
| code_input = gr.Code(language="python", label="Your Solution", visible=False) |
|
|
| with gr.Row(variant="compact"): |
| status_text = gr.Markdown("") |
|
|
| with gr.Row(variant="compact"): |
| login_btn = gr.LoginButton() |
| start_btn = gr.Button("Start") |
| next_btn = gr.Button("Next ⏭️", visible=False) |
| submit_btn = gr.Button("Submit ✅", visible=False) |
|
|
| with gr.Row(variant="compact"): |
| final_markdown = gr.Markdown("", visible=False) |
|
|
| login_btn.click( |
| fn=on_user_logged_in, |
| inputs=None, |
| outputs=[ |
| login_btn, |
| start_btn, |
| next_btn, |
| submit_btn, |
| question_text, |
| code_input, |
| status_text, |
| question_image, |
| ], |
| ) |
|
|
| start_btn.click( |
| fn=handle_quiz, |
| inputs=[question_idx, user_answers, code_input, gr.State(True)], |
| outputs=[ |
| question_text, |
| code_input, |
| status_text, |
| question_idx, |
| user_answers, |
| start_btn, |
| next_btn, |
| submit_btn, |
| final_markdown, |
| question_image, |
| ], |
| ) |
|
|
| next_btn.click( |
| fn=handle_quiz, |
| inputs=[question_idx, user_answers, code_input, gr.State(False)], |
| outputs=[ |
| question_text, |
| code_input, |
| status_text, |
| question_idx, |
| user_answers, |
| start_btn, |
| next_btn, |
| submit_btn, |
| final_markdown, |
| question_image, |
| ], |
| ) |
|
|
| submit_btn.click( |
| fn=push_results_to_hub, |
| inputs=[user_answers, login_btn], |
| outputs=status_text, |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|