| import json |
| import base64 |
| import gradio as gr |
| from gradio_client import Client, handle_file |
| from huggingface_hub import hf_hub_download, upload_file |
| import os |
| import time |
| import pandas as pd |
| from datetime import datetime |
|
|
| |
| PRIVATE_SPACE_ID = "LLM-course/lipogram_private" |
| DATASET_REPO_ID = "LLM-course/leaderboard-lipogram" |
| HF_TOKEN = os.environ.get("HF_TOKEN") |
| LOCAL_CSV = "leaderboard.csv" |
|
|
| def extract_username_from_session(session_cookie): |
| """Extract username from Gradio's encrypted session cookie""" |
| try: |
| |
| |
| if not session_cookie: |
| return None |
| |
| parts = session_cookie.split('.') |
| if len(parts) < 2: |
| return None |
| |
| |
| payload = parts[0] |
| padding = 4 - len(payload) % 4 |
| if padding != 4: |
| payload += '=' * padding |
| |
| try: |
| decoded = base64.urlsafe_b64decode(payload) |
| data = json.loads(decoded) |
| |
| |
| if 'oauth_info' in data: |
| oauth = data['oauth_info'] |
| if 'userinfo' in oauth: |
| userinfo = oauth['userinfo'] |
| if 'preferred_username' in userinfo: |
| return userinfo['preferred_username'] |
| |
| return None |
| except Exception: |
| return None |
| except Exception: |
| return None |
|
|
| |
| def sync_leaderboard(): |
| """Download the latest leaderboard from the Private Dataset""" |
| try: |
| path = hf_hub_download( |
| repo_id=DATASET_REPO_ID, |
| filename="leaderboard.csv", |
| repo_type="dataset", |
| token=HF_TOKEN |
| ) |
| df = pd.read_csv(path) |
| return df.sort_values(by="Score", ascending=False) |
| except Exception: |
| |
| return pd.DataFrame(columns=["Timestamp", "User", "Score", "Ex 1", "Ex 2"]) |
|
|
| def save_score(user, score, ex1, ex2): |
| """Update or append score - only if it's better than existing score""" |
| df = sync_leaderboard() |
| |
| |
| existing_user = df[df['User'] == user] |
| |
| if not existing_user.empty: |
| |
| existing_score = existing_user.iloc[0]['Score'] |
| if score > existing_score: |
| |
| df.loc[df['User'] == user, 'Timestamp'] = datetime.now().strftime("%Y-%m-%d %H:%M") |
| df.loc[df['User'] == user, 'Score'] = score |
| df.loc[df['User'] == user, 'Ex 1'] = ex1 |
| df.loc[df['User'] == user, 'Ex 2'] = ex2 |
| else: |
| |
| return df |
| else: |
| |
| new_entry = { |
| "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"), |
| "User": user, |
| "Score": score, |
| "Ex 1": ex1, |
| "Ex 2": ex2 |
| } |
| df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True) |
| |
| df.to_csv(LOCAL_CSV, index=False) |
| |
| |
| upload_file( |
| path_or_fileobj=LOCAL_CSV, |
| path_in_repo="leaderboard.csv", |
| repo_id=DATASET_REPO_ID, |
| repo_type="dataset", |
| token=HF_TOKEN |
| ) |
| return df |
|
|
| |
| user_last_submission = {} |
|
|
| def submit_challenge(file, request: gr.Request): |
| |
| session_cookie = request.cookies.get('session') |
| user_name = extract_username_from_session(session_cookie) |
| |
| if not user_name: |
| raise gr.Error("Please 'Sign in with Hugging Face' at the top of the page to submit.") |
| |
| user_key = user_name |
| |
| |
| if user_key in user_last_submission and (time.time() - user_last_submission[user_key]) < 600: |
| raise gr.Error("One submission every 10 minutes allowed.") |
|
|
| if file is None: raise gr.Error("Please upload a file.") |
| |
| gr.Info(f"Hello {user_name}, sending your code to the evaluator...") |
| |
| try: |
| client = Client(PRIVATE_SPACE_ID, token=HF_TOKEN) |
| result_text = client.predict(file_obj=handle_file(file.name), api_name="/predict") |
| |
| |
| ex1_score = 0 |
| ex2_score = 0 |
| ex1_quality = 0.0 |
| ex2_quality = 0.0 |
| ex1_status = "Not evaluated" |
| ex2_status = "Not evaluated" |
| |
| try: |
| import re |
| |
| if "Ex 1" in result_text: |
| if "Ex 1" in result_text and "TIMEOUT" in result_text.split("Ex 2")[0]: |
| ex1_status = "TIMEOUT" |
| elif "Ex 1 Error" in result_text: |
| ex1_status = "ERROR" |
| else: |
| |
| ex1_match = re.search(r'Ex 1[^:]*:\*?\*?\s*(\d+)/5\s*correct\s*\|\s*Quality:\s*(\d+)%', result_text) |
| if ex1_match: |
| ex1_score = int(ex1_match.group(1)) |
| ex1_quality = int(ex1_match.group(2)) / 100.0 |
| ex1_status = f"{ex1_score}/5 ({ex1_match.group(2)}%)" |
| |
| |
| if "Ex 2" in result_text: |
| if "Ex 2" in result_text and "TIMEOUT" in result_text.split("Ex 2")[1]: |
| ex2_status = "TIMEOUT" |
| elif "Ex 2 Error" in result_text: |
| ex2_status = "ERROR" |
| else: |
| |
| ex2_match = re.search(r'Ex 2[^:]*:\*?\*?\s*(\d+)/5\s*correct\s*\|\s*Quality:\s*(\d+)%', result_text) |
| if ex2_match: |
| ex2_score = int(ex2_match.group(1)) |
| ex2_quality = int(ex2_match.group(2)) / 100.0 |
| ex2_status = f"{ex2_score}/5 ({ex2_match.group(2)}%)" |
| |
| |
| correctness_part = (ex1_score + ex2_score) / 2.0 |
| avg_quality = (ex1_quality + ex2_quality) / 2.0 |
| quality_part = avg_quality * 5 |
| total_score = round(correctness_part + quality_part, 2) |
| except Exception as e: |
| |
| total_score = 0 |
| ex1_status = f"Parse error: {str(e)}" |
| ex2_status = "Parse error" |
|
|
| |
| updated_df = save_score(user_name, total_score, ex1_status, ex2_status) |
| user_last_submission[user_key] = time.time() |
| |
| return result_text, updated_df.sort_values(by="Score", ascending=False) |
| |
| except gr.Error: |
| |
| raise |
| except Exception as e: |
| return f"Error: {str(e)}", sync_leaderboard() |
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# LLM Lipogram Challenge Portal") |
| |
| |
| gr.LoginButton() |
| |
| |
| progress_status = gr.Markdown("", visible=False) |
| |
| with gr.Tabs(): |
| with gr.TabItem("Download the Template"): |
| gr.Markdown("## Exercise Instructions\n\n### Exercise 1: La disparition (No 'e' or 'E')\nGenerate text without ever using the letter 'e' or 'E'. For this, you must use `model()` directly: `model(input_ids)` yields logits. You need to manually adjust the logits to forbid tokens containing 'e' or 'E'. **REQUIREMENT: Do NOT use model.generate().**\n\n### Exercise 2: The Toulouse Sequence\nGenerate text without ever using the word 'Toulouse'. For this, you must use `model()` directly: `model(input_ids)` yields logits. You need to manually adjust the logits. It is more difficult here because 'Toulouse' is a multi-token word. **REQUIREMENT: Do NOT use model.generate().**\n\nDownload the `challenge.py` template below to get started:\n") |
| gr.File(value="challenge.py", label="Download Template", interactive=False) |
| |
| with gr.TabItem("Submit"): |
| gr.Markdown("### 1. Sign in above\n### 2. Upload challenge.py below") |
| file_input = gr.File(label="challenge.py") |
| submit_btn = gr.Button("Evaluate My Code", variant="primary") |
| output_text = gr.Markdown() |
| |
| with gr.TabItem("Leaderboard"): |
| leaderboard_df = gr.DataFrame(value=sync_leaderboard, interactive=False) |
|
|
| submit_btn.click( |
| fn=submit_challenge, |
| inputs=file_input, |
| outputs=[output_text, leaderboard_df], |
| show_progress="hidden" |
| ) |
|
|
| demo.launch(theme=gr.themes.Soft()) |