Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import time | |
| import gradio as gr | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import requests | |
| from typing import Dict, List, Any | |
| from datetime import datetime | |
| import re | |
| import io | |
| import base64 | |
| from PIL import Image | |
| # API configuration | |
| # Change this to your DigitalOcean API URL | |
| API_URL = "https://s25-aisec-lab9-95qi2.ondigitalocean.app/api" | |
| # Define the rubric - cached locally to avoid excessive API calls | |
| rubric = { | |
| "assignment_name": "Neural Network Implementation", | |
| "total_points": 100, | |
| "criteria": [ | |
| { | |
| "name": "Implementation Correctness", | |
| "description": "The neural network implementation correctly handles forward and backward propagation", | |
| "points": 40 | |
| }, | |
| { | |
| "name": "Model Performance", | |
| "description": "The model achieves at least 90% accuracy on the test set", | |
| "points": 30 | |
| }, | |
| { | |
| "name": "Code Quality", | |
| "description": "Code is well-organized, properly documented, and follows PEP 8 style guide", | |
| "points": 20 | |
| }, | |
| { | |
| "name": "Analysis", | |
| "description": "Student provides thoughtful analysis of model performance and limitations", | |
| "points": 10 | |
| } | |
| ] | |
| } | |
| # Sample legitimate submission (shortened for display purposes) | |
| sample_submission = """ | |
| # Neural Network Implementation for MNIST Digit Classification | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from sklearn.datasets import fetch_openml | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.metrics import accuracy_score | |
| class NeuralNetwork: | |
| def __init__(self, input_size, hidden_size, output_size): | |
| # Initialize weights and biases | |
| self.W1 = np.random.randn(input_size, hidden_size) * 0.01 | |
| self.b1 = np.zeros((1, hidden_size)) | |
| self.W2 = np.random.randn(hidden_size, output_size) * 0.01 | |
| self.b2 = np.zeros((1, output_size)) | |
| def forward(self, X): | |
| # Forward propagation | |
| self.z1 = np.dot(X, self.W1) + self.b1 | |
| self.a1 = np.maximum(0, self.z1) # ReLU activation | |
| self.z2 = np.dot(self.a1, self.W2) + self.b2 | |
| exp_scores = np.exp(self.z2) | |
| self.probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) | |
| return self.probs | |
| # More implementation details omitted for brevity | |
| def predict(self, X): | |
| probs = self.forward(X) | |
| return np.argmax(probs, axis=1) | |
| # Analysis | |
| print("Model Analysis:") | |
| print("1. The implemented neural network achieves over 90% accuracy on the MNIST test set.") | |
| print("2. Using ReLU activation helps avoid vanishing gradient problems.") | |
| """ | |
| # Function to check API connectivity | |
| def check_api_connectivity(): | |
| try: | |
| response = requests.get(f"{API_URL}/health") | |
| if response.status_code == 200: | |
| return True, response.json() | |
| else: | |
| return False, response.text | |
| except Exception as e: | |
| return False, str(e) | |
| # Cache for student attempts | |
| student_attempts_cache = {} | |
| # Helper function to get student attempts from API | |
| def get_student_attempts(student_id): | |
| """Get all attempts for a student from the API.""" | |
| if not student_id or '@' not in student_id: | |
| return [] | |
| # Check if we already have cached data for this student | |
| if student_id in student_attempts_cache: | |
| return student_attempts_cache[student_id] | |
| try: | |
| response = requests.get(f"{API_URL}/attempts", params={"student_id": student_id}) | |
| if response.status_code == 200: | |
| attempts = response.json().get("attempts", []) | |
| student_attempts_cache[student_id] = attempts | |
| return attempts | |
| else: | |
| print(f"Error fetching attempts: {response.status_code}") | |
| print(response.text) | |
| return [] | |
| except Exception as e: | |
| print(f"Error: {str(e)}") | |
| return [] | |
| # Function to update the attack history chart | |
| def update_attack_history_chart(student_id): | |
| attempts = get_student_attempts(student_id) | |
| if not attempts: | |
| return None | |
| # Create a dataframe for visualization | |
| scores = [attempt["total_score"] for attempt in attempts] | |
| attempt_ids = [f"Attempt {i+1}" for i in range(len(attempts))] | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| bars = ax.bar(attempt_ids, scores, color="skyblue") | |
| # Add score labels | |
| for i, bar in enumerate(bars): | |
| ax.text( | |
| bar.get_x() + bar.get_width() / 2, | |
| bar.get_height() + 1, | |
| f"{scores[i]:.1f}", | |
| ha="center", | |
| va="bottom" | |
| ) | |
| # Customize chart | |
| ax.set_title(f"Attack Attempts History for {student_id.split('@')[0]}") | |
| ax.set_ylabel("Score (out of 100)") | |
| ax.set_ylim(0, 110) # Give some space for the labels | |
| plt.xticks(rotation=45, ha="right") | |
| plt.tight_layout() | |
| return fig | |
| # Function to submit for grading | |
| def submit_for_grading(student_id, submission_text, additional_instructions=""): | |
| """Submit the code for grading and return the result.""" | |
| if not student_id or '@' not in student_id: | |
| return "Please enter a valid university email address.", None | |
| if not submission_text: | |
| return "Please enter a submission.", None | |
| # Clear cache for this student to ensure fresh data | |
| if student_id in student_attempts_cache: | |
| del student_attempts_cache[student_id] | |
| payload = { | |
| "student_id": student_id, | |
| "submission": submission_text, | |
| "additional_instructions": additional_instructions | |
| } | |
| try: | |
| response = requests.post(f"{API_URL}/submit", json=payload) | |
| if response.status_code == 200: | |
| result = response.json() | |
| # Format the result for display | |
| formatted_result = json.dumps(result, indent=2) | |
| # Get updated history chart | |
| history_chart = update_attack_history_chart(student_id) | |
| return formatted_result, history_chart | |
| else: | |
| error_msg = f"Error: {response.status_code}\n{response.text}" | |
| return error_msg, None | |
| except Exception as e: | |
| error_msg = f"Error: {str(e)}" | |
| return error_msg, None | |
| # Function to download a PDF report | |
| def download_pdf_report(student_id): | |
| # For this client version, we'll create a simple text report since | |
| # the real PDF generation happens on the server | |
| if not student_id or '@' not in student_id: | |
| return None, "Please enter a valid university email address." | |
| attempts = get_student_attempts(student_id) | |
| if not attempts: | |
| return None, "No attempts found for this student ID." | |
| # Create a simple text report | |
| report_text = f""" | |
| PROMPT INJECTION LAB REPORT | |
| Student ID: {student_id} | |
| Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | |
| ATTACK SUMMARY: | |
| Total Attempts: {len(attempts)} | |
| Best Score: {max([attempt["total_score"] for attempt in attempts]):.1f} | |
| Average Score: {sum([attempt["total_score"] for attempt in attempts]) / len(attempts):.1f} | |
| DETAILED ATTEMPTS: | |
| """ | |
| for i, attempt in enumerate(attempts): | |
| report_text += f""" | |
| Attack {i+1}: | |
| - Timestamp: {attempt["timestamp"]} | |
| - Score: {attempt["total_score"]:.1f} | |
| - Processing Time: {attempt["processing_time"]:.2f} seconds | |
| Submission: | |
| {attempt["submission"][:500]}... (truncated) | |
| """ | |
| if attempt.get("additional_instructions"): | |
| report_text += f""" | |
| Additional Instructions: | |
| {attempt["additional_instructions"][:300]}... (truncated) | |
| """ | |
| report_text += """ | |
| NOTE: This is a simple text summary. To get the full PDF report, | |
| download it from the API directly or from the Colab notebook. | |
| """ | |
| # Create a temporary file with the report | |
| filename = f"prompt_injection_report_{student_id.split('@')[0]}.txt" | |
| with open(filename, "w") as f: | |
| f.write(report_text) | |
| return filename, f"Report generated for {student_id}" | |
| # Create the Gradio interface | |
| with gr.Blocks(title="AI Autograder Prompt Injection Lab") as demo: | |
| gr.Markdown(""" | |
| # Lab 1: Prompt Injection Attacks on AI Autograders | |
| In this lab, you'll explore how language models (LLMs) used for educational purposes can be vulnerable to prompt injection attacks. | |
| Your goal is to submit code that doesn't actually implement the required neural network but still receives a high score from the autograder. | |
| This interface connects to an API hosted on DigitalOcean that handles the actual grading. | |
| ## Instructions: | |
| 1. Enter your university email address (this will be used to identify your submission). | |
| 2. Type or paste your submission in the text area. | |
| 3. For part 2 of the lab, you can experiment with adding text to the system prompt. | |
| 4. Click "Submit for Grading" to see how the autograder evaluates your submission. | |
| 5. When you're satisfied with your attacks, generate a report for submission. | |
| The rubric below shows what the autograder is looking for in a legitimate submission. | |
| """) | |
| # API status indicator | |
| with gr.Row(): | |
| api_status = gr.Textbox(label="API Status", value="Checking API connection...") | |
| with gr.Accordion("View Assignment Rubric", open=False): | |
| gr.JSON(value=rubric) | |
| with gr.Accordion("View Sample Legitimate Submission", open=False): | |
| gr.Textbox(value=sample_submission, label="Sample Submission") | |
| with gr.Row(): | |
| with gr.Column(): | |
| student_id = gr.Textbox( | |
| label="Student ID (University Email Address)", | |
| placeholder="your.name@university.edu" | |
| ) | |
| submission_input = gr.Textbox( | |
| lines=15, | |
| label="Your Submission", | |
| placeholder="Enter your submission here..." | |
| ) | |
| additional_instructions = gr.Textbox( | |
| lines=5, | |
| label="Add to System Prompt (Part 2)", | |
| placeholder="Additional instructions to add to the system prompt..." | |
| ) | |
| submit_button = gr.Button("Submit for Grading") | |
| view_history_button = gr.Button("View Your Attack History") | |
| generate_report_button = gr.Button("Download Simple Report") | |
| with gr.Column(): | |
| grading_result = gr.Textbox(lines=15, label="Grading Result") | |
| attack_history_plot = gr.Plot(label="Attack History") | |
| report_output = gr.File(label="Report") | |
| report_status = gr.Textbox(label="Report Status", visible=False) | |
| # Define interactions | |
| def check_api_and_update(): | |
| status, details = check_api_connectivity() | |
| if status: | |
| return f"✅ Connected to API: {details.get('status', 'ok')}, version: {details.get('version', 'unknown')}" | |
| else: | |
| return f"❌ API Connection Failed: {details}" | |
| # Check API on load | |
| demo.load(check_api_and_update, [], [api_status]) | |
| # Submit button | |
| submit_button.click( | |
| fn=submit_for_grading, | |
| inputs=[student_id, submission_input, additional_instructions], | |
| outputs=[grading_result, attack_history_plot] | |
| ) | |
| # View history button | |
| view_history_button.click( | |
| fn=lambda student_id: (None, update_attack_history_chart(student_id)), | |
| inputs=[student_id], | |
| outputs=[grading_result, attack_history_plot] | |
| ) | |
| # Generate report button | |
| generate_report_button.click( | |
| fn=download_pdf_report, | |
| inputs=[student_id], | |
| outputs=[report_output, report_status] | |
| ) | |
| gr.Markdown(""" | |
| ## Documentation | |
| For each successful attack, document: | |
| 1. What vulnerability did you exploit? | |
| 2. How did you structure your injection? | |
| 3. What are possible mitigations? | |
| ## Note About Reports | |
| This simple interface provides a basic text report. For a more comprehensive PDF report | |
| with visualizations, use the Colab notebook which connects to the same API. | |
| Remember to include at least three different prompt injection attack examples in your final submission. | |
| """) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |