import gradio as gr import google.generativeai as genai from PIL import Image import io import difflib import re import os from typing import Optional, Tuple, List import fitz # PyMuPDF for PDF processing from config import GEMINI_API_KEY, GEMINI_MODEL, OCR_PROMPT, DEFAULT_RESOLUTION def preprocess_image_for_gemma(image: Image.Image) -> Image.Image: """ Advanced auto resizer for gemini3n series requirements. Automatically resizes and optimizes images to 768x768 while preserving quality and aspect ratio. """ # Convert to RGB if necessary if image.mode != 'RGB': image = image.convert('RGB') # Get original dimensions original_width, original_height = image.size target_width, target_height = DEFAULT_RESOLUTION # Calculate scaling factor to fit within target dimensions while preserving aspect ratio scale_factor = min(target_width / original_width, target_height / original_height) # Calculate new dimensions new_width = int(original_width * scale_factor) new_height = int(original_height * scale_factor) # Resize image with high-quality resampling resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) # Create a new image with exact target dimensions and white background processed_image = Image.new('RGB', DEFAULT_RESOLUTION, 'white') # Calculate position to center the resized image x_offset = (target_width - new_width) // 2 y_offset = (target_height - new_height) // 2 # Paste the resized image onto the centered position processed_image.paste(resized_image, (x_offset, y_offset)) return processed_image class HandwritingRecognizer: def __init__(self, api_key: str): """Initialize the Gemini API for handwriting recognition.""" if not api_key: raise ValueError("API key is required") genai.configure(api_key=api_key) self.model = genai.GenerativeModel(GEMINI_MODEL) def extract_text_from_image(self, image: Image.Image) -> str: """Extract text from an image using Gemini Vision.""" try: # Preprocess image for gemma-3n-e4b-it requirements processed_image = preprocess_image_for_gemma(image) response = self.model.generate_content([OCR_PROMPT, processed_image]) return response.text.strip() except Exception as e: return f"Error in text extraction: {str(e)}" def extract_text_from_pdf(self, pdf_path: str) -> str: """Extract text from PDF pages and convert to images for OCR.""" try: doc = fitz.open(pdf_path) extracted_text = "" for page_num in range(len(doc)): page = doc.load_page(page_num) # Convert PDF page to image mat = fitz.Matrix(2.0, 2.0) # Increase resolution pix = page.get_pixmap(matrix=mat) img_data = pix.tobytes("png") # Convert to PIL Image and preprocess image = Image.open(io.BytesIO(img_data)) # Extract text from the preprocessed image page_text = self.extract_text_from_image(image) extracted_text += f"\n--- Page {page_num + 1} ---\n{page_text}\n" doc.close() return extracted_text.strip() except Exception as e: return f"Error processing PDF: {str(e)}" class TextScorer: @staticmethod def calculate_similarity(text1: str, text2: str) -> Tuple[float, dict]: """Calculate similarity between two texts and provide detailed metrics.""" if not text1 or not text2: return 0.0, {} # Clean and normalize texts clean_text1 = TextScorer.clean_text(text1) clean_text2 = TextScorer.clean_text(text2) # Calculate different similarity metrics sequence_similarity = difflib.SequenceMatcher(None, clean_text1, clean_text2).ratio() # Word-level similarity words1 = set(clean_text1.split()) words2 = set(clean_text2.split()) word_similarity = len(words1.intersection(words2)) / max(len(words1.union(words2)), 1) # Character-level similarity (Jaccard similarity) chars1 = set(clean_text1) chars2 = set(clean_text2) char_similarity = len(chars1.intersection(chars2)) / max(len(chars1.union(chars2)), 1) # Combined score (weighted average) combined_score = (sequence_similarity * 0.5 + word_similarity * 0.3 + char_similarity * 0.2) metrics = { 'sequence_similarity': sequence_similarity, 'word_similarity': word_similarity, 'char_similarity': char_similarity, 'combined_score': combined_score } return combined_score, metrics @staticmethod def clean_text(text: str) -> str: """Clean and normalize text for comparison.""" if not text: return "" # Convert to lowercase text = text.lower() # Remove extra whitespace text = re.sub(r'\s+', ' ', text) # Remove punctuation (optional - you might want to keep some) text = re.sub(r'[^\w\s]', '', text) return text.strip() @staticmethod def get_score_interpretation(score: float) -> Tuple[str, str]: """Get interpretation and color for the score.""" if score >= 0.9: return "Excellent Match", "#4CAF50" # Green elif score >= 0.8: return "Very Good Match", "#8BC34A" # Light Green elif score >= 0.7: return "Good Match", "#FFEB3B" # Yellow elif score >= 0.6: return "Fair Match", "#FF9800" # Orange else: return "Poor Match", "#F44336" # Red # Global variables to store extracted texts answer_key_text = "" student_response_text = "" recognizer = None # Sample data for demo SAMPLE_ANSWER_KEY = """ Question 1: What is the capital of France? Answer: Paris Question 2: Calculate 15 + 27 Answer: 42 Question 3: Name three primary colors Answer: Red, Blue, Yellow """ SAMPLE_STUDENT_RESPONSE = """ Question 1: What is the capital of France? Answer: Paris Question 2: Calculate 15 + 27 Answer: 42 Question 3: Name three primary colors Answer: Red, Blue, Yellow """ def load_sample_data(): """Load sample data for demonstration purposes using pre-generated sample images.""" global answer_key_text, student_response_text, recognizer try: # Check if API key is available if not GEMINI_API_KEY: return ( "❌ No API key found. Please check your environment configuration.", "", "❌ No API key found. Please check your environment configuration.", "" ) # Initialize recognizer if needed if recognizer is None: try: recognizer = HandwritingRecognizer(GEMINI_API_KEY) except Exception as e: error_msg = f"❌ Error initializing Gemini API: {str(e)}" return (error_msg, "", error_msg, "") # Load pre-generated sample images try: answer_key_img = Image.open("sample_answer_key.png") student_response_img = Image.open("sample_student_response.png") except FileNotFoundError: # Fallback: use the sample_images module if files don't exist try: from sample_images import create_sample_answer_key_image, create_sample_student_response_image answer_key_img = create_sample_answer_key_image() student_response_img = create_sample_student_response_image() except Exception as e: error_msg = f"❌ Error creating sample images: {str(e)}" return (error_msg, "", error_msg, "") # Process through actual OCR try: answer_key_text = recognizer.extract_text_from_image(answer_key_img) if answer_key_text.startswith("Error"): return ( f"❌ Error processing answer key: {answer_key_text}", "", f"❌ Error processing answer key: {answer_key_text}", "" ) except Exception as e: error_msg = f"❌ Error processing answer key image: {str(e)}" return (error_msg, "", error_msg, "") try: student_response_text = recognizer.extract_text_from_image(student_response_img) if student_response_text.startswith("Error"): return ( "✅ Answer key processed successfully!", answer_key_text, f"❌ Error processing student response: {student_response_text}", "" ) except Exception as e: return ( "✅ Answer key processed successfully!", answer_key_text, f"❌ Error processing student response: {str(e)}", "" ) return ( "✅ Sample data processed through Gemini OCR successfully!", answer_key_text, "✅ Sample data processed through Gemini OCR successfully!", student_response_text ) except Exception as e: error_msg = f"❌ Unexpected error in demo: {str(e)}" return (error_msg, "", error_msg, "") def process_answer_key(api_key: str, file) -> Tuple[str, str]: """Process the answer key file and extract text.""" global answer_key_text, recognizer if not api_key: return "Please enter your Google Gemini API key first.", "" if not file: return "Please upload an answer key file.", "" try: # Initialize recognizer if not already done if recognizer is None: recognizer = HandwritingRecognizer(api_key) # Process based on file type if file.name.lower().endswith(('.png', '.jpg', '.jpeg')): image = Image.open(file.name) answer_key_text = recognizer.extract_text_from_image(image) elif file.name.lower().endswith('.pdf'): answer_key_text = recognizer.extract_text_from_pdf(file.name) else: return "Unsupported file format. Please use PNG, JPG, JPEG, or PDF.", "" if answer_key_text.startswith("Error"): return answer_key_text, "" return "Answer key processed successfully!", answer_key_text except Exception as e: return f"Error processing answer key: {str(e)}", "" def process_student_response(api_key: str, file) -> Tuple[str, str]: """Process the student response file and extract text.""" global student_response_text, recognizer if not api_key: return "Please enter your Google Gemini API key first.", "" if not file: return "Please upload a student response file.", "" try: # Initialize recognizer if not already done if recognizer is None: recognizer = HandwritingRecognizer(api_key) # Process based on file type if file.name.lower().endswith(('.png', '.jpg', '.jpeg')): image = Image.open(file.name) student_response_text = recognizer.extract_text_from_image(image) elif file.name.lower().endswith('.pdf'): student_response_text = recognizer.extract_text_from_pdf(file.name) else: return "Unsupported file format. Please use PNG, JPG, JPEG, or PDF.", "" if student_response_text.startswith("Error"): return student_response_text, "" return "Student response processed successfully!", student_response_text except Exception as e: return f"Error processing student response: {str(e)}", "" def calculate_score() -> Tuple[str, str, str, str, str, str]: """Calculate similarity score between answer key and student response.""" global answer_key_text, student_response_text if not answer_key_text or not student_response_text: return "Please process both answer key and student response first.", "", "", "", "", "" try: score, metrics = TextScorer.calculate_similarity(answer_key_text, student_response_text) interpretation, color = TextScorer.get_score_interpretation(score) # Format the main score display score_html = f"""

Overall Score: {score:.1%}

{interpretation}

""" # Format detailed metrics sequence_metric = f"Sequence Similarity: {metrics['sequence_similarity']:.1%}" word_metric = f"Word Similarity: {metrics['word_similarity']:.1%}" char_metric = f"Character Similarity: {metrics['char_similarity']:.1%}" # Format text comparison clean_answer_key = TextScorer.clean_text(answer_key_text) clean_student_response = TextScorer.clean_text(student_response_text) return score_html, sequence_metric, word_metric, char_metric, clean_answer_key, clean_student_response except Exception as e: return f"Error calculating score: {str(e)}", "", "", "", "", "" def reset_all(): """Reset all stored data.""" global answer_key_text, student_response_text, recognizer answer_key_text = "" student_response_text = "" recognizer = None return "", "", "", "", "", "", "", "" # Create the Gradio interface def create_app(): with gr.Blocks(title="✍️ Handwriting Assessment App", theme=gr.themes.Soft()) as app: gr.Markdown("# ✍️ Handwriting Assessment App") gr.Markdown("### Upload handwritten answer keys and student responses for automatic scoring using AI") # API Key section - hidden since it's predefined if not GEMINI_API_KEY: with gr.Row(): api_key = gr.Textbox( label="🔑 Google Gemini API Key", placeholder="Enter your Google Gemini API key here...", type="password", value="", info="Get your API key from: https://makersuite.google.com/app/apikey" ) else: # API key is predefined, create hidden component api_key = gr.Textbox(value=GEMINI_API_KEY, visible=False) # Demo section with gr.Row(): gr.Markdown("### 🎯 Quick Demo") demo_btn = gr.Button("Load Sample Data", variant="secondary", size="sm") with gr.Row(): # Teacher Section with gr.Column(scale=1): gr.Markdown("## 👨‍🏫 Teacher Section") answer_key_file = gr.File( label="Upload Answer Key", file_types=[".png", ".jpg", ".jpeg", ".pdf"], file_count="single" ) answer_key_status = gr.Textbox(label="Status", interactive=False) answer_key_text_display = gr.Textbox( label="Extracted Answer Key Text", lines=8, interactive=False ) # Student Section with gr.Column(scale=1): gr.Markdown("## 👨‍🎓 Student Section") student_file = gr.File( label="Upload Student Response", file_types=[".png", ".jpg", ".jpeg", ".pdf"], file_count="single" ) student_status = gr.Textbox(label="Status", interactive=False) student_text_display = gr.Textbox( label="Extracted Student Response Text", lines=8, interactive=False ) # Scoring Section gr.Markdown("## 📊 Assessment Results") with gr.Row(): calculate_btn = gr.Button("Calculate Score", variant="primary", size="lg") reset_btn = gr.Button("Reset All", variant="secondary") # Results display score_display = gr.HTML(label="Overall Score") with gr.Row(): sequence_metric = gr.Textbox(label="Sequence Similarity", interactive=False) word_metric = gr.Textbox(label="Word Similarity", interactive=False) char_metric = gr.Textbox(label="Character Similarity", interactive=False) gr.Markdown("### Text Comparison") with gr.Row(): clean_answer_key = gr.Textbox( label="Answer Key (Cleaned)", lines=5, interactive=False ) clean_student_response = gr.Textbox( label="Student Response (Cleaned)", lines=5, interactive=False ) # Event handlers demo_btn.click( fn=load_sample_data, outputs=[answer_key_status, answer_key_text_display, student_status, student_text_display] ) answer_key_file.change( fn=process_answer_key, inputs=[api_key, answer_key_file], outputs=[answer_key_status, answer_key_text_display] ) student_file.change( fn=process_student_response, inputs=[api_key, student_file], outputs=[student_status, student_text_display] ) calculate_btn.click( fn=calculate_score, outputs=[score_display, sequence_metric, word_metric, char_metric, clean_answer_key, clean_student_response] ) reset_btn.click( fn=reset_all, outputs=[ answer_key_status, answer_key_text_display, student_status, student_text_display, score_display, sequence_metric, word_metric, char_metric ] ) # Instructions with gr.Accordion("📖 How to Use", open=False): gr.Markdown(""" 1. **Enter API Key**: Input your Google Gemini API key at the top 2. **Upload Answer Key**: Teachers upload the handwritten answer key (PNG, JPG, JPEG, or PDF) 3. **Upload Student Response**: Students upload their handwritten answers (PNG, JPG, JPEG, or PDF) 4. **Calculate Score**: Click the "Calculate Score" button to get detailed assessment results 5. **View Results**: See the overall score, detailed metrics, and text comparison **Supported Formats**: PNG, JPG, JPEG, PDF **Score Interpretation**: - 90-100%: Excellent Match (Green) - 80-89%: Very Good Match (Light Green) - 70-79%: Good Match (Yellow) - 60-69%: Fair Match (Orange) - Below 60%: Poor Match (Red) """) return app if __name__ == "__main__": app = create_app() app.launch()