import gradio as gr import google.generativeai as genai from PIL import Image import io import difflib import re import os from typing import Optional, Tuple, List import fitz # PyMuPDF for PDF processing from config import GEMINI_API_KEY, GEMINI_MODEL, OCR_PROMPT, DEFAULT_RESOLUTION def preprocess_image_for_gemma(image: Image.Image) -> Image.Image: """ Advanced auto resizer for gemini3n series requirements. Automatically resizes and optimizes images to 768x768 while preserving quality and aspect ratio. """ # Convert to RGB if necessary if image.mode != 'RGB': image = image.convert('RGB') # Get original dimensions original_width, original_height = image.size target_width, target_height = DEFAULT_RESOLUTION # Calculate scaling factor to fit within target dimensions while preserving aspect ratio scale_factor = min(target_width / original_width, target_height / original_height) # Calculate new dimensions new_width = int(original_width * scale_factor) new_height = int(original_height * scale_factor) # Resize image with high-quality resampling resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) # Create a new image with exact target dimensions and white background processed_image = Image.new('RGB', DEFAULT_RESOLUTION, 'white') # Calculate position to center the resized image x_offset = (target_width - new_width) // 2 y_offset = (target_height - new_height) // 2 # Paste the resized image onto the centered position processed_image.paste(resized_image, (x_offset, y_offset)) return processed_image class HandwritingRecognizer: def __init__(self, api_key: str): """Initialize the Gemini API for handwriting recognition.""" if not api_key: raise ValueError("API key is required") genai.configure(api_key=api_key) self.model = genai.GenerativeModel(GEMINI_MODEL) def extract_text_from_image(self, image: Image.Image) -> str: """Extract text from an image using Gemini Vision.""" try: # Preprocess image for gemma-3n-e4b-it requirements processed_image = preprocess_image_for_gemma(image) response = self.model.generate_content([OCR_PROMPT, processed_image]) return response.text.strip() except Exception as e: return f"Error in text extraction: {str(e)}" def extract_text_from_pdf(self, pdf_path: str) -> str: """Extract text from PDF pages and convert to images for OCR.""" try: doc = fitz.open(pdf_path) extracted_text = "" for page_num in range(len(doc)): page = doc.load_page(page_num) # Convert PDF page to image mat = fitz.Matrix(2.0, 2.0) # Increase resolution pix = page.get_pixmap(matrix=mat) img_data = pix.tobytes("png") # Convert to PIL Image and preprocess image = Image.open(io.BytesIO(img_data)) # Extract text from the preprocessed image page_text = self.extract_text_from_image(image) extracted_text += f"\n--- Page {page_num + 1} ---\n{page_text}\n" doc.close() return extracted_text.strip() except Exception as e: return f"Error processing PDF: {str(e)}" class TextScorer: @staticmethod def calculate_similarity(text1: str, text2: str) -> Tuple[float, dict]: """Calculate similarity between two texts and provide detailed metrics.""" if not text1 or not text2: return 0.0, {} # Clean and normalize texts clean_text1 = TextScorer.clean_text(text1) clean_text2 = TextScorer.clean_text(text2) # Calculate different similarity metrics sequence_similarity = difflib.SequenceMatcher(None, clean_text1, clean_text2).ratio() # Word-level similarity words1 = set(clean_text1.split()) words2 = set(clean_text2.split()) word_similarity = len(words1.intersection(words2)) / max(len(words1.union(words2)), 1) # Character-level similarity (Jaccard similarity) chars1 = set(clean_text1) chars2 = set(clean_text2) char_similarity = len(chars1.intersection(chars2)) / max(len(chars1.union(chars2)), 1) # Combined score (weighted average) combined_score = (sequence_similarity * 0.5 + word_similarity * 0.3 + char_similarity * 0.2) metrics = { 'sequence_similarity': sequence_similarity, 'word_similarity': word_similarity, 'char_similarity': char_similarity, 'combined_score': combined_score } return combined_score, metrics @staticmethod def clean_text(text: str) -> str: """Clean and normalize text for comparison.""" if not text: return "" # Convert to lowercase text = text.lower() # Remove extra whitespace text = re.sub(r'\s+', ' ', text) # Remove punctuation (optional - you might want to keep some) text = re.sub(r'[^\w\s]', '', text) return text.strip() @staticmethod def get_score_interpretation(score: float) -> Tuple[str, str]: """Get interpretation and color for the score.""" if score >= 0.9: return "Excellent Match", "#4CAF50" # Green elif score >= 0.8: return "Very Good Match", "#8BC34A" # Light Green elif score >= 0.7: return "Good Match", "#FFEB3B" # Yellow elif score >= 0.6: return "Fair Match", "#FF9800" # Orange else: return "Poor Match", "#F44336" # Red # Global variables to store extracted texts answer_key_text = "" student_response_text = "" recognizer = None # Sample data for demo SAMPLE_ANSWER_KEY = """ Question 1: What is the capital of France? Answer: Paris Question 2: Calculate 15 + 27 Answer: 42 Question 3: Name three primary colors Answer: Red, Blue, Yellow """ SAMPLE_STUDENT_RESPONSE = """ Question 1: What is the capital of France? Answer: Paris Question 2: Calculate 15 + 27 Answer: 42 Question 3: Name three primary colors Answer: Red, Blue, Yellow """ def load_sample_data(): """Load sample data for demonstration purposes using pre-generated sample images.""" global answer_key_text, student_response_text, recognizer try: # Check if API key is available if not GEMINI_API_KEY: return ( "❌ No API key found. Please check your environment configuration.", "", "❌ No API key found. Please check your environment configuration.", "" ) # Initialize recognizer if needed if recognizer is None: try: recognizer = HandwritingRecognizer(GEMINI_API_KEY) except Exception as e: error_msg = f"❌ Error initializing Gemini API: {str(e)}" return (error_msg, "", error_msg, "") # Load pre-generated sample images try: answer_key_img = Image.open("sample_answer_key.png") student_response_img = Image.open("sample_student_response.png") except FileNotFoundError: # Fallback: use the sample_images module if files don't exist try: from sample_images import create_sample_answer_key_image, create_sample_student_response_image answer_key_img = create_sample_answer_key_image() student_response_img = create_sample_student_response_image() except Exception as e: error_msg = f"❌ Error creating sample images: {str(e)}" return (error_msg, "", error_msg, "") # Process through actual OCR try: answer_key_text = recognizer.extract_text_from_image(answer_key_img) if answer_key_text.startswith("Error"): return ( f"❌ Error processing answer key: {answer_key_text}", "", f"❌ Error processing answer key: {answer_key_text}", "" ) except Exception as e: error_msg = f"❌ Error processing answer key image: {str(e)}" return (error_msg, "", error_msg, "") try: student_response_text = recognizer.extract_text_from_image(student_response_img) if student_response_text.startswith("Error"): return ( "✅ Answer key processed successfully!", answer_key_text, f"❌ Error processing student response: {student_response_text}", "" ) except Exception as e: return ( "✅ Answer key processed successfully!", answer_key_text, f"❌ Error processing student response: {str(e)}", "" ) return ( "✅ Sample data processed through Gemini OCR successfully!", answer_key_text, "✅ Sample data processed through Gemini OCR successfully!", student_response_text ) except Exception as e: error_msg = f"❌ Unexpected error in demo: {str(e)}" return (error_msg, "", error_msg, "") def process_answer_key(api_key: str, file) -> Tuple[str, str]: """Process the answer key file and extract text.""" global answer_key_text, recognizer if not api_key: return "Please enter your Google Gemini API key first.", "" if not file: return "Please upload an answer key file.", "" try: # Initialize recognizer if not already done if recognizer is None: recognizer = HandwritingRecognizer(api_key) # Process based on file type if file.name.lower().endswith(('.png', '.jpg', '.jpeg')): image = Image.open(file.name) answer_key_text = recognizer.extract_text_from_image(image) elif file.name.lower().endswith('.pdf'): answer_key_text = recognizer.extract_text_from_pdf(file.name) else: return "Unsupported file format. Please use PNG, JPG, JPEG, or PDF.", "" if answer_key_text.startswith("Error"): return answer_key_text, "" return "Answer key processed successfully!", answer_key_text except Exception as e: return f"Error processing answer key: {str(e)}", "" def process_student_response(api_key: str, file) -> Tuple[str, str]: """Process the student response file and extract text.""" global student_response_text, recognizer if not api_key: return "Please enter your Google Gemini API key first.", "" if not file: return "Please upload a student response file.", "" try: # Initialize recognizer if not already done if recognizer is None: recognizer = HandwritingRecognizer(api_key) # Process based on file type if file.name.lower().endswith(('.png', '.jpg', '.jpeg')): image = Image.open(file.name) student_response_text = recognizer.extract_text_from_image(image) elif file.name.lower().endswith('.pdf'): student_response_text = recognizer.extract_text_from_pdf(file.name) else: return "Unsupported file format. Please use PNG, JPG, JPEG, or PDF.", "" if student_response_text.startswith("Error"): return student_response_text, "" return "Student response processed successfully!", student_response_text except Exception as e: return f"Error processing student response: {str(e)}", "" def calculate_score() -> Tuple[str, str, str, str, str, str]: """Calculate similarity score between answer key and student response.""" global answer_key_text, student_response_text if not answer_key_text or not student_response_text: return "Please process both answer key and student response first.", "", "", "", "", "" try: score, metrics = TextScorer.calculate_similarity(answer_key_text, student_response_text) interpretation, color = TextScorer.get_score_interpretation(score) # Format the main score display score_html = f"""