Spaces:
Sleeping
Sleeping
Nur Arifin Akbar
Fix Gemini model name and add detailed error handling for sample data loading
0b4d45d
| import gradio as gr | |
| import google.generativeai as genai | |
| from PIL import Image | |
| import io | |
| import difflib | |
| import re | |
| import os | |
| from typing import Optional, Tuple, List | |
| import fitz # PyMuPDF for PDF processing | |
| from config import GEMINI_API_KEY, GEMINI_MODEL, OCR_PROMPT, DEFAULT_RESOLUTION | |
| def preprocess_image_for_gemma(image: Image.Image) -> Image.Image: | |
| """ | |
| Advanced auto resizer for gemini3n series requirements. | |
| Automatically resizes and optimizes images to 768x768 while preserving quality and aspect ratio. | |
| """ | |
| # Convert to RGB if necessary | |
| if image.mode != 'RGB': | |
| image = image.convert('RGB') | |
| # Get original dimensions | |
| original_width, original_height = image.size | |
| target_width, target_height = DEFAULT_RESOLUTION | |
| # Calculate scaling factor to fit within target dimensions while preserving aspect ratio | |
| scale_factor = min(target_width / original_width, target_height / original_height) | |
| # Calculate new dimensions | |
| new_width = int(original_width * scale_factor) | |
| new_height = int(original_height * scale_factor) | |
| # Resize image with high-quality resampling | |
| resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) | |
| # Create a new image with exact target dimensions and white background | |
| processed_image = Image.new('RGB', DEFAULT_RESOLUTION, 'white') | |
| # Calculate position to center the resized image | |
| x_offset = (target_width - new_width) // 2 | |
| y_offset = (target_height - new_height) // 2 | |
| # Paste the resized image onto the centered position | |
| processed_image.paste(resized_image, (x_offset, y_offset)) | |
| return processed_image | |
| class HandwritingRecognizer: | |
| def __init__(self, api_key: str): | |
| """Initialize the Gemini API for handwriting recognition.""" | |
| if not api_key: | |
| raise ValueError("API key is required") | |
| genai.configure(api_key=api_key) | |
| self.model = genai.GenerativeModel(GEMINI_MODEL) | |
| def extract_text_from_image(self, image: Image.Image) -> str: | |
| """Extract text from an image using Gemini Vision.""" | |
| try: | |
| # Preprocess image for gemma-3n-e4b-it requirements | |
| processed_image = preprocess_image_for_gemma(image) | |
| response = self.model.generate_content([OCR_PROMPT, processed_image]) | |
| return response.text.strip() | |
| except Exception as e: | |
| return f"Error in text extraction: {str(e)}" | |
| def extract_text_from_pdf(self, pdf_path: str) -> str: | |
| """Extract text from PDF pages and convert to images for OCR.""" | |
| try: | |
| doc = fitz.open(pdf_path) | |
| extracted_text = "" | |
| for page_num in range(len(doc)): | |
| page = doc.load_page(page_num) | |
| # Convert PDF page to image | |
| mat = fitz.Matrix(2.0, 2.0) # Increase resolution | |
| pix = page.get_pixmap(matrix=mat) | |
| img_data = pix.tobytes("png") | |
| # Convert to PIL Image and preprocess | |
| image = Image.open(io.BytesIO(img_data)) | |
| # Extract text from the preprocessed image | |
| page_text = self.extract_text_from_image(image) | |
| extracted_text += f"\n--- Page {page_num + 1} ---\n{page_text}\n" | |
| doc.close() | |
| return extracted_text.strip() | |
| except Exception as e: | |
| return f"Error processing PDF: {str(e)}" | |
| class TextScorer: | |
| def calculate_similarity(text1: str, text2: str) -> Tuple[float, dict]: | |
| """Calculate similarity between two texts and provide detailed metrics.""" | |
| if not text1 or not text2: | |
| return 0.0, {} | |
| # Clean and normalize texts | |
| clean_text1 = TextScorer.clean_text(text1) | |
| clean_text2 = TextScorer.clean_text(text2) | |
| # Calculate different similarity metrics | |
| sequence_similarity = difflib.SequenceMatcher(None, clean_text1, clean_text2).ratio() | |
| # Word-level similarity | |
| words1 = set(clean_text1.split()) | |
| words2 = set(clean_text2.split()) | |
| word_similarity = len(words1.intersection(words2)) / max(len(words1.union(words2)), 1) | |
| # Character-level similarity (Jaccard similarity) | |
| chars1 = set(clean_text1) | |
| chars2 = set(clean_text2) | |
| char_similarity = len(chars1.intersection(chars2)) / max(len(chars1.union(chars2)), 1) | |
| # Combined score (weighted average) | |
| combined_score = (sequence_similarity * 0.5 + word_similarity * 0.3 + char_similarity * 0.2) | |
| metrics = { | |
| 'sequence_similarity': sequence_similarity, | |
| 'word_similarity': word_similarity, | |
| 'char_similarity': char_similarity, | |
| 'combined_score': combined_score | |
| } | |
| return combined_score, metrics | |
| def clean_text(text: str) -> str: | |
| """Clean and normalize text for comparison.""" | |
| if not text: | |
| return "" | |
| # Convert to lowercase | |
| text = text.lower() | |
| # Remove extra whitespace | |
| text = re.sub(r'\s+', ' ', text) | |
| # Remove punctuation (optional - you might want to keep some) | |
| text = re.sub(r'[^\w\s]', '', text) | |
| return text.strip() | |
| def get_score_interpretation(score: float) -> Tuple[str, str]: | |
| """Get interpretation and color for the score.""" | |
| if score >= 0.9: | |
| return "Excellent Match", "#4CAF50" # Green | |
| elif score >= 0.8: | |
| return "Very Good Match", "#8BC34A" # Light Green | |
| elif score >= 0.7: | |
| return "Good Match", "#FFEB3B" # Yellow | |
| elif score >= 0.6: | |
| return "Fair Match", "#FF9800" # Orange | |
| else: | |
| return "Poor Match", "#F44336" # Red | |
| # Global variables to store extracted texts | |
| answer_key_text = "" | |
| student_response_text = "" | |
| recognizer = None | |
| # Sample data for demo | |
| SAMPLE_ANSWER_KEY = """ | |
| Question 1: What is the capital of France? | |
| Answer: Paris | |
| Question 2: Calculate 15 + 27 | |
| Answer: 42 | |
| Question 3: Name three primary colors | |
| Answer: Red, Blue, Yellow | |
| """ | |
| SAMPLE_STUDENT_RESPONSE = """ | |
| Question 1: What is the capital of France? | |
| Answer: Paris | |
| Question 2: Calculate 15 + 27 | |
| Answer: 42 | |
| Question 3: Name three primary colors | |
| Answer: Red, Blue, Yellow | |
| """ | |
| def load_sample_data(): | |
| """Load sample data for demonstration purposes using pre-generated sample images.""" | |
| global answer_key_text, student_response_text, recognizer | |
| try: | |
| # Check if API key is available | |
| if not GEMINI_API_KEY: | |
| return ( | |
| "❌ No API key found. Please check your environment configuration.", | |
| "", | |
| "❌ No API key found. Please check your environment configuration.", | |
| "" | |
| ) | |
| # Initialize recognizer if needed | |
| if recognizer is None: | |
| try: | |
| recognizer = HandwritingRecognizer(GEMINI_API_KEY) | |
| except Exception as e: | |
| error_msg = f"❌ Error initializing Gemini API: {str(e)}" | |
| return (error_msg, "", error_msg, "") | |
| # Load pre-generated sample images | |
| try: | |
| answer_key_img = Image.open("sample_answer_key.png") | |
| student_response_img = Image.open("sample_student_response.png") | |
| except FileNotFoundError: | |
| # Fallback: use the sample_images module if files don't exist | |
| try: | |
| from sample_images import create_sample_answer_key_image, create_sample_student_response_image | |
| answer_key_img = create_sample_answer_key_image() | |
| student_response_img = create_sample_student_response_image() | |
| except Exception as e: | |
| error_msg = f"❌ Error creating sample images: {str(e)}" | |
| return (error_msg, "", error_msg, "") | |
| # Process through actual OCR | |
| try: | |
| answer_key_text = recognizer.extract_text_from_image(answer_key_img) | |
| if answer_key_text.startswith("Error"): | |
| return ( | |
| f"❌ Error processing answer key: {answer_key_text}", | |
| "", | |
| f"❌ Error processing answer key: {answer_key_text}", | |
| "" | |
| ) | |
| except Exception as e: | |
| error_msg = f"❌ Error processing answer key image: {str(e)}" | |
| return (error_msg, "", error_msg, "") | |
| try: | |
| student_response_text = recognizer.extract_text_from_image(student_response_img) | |
| if student_response_text.startswith("Error"): | |
| return ( | |
| "✅ Answer key processed successfully!", | |
| answer_key_text, | |
| f"❌ Error processing student response: {student_response_text}", | |
| "" | |
| ) | |
| except Exception as e: | |
| return ( | |
| "✅ Answer key processed successfully!", | |
| answer_key_text, | |
| f"❌ Error processing student response: {str(e)}", | |
| "" | |
| ) | |
| return ( | |
| "✅ Sample data processed through Gemini OCR successfully!", | |
| answer_key_text, | |
| "✅ Sample data processed through Gemini OCR successfully!", | |
| student_response_text | |
| ) | |
| except Exception as e: | |
| error_msg = f"❌ Unexpected error in demo: {str(e)}" | |
| return (error_msg, "", error_msg, "") | |
| def process_answer_key(api_key: str, file) -> Tuple[str, str]: | |
| """Process the answer key file and extract text.""" | |
| global answer_key_text, recognizer | |
| if not api_key: | |
| return "Please enter your Google Gemini API key first.", "" | |
| if not file: | |
| return "Please upload an answer key file.", "" | |
| try: | |
| # Initialize recognizer if not already done | |
| if recognizer is None: | |
| recognizer = HandwritingRecognizer(api_key) | |
| # Process based on file type | |
| if file.name.lower().endswith(('.png', '.jpg', '.jpeg')): | |
| image = Image.open(file.name) | |
| answer_key_text = recognizer.extract_text_from_image(image) | |
| elif file.name.lower().endswith('.pdf'): | |
| answer_key_text = recognizer.extract_text_from_pdf(file.name) | |
| else: | |
| return "Unsupported file format. Please use PNG, JPG, JPEG, or PDF.", "" | |
| if answer_key_text.startswith("Error"): | |
| return answer_key_text, "" | |
| return "Answer key processed successfully!", answer_key_text | |
| except Exception as e: | |
| return f"Error processing answer key: {str(e)}", "" | |
| def process_student_response(api_key: str, file) -> Tuple[str, str]: | |
| """Process the student response file and extract text.""" | |
| global student_response_text, recognizer | |
| if not api_key: | |
| return "Please enter your Google Gemini API key first.", "" | |
| if not file: | |
| return "Please upload a student response file.", "" | |
| try: | |
| # Initialize recognizer if not already done | |
| if recognizer is None: | |
| recognizer = HandwritingRecognizer(api_key) | |
| # Process based on file type | |
| if file.name.lower().endswith(('.png', '.jpg', '.jpeg')): | |
| image = Image.open(file.name) | |
| student_response_text = recognizer.extract_text_from_image(image) | |
| elif file.name.lower().endswith('.pdf'): | |
| student_response_text = recognizer.extract_text_from_pdf(file.name) | |
| else: | |
| return "Unsupported file format. Please use PNG, JPG, JPEG, or PDF.", "" | |
| if student_response_text.startswith("Error"): | |
| return student_response_text, "" | |
| return "Student response processed successfully!", student_response_text | |
| except Exception as e: | |
| return f"Error processing student response: {str(e)}", "" | |
| def calculate_score() -> Tuple[str, str, str, str, str, str]: | |
| """Calculate similarity score between answer key and student response.""" | |
| global answer_key_text, student_response_text | |
| if not answer_key_text or not student_response_text: | |
| return "Please process both answer key and student response first.", "", "", "", "", "" | |
| try: | |
| score, metrics = TextScorer.calculate_similarity(answer_key_text, student_response_text) | |
| interpretation, color = TextScorer.get_score_interpretation(score) | |
| # Format the main score display | |
| score_html = f""" | |
| <div style="text-align: center; padding: 20px; border-radius: 10px; background-color: {color}; color: white; margin: 10px 0;"> | |
| <h2 style="margin: 0;">Overall Score: {score:.1%}</h2> | |
| <h3 style="margin: 0;">{interpretation}</h3> | |
| </div> | |
| """ | |
| # Format detailed metrics | |
| sequence_metric = f"Sequence Similarity: {metrics['sequence_similarity']:.1%}" | |
| word_metric = f"Word Similarity: {metrics['word_similarity']:.1%}" | |
| char_metric = f"Character Similarity: {metrics['char_similarity']:.1%}" | |
| # Format text comparison | |
| clean_answer_key = TextScorer.clean_text(answer_key_text) | |
| clean_student_response = TextScorer.clean_text(student_response_text) | |
| return score_html, sequence_metric, word_metric, char_metric, clean_answer_key, clean_student_response | |
| except Exception as e: | |
| return f"Error calculating score: {str(e)}", "", "", "", "", "" | |
| def reset_all(): | |
| """Reset all stored data.""" | |
| global answer_key_text, student_response_text, recognizer | |
| answer_key_text = "" | |
| student_response_text = "" | |
| recognizer = None | |
| return "", "", "", "", "", "", "", "" | |
| # Create the Gradio interface | |
| def create_app(): | |
| with gr.Blocks(title="✍️ Handwriting Assessment App", theme=gr.themes.Soft()) as app: | |
| gr.Markdown("# ✍️ Handwriting Assessment App") | |
| gr.Markdown("### Upload handwritten answer keys and student responses for automatic scoring using AI") | |
| # API Key section - hidden since it's predefined | |
| if not GEMINI_API_KEY: | |
| with gr.Row(): | |
| api_key = gr.Textbox( | |
| label="🔑 Google Gemini API Key", | |
| placeholder="Enter your Google Gemini API key here...", | |
| type="password", | |
| value="", | |
| info="Get your API key from: https://makersuite.google.com/app/apikey" | |
| ) | |
| else: | |
| # API key is predefined, create hidden component | |
| api_key = gr.Textbox(value=GEMINI_API_KEY, visible=False) | |
| # Demo section | |
| with gr.Row(): | |
| gr.Markdown("### 🎯 Quick Demo") | |
| demo_btn = gr.Button("Load Sample Data", variant="secondary", size="sm") | |
| with gr.Row(): | |
| # Teacher Section | |
| with gr.Column(scale=1): | |
| gr.Markdown("## 👨🏫 Teacher Section") | |
| answer_key_file = gr.File( | |
| label="Upload Answer Key", | |
| file_types=[".png", ".jpg", ".jpeg", ".pdf"], | |
| file_count="single" | |
| ) | |
| answer_key_status = gr.Textbox(label="Status", interactive=False) | |
| answer_key_text_display = gr.Textbox( | |
| label="Extracted Answer Key Text", | |
| lines=8, | |
| interactive=False | |
| ) | |
| # Student Section | |
| with gr.Column(scale=1): | |
| gr.Markdown("## 👨🎓 Student Section") | |
| student_file = gr.File( | |
| label="Upload Student Response", | |
| file_types=[".png", ".jpg", ".jpeg", ".pdf"], | |
| file_count="single" | |
| ) | |
| student_status = gr.Textbox(label="Status", interactive=False) | |
| student_text_display = gr.Textbox( | |
| label="Extracted Student Response Text", | |
| lines=8, | |
| interactive=False | |
| ) | |
| # Scoring Section | |
| gr.Markdown("## 📊 Assessment Results") | |
| with gr.Row(): | |
| calculate_btn = gr.Button("Calculate Score", variant="primary", size="lg") | |
| reset_btn = gr.Button("Reset All", variant="secondary") | |
| # Results display | |
| score_display = gr.HTML(label="Overall Score") | |
| with gr.Row(): | |
| sequence_metric = gr.Textbox(label="Sequence Similarity", interactive=False) | |
| word_metric = gr.Textbox(label="Word Similarity", interactive=False) | |
| char_metric = gr.Textbox(label="Character Similarity", interactive=False) | |
| gr.Markdown("### Text Comparison") | |
| with gr.Row(): | |
| clean_answer_key = gr.Textbox( | |
| label="Answer Key (Cleaned)", | |
| lines=5, | |
| interactive=False | |
| ) | |
| clean_student_response = gr.Textbox( | |
| label="Student Response (Cleaned)", | |
| lines=5, | |
| interactive=False | |
| ) | |
| # Event handlers | |
| demo_btn.click( | |
| fn=load_sample_data, | |
| outputs=[answer_key_status, answer_key_text_display, student_status, student_text_display] | |
| ) | |
| answer_key_file.change( | |
| fn=process_answer_key, | |
| inputs=[api_key, answer_key_file], | |
| outputs=[answer_key_status, answer_key_text_display] | |
| ) | |
| student_file.change( | |
| fn=process_student_response, | |
| inputs=[api_key, student_file], | |
| outputs=[student_status, student_text_display] | |
| ) | |
| calculate_btn.click( | |
| fn=calculate_score, | |
| outputs=[score_display, sequence_metric, word_metric, char_metric, clean_answer_key, clean_student_response] | |
| ) | |
| reset_btn.click( | |
| fn=reset_all, | |
| outputs=[ | |
| answer_key_status, answer_key_text_display, | |
| student_status, student_text_display, | |
| score_display, sequence_metric, word_metric, char_metric | |
| ] | |
| ) | |
| # Instructions | |
| with gr.Accordion("📖 How to Use", open=False): | |
| gr.Markdown(""" | |
| 1. **Enter API Key**: Input your Google Gemini API key at the top | |
| 2. **Upload Answer Key**: Teachers upload the handwritten answer key (PNG, JPG, JPEG, or PDF) | |
| 3. **Upload Student Response**: Students upload their handwritten answers (PNG, JPG, JPEG, or PDF) | |
| 4. **Calculate Score**: Click the "Calculate Score" button to get detailed assessment results | |
| 5. **View Results**: See the overall score, detailed metrics, and text comparison | |
| **Supported Formats**: PNG, JPG, JPEG, PDF | |
| **Score Interpretation**: | |
| - 90-100%: Excellent Match (Green) | |
| - 80-89%: Very Good Match (Light Green) | |
| - 70-79%: Good Match (Yellow) | |
| - 60-69%: Fair Match (Orange) | |
| - Below 60%: Poor Match (Red) | |
| """) | |
| return app | |
| if __name__ == "__main__": | |
| app = create_app() | |
| app.launch() |