handschecker / app.py
Nur Arifin Akbar
Fix Gemini model name and add detailed error handling for sample data loading
0b4d45d
import gradio as gr
import google.generativeai as genai
from PIL import Image
import io
import difflib
import re
import os
from typing import Optional, Tuple, List
import fitz # PyMuPDF for PDF processing
from config import GEMINI_API_KEY, GEMINI_MODEL, OCR_PROMPT, DEFAULT_RESOLUTION
def preprocess_image_for_gemma(image: Image.Image) -> Image.Image:
"""
Advanced auto resizer for gemini3n series requirements.
Automatically resizes and optimizes images to 768x768 while preserving quality and aspect ratio.
"""
# Convert to RGB if necessary
if image.mode != 'RGB':
image = image.convert('RGB')
# Get original dimensions
original_width, original_height = image.size
target_width, target_height = DEFAULT_RESOLUTION
# Calculate scaling factor to fit within target dimensions while preserving aspect ratio
scale_factor = min(target_width / original_width, target_height / original_height)
# Calculate new dimensions
new_width = int(original_width * scale_factor)
new_height = int(original_height * scale_factor)
# Resize image with high-quality resampling
resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
# Create a new image with exact target dimensions and white background
processed_image = Image.new('RGB', DEFAULT_RESOLUTION, 'white')
# Calculate position to center the resized image
x_offset = (target_width - new_width) // 2
y_offset = (target_height - new_height) // 2
# Paste the resized image onto the centered position
processed_image.paste(resized_image, (x_offset, y_offset))
return processed_image
class HandwritingRecognizer:
def __init__(self, api_key: str):
"""Initialize the Gemini API for handwriting recognition."""
if not api_key:
raise ValueError("API key is required")
genai.configure(api_key=api_key)
self.model = genai.GenerativeModel(GEMINI_MODEL)
def extract_text_from_image(self, image: Image.Image) -> str:
"""Extract text from an image using Gemini Vision."""
try:
# Preprocess image for gemma-3n-e4b-it requirements
processed_image = preprocess_image_for_gemma(image)
response = self.model.generate_content([OCR_PROMPT, processed_image])
return response.text.strip()
except Exception as e:
return f"Error in text extraction: {str(e)}"
def extract_text_from_pdf(self, pdf_path: str) -> str:
"""Extract text from PDF pages and convert to images for OCR."""
try:
doc = fitz.open(pdf_path)
extracted_text = ""
for page_num in range(len(doc)):
page = doc.load_page(page_num)
# Convert PDF page to image
mat = fitz.Matrix(2.0, 2.0) # Increase resolution
pix = page.get_pixmap(matrix=mat)
img_data = pix.tobytes("png")
# Convert to PIL Image and preprocess
image = Image.open(io.BytesIO(img_data))
# Extract text from the preprocessed image
page_text = self.extract_text_from_image(image)
extracted_text += f"\n--- Page {page_num + 1} ---\n{page_text}\n"
doc.close()
return extracted_text.strip()
except Exception as e:
return f"Error processing PDF: {str(e)}"
class TextScorer:
@staticmethod
def calculate_similarity(text1: str, text2: str) -> Tuple[float, dict]:
"""Calculate similarity between two texts and provide detailed metrics."""
if not text1 or not text2:
return 0.0, {}
# Clean and normalize texts
clean_text1 = TextScorer.clean_text(text1)
clean_text2 = TextScorer.clean_text(text2)
# Calculate different similarity metrics
sequence_similarity = difflib.SequenceMatcher(None, clean_text1, clean_text2).ratio()
# Word-level similarity
words1 = set(clean_text1.split())
words2 = set(clean_text2.split())
word_similarity = len(words1.intersection(words2)) / max(len(words1.union(words2)), 1)
# Character-level similarity (Jaccard similarity)
chars1 = set(clean_text1)
chars2 = set(clean_text2)
char_similarity = len(chars1.intersection(chars2)) / max(len(chars1.union(chars2)), 1)
# Combined score (weighted average)
combined_score = (sequence_similarity * 0.5 + word_similarity * 0.3 + char_similarity * 0.2)
metrics = {
'sequence_similarity': sequence_similarity,
'word_similarity': word_similarity,
'char_similarity': char_similarity,
'combined_score': combined_score
}
return combined_score, metrics
@staticmethod
def clean_text(text: str) -> str:
"""Clean and normalize text for comparison."""
if not text:
return ""
# Convert to lowercase
text = text.lower()
# Remove extra whitespace
text = re.sub(r'\s+', ' ', text)
# Remove punctuation (optional - you might want to keep some)
text = re.sub(r'[^\w\s]', '', text)
return text.strip()
@staticmethod
def get_score_interpretation(score: float) -> Tuple[str, str]:
"""Get interpretation and color for the score."""
if score >= 0.9:
return "Excellent Match", "#4CAF50" # Green
elif score >= 0.8:
return "Very Good Match", "#8BC34A" # Light Green
elif score >= 0.7:
return "Good Match", "#FFEB3B" # Yellow
elif score >= 0.6:
return "Fair Match", "#FF9800" # Orange
else:
return "Poor Match", "#F44336" # Red
# Global variables to store extracted texts
answer_key_text = ""
student_response_text = ""
recognizer = None
# Sample data for demo
SAMPLE_ANSWER_KEY = """
Question 1: What is the capital of France?
Answer: Paris
Question 2: Calculate 15 + 27
Answer: 42
Question 3: Name three primary colors
Answer: Red, Blue, Yellow
"""
SAMPLE_STUDENT_RESPONSE = """
Question 1: What is the capital of France?
Answer: Paris
Question 2: Calculate 15 + 27
Answer: 42
Question 3: Name three primary colors
Answer: Red, Blue, Yellow
"""
def load_sample_data():
"""Load sample data for demonstration purposes using pre-generated sample images."""
global answer_key_text, student_response_text, recognizer
try:
# Check if API key is available
if not GEMINI_API_KEY:
return (
"❌ No API key found. Please check your environment configuration.",
"",
"❌ No API key found. Please check your environment configuration.",
""
)
# Initialize recognizer if needed
if recognizer is None:
try:
recognizer = HandwritingRecognizer(GEMINI_API_KEY)
except Exception as e:
error_msg = f"❌ Error initializing Gemini API: {str(e)}"
return (error_msg, "", error_msg, "")
# Load pre-generated sample images
try:
answer_key_img = Image.open("sample_answer_key.png")
student_response_img = Image.open("sample_student_response.png")
except FileNotFoundError:
# Fallback: use the sample_images module if files don't exist
try:
from sample_images import create_sample_answer_key_image, create_sample_student_response_image
answer_key_img = create_sample_answer_key_image()
student_response_img = create_sample_student_response_image()
except Exception as e:
error_msg = f"❌ Error creating sample images: {str(e)}"
return (error_msg, "", error_msg, "")
# Process through actual OCR
try:
answer_key_text = recognizer.extract_text_from_image(answer_key_img)
if answer_key_text.startswith("Error"):
return (
f"❌ Error processing answer key: {answer_key_text}",
"",
f"❌ Error processing answer key: {answer_key_text}",
""
)
except Exception as e:
error_msg = f"❌ Error processing answer key image: {str(e)}"
return (error_msg, "", error_msg, "")
try:
student_response_text = recognizer.extract_text_from_image(student_response_img)
if student_response_text.startswith("Error"):
return (
"✅ Answer key processed successfully!",
answer_key_text,
f"❌ Error processing student response: {student_response_text}",
""
)
except Exception as e:
return (
"✅ Answer key processed successfully!",
answer_key_text,
f"❌ Error processing student response: {str(e)}",
""
)
return (
"✅ Sample data processed through Gemini OCR successfully!",
answer_key_text,
"✅ Sample data processed through Gemini OCR successfully!",
student_response_text
)
except Exception as e:
error_msg = f"❌ Unexpected error in demo: {str(e)}"
return (error_msg, "", error_msg, "")
def process_answer_key(api_key: str, file) -> Tuple[str, str]:
"""Process the answer key file and extract text."""
global answer_key_text, recognizer
if not api_key:
return "Please enter your Google Gemini API key first.", ""
if not file:
return "Please upload an answer key file.", ""
try:
# Initialize recognizer if not already done
if recognizer is None:
recognizer = HandwritingRecognizer(api_key)
# Process based on file type
if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
image = Image.open(file.name)
answer_key_text = recognizer.extract_text_from_image(image)
elif file.name.lower().endswith('.pdf'):
answer_key_text = recognizer.extract_text_from_pdf(file.name)
else:
return "Unsupported file format. Please use PNG, JPG, JPEG, or PDF.", ""
if answer_key_text.startswith("Error"):
return answer_key_text, ""
return "Answer key processed successfully!", answer_key_text
except Exception as e:
return f"Error processing answer key: {str(e)}", ""
def process_student_response(api_key: str, file) -> Tuple[str, str]:
"""Process the student response file and extract text."""
global student_response_text, recognizer
if not api_key:
return "Please enter your Google Gemini API key first.", ""
if not file:
return "Please upload a student response file.", ""
try:
# Initialize recognizer if not already done
if recognizer is None:
recognizer = HandwritingRecognizer(api_key)
# Process based on file type
if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
image = Image.open(file.name)
student_response_text = recognizer.extract_text_from_image(image)
elif file.name.lower().endswith('.pdf'):
student_response_text = recognizer.extract_text_from_pdf(file.name)
else:
return "Unsupported file format. Please use PNG, JPG, JPEG, or PDF.", ""
if student_response_text.startswith("Error"):
return student_response_text, ""
return "Student response processed successfully!", student_response_text
except Exception as e:
return f"Error processing student response: {str(e)}", ""
def calculate_score() -> Tuple[str, str, str, str, str, str]:
"""Calculate similarity score between answer key and student response."""
global answer_key_text, student_response_text
if not answer_key_text or not student_response_text:
return "Please process both answer key and student response first.", "", "", "", "", ""
try:
score, metrics = TextScorer.calculate_similarity(answer_key_text, student_response_text)
interpretation, color = TextScorer.get_score_interpretation(score)
# Format the main score display
score_html = f"""
<div style="text-align: center; padding: 20px; border-radius: 10px; background-color: {color}; color: white; margin: 10px 0;">
<h2 style="margin: 0;">Overall Score: {score:.1%}</h2>
<h3 style="margin: 0;">{interpretation}</h3>
</div>
"""
# Format detailed metrics
sequence_metric = f"Sequence Similarity: {metrics['sequence_similarity']:.1%}"
word_metric = f"Word Similarity: {metrics['word_similarity']:.1%}"
char_metric = f"Character Similarity: {metrics['char_similarity']:.1%}"
# Format text comparison
clean_answer_key = TextScorer.clean_text(answer_key_text)
clean_student_response = TextScorer.clean_text(student_response_text)
return score_html, sequence_metric, word_metric, char_metric, clean_answer_key, clean_student_response
except Exception as e:
return f"Error calculating score: {str(e)}", "", "", "", "", ""
def reset_all():
"""Reset all stored data."""
global answer_key_text, student_response_text, recognizer
answer_key_text = ""
student_response_text = ""
recognizer = None
return "", "", "", "", "", "", "", ""
# Create the Gradio interface
def create_app():
with gr.Blocks(title="✍️ Handwriting Assessment App", theme=gr.themes.Soft()) as app:
gr.Markdown("# ✍️ Handwriting Assessment App")
gr.Markdown("### Upload handwritten answer keys and student responses for automatic scoring using AI")
# API Key section - hidden since it's predefined
if not GEMINI_API_KEY:
with gr.Row():
api_key = gr.Textbox(
label="🔑 Google Gemini API Key",
placeholder="Enter your Google Gemini API key here...",
type="password",
value="",
info="Get your API key from: https://makersuite.google.com/app/apikey"
)
else:
# API key is predefined, create hidden component
api_key = gr.Textbox(value=GEMINI_API_KEY, visible=False)
# Demo section
with gr.Row():
gr.Markdown("### 🎯 Quick Demo")
demo_btn = gr.Button("Load Sample Data", variant="secondary", size="sm")
with gr.Row():
# Teacher Section
with gr.Column(scale=1):
gr.Markdown("## 👨‍🏫 Teacher Section")
answer_key_file = gr.File(
label="Upload Answer Key",
file_types=[".png", ".jpg", ".jpeg", ".pdf"],
file_count="single"
)
answer_key_status = gr.Textbox(label="Status", interactive=False)
answer_key_text_display = gr.Textbox(
label="Extracted Answer Key Text",
lines=8,
interactive=False
)
# Student Section
with gr.Column(scale=1):
gr.Markdown("## 👨‍🎓 Student Section")
student_file = gr.File(
label="Upload Student Response",
file_types=[".png", ".jpg", ".jpeg", ".pdf"],
file_count="single"
)
student_status = gr.Textbox(label="Status", interactive=False)
student_text_display = gr.Textbox(
label="Extracted Student Response Text",
lines=8,
interactive=False
)
# Scoring Section
gr.Markdown("## 📊 Assessment Results")
with gr.Row():
calculate_btn = gr.Button("Calculate Score", variant="primary", size="lg")
reset_btn = gr.Button("Reset All", variant="secondary")
# Results display
score_display = gr.HTML(label="Overall Score")
with gr.Row():
sequence_metric = gr.Textbox(label="Sequence Similarity", interactive=False)
word_metric = gr.Textbox(label="Word Similarity", interactive=False)
char_metric = gr.Textbox(label="Character Similarity", interactive=False)
gr.Markdown("### Text Comparison")
with gr.Row():
clean_answer_key = gr.Textbox(
label="Answer Key (Cleaned)",
lines=5,
interactive=False
)
clean_student_response = gr.Textbox(
label="Student Response (Cleaned)",
lines=5,
interactive=False
)
# Event handlers
demo_btn.click(
fn=load_sample_data,
outputs=[answer_key_status, answer_key_text_display, student_status, student_text_display]
)
answer_key_file.change(
fn=process_answer_key,
inputs=[api_key, answer_key_file],
outputs=[answer_key_status, answer_key_text_display]
)
student_file.change(
fn=process_student_response,
inputs=[api_key, student_file],
outputs=[student_status, student_text_display]
)
calculate_btn.click(
fn=calculate_score,
outputs=[score_display, sequence_metric, word_metric, char_metric, clean_answer_key, clean_student_response]
)
reset_btn.click(
fn=reset_all,
outputs=[
answer_key_status, answer_key_text_display,
student_status, student_text_display,
score_display, sequence_metric, word_metric, char_metric
]
)
# Instructions
with gr.Accordion("📖 How to Use", open=False):
gr.Markdown("""
1. **Enter API Key**: Input your Google Gemini API key at the top
2. **Upload Answer Key**: Teachers upload the handwritten answer key (PNG, JPG, JPEG, or PDF)
3. **Upload Student Response**: Students upload their handwritten answers (PNG, JPG, JPEG, or PDF)
4. **Calculate Score**: Click the "Calculate Score" button to get detailed assessment results
5. **View Results**: See the overall score, detailed metrics, and text comparison
**Supported Formats**: PNG, JPG, JPEG, PDF
**Score Interpretation**:
- 90-100%: Excellent Match (Green)
- 80-89%: Very Good Match (Light Green)
- 70-79%: Good Match (Yellow)
- 60-69%: Fair Match (Orange)
- Below 60%: Poor Match (Red)
""")
return app
if __name__ == "__main__":
app = create_app()
app.launch()