Spaces:

AswinMathew
/

ocr-omr-backend

Sleeping

App Files Files Community

ocr-omr-backend / app.py

AswinMathew

Deploy OCR/OMR backend to HF Spaces

b8548e4 verified 29 days ago

raw

history blame

90.5 kB

	import sys
	import platform
	import easyocr
	from pdf2image import convert_from_path, convert_from_bytes
	from flask import Flask, request, jsonify
	from flask_cors import CORS
	from dataclasses import dataclass
	from typing import List, Tuple, Optional, Dict, Any
	from collections import defaultdict
	import numpy as np
	import cv2
	import pytesseract
	from PIL import Image
	import os
	import tempfile
	import difflib
	import re
	from fuzzywuzzy import fuzz
	from dotenv import load_dotenv
	import google.generativeai as genai
	import asyncio
	import base64
	import io
	import json
	import pandas as pd
	import subprocess

	# Import the SupabaseHandler
	import uuid
	from datetime import datetime
	from supabase import create_client, Client

	_tesseract_cmd = os.getenv("TESSERACT_CMD")
	if _tesseract_cmd:
	pytesseract.pytesseract.tesseract_cmd = _tesseract_cmd
	elif platform.system() == "Windows":
	pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"


	def _get_poppler_path():
	env_path = os.getenv("POPPLER_PATH")
	if env_path:
	return env_path
	if platform.system() == "Windows":
	# Check common install locations
	candidates = [
	r'C:\Program Files\poppler\Library\bin',
	r'C:\Program Files\poppler\poppler-24.08.0\Library\bin',
	]
	# Also scan for any versioned poppler directory
	poppler_base = r'C:\Program Files\poppler'
	if os.path.isdir(poppler_base):
	for entry in os.listdir(poppler_base):
	candidate = os.path.join(poppler_base, entry, 'Library', 'bin')
	if candidate not in candidates:
	candidates.append(candidate)
	for path in candidates:
	if os.path.isdir(path):
	return path
	return None


	load_dotenv()
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
	genai.configure(api_key=GEMINI_API_KEY)

	app = Flask(__name__)
	CORS(app)
	reader = easyocr.Reader(['en'])

	# Global variables to store processing results
	ocr_extracted_texts = []
	last_processed_question_paper_object = None
	last_processed_omr_key = None # Global variable to store OMR answer key
	last_processed_omr_results = None # Global variable to store OMR processing results
	porcessed_omr_results = []
	OMR_IMAGES = []

	class SupabaseHandler:
	def __init__(self):
	url: str = os.getenv("SUPABASE_URL")
	key: str = os.getenv("SUPABASE_ANON_KEY")
	if not url or not key:
	raise ValueError("Supabase URL and ANON_KEY must be set in environment variables")
	self.supabase: Client = create_client(url, key)

	def store_evaluation_result(self, teacher_email, evaluation_data, exam_name=None):
	"""
	Store evaluation result in Supabase with a unique key and exam name
	Returns the unique key for retrieval
	"""
	try:
	# Generate unique key
	unique_key = str(uuid.uuid4())

	# Prepare data for storage
	storage_data = {
	"unique_key": unique_key,
	"teacher_email": teacher_email,
	"evaluation_data": evaluation_data,
	"exam_name": exam_name, # Add exam name field
	"created_at": datetime.utcnow().isoformat(),
	"total_students": evaluation_data.get("total_students", 0)
	}

	# Insert into Supabase
	result = self.supabase.table("evaluation_results").insert(storage_data).execute()

	if result.data:
	print(f"Successfully stored evaluation result with key: {unique_key} for exam: {exam_name}")
	return unique_key
	else:
	print("Failed to store evaluation result")
	return None

	except Exception as e:
	print(f"Error storing evaluation result: {str(e)}")
	return None

	def get_evaluation_result(self, unique_key):
	"""
	Retrieve evaluation result by unique key
	"""
	try:
	result = self.supabase.table("evaluation_results").select("*").eq("unique_key", unique_key).execute()

	if result.data and len(result.data) > 0:
	return result.data[0]
	else:
	return None

	except Exception as e:
	print(f"Error retrieving evaluation result: {str(e)}")
	return None

	def get_teacher_evaluations(self, teacher_email):
	"""
	Get all evaluation results for a specific teacher
	"""
	try:
	result = self.supabase.table("evaluation_results").select("unique_key", "created_at", "total_students", "exam_name").eq("teacher_email", teacher_email).order("created_at", desc=True).execute()

	if result.data:
	return result.data
	else:
	return []

	except Exception as e:
	print(f"Error retrieving teacher evaluations: {str(e)}")
	return []

	class QuestionPaper:
	def __init__(self, path=None):
	self.questions = []
	self.answers = []
	self.path = path
	def clean_answers(self):
	# Remove unwanted patterns from answers
	unwanted_patterns = [
	"Time: 15 MinutesMarks: 20",
	"Time: 15 Minutes Marks: 20",
	"GENERAL KNOWLEDGE QUESTION PAPER WITH ANSWERS",
	"GENERAL KNOWLEDGE QUESTION PAPER",
	]
	# Filter out unwanted answers
	cleaned_answers = []
	for answer in self.answers:
	if answer.strip() and answer.strip() not in unwanted_patterns:
	# Also check if it doesn't match any unwanted pattern with regex
	is_unwanted = False
	for pattern in unwanted_patterns:
	if pattern and re.search(re.escape(pattern), answer, re.IGNORECASE):
	is_unwanted = True
	break
	if not is_unwanted:
	cleaned_answers.append(answer.strip())

	self.answers = cleaned_answers

	def add_question(self, question_text):
	self.questions.append(question_text)

	def add_answer(self, answer_text):
	self.answers.append(answer_text)

	def to_dict(self):
	return {
	'questions': self.questions,
	'answers': self.answers
	}

	class OMRAnswerKey:
	def __init__(self):
	self.answers = {} # Dictionary mapping question numbers to correct options
	self.total_marks = 0
	self.marks_per_question = 1
	self.negative_marking = 0
	self.title = ""
	self.duration = ""
	self.total_questions = 0
	self.path = None
	self.questions = [] # List to store questions if needed
	self.question_data = [] # List to store complete question data with options

	def __str__(self):
	return f"OMR Answer Key: {self.title}\nTotal Questions: {self.total_questions}\nAnswers: {self.answers}"

	def set_answers(self, answers: dict):
	"""Set the answer key with question numbers as keys and correct options (A,B,C,D) as values"""
	self.answers = {int(k): v.upper() for k, v in answers.items() if v.upper() in ['A', 'B', 'C', 'D']}
	self.total_questions = len(self.answers)

	def set_marking_scheme(self, marks_per_question: float, negative_marking: float = 0):
	"""Set the marking scheme for the answer key"""
	self.marks_per_question = marks_per_question
	self.negative_marking = negative_marking
	self.total_marks = self.total_questions * marks_per_question

	def set_metadata(self, title: str, duration: str):
	"""Set metadata for the answer key"""
	self.title = title
	self.duration = duration

	def set_question_data(self, question_data):
	"""Store complete question data including options"""
	self.question_data = question_data
	self.questions = [f"{q['number']}. {q['question']}" for q in question_data]
	self.answers = {q['number']: q['answer'] for q in question_data if q['answer']}
	self.total_questions = len(question_data)

	def get_question_details(self, question_number):
	"""Get complete details for a specific question"""
	for q in self.question_data:
	print(f"Checking question number: {q['number']} with {question_number}")
	if str(q['number']) == str(question_number):
	return q
	return None

	def to_dict(self):
	return {
	'title': self.title,
	'duration': self.duration,
	'total_questions': self.total_questions,
	'answers': self.answers,
	'total_marks': self.total_marks,
	'marks_per_question': self.marks_per_question,
	'negative_marking': self.negative_marking,
	'questions': self.questions,
	'question_data': self.question_data # Include complete question data
	}



	def parse_question_paper_text(text):
	"""
	Improved parsing function that correctly identifies questions and answers
	"""
	lines = [line.strip() for line in text.split('\n') if line.strip()]

	questions = []
	answers = []

	# Patterns to ignore (headers, footers, etc.)
	ignore_patterns = [
	r'GENERAL KNOWLEDGE QUESTION PAPER.*',
	r'Time:\s\d+\sMinutes.Marks:\s\d+',
	r'Time:\s\d+\sMinutesMarks:\s*\d+',
	r'^\s*$' # Empty lines
	]

	# Filter out unwanted lines
	filtered_lines = []
	for line in lines:
	should_ignore = False
	for pattern in ignore_patterns:
	if re.match(pattern, line, re.IGNORECASE):
	should_ignore = True
	break
	if not should_ignore:
	filtered_lines.append(line)

	# Pattern to identify questions (starts with number followed by dot/parenthesis)
	question_pattern = r'^\d+\s[.)]\s(.+)'

	i = 0
	while i < len(filtered_lines):
	current_line = filtered_lines[i].strip()

	# Check if current line is a question
	question_match = re.match(question_pattern, current_line)
	if question_match:
	# This is a question
	question_text = question_match.group(1).strip()
	questions.append(f"{current_line}") # Keep the full question with number

	# Look for the answer in the next line
	if i + 1 < len(filtered_lines):
	next_line = filtered_lines[i + 1].strip()
	# If next line is not a question (doesn't start with number), it's likely an answer
	if not re.match(question_pattern, next_line):
	answers.append(next_line)
	i += 2 # Skip both question and answer
	else:
	# Next line is also a question, so this question might not have an answer
	# Or the answer might be embedded in the same line
	# Try to extract answer from the question line itself if it contains common answer patterns
	answers.append("") # Placeholder for missing answer
	i += 1
	else:
	# Last line and it's a question without answer
	answers.append("")
	i += 1
	else:
	# This line doesn't match question pattern, skip it or try to pair it with previous question
	if len(questions) > len(answers):
	# We have more questions than answers, this might be an answer
	answers.append(current_line)
	i += 1

	# Ensure we have equal number of questions and answers
	while len(answers) < len(questions):
	answers.append("")
	while len(questions) < len(answers):
	questions.append(f"Question {len(questions) + 1}")

	return questions, answers

	def improved_clean_and_parse_ocr_text(ocr_text):
	"""
	Improved parsing with better answer extraction logic
	"""
	# Remove special characters but keep important ones
	cleaned_text = re.sub(r'[\|@~¥#$%^&*()_+=\[\]{}\\:";\'<>?,./]', ' ', ocr_text)

	# Split by newlines and filter out empty strings
	lines = [line.strip() for line in cleaned_text.split('\n') if line.strip()]

	individual_answers = []

	# Try to find numbered patterns first
	numbered_pattern = re.compile(r'(\d+)\s[.)]\s([^0-9]+?)(?=\d+\s*[.)]\|$)', re.MULTILINE \| re.DOTALL)
	matches = numbered_pattern.findall(cleaned_text)

	if matches:
	# If we found numbered patterns, use them
	for number, answer in matches:
	answer = answer.strip()
	if answer and len(answer) > 1:
	individual_answers.append(answer)
	else:
	# Fallback to line-by-line processing
	for line in lines:
	# Remove leading numbers and punctuation
	cleaned_line = re.sub(r'^\d+\s[.)]\s', '', line).strip()
	if cleaned_line and len(cleaned_line) > 1:
	individual_answers.append(cleaned_line)

	return individual_answers

	def find_best_match(student_answer, correct_answers, threshold=0.6):
	"""
	Find the best matching correct answer for a student answer
	"""
	best_score = 0
	best_match = None

	for correct_answer in correct_answers:
	# Use multiple similarity metrics
	ratio_score = difflib.SequenceMatcher(None, student_answer.lower(), correct_answer.lower()).ratio()
	fuzzy_score = fuzz.ratio(student_answer.lower(), correct_answer.lower()) / 100.0
	partial_score = fuzz.partial_ratio(student_answer.lower(), correct_answer.lower()) / 100.0

	# Take the maximum of all scores
	combined_score = max(ratio_score, fuzzy_score, partial_score)

	if combined_score > best_score:
	best_score = combined_score
	best_match = correct_answer

	# Only return match if it meets the threshold
	if best_score >= threshold:
	return best_match, best_score
	else:
	return None, best_score

	def extract_roll_number(student_answer_path):
	"""
	Extract roll number from student answer sheet using OCR
	"""
	try:
	student_answer_image = Image.open(student_answer_path)
	text = pytesseract.image_to_string(student_answer_image)

	# Look for common roll number patterns
	roll_patterns = [
	r'(?i)roll\sno\s[:\-]?\s*(\w+)',
	r'(?i)roll\snumber\s[:\-]?\s*(\w+)',
	r'(?i)roll\s[:\-]?\s(\w+)',
	r'(?i)reg\sno\s[:\-]?\s*(\w+)',
	r'(?i)registration\s[:\-]?\s(\w+)'
	]

	for pattern in roll_patterns:
	match = re.search(pattern, text)
	if match:
	return match.group(1).strip()

	# If no explicit roll number found, try to find number sequences
	number_sequences = re.findall(r'\b\d{2,}\b', text)
	if number_sequences:
	return number_sequences[0] # Return first significant number sequence

	return "Unknown"
	except Exception as e:
	print(f"Error extracting roll number: {str(e)}")
	return "Unknown"

	# OMR Section


	@dataclass
	class BubbleLocation:
	"""Stores information about each bubble"""
	question_num: int
	option: str
	center: Tuple[int, int]
	radius: int
	filled: bool = False
	fill_ratio: float = 0.0



	class CorrectedOMRReader:
	def __init__(self, image_path: str = None, image_array: np.ndarray = None):
	"""Initialize the OMR Reader with an image"""
	if image_array is not None:
	self.image = image_array
	self.image_path = None
	elif image_path is not None:
	self.image = cv2.imread(image_path)
	self.image_path = image_path
	else:
	raise ValueError("Either image_array or image_path must be provided")

	if self.image is None:
	raise ValueError("Could not load image")

	self.gray = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
	self.height, self.width = self.gray.shape
	self.bubbles = []
	self.answers = {}

	# Expected grid parameters
	self.expected_radius = 15 # Approximate bubble radius
	self.grid_params = {
	'rows': 20, # Maximum rows
	'cols': 3, # 3 columns of questions
	'options': 4 # 4 options per question (A, B, C, D)
	}

	def preprocess_for_detection(self):
	"""Preprocess specifically for bubble DETECTION (not fill detection)"""
	blurred = cv2.GaussianBlur(self.gray, (3, 3), 0)
	_, thresh = cv2.threshold(blurred, 200, 255, cv2.THRESH_BINARY)
	self.detection_thresh = cv2.bitwise_not(thresh)
	return self.detection_thresh

	def find_bubble_grid(self):
	"""Find bubble locations using grid detection"""
	bubbles = []

	param_sets = [
	{'dp': 1.0, 'minDist': 20, 'param1': 50, 'param2': 28, 'minRadius': 10, 'maxRadius': 20},
	{'dp': 1.1, 'minDist': 22, 'param1': 45, 'param2': 25, 'minRadius': 11, 'maxRadius': 19},
	{'dp': 1.2, 'minDist': 25, 'param1': 40, 'param2': 30, 'minRadius': 9, 'maxRadius': 21},
	]

	for params in param_sets:
	circles = cv2.HoughCircles(
	self.gray,
	cv2.HOUGH_GRADIENT,
	dp=params['dp'],
	minDist=params['minDist'],
	param1=params['param1'],
	param2=params['param2'],
	minRadius=params['minRadius'],
	maxRadius=params['maxRadius']
	)

	if circles is not None:
	circles = np.round(circles[0, :]).astype("int")
	for (x, y, r) in circles:
	is_dup = False
	for bub in bubbles:
	if np.sqrt((x - bub[0])2 + (y - bub[1])2) < 15:
	is_dup = True
	break
	if not is_dup:
	bubbles.append((x, y, r))

	print(f" Found {len(bubbles)} bubbles with Hough Circles")

	if len(bubbles) < 180:
	template_bubbles = self.template_matching_detection()
	bubbles.extend(template_bubbles)
	print(f" Added {len(template_bubbles)} bubbles with template matching")

	return bubbles

	def template_matching_detection(self):
	"""Use template matching to find bubble locations"""
	bubbles = []
	template_size = 30
	template = np.zeros((template_size, template_size), dtype=np.uint8)
	cv2.circle(template, (template_size//2, template_size//2), 12, 255, 2)

	result = cv2.matchTemplate(self.gray, template, cv2.TM_CCOEFF_NORMED)
	threshold = 0.5
	locations = np.where(result >= threshold)

	for pt in zip(*locations[::-1]):
	center_x = pt[0] + template_size // 2
	center_y = pt[1] + template_size // 2
	too_close = False
	for (bx, by, _) in bubbles:
	if np.sqrt((center_x - bx)2 + (center_y - by)2) < 20:
	too_close = True
	break
	if not too_close:
	bubbles.append((center_x, center_y, 12))

	return bubbles

	def detect_bubbles_by_contours(self):
	"""Detect bubbles using contours - focusing on circular shapes"""
	bubbles = []
	edge_params = [(30, 100), (50, 150), (20, 80)]

	for low, high in edge_params:
	edges = cv2.Canny(self.gray, low, high)
	contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	for contour in contours:
	area = cv2.contourArea(contour)
	if 150 < area < 900:
	(x, y), radius = cv2.minEnclosingCircle(contour)
	perimeter = cv2.arcLength(contour, True)
	if perimeter > 0:
	circularity = 4 * np.pi * area / (perimeter * perimeter)
	if circularity > 0.6 and 8 < radius < 22:
	is_dup = False
	for bub in bubbles:
	if np.sqrt((x - bub[0])2 + (y - bub[1])2) < 15:
	is_dup = True
	break
	if not is_dup:
	bubbles.append((int(x), int(y), int(radius)))

	return bubbles

	def organize_and_filter_bubbles(self, all_bubbles):
	if not all_bubbles:
	return []

	filtered_bubbles = []
	for bubble in all_bubbles:
	is_duplicate = False
	for existing in filtered_bubbles:
	dist = np.sqrt((bubble[0] - existing[0])2 + (bubble[1] - existing[1])2)
	if dist < 15:
	is_duplicate = True
	break
	if not is_duplicate:
	filtered_bubbles.append(bubble)

	filtered_bubbles.sort(key=lambda b: (b[1], b[0]))

	rows = []
	current_row = []
	row_threshold = 20

	for bubble in filtered_bubbles:
	if not current_row:
	current_row.append(bubble)
	else:
	avg_y = np.mean([b[1] for b in current_row])
	if abs(bubble[1] - avg_y) < row_threshold:
	current_row.append(bubble)
	else:
	if len(current_row) >= 4:
	current_row.sort(key=lambda b: b[0])
	rows.append(current_row)
	current_row = [bubble]

	if len(current_row) >= 4:
	current_row.sort(key=lambda b: b[0])
	rows.append(current_row)

	return rows

	def map_to_questions(self, bubble_rows):
	mapped_bubbles = []
	options = ['A', 'B', 'C', 'D']

	if not bubble_rows:
	return mapped_bubbles

	col1_max = self.width * 0.35
	col2_max = self.width * 0.68

	for row_idx, row in enumerate(bubble_rows[:20]):
	col1 = [b for b in row if b[0] < col1_max]
	col2 = [b for b in row if col1_max <= b[0] < col2_max]
	col3 = [b for b in row if b[0] >= col2_max]

	if len(col1) >= 4:
	col1_sorted = sorted(col1, key=lambda b: b[0])[:4]
	q_num = row_idx + 1
	for opt_idx, bubble in enumerate(col1_sorted):
	mapped_bubbles.append(BubbleLocation(q_num, options[opt_idx], (bubble[0], bubble[1]), bubble[2]))

	if len(col2) >= 4:
	col2_sorted = sorted(col2, key=lambda b: b[0])[:4]
	q_num = row_idx + 21
	for opt_idx, bubble in enumerate(col2_sorted):
	mapped_bubbles.append(BubbleLocation(q_num, options[opt_idx], (bubble[0], bubble[1]), bubble[2]))

	if row_idx < 10 and len(col3) >= 4:
	col3_sorted = sorted(col3, key=lambda b: b[0])[:4]
	q_num = row_idx + 41
	for opt_idx, bubble in enumerate(col3_sorted):
	mapped_bubbles.append(BubbleLocation(q_num, options[opt_idx], (bubble[0], bubble[1]), bubble[2]))

	return mapped_bubbles

	def analyze_bubble_fill(self, bubble: BubbleLocation):
	mask = np.zeros(self.gray.shape, dtype=np.uint8)
	cv2.circle(mask, bubble.center, max(bubble.radius - 5, 5), 255, -1)
	mean_val = cv2.mean(self.gray, mask=mask)[0]

	large_ring_mask = np.zeros(self.gray.shape, dtype=np.uint8)
	cv2.circle(large_ring_mask, bubble.center, bubble.radius + 10, 255, -1)
	cv2.circle(large_ring_mask, bubble.center, bubble.radius + 5, 0, -1)
	surrounding_mean = cv2.mean(self.gray, mask=large_ring_mask)[0]

	bubble.darkness_score = surrounding_mean - mean_val
	darkness_threshold = 50
	absolute_darkness_threshold = 150 # 150
	bubble.filled = (bubble.darkness_score > darkness_threshold) and (mean_val < absolute_darkness_threshold)

	pixels = self.gray[mask > 0]
	if len(pixels) > 0:
	std_dev = np.std(pixels)
	if std_dev > 25 and mean_val < 170:
	bubble.filled = True
	if mean_val < 120:
	bubble.filled = True

	return bubble.filled

	def process(self):
	"""Main processing pipeline"""
	print("Starting corrected OMR processing...")
	print("Detecting bubble locations...")

	all_bubbles = []
	circles = self.find_bubble_grid()
	all_bubbles.extend(circles)
	contour_bubbles = self.detect_bubbles_by_contours()
	all_bubbles.extend(contour_bubbles)
	print(f" Contour bubbles found: {len(contour_bubbles)}")
	print(f"Total bubbles detected: {len(all_bubbles)}")

	if len(all_bubbles) < 180:
	print("Not enough bubbles detected, using grid-based approach...")
	grid_bubbles = self.detect_by_grid_assumption()
	all_bubbles.extend(grid_bubbles)
	print(f"Added {len(grid_bubbles)} bubbles from grid assumption")

	print("Organizing bubbles into grid...")
	bubble_rows = self.organize_and_filter_bubbles(all_bubbles)
	print(f"Organized into {len(bubble_rows)} rows")

	print("Mapping bubbles to questions...")
	self.bubbles = self.map_to_questions(bubble_rows)
	print(f"Mapped {len(self.bubbles)} bubble locations")

	print("Analyzing filled bubbles...")
	for bubble in self.bubbles:
	self.analyze_bubble_fill(bubble)

	print("Extracting final answers...")
	self.extract_answers()

	return self.answers

	def detect_by_grid_assumption(self):
	bubbles = []
	col_starts = [60, 360, 660]
	bubble_spacing_x = 45
	bubble_spacing_y = 28
	start_y = 50

	for col_idx, col_x in enumerate(col_starts):
	num_rows = 20 if col_idx < 2 else 10
	for row in range(num_rows):
	y = start_y + row * bubble_spacing_y
	for opt in range(4):
	x = col_x + opt * bubble_spacing_x
	exists = False
	for existing in bubbles:
	if np.sqrt((x - existing[0])2 + (y - existing[1])2) < 20:
	exists = True
	break
	if not exists:
	bubbles.append((x, y, 13))
	return bubbles

	def extract_answers(self):
	questions = defaultdict(list)
	for bubble in self.bubbles:
	questions[bubble.question_num].append(bubble)

	self.answers = {}
	for q_num in sorted(questions.keys()):
	q_bubbles = questions[q_num]
	filled = [b for b in q_bubbles if b.filled]

	if not filled:
	self.answers[q_num] = "---"
	elif len(filled) == 1:
	self.answers[q_num] = filled[0].option
	else:
	filled.sort(key=lambda b: b.darkness_score, reverse=True)
	self.answers[q_num] = filled[0].option

	return self.answers

	def visualize_results(self):
	result_img = self.image.copy()
	for bubble in self.bubbles:
	if bubble.filled:
	cv2.circle(result_img, bubble.center, bubble.radius, (0, 255, 0), 2)
	text = f"Q{bubble.question_num}:{bubble.option}"
	cv2.putText(result_img, text,
	(bubble.center[0] - 25, bubble.center[1] - bubble.radius - 5),
	cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 1)
	else:
	cv2.circle(result_img, bubble.center, bubble.radius, (100, 100, 255), 1)
	return result_img

	def display_results(self):
	print("\n" + "="*60)
	print("DETECTED ANSWERS")
	print("="*60)

	for i in range(1, 21):
	row_str = ""
	ans1 = self.answers.get(i, "---")
	row_str += f"Q{i:2d}: {ans1:^4} \| "

	if i + 20 <= 40:
	ans2 = self.answers.get(i + 20, "---")
	row_str += f"Q{i+20:2d}: {ans2:^4} \| "
	else:
	row_str += " " * 13 + "\| "

	if i + 40 <= 50:
	ans3 = self.answers.get(i + 40, "---")
	row_str += f"Q{i+40:2d}: {ans3:^4}"

	print(row_str)

	print("\n" + "="*60)
	print("SUMMARY")
	print("="*60)
	answered = sum(1 for v in self.answers.values() if v != "---")
	print(f"Questions detected: {len(self.answers)}")
	print(f"Answered: {answered}")
	print(f"Unanswered: {len(self.answers) - answered}")




	def process_single_image(image_data) -> Dict[str, Any]:
	"""Process a single image and return results with fixed indexing"""
	try:
	# Convert image data to numpy array
	if isinstance(image_data, str):
	# Base64 encoded image
	image_bytes = base64.b64decode(image_data)
	image = Image.open(io.BytesIO(image_bytes))
	image_array = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
	else:
	# Direct file upload
	image = Image.open(image_data)
	image_array = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

	# Process the image using the new CorrectedOMRReader
	reader = CorrectedOMRReader(image_array=image_array)
	answers = reader.process()

	# No need for indexing fix in the new implementation
	fixed_answers = answers

	# Calculate CORRECTED statistics for 50 questions total
	total_questions = 50 # Fixed to always be 50
	answered = sum(1 for v in fixed_answers.values() if v is not None)
	unanswered = total_questions - answered

	# Format answers for JSON (convert None to "null" string)
	formatted_answers = {}
	for q_num in range(1, total_questions + 1):
	answer = fixed_answers.get(q_num)
	formatted_answers[str(q_num)] = answer if answer is not None else "null"

	return {
	"success": True,
	"answers": formatted_answers,
	"summary": {
	"total_questions": total_questions,
	"answered": answered,
	"unanswered": unanswered
	}
	}

	except Exception as e:
	return {
	"success": False,
	"error": str(e),
	"answers": {},
	"summary": {
	"total_questions": 50,
	"answered": 0,
	"unanswered": 50
	}
	}




	@app.route('/health', methods=['GET'])
	def health_check():
	"""Health check endpoint"""
	return jsonify({
	"status": "healthy",
	"message": "OMR API is running"
	})

	@app.route('/', methods=['GET'])
	def home():
	"""Home endpoint with API documentation"""
	return jsonify({
	"message": "OMR Processing API",
	"version": "1.0",
	"endpoints": {
	"/process_omr": {
	"method": "POST",
	"description": "Process OMR answer sheets",
	"accepts": [
	"Multipart form data with 'images' field",
	"JSON with base64 encoded images in 'images' array"
	],
	"returns": "JSON with detected answers and summary"
	},
	"/health": {
	"method": "GET",
	"description": "Health check endpoint"
	}
	},
	"example_response": {
	"success": True,
	"answers": {
	"1": "A",
	"2": "B",
	"3": "null"
	},
	"summary": {
	"total_questions": 50,
	"answered": 45,
	"unanswered": 5
	}
	}
	})


	# <----------------->

	@app.route('/easyocr', methods=['POST'])
	def easyocr_image():
	if 'images' not in request.files:
	return jsonify({'error': 'No image files provided'}), 400

	images = request.files.getlist('images')
	extracted_texts = []

	for image_file in images:
	try:
	# Save the image to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_image_file:
	image_file.save(temp_image_file.name)
	temp_path = temp_image_file.name

	try:
	image_np = np.frombuffer(open(temp_path, 'rb').read(), np.uint8)
	image = cv2.imdecode(image_np, cv2.IMREAD_COLOR)

	# Perform OCR
	result = reader.readtext(image)

	# Extract text from the result
	text = " ".join([item[1] for item in result])
	extracted_texts.append(text)
	ocr_extracted_texts.append(text)
	finally:
	# Clean up temp file
	if os.path.exists(temp_path):
	os.unlink(temp_path)
	except Exception as e:
	extracted_texts.append(f"Error processing image with EasyOCR: {str(e)}")

	return jsonify({'extracted_texts': extracted_texts})

	@app.route('/tesseract', methods=['POST'])
	def tesseract_image():
	if 'images' not in request.files:
	return jsonify({'error': 'No image files provided'}), 400

	images = request.files.getlist('images')
	extracted_texts = []

	for image_file in images:
	try:
	# Save the image to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_image_file:
	image_file.save(temp_image_file.name)
	temp_path = temp_image_file.name

	try:
	with Image.open(temp_path) as image:
	# Perform OCR using Tesseract
	text = pytesseract.image_to_string(image)
	extracted_texts.append(text.strip())
	ocr_extracted_texts.append(text.strip())
	finally:
	# Clean up the temporary file
	if os.path.exists(temp_path):
	os.unlink(temp_path)
	except Exception as e:
	extracted_texts.append(f"Error processing image with Tesseract: {str(e)}")

	return jsonify({'extracted_texts': extracted_texts})

	@app.route('/process_question_paper', methods=['POST'])
	def process_question_paper():
	global last_processed_question_paper_object

	if 'file' not in request.files:
	return jsonify({'error': 'No file provided'}), 400

	file = request.files['file']
	if file.filename == '':
	return jsonify({'error': 'No file selected'}), 400

	question_paper = QuestionPaper()

	try:
	# Create Images directory if it doesn't exist
	images_dir = os.path.join(app.root_path, 'Images')
	os.makedirs(images_dir, exist_ok=True)

	if file.filename.lower().endswith('.pdf'):
	question_paper_filename = "question_paper.pdf"
	question_paper_path = os.path.join(images_dir, question_paper_filename)
	file.save(question_paper_path)

	# Initialize the global object with the path
	question_paper.path = question_paper_path

	# For PDF processing
	images_from_pdf = convert_from_path(question_paper_path, poppler_path=_get_poppler_path())

	all_text = ""
	for page_image in images_from_pdf:
	text = pytesseract.image_to_string(page_image)
	all_text += text + "\n"

	# Use improved parsing
	questions, answers = parse_question_paper_text(all_text)
	question_paper.questions = questions
	question_paper.answers = answers

	else:
	# Process as image
	question_paper_filename = "question_paper.png"
	question_paper_path = os.path.join(images_dir, question_paper_filename)
	file.save(question_paper_path)

	question_paper.path = question_paper_path

	image = Image.open(question_paper_path)
	text = pytesseract.image_to_string(image)

	# Use improved parsing
	questions, answers = parse_question_paper_text(text)
	question_paper.questions = questions
	question_paper.answers = answers

	# Clean the answers (remove any remaining unwanted patterns)
	question_paper.clean_answers()

	# Store the processed question paper globally
	last_processed_question_paper_object = question_paper

	return jsonify(question_paper.to_dict())

	except Exception as e:
	return jsonify({'error': str(e)}), 500

	def gemini_evaluate_answer_sheet_with_roll(question_paper_path, student_answer_path, questions, correct_answers, paddle_results):
	"""
	Evaluate entire answer sheet using Gemini and extract roll number
	"""
	try:
	model = genai.GenerativeModel('gemini-2.5-flash')

	# Create the expected answers list for the prompt
	expected_answers_text = "\n".join([f"{i+1}. {answer}" for i, answer in enumerate(correct_answers)])

	prompt_text = f"""You are an OCR Assitant for an evaluvation script.
	You will be given an image of a question paper and an image of a student's handwritten answers along with traditional OCR evaluvations.
	Your task is assist the traditional OCR in overcoming its limitation with handwritten text the image may have bad quality handwritten text which the OCR may fail to extract and evaluvate properly, this is where you come in.
	Your task is to Just do a double check of the OCR results and correct any mistakes or missing answers. and provide the result in a structured way.

	Expected correct answers:
	{expected_answers_text}
	Traditional OCR Evaluation Results:
	{paddle_results}

	Instructions:
	- First, identify and extract the student's roll number from the answer sheet
	- Compare the student's handwritten answers with the expected answers above
	- Small spelling mistakes should be ignored and considered correct
	- If an answer has been crossed out or strikethrough, consider it incorrect
	- Be lenient with handwriting recognition issues
	- Look for answers by question numbers (1, 2, 3, etc.)

	Please evaluate ALL questions and respond in this EXACT JSON format:
	{{
	"roll_number": "extracted_roll_number_here",
	"evaluations": [
	{{"question_number": 1, "status": "Correct"}},
	{{"question_number": 2, "status": "Wrong"}},
	{{"question_number": 3, "status": "Missing"}},
	...
	]
	}}

	For roll_number: Look for patterns like "Roll No:", "Roll Number:", "Reg No:", or any number sequence that appears to be a student identifier.

	For each question, use ONLY one of these three status values:
	- "Correct" - if the student's answer matches the expected answer (allowing for minor spelling)
	- "Wrong" - if the student's answer is clearly different from the expected answer
	- "Missing" - if no answer is visible for this question number

	Respond with ONLY the JSON format above, no other text.

	! Note
	Ignore texts like `GENERAL KNOWLEDGE QUESTION PAPER WITH ANSWERS` and the final output should only have actual questions.

	"""

	# Handle PDF vs Image for question paper
	if question_paper_path.lower().endswith('.pdf'):
	# Convert PDF to images
	pdf_images = convert_from_path(question_paper_path, poppler_path=_get_poppler_path())
	question_paper_img = pdf_images[0] # Use first page
	else:
	question_paper_img = Image.open(question_paper_path)

	# Load student answer image
	student_answer_img = Image.open(student_answer_path)

	# Create content for the model
	content = [prompt_text, question_paper_img, student_answer_img]

	response = model.generate_content(content)
	result_text = response.text.strip()

	print(f"Gemini response: {result_text}")

	# Try to parse JSON response
	import json
	try:
	# Clean the response - sometimes Gemini adds markdown formatting
	if "```json" in result_text:
	result_text = result_text.split("```json")[1].split("```")[0].strip()
	elif "```" in result_text:
	result_text = result_text.split("```")[1].strip()

	parsed_result = json.loads(result_text)
	return parsed_result["roll_number"], parsed_result["evaluations"]
	except (json.JSONDecodeError, KeyError) as e:
	print(f"Failed to parse JSON response: {e}")
	print(f"Raw response: {result_text}")
	# Fallback - extract roll number using OCR and create default "Error" results
	roll_number = extract_roll_number(student_answer_path)
	return roll_number, [{"question_number": i+1, "status": "Error"} for i in range(len(correct_answers))]

	except Exception as e:
	print(f"Error in Gemini evaluation: {str(e)}")
	# Return error status for all questions with OCR extracted roll number
	roll_number = extract_roll_number(student_answer_path)
	return roll_number, [{"question_number": i+1, "status": "Error"} for i in range(len(correct_answers))]

	def quick_match(correct_list, messy_student_list, min_score=80):
	"""Quick function to match messy student answers"""
	from fuzzywuzzy import process
	import re

	results = []
	used = set()

	for item in messy_student_list:
	# Extract content
	content = re.sub(r'^\d+\.?\s*', '', str(item)).strip()
	if content and content != '-':
	# Find best match
	match = process.extractOne(content, correct_list)
	if match and match[1] >= min_score:
	q_num = correct_list.index(match[0]) + 1
	if q_num not in used:
	used.add(q_num)
	results.append((item, q_num, match[0], match[1]))

	return results

	def process_with_paddle_ocr(image_path, correct_answers):
	"""
	Process an image with PaddleOCR and perform similarity matching with correct answers
	Returns:
	tuple: (extracted_text, similarity_scores, average_similarity)
	"""
	try:
	# Initialize PaddleOCR
	from paddleocr import PaddleOCR
	print("Initializing PaddleOCR...")
	ocr = PaddleOCR(
	use_doc_orientation_classify=True,
	use_doc_unwarping=False,
	use_textline_orientation=False
	)
	print("PaddleOCR initialized.")
	# Read and process the image
	# result = ocr.ocr(image_path, cls=True)
	print("Preditcing")
	result = ocr.predict(image_path)
	print("PaddleOCR processing completed.")
	# print(f"PaddleOCR result: {result}")
	print("Correct Answers are:")
	print(correct_answers)
	for res in result:
	words = res["rec_texts"]
	print(f"PaddleOCR extracted words: {words}")
	# words = result["rec_texts"]
	result = quick_match(correct_answers, words, min_score=85)
	print(f"PaddleOCR matched results: {result}")

	return result

	except Exception as e:
	print(f"Error in PaddleOCR processing: {str(e)}")
	return None, [], 0

	# OCR Evaluvation Endpoint
	@app.route('/evaluate_answers', methods=['POST'])
	def evaluate_answers():
	global ocr_extracted_texts
	if 'student_answers' not in request.files:
	return jsonify({"error": "Missing student answers"}), 400

	student_answer_files = request.files.getlist('student_answers')

	# Get teacher email and exam name from the request
	teacher_email = request.form.get('teacher_email', 'unknown@example.com')
	exam_name = request.form.get('exam_name', 'Untitled Exam') # Get exam name from form data

	# Retrieve the question paper object
	question_paper = last_processed_question_paper_object

	if last_processed_question_paper_object is None:
	return jsonify({'error': 'Question paper not found or processed yet'}), 404

	student_answer_paths = []
	try:
	# Save student answer files temporarily
	for student_answer_file in student_answer_files:
	with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_ans_file:
	student_answer_file.save(temp_ans_file.name)
	student_answer_paths.append(temp_ans_file.name)

	# Process each student's answer sheet
	all_students_results = []

	if question_paper.path and os.path.exists(question_paper.path):
	print(f"Starting Gemini evaluation for exam: {exam_name} with {len(student_answer_paths)} students...")

	for idx, student_answer_path in enumerate(student_answer_paths):
	print(f"Processing answer sheet {idx + 1} with PaddleOCR...")

	# First process with PaddleOCR
	results = process_with_paddle_ocr(
	student_answer_path,
	question_paper.answers
	)

	roll_number, sheet_evaluations = gemini_evaluate_answer_sheet_with_roll(
	question_paper.path,
	student_answer_path,
	question_paper.questions,
	question_paper.answers,
	results
	)




	# Process the results for this student
	student_results = []
	for eval_result in sheet_evaluations:
	question_num = eval_result["question_number"]
	if 1 <= question_num <= len(question_paper.questions):
	student_results.append({
	'question_number': question_num,
	'question_text': question_paper.questions[question_num - 1],
	'correct_answer': question_paper.answers[question_num - 1],
	'status': eval_result["status"]
	})

	# Calculate summary for this student
	correct_count = sum(1 for result in student_results if result['status'] == 'Correct')
	total_questions = len(student_results)
	score_percentage = (correct_count / total_questions) * 100 if total_questions > 0 else 0

	student_summary = {
	'roll_number': roll_number,
	'total_questions': len(question_paper.answers),
	'correct_answers': correct_count,
	'wrong_answers': sum(1 for result in student_results if result['status'] == 'Wrong'),
	'missing_answers': sum(1 for result in student_results if result['status'] == 'Missing'),
	'error_answers': sum(1 for result in student_results if result['status'] == 'Error'),
	'score_percentage': round(score_percentage, 2),
	'evaluation_results': student_results,
	'ocr_results': {
	'extracted_text': results,
	}
	}

	all_students_results.append(student_summary)

	final_results = {
	'exam_name': exam_name, # Include exam name in results
	'total_students': len(student_answer_paths),
	'students_evaluated': all_students_results
	}

	# STORE THE RESULTS IN SUPABASE WITH EXAM NAME
	try:
	supabase_handler = SupabaseHandler()
	unique_key = supabase_handler.store_evaluation_result(teacher_email, final_results, exam_name)

	if unique_key:
	# Add the unique key to the response
	final_results['unique_key'] = unique_key
	final_results['storage_success'] = True
	print(f"Results stored successfully with key: {unique_key} for exam: {exam_name}")
	else:
	final_results['storage_success'] = False
	final_results['storage_error'] = "Failed to store results in database"
	print("Failed to store results in Supabase")

	except Exception as storage_error:
	print(f"Error storing results: {str(storage_error)}")
	final_results['storage_success'] = False
	final_results['storage_error'] = str(storage_error)

	return jsonify(final_results)
	else:
	return jsonify({
	'error': 'Question paper file not found for Gemini evaluation.'
	})

	except Exception as e:
	return jsonify({'error': str(e)}), 500
	finally:
	# Clean up temporary student answer files
	for path in student_answer_paths:
	try:
	if os.path.exists(path):
	os.unlink(path)
	except PermissionError:
	pass # File still locked on Windows; OS will clean up temp dir

	# Get Evaluation
	@app.route('/get_evaluation_result/<unique_key>', methods=['GET'])
	def get_evaluation_result(unique_key):
	"""
	Get evaluation result by unique key
	"""
	try:
	supabase_handler = SupabaseHandler()
	result = supabase_handler.get_evaluation_result(unique_key)

	if result:
	return jsonify({
	'success': True,
	'data': result
	})
	else:
	return jsonify({
	'error': 'Evaluation result not found'
	}), 404

	except Exception as e:
	return jsonify({'error': str(e)}), 500

	# Get Teacher Evaluation
	@app.route('/get_teacher_evaluations/<teacher_email>', methods=['GET'])
	def get_teacher_evaluations(teacher_email):
	"""
	Get all evaluation results for a specific teacher
	"""
	try:
	supabase_handler = SupabaseHandler()
	results = supabase_handler.get_teacher_evaluations(teacher_email)

	return jsonify({
	'success': True,
	'data': results,
	'total_evaluations': len(results)
	})

	except Exception as e:
	return jsonify({'error': str(e)}), 500

	# Get OMR Answer Key
	@app.route('/get_omr_answer_key', methods=['GET'])
	def get_omr_answer_key():
	"""Get the currently stored OMR answer key"""
	global last_processed_omr_key

	if last_processed_omr_key is None:
	return jsonify({
	'error': 'No answer key has been processed yet'
	}), 404

	return jsonify({
	'success': True,
	'answer_key': last_processed_omr_key.to_dict()
	})

	def omr_gemini_process(error_questions, correct_answers, image_file):
	"""
	Use Gemini to assist in evaluating OMR sheets, especially for error questions
	"""
	try:

	model = genai.GenerativeModel('gemini-2.5-flash')

	prompt_text = f"""
	You are an OMR Assistant for an evaluvation script.
	Your main purpose is to assist in the process.

	Correct Answers to questions sorted by question number: {correct_answers}
	Error Question numbers: {error_questions}
	Your task:
	- From the given image identify the student name and roll number
	- if for some reason the traditional OMR Processing failed to detect some answers, those question numbers will be provided to you, you should look into those questions form the given image and correct answers.
	- Only provide answer for the questions that are in the error list.
	- You can ignore the rest of the question
	- if Error question is empty, just extract the roll number and name


	Please evaluate ALL questions and respond in this EXACT JSON format:
	{{
	"roll_number": "extracted_roll_number_here",
	"evaluations": [
	{{"question_number": 1, "status": "Correct"}},
	{{"question_number": 2, "status": "Wrong"}},
	{{"question_number": 3, "status": "Missing"}},
	...
	]
	}}

	"""
	student_answer_img = image_file

	content = [prompt_text, student_answer_img]

	response = model.generate_content(content)
	result_text = response.text.strip()

	print(f"Gemini response: {result_text}")

	import json
	try:
	# Clean the response - sometimes Gemini adds markdown formatting
	if "```json" in result_text:
	result_text = result_text.split("```json")[1].split("```")[0].strip()
	elif "```" in result_text:
	result_text = result_text.split("```")[1].strip()

	parsed_result = json.loads(result_text)
	return parsed_result["roll_number"], parsed_result["evaluations"]
	except (json.JSONDecodeError, KeyError) as e:
	print(f"Failed to parse JSON response: {e}")
	print(f"Raw response: {result_text}")
	# Fallback - extract roll number using OCR and create default "Error" results
	roll_number = extract_roll_number(os.path.join("OMRChecker", "inputs", "OMRImage.jpg"))
	return roll_number, [{"question_number": i+1, "status": "Error"} for i in range(len(correct_answers))]

	except Exception as e:
	print(f"Error in OMR Gemini processing: {str(e)}")
	return "Unknown", [{"question_number": q, "status": "Error"} for q in error_questions]

	@app.route('/evaluate_omr', methods=['POST'])
	def evaluate_omr():
	"""
	Evaluate OMR answers against stored answer key
	"""
	global last_processed_omr_key, last_processed_omr_results, porcessed_omr_results, OMR_IMAGES

	# Get teacher email and exam name from the request
	teacher_email = request.form.get('teacher_email', 'unknown@example.com')
	exam_name = request.form.get('exam_name', 'Untitled Exam') # Get exam name from form data
	if not last_processed_omr_key:
	return jsonify({
	'error': 'No answer key has been processed. Please process an answer key first.'
	}), 400

	if not last_processed_omr_results:
	return jsonify({
	'error': 'No OMR sheet has been processed. Please process an OMR sheet first.'
	}), 400

	try:
	# Get the marked answers from the processed OMR
	if isinstance(last_processed_omr_results, list):
	omr_data = last_processed_omr_results[0] # Take first sheet if multiple
	else:
	omr_data = last_processed_omr_results
	student_datas = []
	for idx, omr_data in enumerate(porcessed_omr_results):
	marked_answers = omr_data
	image_file = OMR_IMAGES[idx]

	# Get correct answers from answer key (only for questions that exist)
	correct_answers = last_processed_omr_key.answers
	total_questions_in_key = len(correct_answers)

	# Evaluate answers only for questions that exist in the answer key
	evaluation_details = []
	correct_count = 0
	wrong_count = 0
	missing_count = 0
	error_questions = []

	for q_num in sorted(correct_answers.keys()):
	print(f"Evaluating Question {q_num}")
	print(f"Correct Answer: {correct_answers[q_num]} \| Marked Answer: {marked_answers.get(str(q_num))}")
	correct_ans = correct_answers[q_num]
	marked_ans = marked_answers.get(str(q_num))

	if marked_ans is None or marked_ans == '' or len(str(marked_ans)) > 1 or marked_ans == 'nan':
	status = 'Missing'
	error_questions.append(q_num)
	missing_count += 1
	elif marked_ans.upper() == correct_ans.upper():
	status = 'Correct'
	correct_count += 1
	else:
	status = 'Wrong'
	wrong_count += 1

	evaluation_details.append({
	'question_number': q_num,
	'question_text': last_processed_omr_key.questions[q_num - 1] if q_num <= len(last_processed_omr_key.questions) else f"Question {q_num}",
	'correct_answer': correct_ans,
	'marked_answer': marked_ans if marked_ans != 'null' else None,
	'status': status
	})

	roll_no, gemini_result = omr_gemini_process(
	error_questions,
	last_processed_omr_key.answers,
	image_file
	)

	for err_idx in error_questions:
	for gemini_eval in gemini_result:
	if gemini_eval["question_number"] == err_idx:
	correct_ans = last_processed_omr_key.answers[err_idx]
	marked_ans = None # Since it was an error question
	status = gemini_eval["status"]

	if status == "Correct":
	correct_count += 1
	# wrong_count -= 1 # Adjust wrong count
	missing_count -= 1 # Adjust missing count
	elif status == "Wrong":
	wrong_count += 1
	missing_count -= 1 # Adjust missing count
	elif status == "Missing":
	missing_count += 1

	# Update the evaluation details
	for eval_detail in evaluation_details:
	if eval_detail['question_number'] == err_idx:
	eval_detail.update({
	'marked_answer': marked_ans,
	'status': status
	})
	break
	break

	# Calculate score
	total_score = correct_count * last_processed_omr_key.marks_per_question

	if last_processed_omr_key.negative_marking > 0:
	total_score -= wrong_count * last_processed_omr_key.negative_marking

	max_score = total_questions_in_key * last_processed_omr_key.marks_per_question
	student_summary = {
	'roll_number': roll_no,
	'total_questions': len(last_processed_omr_key.answers),
	'correct_answers': correct_count,
	'wrong_answers': wrong_count,
	'missing_answers': missing_count,
	'error_answers': len(error_questions),
	'score_percentage': correct_count / len(last_processed_omr_key.answers) * 100 if len(last_processed_omr_key.answers) > 0 else 0,
	'evaluation_results': evaluation_details,
	'ocr_results': {
	'extracted_text': gemini_result,
	}
	}
	student_datas.append(student_summary)

	# Format the data in the required structure for Supabase
	formatted_evaluation_data = {
	'exam_name': exam_name, # Include exam name in results
	'total_students': len(student_datas),
	'students_evaluated': student_datas
	}

	# Store results in Supabase (optional — skip if credentials not configured)
	unique_key = None
	try:
	supabase_handler = SupabaseHandler()
	unique_key = supabase_handler.store_evaluation_result(teacher_email, formatted_evaluation_data, exam_name)
	except Exception as supa_err:
	print(f"Supabase storage skipped: {supa_err}")

	# Prepare answer key info
	answer_key_info = {
	"title": getattr(last_processed_omr_key, 'title', 'Untitled'),
	"marks_per_question": last_processed_omr_key.marks_per_question,
	"negative_marking": last_processed_omr_key.negative_marking
	}

	# Return response in the same format as stored in Supabase
	final_result = {
	"success": True,
	"unique_key": unique_key,
	#**formatted_evaluation_data, # Include all the formatted data
	"additional_info": {
	"answer_key_info": answer_key_info
	}
	}

	return jsonify(final_result)

	except Exception as e:
	return jsonify({
	"success": False,
	"error": f"Evaluation failed: {str(e)}"
	}), 500

	def process_with_gemini(evaluation_details, evaluation_summary, omr_data):
	"""
	Use Gemini to independently evaluate the OMR sheet and extract student details
	"""
	global last_processed_omr_key

	try:
	model = genai.GenerativeModel('gemini-2.5-flash')

	# Prepare the questions and correct answers for Gemini
	questions_and_answers = ""
	for i, (q_num, correct_answer) in enumerate(sorted(last_processed_omr_key.answers.items())):
	question_text = last_processed_omr_key.questions[i] if i < len(last_processed_omr_key.questions) else f"Question {q_num}"
	questions_and_answers += f"Question {q_num}: {question_text}\nCorrect Answer: {correct_answer}\n\n"

	prompt = f"""
	You are a teacher grading an OMR answer sheet.

	STUDENT INFO: Extract the student's name and roll number from the image.

	GRADING TASK: For each question, identify which bubble (A, B, C, or D) is filled/darkened, then compare with the correct answer.

	QUESTIONS AND CORRECT ANSWERS:
	{questions_and_answers}

	IMPORTANT: Look carefully at each row of bubbles. A filled bubble will be darkened/shaded, while empty bubbles will be white/clear.

	Respond in this EXACT JSON format:
	{{
	"student_info": {{
	"name": "extracted student name",
	"roll_no": "extracted roll number"
	}},
	"gemini_evaluation": [
	{{"question": 1, "marked_answer": "C", "correct_answer": "C", "status": "Correct"}},
	{{"question": 2, "marked_answer": "D", "correct_answer": "D", "status": "Correct"}},
	// ... continue for all questions
	]
	}}

	For status: use "Correct", "Wrong", or "Missing" only.
	For marked_answer: use "A", "B", "C", "D", or null if no bubble is clearly filled.

	"""
	# Get the image - we need to retrieve it from the last processed OMR
	# Since we don't store the image directly, we'll need to work with what we have
	# For now, let's assume we have access to the image file

	# Check if we have image data stored
	if 'image_data' in omr_data:
	# If we have base64 image data
	image_data = omr_data['image_data']
	image_bytes = base64.b64decode(image_data)
	image = Image.open(io.BytesIO(image_bytes))
	elif 'filename' in omr_data:
	# Try to find the image file
	try:
	# Look for the image in common locations
	possible_paths = [
	f"Images/{omr_data['filename']}",
	f"temp/{omr_data['filename']}",
	omr_data['filename']
	]

	image = None
	for path in possible_paths:
	if os.path.exists(path):
	image = Image.open(path)
	break

	if image is None:
	# If we can't find the image, return a fallback result
	return {
	"student_info": {
	"name": "Image not available",
	"roll_number": "Image not available"
	},
	"verification": {
	"evaluation_correct": "unknown",
	"confidence": "low",
	"discrepancies": ["Original image not available for verification"],
	"notes": "Could not verify due to missing image file"
	},
	"gemini_evaluation": []
	}
	except Exception as e:
	print(f"Error loading image: {str(e)}")
	return {
	"student_info": {
	"name": "Error loading image",
	"roll_number": "Error loading image"
	},
	"verification": {
	"evaluation_correct": "unknown",
	"confidence": "low",
	"discrepancies": [f"Error loading image: {str(e)}"],
	"notes": "Image processing failed"
	},
	"gemini_evaluation": []
	}
	else:
	# No image reference available
	return {
	"student_info": {
	"name": "No image data",
	"roll_number": "No image data"
	},
	"verification": {
	"evaluation_correct": "unknown",
	"confidence": "low",
	"discrepancies": ["No image data available"],
	"notes": "Cannot verify without image"
	},
	"gemini_evaluation": []
	}

	# Generate content with Gemini
	response = model.generate_content([prompt, image])
	result_text = response.text.strip()

	print(f"Gemini raw response: {result_text}")

	# Parse the JSON response
	try:
	# Clean the response - remove markdown formatting if present
	if "```json" in result_text:
	result_text = result_text.split("```json")[1].split("```")[0].strip()
	elif "```" in result_text:
	result_text = result_text.split("```")[1].strip()

	parsed_result = json.loads(result_text)

	# Update summary counts and score based on the evaluation
	if 'gemini_evaluation' in parsed_result:
	correct_count = sum(1 for item in parsed_result['gemini_evaluation'] if item.get('status') == 'Correct')
	wrong_count = sum(1 for item in parsed_result['gemini_evaluation'] if item.get('status') == 'Wrong')
	missing_count = sum(1 for item in parsed_result['gemini_evaluation'] if item.get('status') == 'Missing')

	score = (correct_count * last_processed_omr_key.marks_per_question) - (wrong_count * last_processed_omr_key.negative_marking)
	max_score = len(last_processed_omr_key.answers) * last_processed_omr_key.marks_per_question

	parsed_result['summary'] = {
	"total_questions": len(last_processed_omr_key.answers),
	"correct_count": correct_count,
	"wrong_count": wrong_count,
	"missing_count": missing_count,
	"score": score,
	"max_score": max_score,
	"percentage": round((score / max_score) * 100, 2) if max_score > 0 else 0
	}

	return parsed_result

	except json.JSONDecodeError as e:
	print(f"Failed to parse Gemini JSON response: {e}")
	print(f"Raw response: {result_text}")

	# Fallback response with extracted text attempt
	return {
	"student_info": {
	"name": "Parse error",
	"roll_number": "Parse error"
	},
	"verification": {
	"evaluation_correct": "unknown",
	"confidence": "low",
	"discrepancies": ["Failed to parse Gemini response"],
	"notes": f"JSON parse error: {str(e)}"
	},
	"gemini_evaluation": [],
	"raw_response": result_text # Include raw response for debugging
	}
	# print(f"Error in Gemini processing: {str(e)}")
	except Exception as e:
	return {
	"student_info": {
	"name": "Processing error",
	"roll_number": "Processing error"
	},
	"verification": {
	"evaluation_correct": "unknown",
	"confidence": "low",
	"discrepancies": [f"Gemini processing error: {str(e)}"],
	"notes": "Failed to process with Gemini"
	},
	"gemini_evaluation": []
	}
	def compare_evaluations(our_evaluation, gemini_evaluation):
	"""
	Compare our automated evaluation with Gemini's independent evaluation
	"""
	if not gemini_evaluation:
	return {
	"comparison_available": False,
	"reason": "Gemini evaluation not available"
	}

	matches = 0
	differences = []
	total_compared = 0

	# Create a lookup for our evaluation
	our_eval_lookup = {detail['question_number']: detail for detail in our_evaluation}

	for gemini_item in gemini_evaluation:
	q_num = gemini_item.get('question')
	if q_num in our_eval_lookup:
	total_compared += 1
	our_status = our_eval_lookup[q_num]['status']
	gemini_status = gemini_item.get('status')

	if our_status == gemini_status:
	matches += 1
	else:
	differences.append({
	"question": q_num,
	"our_evaluation": {
	"marked_answer": our_eval_lookup[q_num]['marked_answer'],
	"status": our_status
	},
	"gemini_evaluation": {
	"marked_answer": gemini_item.get('marked_answer'),
	"status": gemini_status
	}
	})

	agreement_rate = (matches / total_compared) * 100 if total_compared > 0 else 0

	return {
	"comparison_available": True,
	"total_questions_compared": total_compared,
	"agreements": matches,
	"differences_count": len(differences),
	"agreement_rate": round(agreement_rate, 2),
	"differences": differences
	}




	# Also need to modify the process_omr endpoint to store image data for later use
	@app.route('/process_omr', methods=['POST'])
	def process_omr_enhanced():
	"""
	Enhanced OMR processing that stores image data for later Gemini processing
	"""
	global last_processed_omr_results
	global OMR_IMAGES
	global porcessed_omr_results

	OMR_IMAGES = []
	porcessed_omr_results = []
	try:
	results = []
	print("Starting OMR processing...")
	# Check if files were uploaded
	if 'images' in request.files:
	files = request.files.getlist('images')
	results = []
	for idx, file in enumerate(files):
	if file.filename == '':
	continue

	print(f"===================================== Processing file {file.filename} =====================================")
	name, extension = os.path.splitext(file.filename)
	filename = os.path.join("OMRChecker", "inputs", "OMRImage" + extension)
	file.save(filename)
	OMR_IMAGES.append(Image.open(filename))

	result = subprocess.run([sys.executable, os.path.join('OMRChecker', 'main.py'), '--inputDir=' + os.path.join('OMRChecker', 'inputs')])

	print("OMR Finished Processing Successfully")
	folder = os.path.join("outputs", "Results")
	csv_files = [f for f in os.listdir(folder) if f.endswith(".csv")]
	print("CSV FILES:", csv_files)
	result_file = os.path.join(folder, csv_files[0])
	print("Found Result File", result_file)
	df = pd.read_csv(result_file)

	# Convert to JSON
	data_json = df.to_json(orient="records")
	parsed_json = json.loads(data_json)

	columns_dict = df.to_dict(orient="list")

	print(columns_dict)
	questions_only = {k.replace("q", ""): v[0] for k, v in columns_dict.items() if k.startswith("q")}

	last_processed_omr_results = questions_only
	porcessed_omr_results.append(questions_only)

	if os.path.exists(result_file):
	os.remove(result_file)
	print(f"{result_file} deleted")

	return jsonify(parsed_json)

	else:
	return jsonify({
	"success": False,
	"error": "No images provided. Use 'images' field for file uploads.",
	"results": []
	}), 400

	except Exception as e:
	return jsonify({
	"success": False,
	"error": f"Server error: {str(e)}",
	"results": []
	}), 500

	@app.route('/get_question_details/<int:question_number>', methods=['GET'])
	def get_question_details(question_number):
	"""Get detailed information about a specific question"""
	global last_processed_omr_key

	if last_processed_omr_key is None:
	return jsonify({
	'error': 'No answer key has been processed yet'
	}), 404

	question_data = last_processed_omr_key.get_question_details(question_number)
	if question_data is None:
	return jsonify({
	'error': f'Question number {question_number} not found'
	}), 404

	return jsonify({
	'success': True,
	'question_data': question_data
	})

	@app.route('/debug_parsing', methods=['GET'])
	def debug_parsing():
	"""
	Debug endpoint to see how OCR text is being parsed
	"""
	if not ocr_extracted_texts:
	return jsonify({'error': 'No OCR extracted texts available.'}), 400

	debug_results = []

	for ocr_text in ocr_extracted_texts:
	parsed_answers = improved_clean_and_parse_ocr_text(ocr_text)
	debug_results.append({
	'original_ocr_text': ocr_text,
	'parsed_answers': parsed_answers
	})

	return jsonify({'debug_results': debug_results})

	def extract_omr_metadata(text: str) -> tuple:
	"""Extract title and duration from the question paper text"""
	title = ""
	duration = ""

	# Look for title (usually in first few lines, often in caps)
	lines = text.split('\n')
	for line in lines[:5]: # Check first 5 lines
	if line.strip().upper() == line.strip() and len(line.strip()) > 10:
	title = line.strip()
	break

	# Look for duration/time
	time_pattern = r'Time:\s(\d+)\s(minutes\|mins\|min)'
	duration_match = re.search(time_pattern, text, re.IGNORECASE)
	if duration_match:
	duration = f"{duration_match.group(1)} minutes"

	return title, duration

	def extract_omr_answers(text: str) -> dict:
	"""Extract answers from the question paper text"""
	answers = {}
	questions = []
	question_data = []
	current_question = None

	print("\nStarting answer extraction...")

	# Split text into lines and process line by line
	lines = [line.strip() for line in text.split('\n') if line.strip()]

	# Skip header lines until we find the first question
	started = False
	current_dict = None

	for line in lines:
	print(f"Processing line: {line}")

	# Skip header or empty lines
	if not started:
	if line.startswith('1.'):
	started = True
	else:
	continue

	# Check for new question
	question_match = re.match(r'^(\d+)[.)](.*?)$', line)
	if question_match:
	# Save previous question if exists
	if current_dict:
	question_data.append(current_dict)

	# Start new question
	q_num = int(question_match.group(1))
	q_text = question_match.group(2).strip()
	current_dict = {
	'number': q_num,
	'question': q_text,
	'options': {},
	'answer': None
	}
	continue

	# Check for options
	option_match = re.match(r'^([A-D])[).](.*?)$', line)
	if option_match and current_dict is not None:
	opt_letter = option_match.group(1)
	opt_text = option_match.group(2).strip()
	current_dict['options'][opt_letter] = opt_text
	continue

	# Check for answer
	answer_match = re.match(r'^\sAnswer[:\s]([A-D]\|.+)$', line, re.IGNORECASE)
	if answer_match and current_dict is not None:
	answer = answer_match.group(1).strip()
	# print(f"For Question: {current_dict['number']}, Options are:")
	# print(current_dict['options'])
	for opt_letter, opt_text in current_dict['options'].items():
	if answer.lower() == opt_text.lower():
	answer = opt_letter
	break
	current_dict['answer'] = answer
	continue

	# Add last question
	if current_dict:
	question_data.append(current_dict)

	print("\nExtracted Question Data:")
	for q in question_data:
	print(f"\nQuestion {q['number']}:")
	print(f"Text: {q['question']}")
	print(f"Options: {q['options']}")
	print(f"Answer: {q['answer']}")

	# Add to return format
	if q['answer']:
	answers[q['number']] = q['answer']
	questions.append(f"{q['number']}. {q['question']}")

	print(f"\nExtracted {len(questions)} questions and {len(answers)} answers")
	print("Questions:", questions)
	print("Answers:", answers)

	return answers, questions

	def debug_text_extraction(text: str):
	"""Helper function to debug text extraction issues"""
	print("=== Extracted Text ===")
	print(text)
	print("\n=== Line by Line Analysis ===")
	for line in text.split('\n'):
	if line.strip():
	print(f"Line: {line.strip()}")

	@app.route('/process_omr_answer_key', methods=['POST'])
	def process_omr_answer_key():
	"""
	Process OMR answer key from either:
	1. JSON format with direct answers
	2. PDF/Image of question paper with answers marked

	For JSON format:
	{
	"answers": {
	"1": "A",
	"2": "B",
	...
	},
	"marks_per_question": 1.0, # optional, defaults to 1
	"negative_marking": 0.0 # optional, defaults to 0
	}

	For PDF/Image:
	multipart/form-data with 'file' field containing the question paper
	"""
	global last_processed_omr_key

	try:
	omr_key = OMRAnswerKey()

	# Check if file upload or JSON
	if 'file' in request.files:
	file = request.files['file']
	if file.filename == '':
	return jsonify({'error': 'No file selected'}), 400

	# Create Images directory if it doesn't exist
	images_dir = os.path.join(app.root_path, 'Images')
	os.makedirs(images_dir, exist_ok=True)

	if file.filename.lower().endswith('.pdf'):
	# Save and process PDF
	answer_key_path = os.path.join(images_dir, "omr_answer_key.pdf")
	file.save(answer_key_path)
	omr_key.path = answer_key_path

	# Convert PDF to images and extract text
	all_text = ""
	try:
	print(f"\nProcessing PDF file: {answer_key_path}")
	images_from_pdf = convert_from_path(
	answer_key_path,
	poppler_path=_get_poppler_path(),
	dpi=300 # Increase DPI for better quality
	)
	print(f"Converted PDF to {len(images_from_pdf)} images")

	for idx, page_image in enumerate(images_from_pdf):
	print(f"\nProcessing page {idx + 1}")

	# Preprocess the image for better OCR
	# Convert to numpy array
	img_np = np.array(page_image)

	# Convert to grayscale
	gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)

	# Apply thresholding to get black and white image
	_, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

	# Save processed image for debugging
	debug_image_path = os.path.join(images_dir, f"debug_page_{idx + 1}.png")
	cv2.imwrite(debug_image_path, threshold)
	print(f"Saved processed image to {debug_image_path}")

	# Configure Tesseract parameters for better accuracy
	custom_config = r'--oem 3 --psm 6'
	text = pytesseract.image_to_string(threshold, config=custom_config)
	print(f"Extracted text length: {len(text)}")
	all_text += text + "\n"

	print("\nTotal extracted text length:", len(all_text))

	except Exception as e:
	print(f"Error during PDF processing: {str(e)}")
	raise

	# Debug the extracted text
	print("\nDebugging PDF extraction:")
	debug_text_extraction(all_text)

	# Extract metadata and answers
	title, duration = extract_omr_metadata(all_text)
	answers, questions = extract_omr_answers(all_text)

	print("\nExtracted answers:", answers)

	omr_key.set_metadata(title, duration)
	omr_key.set_answers(answers)
	omr_key.questions = questions

	else:
	# Process as image
	answer_key_path = os.path.join(images_dir, "omr_answer_key.png")
	file.save(answer_key_path)
	omr_key.path = answer_key_path

	image = Image.open(answer_key_path)
	text = pytesseract.image_to_string(image)

	# Debug the extracted text
	print("\nDebugging Image extraction:")
	debug_text_extraction(text)

	# Extract metadata and answers
	title, duration = extract_omr_metadata(text)
	answers, questions = extract_omr_answers(text)

	print("\nStructured Extraction Results:")
	print("Title:", title)
	print("Duration:", duration)
	print("\nQuestions found:", len(questions))
	print("Answers found:", len(answers))
	print("\nAnswers:", answers)

	omr_key.set_metadata(title, duration)
	omr_key.set_answers(answers)
	omr_key.questions = questions

	# Set default marking scheme
	marks_per_question = float(request.form.get('marks_per_question', 1.0))
	negative_marking = float(request.form.get('negative_marking', 0.0))

	else:
	# Process JSON input
	if not request.is_json:
	return jsonify({'error': 'Request must be JSON or file upload'}), 400

	data = request.get_json()

	if 'answers' not in data:
	return jsonify({'error': 'Answer key must be provided'}), 400

	# Validate answer format
	answer_key = data['answers']
	for q_num, answer in answer_key.items():
	try:
	q_num = int(q_num)
	if not isinstance(answer, str) or answer.upper() not in ['A', 'B', 'C', 'D']:
	return jsonify({
	'error': f'Invalid answer format for question {q_num}. Must be A, B, C, or D'
	}), 400
	except ValueError:
	return jsonify({
	'error': f'Question numbers must be integers, got {q_num}'
	}), 400

	# Set the answers
	omr_key.set_answers(answer_key)

	# Set metadata if provided
	title = data.get('title', '')
	duration = data.get('duration', '')
	omr_key.set_metadata(title, duration)

	# Set marking scheme
	marks_per_question = float(data.get('marks_per_question', 1.0))
	negative_marking = float(data.get('negative_marking', 0.0))

	# Set marking scheme
	omr_key.set_marking_scheme(marks_per_question, negative_marking)

	# Store globally
	last_processed_omr_key = omr_key

	return jsonify({
	'success': True,
	'message': 'OMR answer key processed successfully',
	'answer_key': omr_key.to_dict()
	})

	except Exception as e:
	return jsonify({
	'error': f'Failed to process answer key: {str(e)}'
	}), 500

	if __name__ == '__main__':
	app.run(
	host="0.0.0.0",
	port=int(os.environ.get("PORT", 5000)),
	debug=os.environ.get("FLASK_DEBUG", "false").lower() == "true"
	)