Spaces:

mohammedriza-rahman
/

AutoGraderPro

Sleeping

App Files Files Community

AutoGraderPro / ocr_processing.py

mohammedriza-rahman

Update ocr_processing.py

efa7e0c verified 11 months ago

raw

history blame contribute delete

3.35 kB

	import os
	import base64
	import cv2
	import numpy as np
	import fitz # PyMuPDF
	from dotenv import load_dotenv
	from groq import Groq

	# Load environment variables
	load_dotenv()
	API_KEY = os.getenv("GROQ_API_KEY") # Fetch API key from environment

	class OCRProcessor:
	def __init__(self, model="llama-3.2-90b-vision-preview"):
	if not API_KEY:
	raise ValueError("GROQ_API_KEY is missing! Please set it as an environment variable.")

	self.model = model
	self.client = Groq(api_key=API_KEY)

	def enhance_image(self, input_path, output_path):
	"""
	Enhances the quality of an image for OCR processing.
	"""
	if not os.path.exists(input_path):
	raise FileNotFoundError(f"File not found: {input_path}")

	image = cv2.imread(input_path)
	if image is None:
	raise ValueError(f"Could not process image: {input_path}")

	cv2.imwrite(output_path, image)
	return output_path

	def convert_pdf_to_images(self, pdf_path, save_dir="./uploads"):
	"""
	Converts a PDF to images and returns the image file paths.
	"""
	if not os.path.exists(pdf_path):
	raise FileNotFoundError(f"PDF file not found: {pdf_path}")

	os.makedirs(save_dir, exist_ok=True)
	doc = fitz.open(pdf_path)
	image_paths = []

	for page_idx in range(len(doc)):
	page = doc.load_page(page_idx)
	img = page.get_pixmap()

	image_file = os.path.join(save_dir, f"page_{page_idx + 1}.png")
	img.save(image_file)

	if not os.path.exists(image_file):
	raise Exception(f"Failed to save image: {image_file}")

	image_paths.append(image_file)

	doc.close()
	return image_paths

	def encode_image(self, img_path):
	"""
	Encodes an image to base64 format after verifying its existence.
	"""
	if not os.path.exists(img_path):
	raise FileNotFoundError(f"File not found: {img_path}")

	try:
	with open(img_path, "rb") as img_file:
	encoded_data = base64.b64encode(img_file.read()).decode("utf-8")

	if not encoded_data or len(encoded_data) < 50:
	raise ValueError("Encoded image data is too short, possibly corrupted.")

	return encoded_data

	except Exception as e:
	raise Exception(f"Failed to encode image: {e}")

	def extract_text_from_image(self, encoded_img, prompt_text):
	"""
	Extracts text from an image using OCR. Ensures base64 encoding is valid.
	"""
	if not encoded_img or len(encoded_img) < 50: # Ensures valid base64 string
	raise ValueError("Invalid base64-encoded image data!")

	messages = [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt_text},
	{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_img}"}}
	]
	}
	]

	try:
	response = self.client.chat.completions.create(model=self.model, messages=messages)
	return response.choices[0].message

	except Exception as err:
	raise Exception(f"OCR extraction failed: {err}")