AutoGraderPro / ocr_processing.py
mohammedriza-rahman's picture
Update ocr_processing.py
efa7e0c verified
import os
import base64
import cv2
import numpy as np
import fitz # PyMuPDF
from dotenv import load_dotenv
from groq import Groq
# Load environment variables
load_dotenv()
API_KEY = os.getenv("GROQ_API_KEY") # Fetch API key from environment
class OCRProcessor:
def __init__(self, model="llama-3.2-90b-vision-preview"):
if not API_KEY:
raise ValueError("GROQ_API_KEY is missing! Please set it as an environment variable.")
self.model = model
self.client = Groq(api_key=API_KEY)
def enhance_image(self, input_path, output_path):
"""
Enhances the quality of an image for OCR processing.
"""
if not os.path.exists(input_path):
raise FileNotFoundError(f"File not found: {input_path}")
image = cv2.imread(input_path)
if image is None:
raise ValueError(f"Could not process image: {input_path}")
cv2.imwrite(output_path, image)
return output_path
def convert_pdf_to_images(self, pdf_path, save_dir="./uploads"):
"""
Converts a PDF to images and returns the image file paths.
"""
if not os.path.exists(pdf_path):
raise FileNotFoundError(f"PDF file not found: {pdf_path}")
os.makedirs(save_dir, exist_ok=True)
doc = fitz.open(pdf_path)
image_paths = []
for page_idx in range(len(doc)):
page = doc.load_page(page_idx)
img = page.get_pixmap()
image_file = os.path.join(save_dir, f"page_{page_idx + 1}.png")
img.save(image_file)
if not os.path.exists(image_file):
raise Exception(f"Failed to save image: {image_file}")
image_paths.append(image_file)
doc.close()
return image_paths
def encode_image(self, img_path):
"""
Encodes an image to base64 format after verifying its existence.
"""
if not os.path.exists(img_path):
raise FileNotFoundError(f"File not found: {img_path}")
try:
with open(img_path, "rb") as img_file:
encoded_data = base64.b64encode(img_file.read()).decode("utf-8")
if not encoded_data or len(encoded_data) < 50:
raise ValueError("Encoded image data is too short, possibly corrupted.")
return encoded_data
except Exception as e:
raise Exception(f"Failed to encode image: {e}")
def extract_text_from_image(self, encoded_img, prompt_text):
"""
Extracts text from an image using OCR. Ensures base64 encoding is valid.
"""
if not encoded_img or len(encoded_img) < 50: # Ensures valid base64 string
raise ValueError("Invalid base64-encoded image data!")
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt_text},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_img}"}}
]
}
]
try:
response = self.client.chat.completions.create(model=self.model, messages=messages)
return response.choices[0].message
except Exception as err:
raise Exception(f"OCR extraction failed: {err}")