Spaces:

mohammedriza-rahman
/

AutoGraderPro

Sleeping

File size: 3,347 Bytes

291afdf
 
 
 
 
 
 
 
 
 
 
 
 
 
efa7e0c
 
 
291afdf
 
 
 
efa7e0c
 
 
291afdf
 
 
 
 
 
 
 
 
 
 
efa7e0c
 
 
 
 
 
291afdf
 
 
 
 
 
 
efa7e0c
291afdf
 
efa7e0c
 
 
 
291afdf
 
 
 
 
 
efa7e0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291afdf
 
efa7e0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291afdf
 
 
 
efa7e0c
291afdf

import os
import base64
import cv2
import numpy as np
import fitz  # PyMuPDF
from dotenv import load_dotenv
from groq import Groq

# Load environment variables
load_dotenv()
API_KEY = os.getenv("GROQ_API_KEY")  # Fetch API key from environment

class OCRProcessor:
    def __init__(self, model="llama-3.2-90b-vision-preview"):
        if not API_KEY:
            raise ValueError("GROQ_API_KEY is missing! Please set it as an environment variable.")
        
        self.model = model
        self.client = Groq(api_key=API_KEY)

    def enhance_image(self, input_path, output_path):
        """
        Enhances the quality of an image for OCR processing.
        """
        if not os.path.exists(input_path):
            raise FileNotFoundError(f"File not found: {input_path}")

        image = cv2.imread(input_path)
        if image is None:
            raise ValueError(f"Could not process image: {input_path}")

        cv2.imwrite(output_path, image)
        return output_path

    def convert_pdf_to_images(self, pdf_path, save_dir="./uploads"):
        """
        Converts a PDF to images and returns the image file paths.
        """
        if not os.path.exists(pdf_path):
            raise FileNotFoundError(f"PDF file not found: {pdf_path}")

        os.makedirs(save_dir, exist_ok=True)
        doc = fitz.open(pdf_path)
        image_paths = []

        for page_idx in range(len(doc)):
            page = doc.load_page(page_idx)
            img = page.get_pixmap()

            image_file = os.path.join(save_dir, f"page_{page_idx + 1}.png")
            img.save(image_file)

            if not os.path.exists(image_file):
                raise Exception(f"Failed to save image: {image_file}")

            image_paths.append(image_file)

        doc.close()
        return image_paths

    def encode_image(self, img_path):
        """
        Encodes an image to base64 format after verifying its existence.
        """
        if not os.path.exists(img_path):
            raise FileNotFoundError(f"File not found: {img_path}")

        try:
            with open(img_path, "rb") as img_file:
                encoded_data = base64.b64encode(img_file.read()).decode("utf-8")

            if not encoded_data or len(encoded_data) < 50:
                raise ValueError("Encoded image data is too short, possibly corrupted.")

            return encoded_data

        except Exception as e:
            raise Exception(f"Failed to encode image: {e}")

    def extract_text_from_image(self, encoded_img, prompt_text):
        """
        Extracts text from an image using OCR. Ensures base64 encoding is valid.
        """
        if not encoded_img or len(encoded_img) < 50:  # Ensures valid base64 string
            raise ValueError("Invalid base64-encoded image data!")

        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt_text},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_img}"}}
                ]
            }
        ]

        try:
            response = self.client.chat.completions.create(model=self.model, messages=messages)
            return response.choices[0].message

        except Exception as err:
            raise Exception(f"OCR extraction failed: {err}")