Spaces:
Sleeping
Sleeping
| import os | |
| import base64 | |
| import cv2 | |
| import numpy as np | |
| import fitz # PyMuPDF | |
| from dotenv import load_dotenv | |
| from groq import Groq | |
| # Load environment variables | |
| load_dotenv() | |
| API_KEY = os.getenv("GROQ_API_KEY") # Fetch API key from environment | |
| class OCRProcessor: | |
| def __init__(self, model="llama-3.2-90b-vision-preview"): | |
| if not API_KEY: | |
| raise ValueError("GROQ_API_KEY is missing! Please set it as an environment variable.") | |
| self.model = model | |
| self.client = Groq(api_key=API_KEY) | |
| def enhance_image(self, input_path, output_path): | |
| """ | |
| Enhances the quality of an image for OCR processing. | |
| """ | |
| if not os.path.exists(input_path): | |
| raise FileNotFoundError(f"File not found: {input_path}") | |
| image = cv2.imread(input_path) | |
| if image is None: | |
| raise ValueError(f"Could not process image: {input_path}") | |
| cv2.imwrite(output_path, image) | |
| return output_path | |
| def convert_pdf_to_images(self, pdf_path, save_dir="./uploads"): | |
| """ | |
| Converts a PDF to images and returns the image file paths. | |
| """ | |
| if not os.path.exists(pdf_path): | |
| raise FileNotFoundError(f"PDF file not found: {pdf_path}") | |
| os.makedirs(save_dir, exist_ok=True) | |
| doc = fitz.open(pdf_path) | |
| image_paths = [] | |
| for page_idx in range(len(doc)): | |
| page = doc.load_page(page_idx) | |
| img = page.get_pixmap() | |
| image_file = os.path.join(save_dir, f"page_{page_idx + 1}.png") | |
| img.save(image_file) | |
| if not os.path.exists(image_file): | |
| raise Exception(f"Failed to save image: {image_file}") | |
| image_paths.append(image_file) | |
| doc.close() | |
| return image_paths | |
| def encode_image(self, img_path): | |
| """ | |
| Encodes an image to base64 format after verifying its existence. | |
| """ | |
| if not os.path.exists(img_path): | |
| raise FileNotFoundError(f"File not found: {img_path}") | |
| try: | |
| with open(img_path, "rb") as img_file: | |
| encoded_data = base64.b64encode(img_file.read()).decode("utf-8") | |
| if not encoded_data or len(encoded_data) < 50: | |
| raise ValueError("Encoded image data is too short, possibly corrupted.") | |
| return encoded_data | |
| except Exception as e: | |
| raise Exception(f"Failed to encode image: {e}") | |
| def extract_text_from_image(self, encoded_img, prompt_text): | |
| """ | |
| Extracts text from an image using OCR. Ensures base64 encoding is valid. | |
| """ | |
| if not encoded_img or len(encoded_img) < 50: # Ensures valid base64 string | |
| raise ValueError("Invalid base64-encoded image data!") | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": prompt_text}, | |
| {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_img}"}} | |
| ] | |
| } | |
| ] | |
| try: | |
| response = self.client.chat.completions.create(model=self.model, messages=messages) | |
| return response.choices[0].message | |
| except Exception as err: | |
| raise Exception(f"OCR extraction failed: {err}") |