mohammedriza-rahman commited on
Commit
efa7e0c
·
verified ·
1 Parent(s): a7b827c

Update ocr_processing.py

Browse files
Files changed (1) hide show
  1. ocr_processing.py +50 -3
ocr_processing.py CHANGED
@@ -12,10 +12,16 @@ API_KEY = os.getenv("GROQ_API_KEY") # Fetch API key from environment
12
 
13
  class OCRProcessor:
14
  def __init__(self, model="llama-3.2-90b-vision-preview"):
 
 
 
15
  self.model = model
16
  self.client = Groq(api_key=API_KEY)
17
 
18
  def enhance_image(self, input_path, output_path):
 
 
 
19
  if not os.path.exists(input_path):
20
  raise FileNotFoundError(f"File not found: {input_path}")
21
 
@@ -27,6 +33,12 @@ class OCRProcessor:
27
  return output_path
28
 
29
  def convert_pdf_to_images(self, pdf_path, save_dir="./uploads"):
 
 
 
 
 
 
30
  os.makedirs(save_dir, exist_ok=True)
31
  doc = fitz.open(pdf_path)
32
  image_paths = []
@@ -34,22 +46,57 @@ class OCRProcessor:
34
  for page_idx in range(len(doc)):
35
  page = doc.load_page(page_idx)
36
  img = page.get_pixmap()
 
37
  image_file = os.path.join(save_dir, f"page_{page_idx + 1}.png")
38
  img.save(image_file)
 
 
 
 
39
  image_paths.append(image_file)
40
 
41
  doc.close()
42
  return image_paths
43
 
44
  def encode_image(self, img_path):
45
- with open(img_path, "rb") as img_file:
46
- return base64.b64encode(img_file.read()).decode("utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  def extract_text_from_image(self, encoded_img, prompt_text):
49
- messages = [{"role": "user", "content": [{"type": "text", "text": prompt_text}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_img}"}}]}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  try:
52
  response = self.client.chat.completions.create(model=self.model, messages=messages)
53
  return response.choices[0].message
 
54
  except Exception as err:
55
  raise Exception(f"OCR extraction failed: {err}")
 
12
 
13
  class OCRProcessor:
14
  def __init__(self, model="llama-3.2-90b-vision-preview"):
15
+ if not API_KEY:
16
+ raise ValueError("GROQ_API_KEY is missing! Please set it as an environment variable.")
17
+
18
  self.model = model
19
  self.client = Groq(api_key=API_KEY)
20
 
21
  def enhance_image(self, input_path, output_path):
22
+ """
23
+ Enhances the quality of an image for OCR processing.
24
+ """
25
  if not os.path.exists(input_path):
26
  raise FileNotFoundError(f"File not found: {input_path}")
27
 
 
33
  return output_path
34
 
35
  def convert_pdf_to_images(self, pdf_path, save_dir="./uploads"):
36
+ """
37
+ Converts a PDF to images and returns the image file paths.
38
+ """
39
+ if not os.path.exists(pdf_path):
40
+ raise FileNotFoundError(f"PDF file not found: {pdf_path}")
41
+
42
  os.makedirs(save_dir, exist_ok=True)
43
  doc = fitz.open(pdf_path)
44
  image_paths = []
 
46
  for page_idx in range(len(doc)):
47
  page = doc.load_page(page_idx)
48
  img = page.get_pixmap()
49
+
50
  image_file = os.path.join(save_dir, f"page_{page_idx + 1}.png")
51
  img.save(image_file)
52
+
53
+ if not os.path.exists(image_file):
54
+ raise Exception(f"Failed to save image: {image_file}")
55
+
56
  image_paths.append(image_file)
57
 
58
  doc.close()
59
  return image_paths
60
 
61
  def encode_image(self, img_path):
62
+ """
63
+ Encodes an image to base64 format after verifying its existence.
64
+ """
65
+ if not os.path.exists(img_path):
66
+ raise FileNotFoundError(f"File not found: {img_path}")
67
+
68
+ try:
69
+ with open(img_path, "rb") as img_file:
70
+ encoded_data = base64.b64encode(img_file.read()).decode("utf-8")
71
+
72
+ if not encoded_data or len(encoded_data) < 50:
73
+ raise ValueError("Encoded image data is too short, possibly corrupted.")
74
+
75
+ return encoded_data
76
+
77
+ except Exception as e:
78
+ raise Exception(f"Failed to encode image: {e}")
79
 
80
  def extract_text_from_image(self, encoded_img, prompt_text):
81
+ """
82
+ Extracts text from an image using OCR. Ensures base64 encoding is valid.
83
+ """
84
+ if not encoded_img or len(encoded_img) < 50: # Ensures valid base64 string
85
+ raise ValueError("Invalid base64-encoded image data!")
86
+
87
+ messages = [
88
+ {
89
+ "role": "user",
90
+ "content": [
91
+ {"type": "text", "text": prompt_text},
92
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_img}"}}
93
+ ]
94
+ }
95
+ ]
96
 
97
  try:
98
  response = self.client.chat.completions.create(model=self.model, messages=messages)
99
  return response.choices[0].message
100
+
101
  except Exception as err:
102
  raise Exception(f"OCR extraction failed: {err}")