dev2607 commited on
Commit
447cd51
·
verified ·
1 Parent(s): c0630c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -95
app.py CHANGED
@@ -8,18 +8,6 @@ import gradio as gr
8
  import requests
9
  import json
10
  from dotenv import load_dotenv
11
- import spaces
12
- from transformers import AutoModel, AutoTokenizer
13
- from PIL import Image
14
- import numpy as np
15
- import os
16
- import base64
17
- import io
18
- import uuid
19
- import tempfile
20
- import time
21
- import shutil
22
- from pathlib import Path
23
 
24
  # Attempt to install pytesseract if not found
25
  try:
@@ -146,91 +134,55 @@ def dummy_analyze(ingredients_list, health_conditions=None):
146
  return report
147
 
148
  # Function to extract text from images using OCR
149
- tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
150
- model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True)
151
- model = model.eval().cuda()
152
-
153
- UPLOAD_FOLDER = "./uploads"
154
- RESULTS_FOLDER = "./results"
155
-
156
- for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
157
- if not os.path.exists(folder):
158
- os.makedirs(folder)
159
-
160
- def image_to_base64(image):
161
- buffered = io.BytesIO()
162
- image.save(buffered, format="PNG")
163
- return base64.b64encode(buffered.getvalue()).decode()
164
-
165
- @spaces.GPU
166
- def run_GOT(image, got_mode, fine_grained_mode="", ocr_color="", ocr_box=""):
167
- unique_id = str(uuid.uuid4())
168
- image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
169
- result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")
170
-
171
- shutil.copy(image, image_path)
172
-
173
  try:
174
- if got_mode == "plain texts OCR":
175
- res = model.chat(tokenizer, image_path, ocr_type='ocr')
176
- return res, None
177
- elif got_mode == "format texts OCR":
178
- res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
179
- elif got_mode == "plain multi-crop OCR":
180
- res = model.chat_crop(tokenizer, image_path, ocr_type='ocr')
181
- return res, None
182
- elif got_mode == "format multi-crop OCR":
183
- res = model.chat_crop(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
184
- elif got_mode == "plain fine-grained OCR":
185
- res = model.chat(tokenizer, image_path, ocr_type='ocr', ocr_box=ocr_box, ocr_color=ocr_color)
186
- return res, None
187
- elif got_mode == "format fine-grained OCR":
188
- res = model.chat(tokenizer, image_path, ocr_type='format', ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path)
189
-
190
- # res_markdown = f"$$ {res} $$"
191
- res_markdown = res
192
-
193
- if "format" in got_mode and os.path.exists(result_path):
194
- with open(result_path, 'r') as f:
195
- html_content = f.read()
196
- encoded_html = base64.b64encode(html_content.encode('utf-8')).decode('utf-8')
197
- iframe_src = f"data:text/html;base64,{encoded_html}"
198
- iframe = f'<iframe src="{iframe_src}" width="100%" height="600px"></iframe>'
199
- download_link = f'<a href="data:text/html;base64,{encoded_html}" download="result_{unique_id}.html">Download Full Result</a>'
200
- return res_markdown, f"{download_link}<br>{iframe}"
201
- else:
202
- return res_markdown, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  except Exception as e:
204
- return f"Error: {str(e)}", None
205
- finally:
206
- if os.path.exists(image_path):
207
- os.remove(image_path)
208
-
209
- def task_update(task):
210
- if "fine-grained" in task:
211
- return [
212
- gr.update(visible=True),
213
- gr.update(visible=False),
214
- gr.update(visible=False),
215
- ]
216
- else:
217
- return [
218
- gr.update(visible=False),
219
- gr.update(visible=False),
220
- gr.update(visible=False),
221
- ]
222
-
223
- def fine_grained_update(task):
224
- if task == "box":
225
- return [
226
- gr.update(visible=False, value = ""),
227
- gr.update(visible=True),
228
- ]
229
- elif task == 'color':
230
- return [
231
- gr.update(visible=True),
232
- gr.update(visible=False, value = ""),
233
- ]
234
 
235
  # Function to parse ingredients from text
236
  def parse_ingredients(text):
 
8
  import requests
9
  import json
10
  from dotenv import load_dotenv
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Attempt to install pytesseract if not found
13
  try:
 
134
  return report
135
 
136
  # Function to extract text from images using OCR
137
+ def extract_text_from_image(image):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  try:
139
+ if image is None:
140
+ return "No image captured. Please try again."
141
+
142
+ # Verify Tesseract executable is accessible
143
+ try:
144
+ subprocess.run([pytesseract.pytesseract.tesseract_cmd, "--version"],
145
+ check=True, capture_output=True, text=True)
146
+ except (subprocess.SubprocessError, FileNotFoundError):
147
+ return "Tesseract OCR is not installed or not properly configured. Please check installation."
148
+
149
+ # Image preprocessing for better OCR
150
+ import cv2
151
+ import numpy as np
152
+
153
+ # Convert PIL image to OpenCV format
154
+ img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
155
+
156
+ # Convert to grayscale
157
+ gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
158
+
159
+ # Apply thresholding to get black and white image
160
+ _, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
161
+
162
+ # Noise removal
163
+ kernel = np.ones((1, 1), np.uint8)
164
+ binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
165
+
166
+ # Dilate to connect text
167
+ binary = cv2.dilate(binary, kernel, iterations=1)
168
+
169
+ # Convert back to PIL image for tesseract
170
+ binary_pil = Image.fromarray(cv2.bitwise_not(binary))
171
+
172
+ # Run OCR with improved configuration
173
+ custom_config = r'--oem 3 --psm 6 -l eng'
174
+ text = pytesseract.image_to_string(binary_pil, config=custom_config)
175
+
176
+ if not text.strip():
177
+ # Try original image as fallback
178
+ text = pytesseract.image_to_string(image, config=custom_config)
179
+
180
+ if not text.strip():
181
+ return "No text could be extracted. Ensure image is clear and readable."
182
+
183
+ return text
184
  except Exception as e:
185
+ return f"Error extracting text: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  # Function to parse ingredients from text
188
  def parse_ingredients(text):