staghado commited on
Commit
a036cd1
Β·
verified Β·
1 Parent(s): 6a172b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -26
app.py CHANGED
@@ -18,6 +18,8 @@ if not ENDPOINT or not MODEL:
18
 
19
  def image_to_base64(image):
20
  buffered = BytesIO()
 
 
21
  image.save(buffered, format="PNG")
22
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
23
 
@@ -61,25 +63,27 @@ def process_single_page(pdf_path, page_number):
61
  return img, total_pages
62
 
63
 
64
- def process_input(image, pdf_file, temperature, page_number):
65
- if image is None and pdf_file is None:
66
  yield "Please upload an image or PDF first.", "", ""
67
  return
68
 
69
  images_to_process = []
70
  page_info = ""
71
 
72
- if pdf_file is not None:
 
 
73
  try:
74
  if page_number > 0:
75
- img, total_pages = process_single_page(pdf_file, page_number)
76
  if img is None:
77
  yield f"Error: Page {page_number} does not exist. PDF has {total_pages} pages.", "", ""
78
  return
79
  images_to_process = [img]
80
  page_info = f"Processing page {page_number} of {total_pages}"
81
  else:
82
- images_to_process, total_pages = process_pdf(pdf_file, max_pages=5)
83
  if len(images_to_process) == 0:
84
  yield "Error: Could not extract pages from PDF.", "", ""
85
  return
@@ -87,17 +91,31 @@ def process_input(image, pdf_file, temperature, page_number):
87
  except Exception as e:
88
  yield f"Error processing PDF: {str(e)}", "", ""
89
  return
90
- elif image is not None:
91
- images_to_process = [image]
 
 
 
 
 
 
 
 
 
 
92
 
93
  content = [{"type": "text", "text": ""}]
94
 
95
  for img in images_to_process:
96
- b64_image = image_to_base64(img)
97
- content.append({
98
- "type": "image_url",
99
- "image_url": {"url": f"data:image/png;base64,{b64_image}"}
100
- })
 
 
 
 
101
 
102
  payload = {
103
  "model": MODEL,
@@ -162,19 +180,13 @@ with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
162
 
163
  with gr.Row():
164
  with gr.Column(scale=1):
165
- image_input = gr.Image(
166
- type="pil",
167
- label="πŸ–ΌοΈ Upload Image",
168
- sources=["upload", "clipboard"],
169
- height=400
170
- )
171
- pdf_input = gr.File(
172
- label="πŸ“„ Upload PDF",
173
- file_types=[".pdf"],
174
  type="filepath"
175
  )
176
  page_number = gr.Number(
177
- label="Specific Page Number (0 = first 5 pages)",
178
  value=0,
179
  minimum=0,
180
  step=1,
@@ -185,7 +197,7 @@ with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
185
  value="",
186
  interactive=False
187
  )
188
- gr.Markdown("*Upload either an image or PDF. For PDF: set page number to 0 for first 5 pages, or specify a page*")
189
  temperature = gr.Slider(
190
  minimum=0.1,
191
  maximum=1.0,
@@ -215,13 +227,13 @@ with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
215
 
216
  submit_btn.click(
217
  fn=process_input,
218
- inputs=[image_input, pdf_input, temperature, page_number],
219
  outputs=[output_text, raw_output, page_info]
220
  )
221
 
222
  clear_btn.click(
223
- fn=lambda: (None, None, "", "", 0, ""),
224
- outputs=[image_input, pdf_input, output_text, raw_output, page_number, page_info]
225
  )
226
 
227
 
 
18
 
19
  def image_to_base64(image):
20
  buffered = BytesIO()
21
+ if image.mode == 'RGBA':
22
+ image = image.convert('RGB')
23
  image.save(buffered, format="PNG")
24
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
25
 
 
63
  return img, total_pages
64
 
65
 
66
+ def process_input(file_input, temperature, page_number):
67
+ if file_input is None:
68
  yield "Please upload an image or PDF first.", "", ""
69
  return
70
 
71
  images_to_process = []
72
  page_info = ""
73
 
74
+ file_path = file_input if isinstance(file_input, str) else file_input.name
75
+
76
+ if file_path.lower().endswith('.pdf'):
77
  try:
78
  if page_number > 0:
79
+ img, total_pages = process_single_page(file_path, page_number)
80
  if img is None:
81
  yield f"Error: Page {page_number} does not exist. PDF has {total_pages} pages.", "", ""
82
  return
83
  images_to_process = [img]
84
  page_info = f"Processing page {page_number} of {total_pages}"
85
  else:
86
+ images_to_process, total_pages = process_pdf(file_path, max_pages=5)
87
  if len(images_to_process) == 0:
88
  yield "Error: Could not extract pages from PDF.", "", ""
89
  return
 
91
  except Exception as e:
92
  yield f"Error processing PDF: {str(e)}", "", ""
93
  return
94
+ else:
95
+ try:
96
+ img = Image.open(file_path)
97
+ images_to_process = [img]
98
+ except Exception as e:
99
+ yield f"Error opening image: {str(e)}", "", ""
100
+ return
101
+
102
+ for img in images_to_process:
103
+ if not isinstance(img, Image.Image):
104
+ yield "Error: Invalid image format.", "", ""
105
+ return
106
 
107
  content = [{"type": "text", "text": ""}]
108
 
109
  for img in images_to_process:
110
+ try:
111
+ b64_image = image_to_base64(img)
112
+ content.append({
113
+ "type": "image_url",
114
+ "image_url": {"url": f"data:image/png;base64,{b64_image}"}
115
+ })
116
+ except Exception as e:
117
+ yield f"Error encoding image: {str(e)}", "", ""
118
+ return
119
 
120
  payload = {
121
  "model": MODEL,
 
180
 
181
  with gr.Row():
182
  with gr.Column(scale=1):
183
+ file_input = gr.File(
184
+ label="πŸ–ΌοΈ Upload Image or PDF",
185
+ file_types=[".pdf", ".png", ".jpg", ".jpeg"],
 
 
 
 
 
 
186
  type="filepath"
187
  )
188
  page_number = gr.Number(
189
+ label="PDF: Page Number (0 = first 5 pages)",
190
  value=0,
191
  minimum=0,
192
  step=1,
 
197
  value="",
198
  interactive=False
199
  )
200
+ gr.Markdown("*Upload an image (PNG/JPG) or PDF. For PDF: 0 = first 5 pages, or specify page number*")
201
  temperature = gr.Slider(
202
  minimum=0.1,
203
  maximum=1.0,
 
227
 
228
  submit_btn.click(
229
  fn=process_input,
230
+ inputs=[file_input, temperature, page_number],
231
  outputs=[output_text, raw_output, page_info]
232
  )
233
 
234
  clear_btn.click(
235
+ fn=lambda: (None, "", "", 0, ""),
236
+ outputs=[file_input, output_text, raw_output, page_number, page_info]
237
  )
238
 
239