staghado commited on
Commit
6a172b5
Β·
verified Β·
1 Parent(s): c2ad295

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -19
app.py CHANGED
@@ -22,18 +22,19 @@ def image_to_base64(image):
22
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
23
 
24
 
25
- def render_pdf_page(page, max_resolution=1540, scale=2.77):
26
  width, height = page.get_size()
27
  pixel_width = width * scale
28
  pixel_height = height * scale
29
- resize_factor = min(1, max_resolution / pixel_width, max_resolution / pixel_height)
30
  target_scale = scale * resize_factor
31
  return page.render(scale=target_scale, rev_byteorder=True).to_pil()
32
 
33
 
34
  def process_pdf(pdf_path, max_pages=5):
35
  pdf = pdfium.PdfDocument(pdf_path)
36
- num_pages = min(len(pdf), max_pages)
 
37
  images = []
38
 
39
  for i in range(num_pages):
@@ -42,24 +43,49 @@ def process_pdf(pdf_path, max_pages=5):
42
  images.append(img)
43
 
44
  pdf.close()
45
- return images
46
 
47
 
48
- def process_input(image, pdf_file, temperature):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  if image is None and pdf_file is None:
50
- yield "Please upload an image or PDF first.", ""
51
  return
52
 
53
  images_to_process = []
 
54
 
55
  if pdf_file is not None:
56
  try:
57
- images_to_process = process_pdf(pdf_file, max_pages=5)
58
- if len(images_to_process) == 0:
59
- yield "Error: Could not extract pages from PDF.", ""
60
- return
 
 
 
 
 
 
 
 
 
61
  except Exception as e:
62
- yield f"Error processing PDF: {str(e)}", ""
63
  return
64
  elif image is not None:
65
  images_to_process = [image]
@@ -112,13 +138,13 @@ def process_input(image, pdf_file, temperature):
112
  content_delta = delta.get('content', '')
113
  if content_delta:
114
  accumulated_response += content_delta
115
- yield accumulated_response, accumulated_response
116
  except json.JSONDecodeError:
117
  continue
118
 
119
  except Exception as e:
120
  error_msg = f"Error: {str(e)}"
121
- yield error_msg, error_msg
122
 
123
 
124
  with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
@@ -143,11 +169,23 @@ with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
143
  height=400
144
  )
145
  pdf_input = gr.File(
146
- label="πŸ“„ Upload PDF (max 5 pages)",
147
  file_types=[".pdf"],
148
  type="filepath"
149
  )
150
- gr.Markdown("*Upload either an image or PDF, not both*")
 
 
 
 
 
 
 
 
 
 
 
 
151
  temperature = gr.Slider(
152
  minimum=0.1,
153
  maximum=1.0,
@@ -177,13 +215,13 @@ with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
177
 
178
  submit_btn.click(
179
  fn=process_input,
180
- inputs=[image_input, pdf_input, temperature],
181
- outputs=[output_text, raw_output]
182
  )
183
 
184
  clear_btn.click(
185
- fn=lambda: (None, None, "", ""),
186
- outputs=[image_input, pdf_input, output_text, raw_output]
187
  )
188
 
189
 
 
22
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
23
 
24
 
25
+ def render_pdf_page(page, max_resolution=1280, scale=2.77):
26
  width, height = page.get_size()
27
  pixel_width = width * scale
28
  pixel_height = height * scale
29
+ resize_factor = min(max_resolution / pixel_width, max_resolution / pixel_height)
30
  target_scale = scale * resize_factor
31
  return page.render(scale=target_scale, rev_byteorder=True).to_pil()
32
 
33
 
34
  def process_pdf(pdf_path, max_pages=5):
35
  pdf = pdfium.PdfDocument(pdf_path)
36
+ total_pages = len(pdf)
37
+ num_pages = min(total_pages, max_pages)
38
  images = []
39
 
40
  for i in range(num_pages):
 
43
  images.append(img)
44
 
45
  pdf.close()
46
+ return images, total_pages
47
 
48
 
49
+ def process_single_page(pdf_path, page_number):
50
+ pdf = pdfium.PdfDocument(pdf_path)
51
+ total_pages = len(pdf)
52
+
53
+ if page_number < 1 or page_number > total_pages:
54
+ pdf.close()
55
+ return None, total_pages
56
+
57
+ page = pdf[page_number - 1]
58
+ img = render_pdf_page(page)
59
+ pdf.close()
60
+
61
+ return img, total_pages
62
+
63
+
64
+ def process_input(image, pdf_file, temperature, page_number):
65
  if image is None and pdf_file is None:
66
+ yield "Please upload an image or PDF first.", "", ""
67
  return
68
 
69
  images_to_process = []
70
+ page_info = ""
71
 
72
  if pdf_file is not None:
73
  try:
74
+ if page_number > 0:
75
+ img, total_pages = process_single_page(pdf_file, page_number)
76
+ if img is None:
77
+ yield f"Error: Page {page_number} does not exist. PDF has {total_pages} pages.", "", ""
78
+ return
79
+ images_to_process = [img]
80
+ page_info = f"Processing page {page_number} of {total_pages}"
81
+ else:
82
+ images_to_process, total_pages = process_pdf(pdf_file, max_pages=5)
83
+ if len(images_to_process) == 0:
84
+ yield "Error: Could not extract pages from PDF.", "", ""
85
+ return
86
+ page_info = f"Processing first {len(images_to_process)} pages of {total_pages}"
87
  except Exception as e:
88
+ yield f"Error processing PDF: {str(e)}", "", ""
89
  return
90
  elif image is not None:
91
  images_to_process = [image]
 
138
  content_delta = delta.get('content', '')
139
  if content_delta:
140
  accumulated_response += content_delta
141
+ yield accumulated_response, accumulated_response, page_info
142
  except json.JSONDecodeError:
143
  continue
144
 
145
  except Exception as e:
146
  error_msg = f"Error: {str(e)}"
147
+ yield error_msg, error_msg, page_info
148
 
149
 
150
  with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
 
169
  height=400
170
  )
171
  pdf_input = gr.File(
172
+ label="πŸ“„ Upload PDF",
173
  file_types=[".pdf"],
174
  type="filepath"
175
  )
176
+ page_number = gr.Number(
177
+ label="Specific Page Number (0 = first 5 pages)",
178
+ value=0,
179
+ minimum=0,
180
+ step=1,
181
+ precision=0
182
+ )
183
+ page_info = gr.Textbox(
184
+ label="Page Info",
185
+ value="",
186
+ interactive=False
187
+ )
188
+ gr.Markdown("*Upload either an image or PDF. For PDF: set page number to 0 for first 5 pages, or specify a page*")
189
  temperature = gr.Slider(
190
  minimum=0.1,
191
  maximum=1.0,
 
215
 
216
  submit_btn.click(
217
  fn=process_input,
218
+ inputs=[image_input, pdf_input, temperature, page_number],
219
+ outputs=[output_text, raw_output, page_info]
220
  )
221
 
222
  clear_btn.click(
223
+ fn=lambda: (None, None, "", "", 0, ""),
224
+ outputs=[image_input, pdf_input, output_text, raw_output, page_number, page_info]
225
  )
226
 
227