staghado commited on
Commit
a02a7ea
Β·
verified Β·
1 Parent(s): 85c77a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -59
app.py CHANGED
@@ -7,7 +7,6 @@ import gradio as gr
7
  from PIL import Image
8
  from io import BytesIO
9
  import pypdfium2 as pdfium
10
- from pathlib import Path
11
 
12
  ENDPOINT = os.environ.get("VLLM_ENDPOINT")
13
  MODEL = os.environ.get("VLLM_MODEL")
@@ -24,22 +23,22 @@ def image_to_base64(image):
24
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
25
 
26
 
27
- def render_pdf_page(page, max_resolution=1540, scale=2.77):
28
  width, height = page.get_size()
29
  pixel_width = width * scale
30
  pixel_height = height * scale
31
- resize_factor = min(1, max_resolution / pixel_width, max_resolution / pixel_height)
32
  target_scale = scale * resize_factor
33
  return page.render(scale=target_scale, rev_byteorder=True).to_pil()
34
 
35
 
36
- def process_pdf(pdf_path, max_pages=5):
37
  pdf = pdfium.PdfDocument(pdf_path)
38
  total_pages = len(pdf)
39
- num_pages = min(total_pages, max_pages)
40
  images = []
41
 
42
- for i in range(num_pages):
43
  page = pdf[i]
44
  img = render_pdf_page(page)
45
  images.append(img)
@@ -48,22 +47,7 @@ def process_pdf(pdf_path, max_pages=5):
48
  return images, total_pages
49
 
50
 
51
- def process_single_page(pdf_path, page_number):
52
- pdf = pdfium.PdfDocument(pdf_path)
53
- total_pages = len(pdf)
54
-
55
- if page_number < 1 or page_number > total_pages:
56
- pdf.close()
57
- return None, total_pages
58
-
59
- page = pdf[page_number - 1]
60
- img = render_pdf_page(page)
61
- pdf.close()
62
-
63
- return img, total_pages
64
-
65
-
66
- def process_input(file_input, temperature, page_number):
67
  if file_input is None:
68
  yield "Please upload an image or PDF first.", "", "", None
69
  return
@@ -76,22 +60,15 @@ def process_input(file_input, temperature, page_number):
76
 
77
  if file_path.lower().endswith('.pdf'):
78
  try:
79
- if page_number > 0:
80
- img, total_pages = process_single_page(file_path, page_number)
81
- if img is None:
82
- yield f"Error: Page {page_number} does not exist. PDF has {total_pages} pages.", "", "", None
83
- return
84
- images_to_process = [img]
85
- display_image = img
86
- page_info = f"Processing page {page_number} of {total_pages}"
87
- else:
88
- img, total_pages = process_single_page(file_path, 1)
89
- if img is None:
90
- yield f"Error: Could not read PDF.", "", "", None
91
- return
92
- images_to_process = [img]
93
- display_image = img
94
  page_info = f"Processing page 1 of {total_pages}"
 
 
95
  except Exception as e:
96
  yield f"Error processing PDF: {str(e)}", "", "", None
97
  return
@@ -100,15 +77,11 @@ def process_input(file_input, temperature, page_number):
100
  img = Image.open(file_path)
101
  images_to_process = [img]
102
  display_image = img
 
103
  except Exception as e:
104
  yield f"Error opening image: {str(e)}", "", "", None
105
  return
106
 
107
- for img in images_to_process:
108
- if not isinstance(img, Image.Image):
109
- yield "Error: Invalid image format.", "", "", None
110
- return
111
-
112
  content = [{"type": "text", "text": ""}]
113
 
114
  for img in images_to_process:
@@ -144,6 +117,7 @@ def process_input(file_input, temperature, page_number):
144
  response.raise_for_status()
145
 
146
  accumulated_response = ""
 
147
 
148
  for line in response.iter_lines():
149
  if line:
@@ -161,7 +135,11 @@ def process_input(file_input, temperature, page_number):
161
  content_delta = delta.get('content', '')
162
  if content_delta:
163
  accumulated_response += content_delta
164
- yield accumulated_response, accumulated_response, page_info, display_image
 
 
 
 
165
  except json.JSONDecodeError:
166
  continue
167
 
@@ -175,10 +153,10 @@ with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
175
  """
176
  # πŸ“– Image/PDF to Text Extraction
177
  **πŸ’‘ How to use:**
178
- 1. Upload an image OR a PDF (max 5 pages)
179
- 2. Click "Extract Text" to process
180
-
181
- The model will extract and format text from your document.
182
  """
183
  )
184
 
@@ -190,24 +168,24 @@ with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
190
  type="filepath"
191
  )
192
  rendered_image = gr.Image(
193
- label="πŸ“„ Current Page/Image",
194
  type="pil",
195
- height=400,
196
  interactive=False
197
  )
198
- page_number = gr.Number(
199
- label="PDF: Page Number (0 = auto first page, or specify 1, 2, 3...)",
200
- value=0,
201
- minimum=0,
202
  step=1,
203
- precision=0
 
204
  )
205
  page_info = gr.Textbox(
206
- label="Page Info",
207
  value="",
208
  interactive=False
209
  )
210
- gr.Markdown("*Upload an image (PNG/JPG) or PDF. For PDF: 0 = page 1 automatically, or specify any page number*")
211
  temperature = gr.Slider(
212
  minimum=0.1,
213
  maximum=1.0,
@@ -237,13 +215,13 @@ with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
237
 
238
  submit_btn.click(
239
  fn=process_input,
240
- inputs=[file_input, temperature, page_number],
241
  outputs=[output_text, raw_output, page_info, rendered_image]
242
  )
243
 
244
  clear_btn.click(
245
- fn=lambda: (None, "", "", 0, "", None),
246
- outputs=[file_input, output_text, raw_output, page_number, page_info, rendered_image]
247
  )
248
 
249
 
 
7
  from PIL import Image
8
  from io import BytesIO
9
  import pypdfium2 as pdfium
 
10
 
11
  ENDPOINT = os.environ.get("VLLM_ENDPOINT")
12
  MODEL = os.environ.get("VLLM_MODEL")
 
23
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
24
 
25
 
26
+ def render_pdf_page(page, max_resolution=1280, scale=2.77):
27
  width, height = page.get_size()
28
  pixel_width = width * scale
29
  pixel_height = height * scale
30
+ resize_factor = min(max_resolution / pixel_width, max_resolution / pixel_height)
31
  target_scale = scale * resize_factor
32
  return page.render(scale=target_scale, rev_byteorder=True).to_pil()
33
 
34
 
35
+ def process_pdf(pdf_path, num_pages=1):
36
  pdf = pdfium.PdfDocument(pdf_path)
37
  total_pages = len(pdf)
38
+ pages_to_process = min(num_pages, total_pages, 5)
39
  images = []
40
 
41
+ for i in range(pages_to_process):
42
  page = pdf[i]
43
  img = render_pdf_page(page)
44
  images.append(img)
 
47
  return images, total_pages
48
 
49
 
50
+ def process_input(file_input, temperature, num_pages):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  if file_input is None:
52
  yield "Please upload an image or PDF first.", "", "", None
53
  return
 
60
 
61
  if file_path.lower().endswith('.pdf'):
62
  try:
63
+ images_to_process, total_pages = process_pdf(file_path, num_pages)
64
+ if len(images_to_process) == 0:
65
+ yield "Error: Could not extract pages from PDF.", "", "", None
66
+ return
67
+ display_image = images_to_process[0]
68
+ if len(images_to_process) == 1:
 
 
 
 
 
 
 
 
 
69
  page_info = f"Processing page 1 of {total_pages}"
70
+ else:
71
+ page_info = f"Processing {len(images_to_process)} pages of {total_pages}"
72
  except Exception as e:
73
  yield f"Error processing PDF: {str(e)}", "", "", None
74
  return
 
77
  img = Image.open(file_path)
78
  images_to_process = [img]
79
  display_image = img
80
+ page_info = "Processing image"
81
  except Exception as e:
82
  yield f"Error opening image: {str(e)}", "", "", None
83
  return
84
 
 
 
 
 
 
85
  content = [{"type": "text", "text": ""}]
86
 
87
  for img in images_to_process:
 
117
  response.raise_for_status()
118
 
119
  accumulated_response = ""
120
+ first_chunk = True
121
 
122
  for line in response.iter_lines():
123
  if line:
 
135
  content_delta = delta.get('content', '')
136
  if content_delta:
137
  accumulated_response += content_delta
138
+ if first_chunk:
139
+ yield accumulated_response, accumulated_response, page_info, display_image
140
+ first_chunk = False
141
+ else:
142
+ yield accumulated_response, accumulated_response, page_info, gr.update()
143
  except json.JSONDecodeError:
144
  continue
145
 
 
153
  """
154
  # πŸ“– Image/PDF to Text Extraction
155
  **πŸ’‘ How to use:**
156
+ 1. Upload an image or PDF
157
+ 2. For PDFs: choose how many pages to process (1-5, default is 1)
158
+ 3. Adjust temperature if needed
159
+ 4. Click "Extract Text"
160
  """
161
  )
162
 
 
168
  type="filepath"
169
  )
170
  rendered_image = gr.Image(
171
+ label="πŸ“„ Preview (First Page)",
172
  type="pil",
173
+ height=600,
174
  interactive=False
175
  )
176
+ num_pages = gr.Slider(
177
+ minimum=1,
178
+ maximum=5,
179
+ value=1,
180
  step=1,
181
+ label="PDF: Number of Pages to Process",
182
+ info="Only applies to PDF files (max 5 pages)"
183
  )
184
  page_info = gr.Textbox(
185
+ label="Processing Info",
186
  value="",
187
  interactive=False
188
  )
 
189
  temperature = gr.Slider(
190
  minimum=0.1,
191
  maximum=1.0,
 
215
 
216
  submit_btn.click(
217
  fn=process_input,
218
+ inputs=[file_input, temperature, num_pages],
219
  outputs=[output_text, raw_output, page_info, rendered_image]
220
  )
221
 
222
  clear_btn.click(
223
+ fn=lambda: (None, "", "", "", None, 1),
224
+ outputs=[file_input, output_text, raw_output, page_info, rendered_image, num_pages]
225
  )
226
 
227