staghado commited on
Commit
734c7e1
Β·
verified Β·
1 Parent(s): 72c3b35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -41
app.py CHANGED
@@ -32,68 +32,50 @@ def render_pdf_page(page, max_resolution=1540, scale=2.77):
32
  return page.render(scale=target_scale, rev_byteorder=True).to_pil()
33
 
34
 
35
- def process_pdf(pdf_path, num_pages=1):
36
  pdf = pdfium.PdfDocument(pdf_path)
37
  total_pages = len(pdf)
38
- pages_to_process = min(int(num_pages), total_pages, 5)
39
- images = []
40
 
41
- for i in range(pages_to_process):
42
- page = pdf[i]
43
- img = render_pdf_page(page)
44
- images.append(img)
45
 
46
  pdf.close()
47
- return images, total_pages
48
 
49
 
50
- def process_input(file_input, temperature, num_pages):
51
  if file_input is None:
52
  yield "Please upload an image or PDF first.", "", "", None
53
  return
54
 
55
- images_to_process = []
56
  page_info = ""
57
- display_image = None
58
 
59
  file_path = file_input if isinstance(file_input, str) else file_input.name
60
 
61
  if file_path.lower().endswith('.pdf'):
62
  try:
63
- images_to_process, total_pages = process_pdf(file_path, int(num_pages))
64
- if len(images_to_process) == 0:
65
- yield "Error: Could not extract pages from PDF.", "", "", None
66
- return
67
- display_image = images_to_process[0]
68
- if len(images_to_process) == 1:
69
- page_info = f"Processing page 1 of {total_pages}"
70
- else:
71
- page_info = f"Processing {len(images_to_process)} pages of {total_pages}"
72
  except Exception as e:
73
  yield f"Error processing PDF: {str(e)}", "", "", None
74
  return
75
  else:
76
  try:
77
- img = Image.open(file_path)
78
- images_to_process = [img]
79
- display_image = img
80
  page_info = "Processing image"
81
  except Exception as e:
82
  yield f"Error opening image: {str(e)}", "", "", None
83
  return
84
 
85
- content = [{"type": "text", "text": "Extract the text from this image."}]
86
-
87
- for img in images_to_process:
88
- try:
89
- b64_image = image_to_base64(img)
90
- content.append({
91
- "type": "image_url",
92
- "image_url": {"url": f"data:image/png;base64,{b64_image}"}
93
- })
94
- except Exception as e:
95
- yield f"Error encoding image: {str(e)}", "", "", display_image
96
- return
97
 
98
  payload = {
99
  "model": MODEL,
@@ -131,7 +113,7 @@ def process_input(file_input, temperature, num_pages):
131
  if content_delta:
132
  accumulated_response += content_delta
133
  if first_chunk:
134
- yield accumulated_response, accumulated_response, page_info, display_image
135
  first_chunk = False
136
  else:
137
  yield accumulated_response, accumulated_response, page_info, gr.update()
@@ -140,7 +122,7 @@ def process_input(file_input, temperature, num_pages):
140
 
141
  except Exception as e:
142
  error_msg = f"Error: {str(e)}"
143
- yield error_msg, error_msg, page_info, display_image
144
 
145
 
146
  with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
@@ -149,7 +131,7 @@ with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
149
 
150
  **πŸ’‘ How to use:**
151
  1. Upload an image or PDF
152
- 2. For PDFs: choose how many pages to process (1-5, default is 1)
153
  3. Adjust temperature if needed
154
  4. Click "Extract Text"
155
 
@@ -171,11 +153,11 @@ with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
171
  )
172
  num_pages = gr.Slider(
173
  minimum=1,
174
- maximum=5,
175
  value=1,
176
  step=1,
177
- label="PDF: Number of Pages to Process",
178
- info="Only applies to PDF files (max 5 pages)"
179
  )
180
  page_info = gr.Textbox(
181
  label="Processing Info",
 
32
  return page.render(scale=target_scale, rev_byteorder=True).to_pil()
33
 
34
 
35
+ def process_pdf(pdf_path, page_num=1):
36
  pdf = pdfium.PdfDocument(pdf_path)
37
  total_pages = len(pdf)
38
+ page_idx = min(max(int(page_num) - 1, 0), total_pages - 1)
 
39
 
40
+ page = pdf[page_idx]
41
+ img = render_pdf_page(page)
 
 
42
 
43
  pdf.close()
44
+ return img, total_pages, page_idx + 1
45
 
46
 
47
+ def process_input(file_input, temperature, page_num):
48
  if file_input is None:
49
  yield "Please upload an image or PDF first.", "", "", None
50
  return
51
 
52
+ image_to_process = None
53
  page_info = ""
 
54
 
55
  file_path = file_input if isinstance(file_input, str) else file_input.name
56
 
57
  if file_path.lower().endswith('.pdf'):
58
  try:
59
+ image_to_process, total_pages, actual_page = process_pdf(file_path, int(page_num))
60
+ page_info = f"Processing page {actual_page} of {total_pages}"
 
 
 
 
 
 
 
61
  except Exception as e:
62
  yield f"Error processing PDF: {str(e)}", "", "", None
63
  return
64
  else:
65
  try:
66
+ image_to_process = Image.open(file_path)
 
 
67
  page_info = "Processing image"
68
  except Exception as e:
69
  yield f"Error opening image: {str(e)}", "", "", None
70
  return
71
 
72
+ content = [
73
+ {"type": "text", "text": "Extract the text from this image."},
74
+ {
75
+ "type": "image_url",
76
+ "image_url": {"url": f"data:image/png;base64,{image_to_base64(image_to_process)}"}
77
+ }
78
+ ]
 
 
 
 
 
79
 
80
  payload = {
81
  "model": MODEL,
 
113
  if content_delta:
114
  accumulated_response += content_delta
115
  if first_chunk:
116
+ yield accumulated_response, accumulated_response, page_info, image_to_process
117
  first_chunk = False
118
  else:
119
  yield accumulated_response, accumulated_response, page_info, gr.update()
 
122
 
123
  except Exception as e:
124
  error_msg = f"Error: {str(e)}"
125
+ yield error_msg, error_msg, page_info, image_to_process
126
 
127
 
128
  with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
 
131
 
132
  **πŸ’‘ How to use:**
133
  1. Upload an image or PDF
134
+ 2. For PDFs: select which page to extract (1-20)
135
  3. Adjust temperature if needed
136
  4. Click "Extract Text"
137
 
 
153
  )
154
  num_pages = gr.Slider(
155
  minimum=1,
156
+ maximum=20,
157
  value=1,
158
  step=1,
159
+ label="PDF: Page Number",
160
+ info="Select which page to extract"
161
  )
162
  page_info = gr.Textbox(
163
  label="Processing Info",