staghado commited on
Commit
6804c82
Β·
verified Β·
1 Parent(s): 3c5f2af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -24
app.py CHANGED
@@ -6,12 +6,14 @@ import requests
6
  import gradio as gr
7
  from PIL import Image
8
  from io import BytesIO
 
 
9
 
10
  ENDPOINT = os.environ.get("VLLM_ENDPOINT")
11
  MODEL = os.environ.get("VLLM_MODEL")
12
 
13
  if not ENDPOINT or not MODEL:
14
- raise ValueError("VLLM_ENDPOINT and VLLM_MODEL environment variables must be set. Please add them as secrets in your Space settings.")
15
 
16
 
17
  def image_to_base64(image):
@@ -20,22 +22,63 @@ def image_to_base64(image):
20
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
21
 
22
 
23
- def process_image(image, temperature):
24
- if image is None:
25
- yield "Please upload an image first.", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  return
27
 
28
- b64_image = image_to_base64(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  payload = {
31
  "model": MODEL,
32
  "messages": [
33
  {
34
  "role": "user",
35
- "content": [
36
- {"type": "text", "text": ""},
37
- {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}}
38
- ]
39
  }
40
  ],
41
  "temperature": temperature,
@@ -66,9 +109,9 @@ def process_image(image, temperature):
66
  chunk = json.loads(line)
67
  if 'choices' in chunk and len(chunk['choices']) > 0:
68
  delta = chunk['choices'][0].get('delta', {})
69
- content = delta.get('content', '')
70
- if content:
71
- accumulated_response += content
72
  yield accumulated_response, accumulated_response
73
  except json.JSONDecodeError:
74
  continue
@@ -78,27 +121,33 @@ def process_image(image, temperature):
78
  yield error_msg, error_msg
79
 
80
 
81
- with gr.Blocks(title="πŸ“– Image OCR", theme=gr.themes.Soft()) as demo:
82
  gr.Markdown(
83
  """
84
- # πŸ“– Image to Text Extraction
85
  **πŸ’‘ How to use:**
86
- 1. Upload an image using the upload box
87
  2. Adjust temperature if needed
88
  3. Click "Extract Text" to process
89
 
90
- The model will extract and format text from your image.
91
  """
92
  )
93
 
94
  with gr.Row():
95
- with gr.Column():
96
  image_input = gr.Image(
97
  type="pil",
98
  label="πŸ–ΌοΈ Upload Image",
99
  sources=["upload", "clipboard"],
100
- height=600
 
 
 
 
 
101
  )
 
102
  temperature = gr.Slider(
103
  minimum=0.1,
104
  maximum=1.0,
@@ -109,7 +158,7 @@ with gr.Blocks(title="πŸ“– Image OCR", theme=gr.themes.Soft()) as demo:
109
  submit_btn = gr.Button("Extract Text", variant="primary")
110
  clear_btn = gr.Button("Clear", variant="secondary")
111
 
112
- with gr.Column():
113
  output_text = gr.Markdown(
114
  label="πŸ“„ Extracted Text (Rendered)",
115
  value="<div style='min-height: 600px; padding: 10px; border: 1px solid #e0e0e0; border-radius: 4px; background-color: #f9f9f9;'><em>Extracted text will appear here...</em></div>",
@@ -121,19 +170,20 @@ with gr.Blocks(title="πŸ“– Image OCR", theme=gr.themes.Soft()) as demo:
121
  raw_output = gr.Textbox(
122
  label="Raw Markdown Output",
123
  placeholder="Raw text will appear here...",
124
- lines=30,
 
125
  show_copy_button=True
126
  )
127
 
128
  submit_btn.click(
129
- fn=process_image,
130
- inputs=[image_input, temperature],
131
  outputs=[output_text, raw_output]
132
  )
133
 
134
  clear_btn.click(
135
- fn=lambda: (None, "", ""),
136
- outputs=[image_input, output_text, raw_output]
137
  )
138
 
139
 
 
6
  import gradio as gr
7
  from PIL import Image
8
  from io import BytesIO
9
+ import pypdfium2 as pdfium
10
+ from pathlib import Path
11
 
12
  ENDPOINT = os.environ.get("VLLM_ENDPOINT")
13
  MODEL = os.environ.get("VLLM_MODEL")
14
 
15
  if not ENDPOINT or not MODEL:
16
+ raise ValueError("VLLM_ENDPOINT and VLLM_MODEL environment variables must be set.")
17
 
18
 
19
  def image_to_base64(image):
 
22
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
23
 
24
 
25
+ def render_pdf_page(page, max_resolution=1540, scale=2.77):
26
+ width, height = page.get_size()
27
+ pixel_width = width * scale
28
+ pixel_height = height * scale
29
+ resize_factor = min(1, max_resolution / pixel_width, max_resolution / pixel_height)
30
+ target_scale = scale * resize_factor
31
+ return page.render(scale=target_scale, rev_byteorder=True).to_pil()
32
+
33
+
34
+ def process_pdf(pdf_path, max_pages=5):
35
+ pdf = pdfium.PdfDocument(pdf_path)
36
+ num_pages = min(len(pdf), max_pages)
37
+ images = []
38
+
39
+ for i in range(num_pages):
40
+ page = pdf[i]
41
+ img = render_pdf_page(page)
42
+ images.append(img)
43
+
44
+ pdf.close()
45
+ return images
46
+
47
+
48
+ def process_input(image, pdf_file, temperature):
49
+ if image is None and pdf_file is None:
50
+ yield "Please upload an image or PDF first.", ""
51
  return
52
 
53
+ images_to_process = []
54
+
55
+ if pdf_file is not None:
56
+ try:
57
+ images_to_process = process_pdf(pdf_file, max_pages=5)
58
+ if len(images_to_process) == 0:
59
+ yield "Error: Could not extract pages from PDF.", ""
60
+ return
61
+ except Exception as e:
62
+ yield f"Error processing PDF: {str(e)}", ""
63
+ return
64
+ elif image is not None:
65
+ images_to_process = [image]
66
+
67
+ content = [{"type": "text", "text": ""}]
68
+
69
+ for img in images_to_process:
70
+ b64_image = image_to_base64(img)
71
+ content.append({
72
+ "type": "image_url",
73
+ "image_url": {"url": f"data:image/png;base64,{b64_image}"}
74
+ })
75
 
76
  payload = {
77
  "model": MODEL,
78
  "messages": [
79
  {
80
  "role": "user",
81
+ "content": content
 
 
 
82
  }
83
  ],
84
  "temperature": temperature,
 
109
  chunk = json.loads(line)
110
  if 'choices' in chunk and len(chunk['choices']) > 0:
111
  delta = chunk['choices'][0].get('delta', {})
112
+ content_delta = delta.get('content', '')
113
+ if content_delta:
114
+ accumulated_response += content_delta
115
  yield accumulated_response, accumulated_response
116
  except json.JSONDecodeError:
117
  continue
 
121
  yield error_msg, error_msg
122
 
123
 
124
+ with gr.Blocks(title="πŸ“– Image/PDF OCR", theme=gr.themes.Soft()) as demo:
125
  gr.Markdown(
126
  """
127
+ # πŸ“– Image/PDF to Text Extraction
128
  **πŸ’‘ How to use:**
129
+ 1. Upload an image OR a PDF (max 5 pages)
130
  2. Adjust temperature if needed
131
  3. Click "Extract Text" to process
132
 
133
+ The model will extract and format text from your document.
134
  """
135
  )
136
 
137
  with gr.Row():
138
+ with gr.Column(scale=1):
139
  image_input = gr.Image(
140
  type="pil",
141
  label="πŸ–ΌοΈ Upload Image",
142
  sources=["upload", "clipboard"],
143
+ height=400
144
+ )
145
+ pdf_input = gr.File(
146
+ label="πŸ“„ Upload PDF (max 5 pages)",
147
+ file_types=[".pdf"],
148
+ type="filepath"
149
  )
150
+ gr.Markdown("*Upload either an image or PDF, not both*")
151
  temperature = gr.Slider(
152
  minimum=0.1,
153
  maximum=1.0,
 
158
  submit_btn = gr.Button("Extract Text", variant="primary")
159
  clear_btn = gr.Button("Clear", variant="secondary")
160
 
161
+ with gr.Column(scale=2):
162
  output_text = gr.Markdown(
163
  label="πŸ“„ Extracted Text (Rendered)",
164
  value="<div style='min-height: 600px; padding: 10px; border: 1px solid #e0e0e0; border-radius: 4px; background-color: #f9f9f9;'><em>Extracted text will appear here...</em></div>",
 
170
  raw_output = gr.Textbox(
171
  label="Raw Markdown Output",
172
  placeholder="Raw text will appear here...",
173
+ lines=20,
174
+ max_lines=30,
175
  show_copy_button=True
176
  )
177
 
178
  submit_btn.click(
179
+ fn=process_input,
180
+ inputs=[image_input, pdf_input, temperature],
181
  outputs=[output_text, raw_output]
182
  )
183
 
184
  clear_btn.click(
185
+ fn=lambda: (None, None, "", ""),
186
+ outputs=[image_input, pdf_input, output_text, raw_output]
187
  )
188
 
189