prithivMLmods commited on
Commit
8d58865
·
verified ·
1 Parent(s): 2c5ad69

update app

Browse files

implemented image scaling feature by adding a slider to the user interface and integrating the scaling logic into the document processing function. This allows you to upscale the image before it's sent to the model, which can significantly improve the accuracy of the text extraction, especially for documents with small characters.

Files changed (1) hide show
  1. app.py +29 -2
app.py CHANGED
@@ -129,7 +129,8 @@ def generate_and_preview_pdf(image: Image.Image, text_content: str, font_size: i
129
  @spaces.GPU
130
  def process_document_stream(
131
  image: Image.Image,
132
- prompt_input: str,
 
133
  max_new_tokens: int,
134
  temperature: float,
135
  top_p: float,
@@ -146,6 +147,21 @@ def process_document_stream(
146
  yield "Please enter a prompt.", ""
147
  return
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  temp_image_path = None
150
  try:
151
  # --- FIX: Save the PIL Image to a temporary file ---
@@ -230,6 +246,16 @@ def create_gradio_interface():
230
  image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
231
 
232
  with gr.Accordion("Advanced Settings", open=False):
 
 
 
 
 
 
 
 
 
 
233
  max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
234
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.05, value=0.7)
235
  top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.8)
@@ -276,7 +302,8 @@ def create_gradio_interface():
276
 
277
  process_btn.click(
278
  fn=process_document_stream,
279
- inputs=[image_input, prompt_input, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
 
280
  outputs=[raw_output_stream, markdown_output]
281
  )
282
 
 
129
  @spaces.GPU
130
  def process_document_stream(
131
  image: Image.Image,
132
+ prompt_input: str,
133
+ image_scale_factor: float, # New parameter for image scaling
134
  max_new_tokens: int,
135
  temperature: float,
136
  top_p: float,
 
147
  yield "Please enter a prompt.", ""
148
  return
149
 
150
+ # --- IMPLEMENTATION: Image Scaling based on user input ---
151
+ if image_scale_factor > 1.0:
152
+ try:
153
+ original_width, original_height = image.size
154
+ new_width = int(original_width * image_scale_factor)
155
+ new_height = int(original_height * image_scale_factor)
156
+ print(f"Scaling image from {image.size} to ({new_width}, {new_height}) with factor {image_scale_factor}.")
157
+ # Use a high-quality resampling filter for better results
158
+ image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
159
+ except Exception as e:
160
+ print(f"Error during image scaling: {e}")
161
+ # Continue with the original image if scaling fails
162
+ pass
163
+ # --- END IMPLEMENTATION ---
164
+
165
  temp_image_path = None
166
  try:
167
  # --- FIX: Save the PIL Image to a temporary file ---
 
246
  image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
247
 
248
  with gr.Accordion("Advanced Settings", open=False):
249
+ # --- NEW UI ELEMENT: Image Scaling Slider ---
250
+ image_scale_factor = gr.Slider(
251
+ minimum=1.0,
252
+ maximum=3.0,
253
+ value=1.0,
254
+ step=0.1,
255
+ label="Image Upscale Factor",
256
+ info="Increases image size before processing. Can improve OCR on small text. Default: 1.0 (no change)."
257
+ )
258
+ # --- END NEW UI ELEMENT ---
259
  max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
260
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.05, value=0.7)
261
  top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.8)
 
302
 
303
  process_btn.click(
304
  fn=process_document_stream,
305
+ # --- UPDATE: Add the new slider to the inputs list ---
306
+ inputs=[image_input, prompt_input, image_scale_factor, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
307
  outputs=[raw_output_stream, markdown_output]
308
  )
309