Spaces:
Running
on
Zero
Running
on
Zero
update app (#5)
Browse files- update app (8d588651c3738ab3959d1101635561a8cc68f8fb)
app.py
CHANGED
|
@@ -129,7 +129,8 @@ def generate_and_preview_pdf(image: Image.Image, text_content: str, font_size: i
|
|
| 129 |
@spaces.GPU
|
| 130 |
def process_document_stream(
|
| 131 |
image: Image.Image,
|
| 132 |
-
prompt_input: str,
|
|
|
|
| 133 |
max_new_tokens: int,
|
| 134 |
temperature: float,
|
| 135 |
top_p: float,
|
|
@@ -146,6 +147,21 @@ def process_document_stream(
|
|
| 146 |
yield "Please enter a prompt.", ""
|
| 147 |
return
|
| 148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
temp_image_path = None
|
| 150 |
try:
|
| 151 |
# --- FIX: Save the PIL Image to a temporary file ---
|
|
@@ -230,6 +246,16 @@ def create_gradio_interface():
|
|
| 230 |
image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
|
| 231 |
|
| 232 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
|
| 234 |
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.05, value=0.7)
|
| 235 |
top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.8)
|
|
@@ -276,7 +302,8 @@ def create_gradio_interface():
|
|
| 276 |
|
| 277 |
process_btn.click(
|
| 278 |
fn=process_document_stream,
|
| 279 |
-
|
|
|
|
| 280 |
outputs=[raw_output_stream, markdown_output]
|
| 281 |
)
|
| 282 |
|
|
|
|
| 129 |
@spaces.GPU
|
| 130 |
def process_document_stream(
|
| 131 |
image: Image.Image,
|
| 132 |
+
prompt_input: str,
|
| 133 |
+
image_scale_factor: float, # New parameter for image scaling
|
| 134 |
max_new_tokens: int,
|
| 135 |
temperature: float,
|
| 136 |
top_p: float,
|
|
|
|
| 147 |
yield "Please enter a prompt.", ""
|
| 148 |
return
|
| 149 |
|
| 150 |
+
# --- IMPLEMENTATION: Image Scaling based on user input ---
|
| 151 |
+
if image_scale_factor > 1.0:
|
| 152 |
+
try:
|
| 153 |
+
original_width, original_height = image.size
|
| 154 |
+
new_width = int(original_width * image_scale_factor)
|
| 155 |
+
new_height = int(original_height * image_scale_factor)
|
| 156 |
+
print(f"Scaling image from {image.size} to ({new_width}, {new_height}) with factor {image_scale_factor}.")
|
| 157 |
+
# Use a high-quality resampling filter for better results
|
| 158 |
+
image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
| 159 |
+
except Exception as e:
|
| 160 |
+
print(f"Error during image scaling: {e}")
|
| 161 |
+
# Continue with the original image if scaling fails
|
| 162 |
+
pass
|
| 163 |
+
# --- END IMPLEMENTATION ---
|
| 164 |
+
|
| 165 |
temp_image_path = None
|
| 166 |
try:
|
| 167 |
# --- FIX: Save the PIL Image to a temporary file ---
|
|
|
|
| 246 |
image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
|
| 247 |
|
| 248 |
with gr.Accordion("Advanced Settings", open=False):
|
| 249 |
+
# --- NEW UI ELEMENT: Image Scaling Slider ---
|
| 250 |
+
image_scale_factor = gr.Slider(
|
| 251 |
+
minimum=1.0,
|
| 252 |
+
maximum=3.0,
|
| 253 |
+
value=1.0,
|
| 254 |
+
step=0.1,
|
| 255 |
+
label="Image Upscale Factor",
|
| 256 |
+
info="Increases image size before processing. Can improve OCR on small text. Default: 1.0 (no change)."
|
| 257 |
+
)
|
| 258 |
+
# --- END NEW UI ELEMENT ---
|
| 259 |
max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
|
| 260 |
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.05, value=0.7)
|
| 261 |
top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.8)
|
|
|
|
| 302 |
|
| 303 |
process_btn.click(
|
| 304 |
fn=process_document_stream,
|
| 305 |
+
# --- UPDATE: Add the new slider to the inputs list ---
|
| 306 |
+
inputs=[image_input, prompt_input, image_scale_factor, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
|
| 307 |
outputs=[raw_output_stream, markdown_output]
|
| 308 |
)
|
| 309 |
|