Bapt120 commited on
Commit
a86e158
·
verified ·
1 Parent(s): d40348f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +179 -82
app.py CHANGED
@@ -1,24 +1,28 @@
1
  #!/usr/bin/env python3
 
2
  import os
 
3
  import subprocess
4
  import sys
5
  import threading
 
 
6
 
 
 
7
  import spaces
8
  import torch
9
-
10
- import gradio as gr
11
  from PIL import Image
12
- from io import BytesIO
13
- import pypdfium2 as pdfium
14
  from transformers import (
15
  LightOnOcrForConditionalGeneration,
16
  LightOnOcrProcessor,
17
  TextIteratorStreamer,
18
  )
19
- import re
20
- import base64
21
- from collections import OrderedDict
 
22
 
23
  # Model Registry with all supported models
24
  MODEL_REGISTRY = {
@@ -26,11 +30,13 @@ MODEL_REGISTRY = {
26
  "model_id": "lightonai/LightOnOCR-2-1B",
27
  "has_bbox": False,
28
  "description": "Best overall OCR performance",
 
29
  },
30
  "LightOnOCR-2-1B-bbox (Best Bbox)": {
31
  "model_id": "lightonai/LightOnOCR-2-1B-bbox",
32
  "has_bbox": True,
33
  "description": "Best bounding box detection",
 
34
  },
35
  "LightOnOCR-2-1B-base": {
36
  "model_id": "lightonai/LightOnOCR-2-1B-base",
@@ -102,18 +108,20 @@ class ModelManager:
102
  # Load new model
103
  print(f"Loading model: {model_name} ({model_id})...")
104
  hf_token = os.environ.get("HF_TOKEN")
105
- model = LightOnOcrForConditionalGeneration.from_pretrained(
106
- model_id,
107
- attn_implementation=attn_implementation,
108
- torch_dtype=dtype,
109
- trust_remote_code=True,
110
- token=hf_token
111
- ).to(device).eval()
 
 
 
 
112
 
113
  processor = LightOnOcrProcessor.from_pretrained(
114
- model_id,
115
- trust_remote_code=True,
116
- token=hf_token
117
  )
118
 
119
  # Add to cache
@@ -147,10 +155,10 @@ def process_pdf(pdf_path, page_num=1):
147
  pdf = pdfium.PdfDocument(pdf_path)
148
  total_pages = len(pdf)
149
  page_idx = min(max(int(page_num) - 1, 0), total_pages - 1)
150
-
151
  page = pdf[page_idx]
152
  img = render_pdf_page(page)
153
-
154
  pdf.close()
155
  return img, total_pages, page_idx + 1
156
 
@@ -159,31 +167,31 @@ def clean_output_text(text):
159
  """Remove chat template artifacts from output."""
160
  # Remove common chat template markers
161
  markers_to_remove = ["system", "user", "assistant"]
162
-
163
  # Split by lines and filter
164
- lines = text.split('\n')
165
  cleaned_lines = []
166
-
167
  for line in lines:
168
  stripped = line.strip()
169
  # Skip lines that are just template markers
170
  if stripped.lower() not in markers_to_remove:
171
  cleaned_lines.append(line)
172
-
173
  # Join back and strip leading/trailing whitespace
174
- cleaned = '\n'.join(cleaned_lines).strip()
175
-
176
  # Alternative approach: if there's an "assistant" marker, take everything after it
177
  if "assistant" in text.lower():
178
  parts = text.split("assistant", 1)
179
  if len(parts) > 1:
180
  cleaned = parts[1].strip()
181
-
182
  return cleaned
183
 
184
 
185
  # Bbox parsing pattern: ![image](image_N.png)x1,y1,x2,y2 (no space between)
186
- BBOX_PATTERN = r'!\[image\]\((image_\d+\.png)\)\s*(\d+),(\d+),(\d+),(\d+)'
187
 
188
 
189
  def parse_bbox_output(text):
@@ -191,12 +199,11 @@ def parse_bbox_output(text):
191
  detections = []
192
  for match in re.finditer(BBOX_PATTERN, text):
193
  image_ref, x1, y1, x2, y2 = match.groups()
194
- detections.append({
195
- "ref": image_ref,
196
- "coords": (int(x1), int(y1), int(x2), int(y2))
197
- })
198
  # Clean text: remove coordinates, keep markdown image refs
199
- cleaned = re.sub(BBOX_PATTERN, r'![image](\1)', text)
200
  return cleaned, detections
201
 
202
 
@@ -226,6 +233,71 @@ def image_to_data_uri(image):
226
  return f"data:image/png;base64,{b64}"
227
 
228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  def render_bbox_with_crops(raw_output, source_image):
230
  """Replace markdown image placeholders with actual cropped images."""
231
  cleaned, detections = parse_bbox_output(raw_output)
@@ -236,8 +308,7 @@ def render_bbox_with_crops(raw_output, source_image):
236
  data_uri = image_to_data_uri(cropped)
237
  # Replace ![image](image_N.png) with ![Cropped](data:...)
238
  cleaned = cleaned.replace(
239
- f"![image]({bbox['ref']})",
240
- f"![Cropped region]({data_uri})"
241
  )
242
  except Exception as e:
243
  print(f"Error cropping bbox {bbox}: {e}")
@@ -250,6 +321,13 @@ def render_bbox_with_crops(raw_output, source_image):
250
  @spaces.GPU
251
  def extract_text_from_image(image, model_name, temperature=0.2, stream=False):
252
  """Extract text from image using LightOnOCR model."""
 
 
 
 
 
 
 
253
  # Get model and processor from cache or load
254
  model, processor = model_manager.get_model(model_name)
255
 
@@ -269,13 +347,16 @@ def extract_text_from_image(image, model_name, temperature=0.2, stream=False):
269
  add_generation_prompt=True,
270
  tokenize=True,
271
  return_dict=True,
272
- return_tensors="pt"
273
  )
274
 
275
  # Move inputs to device AND convert to the correct dtype
276
  inputs = {
277
- k: v.to(device=device, dtype=dtype) if isinstance(v, torch.Tensor) and v.dtype in [torch.float32, torch.float16, torch.bfloat16]
278
- else v.to(device) if isinstance(v, torch.Tensor)
 
 
 
279
  else v
280
  for k, v in inputs.items()
281
  }
@@ -293,9 +374,7 @@ def extract_text_from_image(image, model_name, temperature=0.2, stream=False):
293
  if stream:
294
  # Setup streamer for streaming generation
295
  streamer = TextIteratorStreamer(
296
- processor.tokenizer,
297
- skip_prompt=True,
298
- skip_special_tokens=True
299
  )
300
  generation_kwargs["streamer"] = streamer
301
 
@@ -338,9 +417,11 @@ def process_input(file_input, model_name, temperature, page_num, enable_streamin
338
  file_path = file_input if isinstance(file_input, str) else file_input.name
339
 
340
  # Handle PDF files
341
- if file_path.lower().endswith('.pdf'):
342
  try:
343
- image_to_process, total_pages, actual_page = process_pdf(file_path, int(page_num))
 
 
344
  page_info = f"Processing page {actual_page} of {total_pages}"
345
  except Exception as e:
346
  yield f"Error processing PDF: {str(e)}", "", "", None, gr.update()
@@ -360,13 +441,21 @@ def process_input(file_input, model_name, temperature, page_num, enable_streamin
360
 
361
  try:
362
  # Extract text using LightOnOCR with optional streaming
363
- for extracted_text in extract_text_from_image(image_to_process, model_name, temperature, stream=enable_streaming):
 
 
364
  # For bbox models, render cropped images inline
365
  if has_bbox:
366
  rendered_text = render_bbox_with_crops(extracted_text, image_to_process)
367
  else:
368
  rendered_text = extracted_text
369
- yield rendered_text, extracted_text, page_info, image_to_process, gr.update()
 
 
 
 
 
 
370
 
371
  except Exception as e:
372
  error_msg = f"Error during text extraction: {str(e)}"
@@ -377,10 +466,10 @@ def update_slider(file_input):
377
  """Update page slider based on PDF page count."""
378
  if file_input is None:
379
  return gr.update(maximum=20, value=1)
380
-
381
  file_path = file_input if isinstance(file_input, str) else file_input.name
382
-
383
- if file_path.lower().endswith('.pdf'):
384
  try:
385
  pdf = pdfium.PdfDocument(file_path)
386
  total_pages = len(pdf)
@@ -396,7 +485,11 @@ def update_slider(file_input):
396
  def get_model_info_text(model_name):
397
  """Return formatted model info string."""
398
  info = MODEL_REGISTRY.get(model_name, {})
399
- has_bbox = "Yes - will show cropped regions inline" if info.get("has_bbox", False) else "No"
 
 
 
 
400
  return f"**Description:** {info.get('description', 'N/A')}\n**Bounding Box Detection:** {has_bbox}"
401
 
402
 
@@ -415,29 +508,25 @@ with gr.Blocks(title="LightOnOCR-2 Multi-Model OCR") as demo:
415
 
416
  **Device:** {device.upper()} | **Attention:** {attn_implementation}
417
  """)
418
-
419
  with gr.Row():
420
  with gr.Column(scale=1):
421
  model_selector = gr.Dropdown(
422
  choices=list(MODEL_REGISTRY.keys()),
423
  value=DEFAULT_MODEL,
424
  label="Model",
425
- info="Select OCR model variant"
426
  )
427
  model_info = gr.Markdown(
428
- value=get_model_info_text(DEFAULT_MODEL),
429
- label="Model Info"
430
  )
431
  file_input = gr.File(
432
  label="Upload Image or PDF",
433
  file_types=[".pdf", ".png", ".jpg", ".jpeg"],
434
- type="filepath"
435
  )
436
  rendered_image = gr.Image(
437
- label="Preview",
438
- type="pil",
439
- height=400,
440
- interactive=False
441
  )
442
  num_pages = gr.Slider(
443
  minimum=1,
@@ -445,68 +534,76 @@ with gr.Blocks(title="LightOnOCR-2 Multi-Model OCR") as demo:
445
  value=1,
446
  step=1,
447
  label="PDF: Page Number",
448
- info="Select which page to extract"
449
- )
450
- page_info = gr.Textbox(
451
- label="Processing Info",
452
- value="",
453
- interactive=False
454
  )
 
455
  temperature = gr.Slider(
456
  minimum=0.0,
457
  maximum=1.0,
458
  value=0.2,
459
  step=0.05,
460
  label="Temperature",
461
- info="0.0 = deterministic, Higher = more varied"
462
  )
463
  enable_streaming = gr.Checkbox(
464
  label="Enable Streaming",
465
  value=True,
466
- info="Show text progressively as it's generated"
467
  )
468
  submit_btn = gr.Button("Extract Text", variant="primary")
469
  clear_btn = gr.Button("Clear", variant="secondary")
470
-
471
  with gr.Column(scale=2):
472
  output_text = gr.Markdown(
473
  label="📄 Extracted Text (Rendered)",
474
- value="*Extracted text will appear here...*"
475
  )
476
-
477
  with gr.Row():
478
  with gr.Column():
479
  raw_output = gr.Textbox(
480
  label="Raw Markdown Output",
481
  placeholder="Raw text will appear here...",
482
  lines=20,
483
- max_lines=30
484
  )
485
-
486
  # Event handlers
487
  submit_btn.click(
488
  fn=process_input,
489
  inputs=[file_input, model_selector, temperature, num_pages, enable_streaming],
490
- outputs=[output_text, raw_output, page_info, rendered_image, num_pages]
491
  )
492
 
493
- file_input.change(
494
- fn=update_slider,
495
- inputs=[file_input],
496
- outputs=[num_pages]
497
- )
498
 
499
  model_selector.change(
500
- fn=get_model_info_text,
501
- inputs=[model_selector],
502
- outputs=[model_info]
503
  )
504
 
505
  clear_btn.click(
506
- fn=lambda: (None, DEFAULT_MODEL, get_model_info_text(DEFAULT_MODEL), "*Extracted text will appear here...*", "", "", None, 1),
507
- outputs=[file_input, model_selector, model_info, output_text, raw_output, page_info, rendered_image, num_pages]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
  )
509
 
510
 
511
  if __name__ == "__main__":
512
- demo.launch(theme=gr.themes.Soft())
 
1
  #!/usr/bin/env python3
2
+ import base64
3
  import os
4
+ import re
5
  import subprocess
6
  import sys
7
  import threading
8
+ from collections import OrderedDict
9
+ from io import BytesIO
10
 
11
+ import gradio as gr
12
+ import pypdfium2 as pdfium
13
  import spaces
14
  import torch
15
+ from openai import OpenAI
 
16
  from PIL import Image
 
 
17
  from transformers import (
18
  LightOnOcrForConditionalGeneration,
19
  LightOnOcrProcessor,
20
  TextIteratorStreamer,
21
  )
22
+
23
+ # vLLM endpoint configuration from environment variables
24
+ VLLM_ENDPOINT_OCR = os.environ.get("VLLM_ENDPOINT_OCR")
25
+ VLLM_ENDPOINT_BBOX = os.environ.get("VLLM_ENDPOINT_BBOX")
26
 
27
  # Model Registry with all supported models
28
  MODEL_REGISTRY = {
 
30
  "model_id": "lightonai/LightOnOCR-2-1B",
31
  "has_bbox": False,
32
  "description": "Best overall OCR performance",
33
+ "vllm_endpoint": VLLM_ENDPOINT_OCR,
34
  },
35
  "LightOnOCR-2-1B-bbox (Best Bbox)": {
36
  "model_id": "lightonai/LightOnOCR-2-1B-bbox",
37
  "has_bbox": True,
38
  "description": "Best bounding box detection",
39
+ "vllm_endpoint": VLLM_ENDPOINT_BBOX,
40
  },
41
  "LightOnOCR-2-1B-base": {
42
  "model_id": "lightonai/LightOnOCR-2-1B-base",
 
108
  # Load new model
109
  print(f"Loading model: {model_name} ({model_id})...")
110
  hf_token = os.environ.get("HF_TOKEN")
111
+ model = (
112
+ LightOnOcrForConditionalGeneration.from_pretrained(
113
+ model_id,
114
+ attn_implementation=attn_implementation,
115
+ torch_dtype=dtype,
116
+ trust_remote_code=True,
117
+ token=hf_token,
118
+ )
119
+ .to(device)
120
+ .eval()
121
+ )
122
 
123
  processor = LightOnOcrProcessor.from_pretrained(
124
+ model_id, trust_remote_code=True, token=hf_token
 
 
125
  )
126
 
127
  # Add to cache
 
155
  pdf = pdfium.PdfDocument(pdf_path)
156
  total_pages = len(pdf)
157
  page_idx = min(max(int(page_num) - 1, 0), total_pages - 1)
158
+
159
  page = pdf[page_idx]
160
  img = render_pdf_page(page)
161
+
162
  pdf.close()
163
  return img, total_pages, page_idx + 1
164
 
 
167
  """Remove chat template artifacts from output."""
168
  # Remove common chat template markers
169
  markers_to_remove = ["system", "user", "assistant"]
170
+
171
  # Split by lines and filter
172
+ lines = text.split("\n")
173
  cleaned_lines = []
174
+
175
  for line in lines:
176
  stripped = line.strip()
177
  # Skip lines that are just template markers
178
  if stripped.lower() not in markers_to_remove:
179
  cleaned_lines.append(line)
180
+
181
  # Join back and strip leading/trailing whitespace
182
+ cleaned = "\n".join(cleaned_lines).strip()
183
+
184
  # Alternative approach: if there's an "assistant" marker, take everything after it
185
  if "assistant" in text.lower():
186
  parts = text.split("assistant", 1)
187
  if len(parts) > 1:
188
  cleaned = parts[1].strip()
189
+
190
  return cleaned
191
 
192
 
193
  # Bbox parsing pattern: ![image](image_N.png)x1,y1,x2,y2 (no space between)
194
+ BBOX_PATTERN = r"!\[image\]\((image_\d+\.png)\)\s*(\d+),(\d+),(\d+),(\d+)"
195
 
196
 
197
  def parse_bbox_output(text):
 
199
  detections = []
200
  for match in re.finditer(BBOX_PATTERN, text):
201
  image_ref, x1, y1, x2, y2 = match.groups()
202
+ detections.append(
203
+ {"ref": image_ref, "coords": (int(x1), int(y1), int(x2), int(y2))}
204
+ )
 
205
  # Clean text: remove coordinates, keep markdown image refs
206
+ cleaned = re.sub(BBOX_PATTERN, r"![image](\1)", text)
207
  return cleaned, detections
208
 
209
 
 
233
  return f"data:image/png;base64,{b64}"
234
 
235
 
236
+ def extract_text_via_vllm(image, model_name, temperature=0.2, stream=False):
237
+ """Extract text from image using vLLM endpoint."""
238
+ config = MODEL_REGISTRY.get(model_name)
239
+ if config is None:
240
+ raise ValueError(f"Unknown model: {model_name}")
241
+
242
+ endpoint = config.get("vllm_endpoint")
243
+ if endpoint is None:
244
+ raise ValueError(f"Model {model_name} does not have a vLLM endpoint")
245
+
246
+ model_id = config["model_id"]
247
+
248
+ # Convert image to base64 data URI
249
+ if isinstance(image, Image.Image):
250
+ image_uri = image_to_data_uri(image)
251
+ else:
252
+ # Assume it's already a data URI or URL
253
+ image_uri = image
254
+
255
+ # Create OpenAI client pointing to vLLM endpoint
256
+ client = OpenAI(base_url=endpoint, api_key="not-needed")
257
+
258
+ # Prepare the message with image
259
+ messages = [
260
+ {
261
+ "role": "user",
262
+ "content": [
263
+ {"type": "image_url", "image_url": {"url": image_uri}},
264
+ ],
265
+ }
266
+ ]
267
+
268
+ if stream:
269
+ # Streaming response
270
+ response = client.chat.completions.create(
271
+ model=model_id,
272
+ messages=messages,
273
+ max_tokens=2048,
274
+ temperature=temperature if temperature > 0 else 0.0,
275
+ top_p=0.9,
276
+ stream=True,
277
+ )
278
+
279
+ full_text = ""
280
+ for chunk in response:
281
+ if chunk.choices and chunk.choices[0].delta.content:
282
+ full_text += chunk.choices[0].delta.content
283
+ cleaned_text = clean_output_text(full_text)
284
+ yield cleaned_text
285
+ else:
286
+ # Non-streaming response
287
+ response = client.chat.completions.create(
288
+ model=model_id,
289
+ messages=messages,
290
+ max_tokens=2048,
291
+ temperature=temperature if temperature > 0 else 0.0,
292
+ top_p=0.9,
293
+ stream=False,
294
+ )
295
+
296
+ output_text = response.choices[0].message.content
297
+ cleaned_text = clean_output_text(output_text)
298
+ yield cleaned_text
299
+
300
+
301
  def render_bbox_with_crops(raw_output, source_image):
302
  """Replace markdown image placeholders with actual cropped images."""
303
  cleaned, detections = parse_bbox_output(raw_output)
 
308
  data_uri = image_to_data_uri(cropped)
309
  # Replace ![image](image_N.png) with ![Cropped](data:...)
310
  cleaned = cleaned.replace(
311
+ f"![image]({bbox['ref']})", f"![Cropped region]({data_uri})"
 
312
  )
313
  except Exception as e:
314
  print(f"Error cropping bbox {bbox}: {e}")
 
321
  @spaces.GPU
322
  def extract_text_from_image(image, model_name, temperature=0.2, stream=False):
323
  """Extract text from image using LightOnOCR model."""
324
+ # Check if model has a vLLM endpoint configured
325
+ config = MODEL_REGISTRY.get(model_name, {})
326
+ if config.get("vllm_endpoint"):
327
+ # Use vLLM endpoint instead of local model
328
+ yield from extract_text_via_vllm(image, model_name, temperature, stream)
329
+ return
330
+
331
  # Get model and processor from cache or load
332
  model, processor = model_manager.get_model(model_name)
333
 
 
347
  add_generation_prompt=True,
348
  tokenize=True,
349
  return_dict=True,
350
+ return_tensors="pt",
351
  )
352
 
353
  # Move inputs to device AND convert to the correct dtype
354
  inputs = {
355
+ k: v.to(device=device, dtype=dtype)
356
+ if isinstance(v, torch.Tensor)
357
+ and v.dtype in [torch.float32, torch.float16, torch.bfloat16]
358
+ else v.to(device)
359
+ if isinstance(v, torch.Tensor)
360
  else v
361
  for k, v in inputs.items()
362
  }
 
374
  if stream:
375
  # Setup streamer for streaming generation
376
  streamer = TextIteratorStreamer(
377
+ processor.tokenizer, skip_prompt=True, skip_special_tokens=True
 
 
378
  )
379
  generation_kwargs["streamer"] = streamer
380
 
 
417
  file_path = file_input if isinstance(file_input, str) else file_input.name
418
 
419
  # Handle PDF files
420
+ if file_path.lower().endswith(".pdf"):
421
  try:
422
+ image_to_process, total_pages, actual_page = process_pdf(
423
+ file_path, int(page_num)
424
+ )
425
  page_info = f"Processing page {actual_page} of {total_pages}"
426
  except Exception as e:
427
  yield f"Error processing PDF: {str(e)}", "", "", None, gr.update()
 
441
 
442
  try:
443
  # Extract text using LightOnOCR with optional streaming
444
+ for extracted_text in extract_text_from_image(
445
+ image_to_process, model_name, temperature, stream=enable_streaming
446
+ ):
447
  # For bbox models, render cropped images inline
448
  if has_bbox:
449
  rendered_text = render_bbox_with_crops(extracted_text, image_to_process)
450
  else:
451
  rendered_text = extracted_text
452
+ yield (
453
+ rendered_text,
454
+ extracted_text,
455
+ page_info,
456
+ image_to_process,
457
+ gr.update(),
458
+ )
459
 
460
  except Exception as e:
461
  error_msg = f"Error during text extraction: {str(e)}"
 
466
  """Update page slider based on PDF page count."""
467
  if file_input is None:
468
  return gr.update(maximum=20, value=1)
469
+
470
  file_path = file_input if isinstance(file_input, str) else file_input.name
471
+
472
+ if file_path.lower().endswith(".pdf"):
473
  try:
474
  pdf = pdfium.PdfDocument(file_path)
475
  total_pages = len(pdf)
 
485
  def get_model_info_text(model_name):
486
  """Return formatted model info string."""
487
  info = MODEL_REGISTRY.get(model_name, {})
488
+ has_bbox = (
489
+ "Yes - will show cropped regions inline"
490
+ if info.get("has_bbox", False)
491
+ else "No"
492
+ )
493
  return f"**Description:** {info.get('description', 'N/A')}\n**Bounding Box Detection:** {has_bbox}"
494
 
495
 
 
508
 
509
  **Device:** {device.upper()} | **Attention:** {attn_implementation}
510
  """)
511
+
512
  with gr.Row():
513
  with gr.Column(scale=1):
514
  model_selector = gr.Dropdown(
515
  choices=list(MODEL_REGISTRY.keys()),
516
  value=DEFAULT_MODEL,
517
  label="Model",
518
+ info="Select OCR model variant",
519
  )
520
  model_info = gr.Markdown(
521
+ value=get_model_info_text(DEFAULT_MODEL), label="Model Info"
 
522
  )
523
  file_input = gr.File(
524
  label="Upload Image or PDF",
525
  file_types=[".pdf", ".png", ".jpg", ".jpeg"],
526
+ type="filepath",
527
  )
528
  rendered_image = gr.Image(
529
+ label="Preview", type="pil", height=400, interactive=False
 
 
 
530
  )
531
  num_pages = gr.Slider(
532
  minimum=1,
 
534
  value=1,
535
  step=1,
536
  label="PDF: Page Number",
537
+ info="Select which page to extract",
 
 
 
 
 
538
  )
539
+ page_info = gr.Textbox(label="Processing Info", value="", interactive=False)
540
  temperature = gr.Slider(
541
  minimum=0.0,
542
  maximum=1.0,
543
  value=0.2,
544
  step=0.05,
545
  label="Temperature",
546
+ info="0.0 = deterministic, Higher = more varied",
547
  )
548
  enable_streaming = gr.Checkbox(
549
  label="Enable Streaming",
550
  value=True,
551
+ info="Show text progressively as it's generated",
552
  )
553
  submit_btn = gr.Button("Extract Text", variant="primary")
554
  clear_btn = gr.Button("Clear", variant="secondary")
555
+
556
  with gr.Column(scale=2):
557
  output_text = gr.Markdown(
558
  label="📄 Extracted Text (Rendered)",
559
+ value="*Extracted text will appear here...*",
560
  )
561
+
562
  with gr.Row():
563
  with gr.Column():
564
  raw_output = gr.Textbox(
565
  label="Raw Markdown Output",
566
  placeholder="Raw text will appear here...",
567
  lines=20,
568
+ max_lines=30,
569
  )
570
+
571
  # Event handlers
572
  submit_btn.click(
573
  fn=process_input,
574
  inputs=[file_input, model_selector, temperature, num_pages, enable_streaming],
575
+ outputs=[output_text, raw_output, page_info, rendered_image, num_pages],
576
  )
577
 
578
+ file_input.change(fn=update_slider, inputs=[file_input], outputs=[num_pages])
 
 
 
 
579
 
580
  model_selector.change(
581
+ fn=get_model_info_text, inputs=[model_selector], outputs=[model_info]
 
 
582
  )
583
 
584
  clear_btn.click(
585
+ fn=lambda: (
586
+ None,
587
+ DEFAULT_MODEL,
588
+ get_model_info_text(DEFAULT_MODEL),
589
+ "*Extracted text will appear here...*",
590
+ "",
591
+ "",
592
+ None,
593
+ 1,
594
+ ),
595
+ outputs=[
596
+ file_input,
597
+ model_selector,
598
+ model_info,
599
+ output_text,
600
+ raw_output,
601
+ page_info,
602
+ rendered_image,
603
+ num_pages,
604
+ ],
605
  )
606
 
607
 
608
  if __name__ == "__main__":
609
+ demo.launch(theme=gr.themes.Soft())