prithivMLmods commited on
Commit
083e7de
·
verified ·
1 Parent(s): 9ee38c4

update app

Browse files
Files changed (1) hide show
  1. app.py +62 -65
app.py CHANGED
@@ -82,7 +82,6 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
82
  torch_dtype=torch.float16
83
  ).to(device).eval()
84
 
85
-
86
  MODEL_MAP = {
87
  "Nanonets-OCR2-3B": (processor_v, model_v),
88
  "olmOCR-7B-0725": (processor_w, model_w),
@@ -90,6 +89,7 @@ MODEL_MAP = {
90
  "Aya-Vision-8B": (processor_a, model_a),
91
  "Qwen2-VL-OCR-2B": (processor_x, model_x),
92
  }
 
93
  MODEL_CHOICES = list(MODEL_MAP.keys())
94
 
95
  image_examples = [
@@ -97,7 +97,7 @@ image_examples = [
97
  {"query": "Run OCR on the image and ensure high accuracy.", "image": "examples/4.jpg", "model": "olmOCR-7B-0725"},
98
  {"query": "Conduct OCR on the image with exact text recognition.", "image": "examples/2.jpg", "model": "RolmOCR-7B"},
99
  {"query": "Perform precise OCR extraction on the image.", "image": "examples/1.jpg", "model": "Qwen2-VL-OCR-2B"},
100
- {"query": "Describe the image and read visible text in detail.", "image": "examples/3.jpg", "model": "Aya-Vision-8B"},
101
  ]
102
 
103
 
@@ -185,7 +185,7 @@ def calc_timeout_duration(model_name, text, image, max_new_tokens, temperature,
185
 
186
  @spaces.GPU(duration=calc_timeout_duration)
187
  def generate_image(model_name, text, image, max_new_tokens, temperature, top_p, top_k, repetition_penalty, gpu_timeout):
188
- if model_name not in MODEL_MAP:
189
  raise gr.Error("Please select a valid model.")
190
  if image is None:
191
  raise gr.Error("Please upload an image.")
@@ -239,35 +239,6 @@ def generate_image(model_name, text, image, max_new_tokens, temperature, top_p,
239
  torch.cuda.empty_cache()
240
 
241
 
242
- def b64_to_pil(b64_str):
243
- if not b64_str:
244
- return None
245
- try:
246
- if b64_str.startswith("data:image"):
247
- _, data = b64_str.split(",", 1)
248
- else:
249
- data = b64_str
250
- image_data = base64.b64decode(data)
251
- return Image.open(BytesIO(image_data)).convert("RGB")
252
- except Exception:
253
- return None
254
-
255
-
256
- def run_ocr(model_name, text, image_b64, max_new_tokens_v, temperature_v, top_p_v, top_k_v, repetition_penalty_v, gpu_timeout_v):
257
- image = b64_to_pil(image_b64)
258
- yield from generate_image(
259
- model_name=model_name,
260
- text=text,
261
- image=image,
262
- max_new_tokens=max_new_tokens_v,
263
- temperature=temperature_v,
264
- top_p=top_p_v,
265
- top_k=top_k_v,
266
- repetition_penalty=repetition_penalty_v,
267
- gpu_timeout=gpu_timeout_v,
268
- )
269
-
270
-
271
  def noop():
272
  return None
273
 
@@ -302,7 +273,7 @@ footer{display:none!important}
302
  }
303
  .app-header-left{display:flex;align-items:center;gap:12px}
304
  .app-logo{
305
- width:38px;height:38px;background:linear-gradient(135deg,#ADFF2F,#C9FF66,#DFFF93);
306
  border-radius:10px;display:flex;align-items:center;justify-content:center;
307
  box-shadow:0 4px 12px rgba(173,255,47,.28);
308
  }
@@ -313,9 +284,9 @@ footer{display:none!important}
313
  }
314
  .app-badge{
315
  font-size:11px;font-weight:600;padding:3px 10px;border-radius:20px;
316
- background:rgba(173,255,47,.10);color:#D6FFA0;border:1px solid rgba(173,255,47,.25);letter-spacing:.3px;
317
  }
318
- .app-badge.fast{background:rgba(173,255,47,.08);color:#C9FF66;border:1px solid rgba(173,255,47,.22)}
319
 
320
  .model-tabs-bar{
321
  background:#18181b;border-bottom:1px solid #27272a;padding:10px 16px;
@@ -327,9 +298,8 @@ footer{display:none!important}
327
  border-radius:999px;cursor:pointer;font-size:12px;font-weight:600;padding:0 12px;
328
  color:#ffffff!important;transition:all .15s ease;
329
  }
330
- .model-tab:hover{background:rgba(173,255,47,.12);border-color:rgba(173,255,47,.35)}
331
- .model-tab.active{background:rgba(173,255,47,.20);border-color:#ADFF2F;color:#111!important;box-shadow:0 0 0 2px rgba(173,255,47,.10)}
332
- .model-tab.active .model-tab-label{color:#111!important}
333
  .model-tab-label{font-size:12px;color:#ffffff!important;font-weight:600}
334
 
335
  .app-main-row{display:flex;gap:0;flex:1;overflow:hidden}
@@ -383,10 +353,10 @@ footer{display:none!important}
383
  .preview-action-btn:hover{background:#ADFF2F;border-color:#ADFF2F;color:#111!important}
384
 
385
  .hint-bar{
386
- background:rgba(173,255,47,.06);border-top:1px solid #27272a;border-bottom:1px solid #27272a;
387
  padding:10px 20px;font-size:13px;color:#a1a1aa;line-height:1.7;
388
  }
389
- .hint-bar b{color:#D6FFA0;font-weight:600}
390
  .hint-bar kbd{
391
  display:inline-block;padding:1px 6px;background:#27272a;border:1px solid #3f3f46;
392
  border-radius:4px;font-family:'JetBrains Mono',monospace;font-size:11px;color:#a1a1aa;
@@ -406,7 +376,7 @@ footer{display:none!important}
406
  flex-shrink:0;width:220px;background:#09090b;border:1px solid #27272a;
407
  border-radius:10px;overflow:hidden;cursor:pointer;transition:all .2s ease;
408
  }
409
- .example-card:hover{border-color:#ADFF2F;transform:translateY(-2px);box-shadow:0 4px 12px rgba(173,255,47,.14)}
410
  .example-card.loading{opacity:.5;pointer-events:none}
411
  .example-thumb-wrap{height:120px;overflow:hidden;background:#18181b}
412
  .example-thumb-wrap img{width:100%;height:100%;object-fit:cover}
@@ -417,7 +387,7 @@ footer{display:none!important}
417
  .example-meta-row{padding:6px 10px;display:flex;align-items:center;gap:6px}
418
  .example-badge{
419
  display:inline-flex;padding:2px 7px;background:rgba(173,255,47,.12);border-radius:4px;
420
- font-size:10px;font-weight:600;color:#D6FFA0;font-family:'JetBrains Mono',monospace;white-space:nowrap;
421
  }
422
  .example-prompt-text{
423
  padding:0 10px 8px;font-size:11px;color:#a1a1aa;line-height:1.4;
@@ -436,7 +406,7 @@ footer{display:none!important}
436
  padding:10px 14px;font-family:'Inter',sans-serif;font-size:14px;color:#e4e4e7;
437
  resize:none;outline:none;min-height:100px;transition:border-color .2s;
438
  }
439
- .modern-textarea:focus{border-color:#ADFF2F;box-shadow:0 0 0 3px rgba(173,255,47,.15)}
440
  .modern-textarea::placeholder{color:#3f3f46}
441
  .modern-textarea.error-flash{
442
  border-color:#ef4444!important;box-shadow:0 0 0 3px rgba(239,68,68,.2)!important;animation:shake .4s ease;
@@ -452,23 +422,23 @@ footer{display:none!important}
452
  }
453
  .toast-notification.visible{transform:translateX(-50%) translateY(0);opacity:1;pointer-events:auto}
454
  .toast-notification.error{background:linear-gradient(135deg,#dc2626,#b91c1c);color:#fff;border:1px solid rgba(255,255,255,.15)}
455
- .toast-notification.warning{background:linear-gradient(135deg,#d97706,#b45309);color:#fff;border:1px solid rgba(255,255,255,.15)}
456
- .toast-notification.info{background:linear-gradient(135deg,#65a30d,#4d7c0f);color:#fff;border:1px solid rgba(255,255,255,.15)}
457
  .toast-notification .toast-icon{font-size:16px;line-height:1}
458
  .toast-notification .toast-text{line-height:1.3}
459
 
460
  .btn-run{
461
  display:flex;align-items:center;justify-content:center;gap:8px;width:100%;
462
- background:linear-gradient(135deg,#ADFF2F,#95DE2B);border:none;border-radius:10px;
463
  padding:12px 24px;cursor:pointer;font-size:15px;font-weight:700;font-family:'Inter',sans-serif;
464
  color:#111!important;transition:all .2s ease;letter-spacing:-.2px;
465
- box-shadow:0 4px 16px rgba(173,255,47,.24),inset 0 1px 0 rgba(255,255,255,.15);
466
  }
467
  .btn-run:hover{
468
- background:linear-gradient(135deg,#C9FF66,#ADFF2F);transform:translateY(-1px);
469
- box-shadow:0 6px 24px rgba(173,255,47,.34),inset 0 1px 0 rgba(255,255,255,.2);
470
  }
471
- .btn-run:active{transform:translateY(0);box-shadow:0 2px 8px rgba(173,255,47,.24)}
472
  .btn-run svg{width:18px;height:18px;fill:#111!important}
473
  .btn-run svg path{fill:#111!important}
474
 
@@ -480,12 +450,12 @@ footer{display:none!important}
480
  }
481
  .out-title-right{display:flex;gap:8px;align-items:center}
482
  .out-action-btn{
483
- display:inline-flex;align-items:center;justify-content:center;background:rgba(173,255,47,.1);
484
  border:1px solid rgba(173,255,47,.2);border-radius:6px;cursor:pointer;padding:3px 10px;
485
- font-size:11px;font-weight:500;color:#D6FFA0!important;gap:4px;height:24px;transition:all .15s;
486
  }
487
  .out-action-btn:hover{background:rgba(173,255,47,.2);border-color:rgba(173,255,47,.35);color:#111!important}
488
- .out-action-btn svg{width:12px;height:12px;fill:#D6FFA0}
489
  .output-frame .out-body{
490
  flex:1;background:#09090b;display:flex;align-items:stretch;justify-content:stretch;
491
  overflow:hidden;min-height:320px;position:relative;
@@ -515,7 +485,7 @@ footer{display:none!important}
515
  .modern-loader .loader-text{font-size:13px;color:#a1a1aa;font-weight:500}
516
  .loader-bar-track{width:200px;height:4px;background:#27272a;border-radius:2px;overflow:hidden}
517
  .loader-bar-fill{
518
- height:100%;background:linear-gradient(90deg,#ADFF2F,#C9FF66,#ADFF2F);
519
  background-size:200% 100%;animation:shimmer 1.5s ease-in-out infinite;border-radius:2px;
520
  }
521
  @keyframes shimmer{0%{background-position:200% 0}100%{background-position:-200% 0}}
@@ -533,12 +503,12 @@ footer{display:none!important}
533
  border-radius:3px;outline:none;min-width:0;
534
  }
535
  .slider-row input[type="range"]::-webkit-slider-thumb{
536
- -webkit-appearance:none;width:16px;height:16px;background:linear-gradient(135deg,#ADFF2F,#95DE2B);
537
  border-radius:50%;cursor:pointer;box-shadow:0 2px 6px rgba(173,255,47,.35);transition:transform .15s;
538
  }
539
  .slider-row input[type="range"]::-webkit-slider-thumb:hover{transform:scale(1.2)}
540
  .slider-row input[type="range"]::-moz-range-thumb{
541
- width:16px;height:16px;background:linear-gradient(135deg,#ADFF2F,#95DE2B);
542
  border-radius:50%;cursor:pointer;border:none;box-shadow:0 2px 6px rgba(173,255,47,.35);
543
  }
544
  .slider-row .slider-val{
@@ -557,11 +527,11 @@ footer{display:none!important}
557
  }
558
  .app-statusbar .sb-section.sb-fixed{
559
  flex:0 0 auto;min-width:110px;text-align:center;justify-content:center;
560
- padding:3px 12px;background:rgba(173,255,47,.08);border-radius:6px;color:#D6FFA0;font-weight:500;
561
  }
562
 
563
  .exp-note{padding:10px 20px;font-size:12px;color:#52525b;border-top:1px solid #27272a;text-align:center}
564
- .exp-note a{color:#D6FFA0;text-decoration:none}
565
  .exp-note a:hover{text-decoration:underline}
566
 
567
  ::-webkit-scrollbar{width:8px;height:8px}
@@ -579,7 +549,7 @@ footer{display:none!important}
579
  gallery_js = r"""
580
  () => {
581
  function init() {
582
- if (window.__ocrGreenInitDone) return;
583
 
584
  const dropZone = document.getElementById('image-drop-zone');
585
  const uploadPrompt = document.getElementById('upload-prompt');
@@ -600,7 +570,7 @@ function init() {
600
  return;
601
  }
602
 
603
- window.__ocrGreenInitDone = true;
604
  let imageState = null;
605
  let toastTimer = null;
606
 
@@ -887,11 +857,11 @@ function init() {
887
  if (!exampleResultContainer) return;
888
  const el = exampleResultContainer.querySelector('textarea') || exampleResultContainer.querySelector('input');
889
  if (!el || !el.value) return;
890
- if (window.__lastExampleVal === el.value) return;
891
  try {
892
  const data = JSON.parse(el.value);
893
  if (data.status === 'ok') {
894
- window.__lastExampleVal = el.value;
895
  if (data.image) setPreview(data.image, data.name || 'example.jpg');
896
  if (data.query) {
897
  promptInput.value = data.query;
@@ -965,7 +935,7 @@ UPLOAD_PREVIEW_SVG = """
965
  <svg viewBox="0 0 80 80" fill="none" xmlns="http://www.w3.org/2000/svg">
966
  <rect x="8" y="14" width="64" height="52" rx="6" fill="none" stroke="#ADFF2F" stroke-width="2" stroke-dasharray="4 3"/>
967
  <polygon points="12,62 30,40 42,50 54,34 68,62" fill="rgba(173,255,47,0.15)" stroke="#ADFF2F" stroke-width="1.5"/>
968
- <circle cx="28" cy="30" r="6" fill="rgba(173,255,47,0.2)" stroke="#ADFF2F" stroke-width="1.5"/>
969
  </svg>
970
  """
971
 
@@ -1054,7 +1024,7 @@ with gr.Blocks() as demo:
1054
  <div class="panel-card-title">OCR / Vision Instruction</div>
1055
  <div class="panel-card-body">
1056
  <label class="modern-label" for="custom-query-input">Query Input</label>
1057
- <textarea id="custom-query-input" class="modern-textarea" rows="4" placeholder="e.g., perform OCR on the image precisely, extract all text, read the document, summarize visual content, describe the image with text..."></textarea>
1058
  </div>
1059
  </div>
1060
 
@@ -1124,7 +1094,7 @@ with gr.Blocks() as demo:
1124
  </div>
1125
 
1126
  <div class="exp-note">
1127
- Experimental OCR Suite &middot; Open on <a href="https://github.com/PRITHIVSAKTHIUR/Multimodal-OCR" target="_blank">GitHub</a>
1128
  </div>
1129
 
1130
  <div class="app-statusbar">
@@ -1136,6 +1106,33 @@ with gr.Blocks() as demo:
1136
 
1137
  run_btn = gr.Button("Run", elem_id="gradio-run-btn")
1138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1139
  demo.load(fn=noop, inputs=None, outputs=None, js=gallery_js)
1140
  demo.load(fn=noop, inputs=None, outputs=None, js=wire_outputs_js)
1141
 
 
82
  torch_dtype=torch.float16
83
  ).to(device).eval()
84
 
 
85
  MODEL_MAP = {
86
  "Nanonets-OCR2-3B": (processor_v, model_v),
87
  "olmOCR-7B-0725": (processor_w, model_w),
 
89
  "Aya-Vision-8B": (processor_a, model_a),
90
  "Qwen2-VL-OCR-2B": (processor_x, model_x),
91
  }
92
+
93
  MODEL_CHOICES = list(MODEL_MAP.keys())
94
 
95
  image_examples = [
 
97
  {"query": "Run OCR on the image and ensure high accuracy.", "image": "examples/4.jpg", "model": "olmOCR-7B-0725"},
98
  {"query": "Conduct OCR on the image with exact text recognition.", "image": "examples/2.jpg", "model": "RolmOCR-7B"},
99
  {"query": "Perform precise OCR extraction on the image.", "image": "examples/1.jpg", "model": "Qwen2-VL-OCR-2B"},
100
+ {"query": "Describe the visual content and extract visible text from the image.", "image": "examples/3.jpg", "model": "Aya-Vision-8B"},
101
  ]
102
 
103
 
 
185
 
186
  @spaces.GPU(duration=calc_timeout_duration)
187
  def generate_image(model_name, text, image, max_new_tokens, temperature, top_p, top_k, repetition_penalty, gpu_timeout):
188
+ if not model_name or model_name not in MODEL_MAP:
189
  raise gr.Error("Please select a valid model.")
190
  if image is None:
191
  raise gr.Error("Please upload an image.")
 
239
  torch.cuda.empty_cache()
240
 
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  def noop():
243
  return None
244
 
 
273
  }
274
  .app-header-left{display:flex;align-items:center;gap:12px}
275
  .app-logo{
276
+ width:38px;height:38px;background:linear-gradient(135deg,#ADFF2F,#C6FF66,#D8FF8A);
277
  border-radius:10px;display:flex;align-items:center;justify-content:center;
278
  box-shadow:0 4px 12px rgba(173,255,47,.28);
279
  }
 
284
  }
285
  .app-badge{
286
  font-size:11px;font-weight:600;padding:3px 10px;border-radius:20px;
287
+ background:rgba(173,255,47,.12);color:#D6FF8C;border:1px solid rgba(173,255,47,.28);letter-spacing:.3px;
288
  }
289
+ .app-badge.fast{background:rgba(173,255,47,.08);color:#C6FF66;border:1px solid rgba(173,255,47,.22)}
290
 
291
  .model-tabs-bar{
292
  background:#18181b;border-bottom:1px solid #27272a;padding:10px 16px;
 
298
  border-radius:999px;cursor:pointer;font-size:12px;font-weight:600;padding:0 12px;
299
  color:#ffffff!important;transition:all .15s ease;
300
  }
301
+ .model-tab:hover{background:rgba(173,255,47,.10);border-color:rgba(173,255,47,.35)}
302
+ .model-tab.active{background:rgba(173,255,47,.18);border-color:#ADFF2F;color:#fff!important;box-shadow:0 0 0 2px rgba(173,255,47,.08)}
 
303
  .model-tab-label{font-size:12px;color:#ffffff!important;font-weight:600}
304
 
305
  .app-main-row{display:flex;gap:0;flex:1;overflow:hidden}
 
353
  .preview-action-btn:hover{background:#ADFF2F;border-color:#ADFF2F;color:#111!important}
354
 
355
  .hint-bar{
356
+ background:rgba(173,255,47,.05);border-top:1px solid #27272a;border-bottom:1px solid #27272a;
357
  padding:10px 20px;font-size:13px;color:#a1a1aa;line-height:1.7;
358
  }
359
+ .hint-bar b{color:#D6FF8C;font-weight:600}
360
  .hint-bar kbd{
361
  display:inline-block;padding:1px 6px;background:#27272a;border:1px solid #3f3f46;
362
  border-radius:4px;font-family:'JetBrains Mono',monospace;font-size:11px;color:#a1a1aa;
 
376
  flex-shrink:0;width:220px;background:#09090b;border:1px solid #27272a;
377
  border-radius:10px;overflow:hidden;cursor:pointer;transition:all .2s ease;
378
  }
379
+ .example-card:hover{border-color:#ADFF2F;transform:translateY(-2px);box-shadow:0 4px 12px rgba(173,255,47,.12)}
380
  .example-card.loading{opacity:.5;pointer-events:none}
381
  .example-thumb-wrap{height:120px;overflow:hidden;background:#18181b}
382
  .example-thumb-wrap img{width:100%;height:100%;object-fit:cover}
 
387
  .example-meta-row{padding:6px 10px;display:flex;align-items:center;gap:6px}
388
  .example-badge{
389
  display:inline-flex;padding:2px 7px;background:rgba(173,255,47,.12);border-radius:4px;
390
+ font-size:10px;font-weight:600;color:#D6FF8C;font-family:'JetBrains Mono',monospace;white-space:nowrap;
391
  }
392
  .example-prompt-text{
393
  padding:0 10px 8px;font-size:11px;color:#a1a1aa;line-height:1.4;
 
406
  padding:10px 14px;font-family:'Inter',sans-serif;font-size:14px;color:#e4e4e7;
407
  resize:none;outline:none;min-height:100px;transition:border-color .2s;
408
  }
409
+ .modern-textarea:focus{border-color:#ADFF2F;box-shadow:0 0 0 3px rgba(173,255,47,.14)}
410
  .modern-textarea::placeholder{color:#3f3f46}
411
  .modern-textarea.error-flash{
412
  border-color:#ef4444!important;box-shadow:0 0 0 3px rgba(239,68,68,.2)!important;animation:shake .4s ease;
 
422
  }
423
  .toast-notification.visible{transform:translateX(-50%) translateY(0);opacity:1;pointer-events:auto}
424
  .toast-notification.error{background:linear-gradient(135deg,#dc2626,#b91c1c);color:#fff;border:1px solid rgba(255,255,255,.15)}
425
+ .toast-notification.warning{background:linear-gradient(135deg,#84cc16,#65a30d);color:#111;border:1px solid rgba(255,255,255,.08)}
426
+ .toast-notification.info{background:linear-gradient(135deg,#a3e635,#84cc16);color:#111;border:1px solid rgba(255,255,255,.08)}
427
  .toast-notification .toast-icon{font-size:16px;line-height:1}
428
  .toast-notification .toast-text{line-height:1.3}
429
 
430
  .btn-run{
431
  display:flex;align-items:center;justify-content:center;gap:8px;width:100%;
432
+ background:linear-gradient(135deg,#ADFF2F,#8FD61F);border:none;border-radius:10px;
433
  padding:12px 24px;cursor:pointer;font-size:15px;font-weight:700;font-family:'Inter',sans-serif;
434
  color:#111!important;transition:all .2s ease;letter-spacing:-.2px;
435
+ box-shadow:0 4px 16px rgba(173,255,47,.25),inset 0 1px 0 rgba(255,255,255,.25);
436
  }
437
  .btn-run:hover{
438
+ background:linear-gradient(135deg,#C6FF66,#ADFF2F);transform:translateY(-1px);
439
+ box-shadow:0 6px 24px rgba(173,255,47,.35),inset 0 1px 0 rgba(255,255,255,.25);
440
  }
441
+ .btn-run:active{transform:translateY(0);box-shadow:0 2px 8px rgba(173,255,47,.25)}
442
  .btn-run svg{width:18px;height:18px;fill:#111!important}
443
  .btn-run svg path{fill:#111!important}
444
 
 
450
  }
451
  .out-title-right{display:flex;gap:8px;align-items:center}
452
  .out-action-btn{
453
+ display:inline-flex;align-items:center;justify-content:center;background:rgba(173,255,47,.10);
454
  border:1px solid rgba(173,255,47,.2);border-radius:6px;cursor:pointer;padding:3px 10px;
455
+ font-size:11px;font-weight:500;color:#D6FF8C!important;gap:4px;height:24px;transition:all .15s;
456
  }
457
  .out-action-btn:hover{background:rgba(173,255,47,.2);border-color:rgba(173,255,47,.35);color:#111!important}
458
+ .out-action-btn svg{width:12px;height:12px;fill:#D6FF8C}
459
  .output-frame .out-body{
460
  flex:1;background:#09090b;display:flex;align-items:stretch;justify-content:stretch;
461
  overflow:hidden;min-height:320px;position:relative;
 
485
  .modern-loader .loader-text{font-size:13px;color:#a1a1aa;font-weight:500}
486
  .loader-bar-track{width:200px;height:4px;background:#27272a;border-radius:2px;overflow:hidden}
487
  .loader-bar-fill{
488
+ height:100%;background:linear-gradient(90deg,#ADFF2F,#C6FF66,#ADFF2F);
489
  background-size:200% 100%;animation:shimmer 1.5s ease-in-out infinite;border-radius:2px;
490
  }
491
  @keyframes shimmer{0%{background-position:200% 0}100%{background-position:-200% 0}}
 
503
  border-radius:3px;outline:none;min-width:0;
504
  }
505
  .slider-row input[type="range"]::-webkit-slider-thumb{
506
+ -webkit-appearance:none;width:16px;height:16px;background:linear-gradient(135deg,#ADFF2F,#8FD61F);
507
  border-radius:50%;cursor:pointer;box-shadow:0 2px 6px rgba(173,255,47,.35);transition:transform .15s;
508
  }
509
  .slider-row input[type="range"]::-webkit-slider-thumb:hover{transform:scale(1.2)}
510
  .slider-row input[type="range"]::-moz-range-thumb{
511
+ width:16px;height:16px;background:linear-gradient(135deg,#ADFF2F,#8FD61F);
512
  border-radius:50%;cursor:pointer;border:none;box-shadow:0 2px 6px rgba(173,255,47,.35);
513
  }
514
  .slider-row .slider-val{
 
527
  }
528
  .app-statusbar .sb-section.sb-fixed{
529
  flex:0 0 auto;min-width:110px;text-align:center;justify-content:center;
530
+ padding:3px 12px;background:rgba(173,255,47,.08);border-radius:6px;color:#D6FF8C;font-weight:500;
531
  }
532
 
533
  .exp-note{padding:10px 20px;font-size:12px;color:#52525b;border-top:1px solid #27272a;text-align:center}
534
+ .exp-note a{color:#D6FF8C;text-decoration:none}
535
  .exp-note a:hover{text-decoration:underline}
536
 
537
  ::-webkit-scrollbar{width:8px;height:8px}
 
549
  gallery_js = r"""
550
  () => {
551
  function init() {
552
+ if (window.__ocr2GreenInitDone) return;
553
 
554
  const dropZone = document.getElementById('image-drop-zone');
555
  const uploadPrompt = document.getElementById('upload-prompt');
 
570
  return;
571
  }
572
 
573
+ window.__ocr2GreenInitDone = true;
574
  let imageState = null;
575
  let toastTimer = null;
576
 
 
857
  if (!exampleResultContainer) return;
858
  const el = exampleResultContainer.querySelector('textarea') || exampleResultContainer.querySelector('input');
859
  if (!el || !el.value) return;
860
+ if (window.__lastExampleVal2 === el.value) return;
861
  try {
862
  const data = JSON.parse(el.value);
863
  if (data.status === 'ok') {
864
+ window.__lastExampleVal2 = el.value;
865
  if (data.image) setPreview(data.image, data.name || 'example.jpg');
866
  if (data.query) {
867
  promptInput.value = data.query;
 
935
  <svg viewBox="0 0 80 80" fill="none" xmlns="http://www.w3.org/2000/svg">
936
  <rect x="8" y="14" width="64" height="52" rx="6" fill="none" stroke="#ADFF2F" stroke-width="2" stroke-dasharray="4 3"/>
937
  <polygon points="12,62 30,40 42,50 54,34 68,62" fill="rgba(173,255,47,0.15)" stroke="#ADFF2F" stroke-width="1.5"/>
938
+ <circle cx="28" cy="30" r="6" fill="rgba(173,255,47,0.22)" stroke="#ADFF2F" stroke-width="1.5"/>
939
  </svg>
940
  """
941
 
 
1024
  <div class="panel-card-title">OCR / Vision Instruction</div>
1025
  <div class="panel-card-body">
1026
  <label class="modern-label" for="custom-query-input">Query Input</label>
1027
+ <textarea id="custom-query-input" class="modern-textarea" rows="4" placeholder="e.g., perform OCR on the image precisely, extract all text, describe the visual scene, summarize visible content..."></textarea>
1028
  </div>
1029
  </div>
1030
 
 
1094
  </div>
1095
 
1096
  <div class="exp-note">
1097
+ Experimental OCR Suite &middot; Open on <a href="https://github.com/PRITHIVSAKTHIUR/Multimodal-OCR2" target="_blank">GitHub</a>
1098
  </div>
1099
 
1100
  <div class="app-statusbar">
 
1106
 
1107
  run_btn = gr.Button("Run", elem_id="gradio-run-btn")
1108
 
1109
+ def b64_to_pil(b64_str):
1110
+ if not b64_str:
1111
+ return None
1112
+ try:
1113
+ if b64_str.startswith("data:image"):
1114
+ _, data = b64_str.split(",", 1)
1115
+ else:
1116
+ data = b64_str
1117
+ image_data = base64.b64decode(data)
1118
+ return Image.open(BytesIO(image_data)).convert("RGB")
1119
+ except Exception:
1120
+ return None
1121
+
1122
+ def run_ocr(model_name, text, image_b64, max_new_tokens_v, temperature_v, top_p_v, top_k_v, repetition_penalty_v, gpu_timeout_v):
1123
+ image = b64_to_pil(image_b64)
1124
+ yield from generate_image(
1125
+ model_name=model_name,
1126
+ text=text,
1127
+ image=image,
1128
+ max_new_tokens=max_new_tokens_v,
1129
+ temperature=temperature_v,
1130
+ top_p=top_p_v,
1131
+ top_k=top_k_v,
1132
+ repetition_penalty=repetition_penalty_v,
1133
+ gpu_timeout=gpu_timeout_v,
1134
+ )
1135
+
1136
  demo.load(fn=noop, inputs=None, outputs=None, js=gallery_js)
1137
  demo.load(fn=noop, inputs=None, outputs=None, js=wire_outputs_js)
1138