prithivMLmods commited on
Commit
ad26ac1
Β·
verified Β·
1 Parent(s): 6a21b3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +671 -554
app.py CHANGED
@@ -5,732 +5,849 @@ import os
5
  import tempfile
6
  from PIL import Image, ImageOps
7
  from transformers import AutoProcessor, AutoModelForImageTextToText
 
 
8
 
9
- # Model configuration
10
  MODEL_PATH = "zai-org/GLM-OCR"
11
 
12
  # Load model and processor
13
  processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
14
  model = AutoModelForImageTextToText.from_pretrained(
15
  pretrained_model_name_or_path=MODEL_PATH,
16
- torch_dtype="auto",
17
  device_map="auto",
18
  trust_remote_code=True
19
  )
20
 
21
- # Task prompts
22
  TASK_PROMPTS = {
23
  "Text": "Text Recognition:",
24
  "Formula": "Formula Recognition:",
25
- "Table": "Table Recognition:"
26
  }
27
 
28
- # Custom CSS with browser-inspired dark theme
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  css = """
30
- /* Reset and base styles */
31
- * {
32
- box-sizing: border-box;
 
 
 
 
 
 
33
  }
34
 
35
- .gradio-container {
36
- max-width: 1000px !important;
37
- margin: auto !important;
38
- padding: 0 !important;
39
- background: transparent !important;
40
  }
41
 
42
- body {
43
- background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%) !important;
44
  min-height: 100vh;
45
  }
46
 
47
- /* Browser window container */
48
- .browser {
49
- width: 100%;
50
- background: #2d2d2d;
51
- border-radius: 12px;
52
- display: flex;
53
- flex-direction: column;
54
- overflow: hidden;
55
- box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.5);
56
- margin: 20px 0;
57
  }
58
 
59
- /* Tabs head */
60
- .tabs-head {
61
- background-color: #353535;
62
- height: 44px;
63
- display: flex;
64
- justify-content: space-between;
65
- align-items: flex-end;
66
- padding-left: 20px;
67
- user-select: none;
68
  }
69
 
70
- .tabs-head .tab-open {
71
- width: 140px;
72
- height: 36px;
73
- border-radius: 10px 10px 0 0;
74
- background-color: #515151;
75
- display: flex;
76
- gap: 8px;
77
- align-items: center;
78
- justify-content: center;
79
- padding: 6px 12px;
80
- position: relative;
81
- }
82
-
83
- .tabs-head .tab-open .close-tab {
84
- color: #aaa;
85
- font-size: 10px;
86
- padding: 2px 5px;
87
- border-radius: 50%;
88
- cursor: pointer;
89
- transition: 0.2s;
90
- position: absolute;
91
- right: 8px;
92
  }
93
 
94
- .tabs-head .tab-open .close-tab:hover {
95
- background-color: #666;
96
- color: #fff;
 
 
97
  }
98
 
99
- .tabs-head .tab-open .rounded-l {
100
- position: absolute;
101
- background-color: #515151;
102
- width: 20px;
103
- height: 26px;
104
- top: 10px;
105
- right: -20px;
106
- overflow: hidden;
107
  }
108
 
109
- .tabs-head .tab-open .rounded-l .mask-round {
110
- width: 100%;
111
- height: 100%;
112
- background-color: #353535;
113
- border-radius: 0 0 0 10px;
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
 
116
- .tabs-head .tab-open .rounded-r {
117
- position: absolute;
118
- background-color: #515151;
119
- width: 20px;
120
- height: 26px;
121
- top: 10px;
122
- left: -20px;
123
- overflow: hidden;
124
  }
125
 
126
- .tabs-head .tab-open .rounded-r .mask-round {
127
- width: 100%;
128
- height: 100%;
129
- background-color: #353535;
130
- border-radius: 0 0 10px 0;
 
 
131
  }
132
 
133
- .tabs-head .tab-open .tab-icon {
134
- font-size: 14px;
135
  }
136
 
137
- .tabs-head .tab-open span {
138
- color: #fff;
139
- font-size: 12px;
140
- font-weight: 500;
 
141
  }
142
 
143
- .tabs-head .window-opt {
144
  display: flex;
145
- padding-right: 10px;
146
- padding-bottom: 14px;
147
- gap: 8px;
148
  }
149
 
150
- .tabs-head .window-opt .dot {
151
- height: 13px;
152
- width: 13px;
153
- border-radius: 50%;
 
 
 
 
 
 
 
154
  cursor: pointer;
155
- transition: 0.15s;
156
  }
157
 
158
- .tabs-head .window-opt .dot:hover {
159
- filter: brightness(0.8);
160
  }
161
 
162
- .tabs-head .window-opt .dot.red { background-color: #ff5f56; }
163
- .tabs-head .window-opt .dot.yellow { background-color: #ffbd2e; }
164
- .tabs-head .window-opt .dot.green { background-color: #27ca40; }
 
 
165
 
166
- /* Head browser / address bar */
167
- .head-browser {
168
  width: 100%;
169
- height: 48px;
170
- background-color: #515151;
171
- padding: 8px 12px;
172
- display: flex;
173
- align-items: center;
174
- gap: 8px;
175
- }
176
-
177
- .head-browser .nav-btn {
178
- width: 30px;
179
- height: 30px;
180
- border: none;
181
- background-color: transparent;
182
- color: #aaa;
183
- border-radius: 50%;
184
- transition: 0.2s;
185
  cursor: pointer;
186
- font-size: 16px;
187
- display: flex;
188
- align-items: center;
189
- justify-content: center;
190
  }
191
 
192
- .head-browser .nav-btn:hover {
193
- background-color: #666;
194
- color: #fff;
195
  }
196
 
197
- .head-browser .nav-btn:disabled {
198
- opacity: 0.4;
199
- cursor: not-allowed;
200
  }
201
 
202
- .head-browser .nav-btn:disabled:hover {
203
- background-color: transparent;
204
- color: #aaa;
 
 
 
 
 
205
  }
206
 
207
- .head-browser .address-bar {
208
- background-color: #3b3b3b;
209
- border: 2px solid transparent;
210
- height: 32px;
211
- border-radius: 20px;
212
- outline: none;
213
- color: #fff;
214
- padding: 0 16px;
215
- flex: 1;
216
- font-size: 13px;
217
- transition: 0.2s;
218
- display: flex;
219
- align-items: center;
220
- gap: 8px;
221
  }
222
 
223
- .head-browser .address-bar:hover {
224
- background-color: #484848;
 
 
 
225
  }
226
 
227
- .head-browser .address-bar .lock {
228
- color: #27ca40;
229
- font-size: 12px;
230
  }
231
 
232
- .head-browser .address-bar .url {
233
- color: #ccc;
234
- font-size: 13px;
 
 
 
 
235
  }
236
 
237
- .head-browser .star {
238
- color: #888;
239
- font-size: 18px;
 
 
 
240
  }
241
 
242
- /* Browser content */
243
- .browser-content {
244
- background: linear-gradient(180deg, #1e1e1e 0%, #252525 100%);
245
- padding: 30px;
246
- min-height: 500px;
247
  }
248
 
249
- /* Title section */
250
- .title-section {
251
  text-align: center;
252
- margin-bottom: 30px;
 
 
 
 
 
 
253
  }
254
 
255
- .title-section h1 {
256
- font-size: 32px;
257
- font-weight: 700;
258
- background: linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%);
259
- -webkit-background-clip: text;
260
- -webkit-text-fill-color: transparent;
261
- background-clip: text;
262
- margin: 0;
263
- letter-spacing: -0.5px;
264
  }
265
 
266
- .title-section .subtitle {
267
- color: #888;
268
- font-size: 14px;
269
- margin-top: 8px;
270
- display: flex;
271
- align-items: center;
272
- justify-content: center;
273
- gap: 8px;
274
  }
275
 
276
- .status-dot {
277
- width: 8px;
278
- height: 8px;
279
- border-radius: 50%;
280
- background: #27ca40;
281
- animation: pulse 2s infinite;
282
  }
283
 
284
- @keyframes pulse {
285
- 0%, 100% { opacity: 1; }
286
- 50% { opacity: 0.5; }
 
 
287
  }
288
 
289
- /* Main content grid */
290
- .main-grid {
291
- display: grid;
292
- grid-template-columns: 1fr 1fr;
293
- gap: 25px;
294
  }
 
295
 
296
- /* Card styling */
297
- .card {
298
- background: #2a2a2a;
299
- border-radius: 16px;
300
- padding: 20px;
301
- border: 1px solid #3a3a3a;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  }
303
 
304
- .card-title {
305
- color: #fff;
306
- font-size: 14px;
307
- font-weight: 600;
308
- margin-bottom: 15px;
309
- display: flex;
310
- align-items: center;
311
- gap: 8px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  }
313
 
314
- /* Upload area */
315
- .upload-zone {
316
- border: 2px dashed #4a4a4a !important;
317
- border-radius: 12px !important;
318
- background: #222 !important;
319
- transition: all 0.3s !important;
320
- min-height: 200px !important;
321
  }
322
 
323
- .upload-zone:hover {
324
- border-color: #667eea !important;
325
- background: #2a2a2a !important;
326
  }
327
 
328
- /* Task buttons */
329
- .task-buttons {
330
- display: flex;
331
- gap: 8px;
332
- margin-bottom: 20px;
333
  }
334
 
335
- .task-btn {
336
- flex: 1;
337
- padding: 12px 16px !important;
338
- border-radius: 10px !important;
339
- border: 2px solid #3a3a3a !important;
340
- background: #2a2a2a !important;
341
- color: #aaa !important;
342
- font-size: 13px !important;
343
- font-weight: 500 !important;
344
- cursor: pointer !important;
345
- transition: all 0.2s !important;
346
  }
347
 
348
- .task-btn:hover {
349
- border-color: #667eea !important;
350
- color: #fff !important;
 
 
 
351
  }
352
 
353
- .task-btn.selected {
354
- border-color: #667eea !important;
355
- background: linear-gradient(135deg, rgba(102, 126, 234, 0.2), rgba(118, 75, 162, 0.2)) !important;
356
- color: #fff !important;
 
357
  }
358
 
359
- /* Primary button */
360
- .primary-btn {
361
- width: 100%;
362
- padding: 14px 24px !important;
363
- border-radius: 12px !important;
364
- border: none !important;
365
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
366
- color: #fff !important;
367
- font-size: 15px !important;
368
- font-weight: 600 !important;
369
- cursor: pointer !important;
370
- transition: all 0.3s !important;
371
- box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
372
  }
373
 
374
- .primary-btn:hover {
375
- transform: translateY(-2px) !important;
376
- box-shadow: 0 8px 25px rgba(102, 126, 234, 0.5) !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  }
378
 
379
- .primary-btn:active {
380
- transform: translateY(0) !important;
 
381
  }
382
 
383
- /* Output area */
384
- .output-area {
385
- background: #1a1a1a !important;
386
- border: 1px solid #333 !important;
387
- border-radius: 12px !important;
388
- color: #e0e0e0 !important;
389
- font-family: 'SF Mono', 'Monaco', 'Consolas', monospace !important;
390
- font-size: 13px !important;
391
- line-height: 1.6 !important;
392
- min-height: 300px !important;
393
  }
394
 
395
- .output-area textarea {
396
- background: transparent !important;
397
- color: #e0e0e0 !important;
398
  }
399
 
400
- /* Tabs */
401
- .output-tabs .tab-nav {
402
- background: #2a2a2a !important;
403
- border-radius: 10px !important;
404
- padding: 4px !important;
405
- margin-bottom: 15px !important;
406
- border: 1px solid #3a3a3a !important;
407
  }
408
 
409
- .output-tabs .tab-nav button {
410
- background: transparent !important;
411
- border: none !important;
412
- border-radius: 8px !important;
413
- color: #888 !important;
414
- padding: 10px 20px !important;
415
- font-size: 13px !important;
416
- font-weight: 500 !important;
417
- transition: all 0.2s !important;
418
  }
419
 
420
- .output-tabs .tab-nav button.selected {
421
- background: #515151 !important;
422
- color: #fff !important;
 
 
 
 
 
 
 
 
 
 
423
  }
424
 
425
- .output-tabs .tab-nav button:hover:not(.selected) {
426
- background: #3a3a3a !important;
427
- color: #ccc !important;
428
  }
429
 
430
- /* Footer */
431
- .footer {
432
- text-align: center;
433
- padding: 20px;
434
- border-top: 1px solid #333;
435
- margin-top: 25px;
436
  }
437
 
438
- .footer p {
439
- color: #666;
440
- font-size: 12px;
441
- margin: 0;
 
 
 
 
 
 
 
 
 
442
  }
443
 
444
- .footer a {
445
- color: #667eea;
446
- text-decoration: none;
447
- transition: 0.2s;
448
  }
449
 
450
- .footer a:hover {
451
- color: #764ba2;
 
452
  }
453
 
454
- /* Dropdown custom style */
455
- .custom-dropdown {
456
- background: #2a2a2a !important;
457
- border: 2px solid #3a3a3a !important;
458
- border-radius: 10px !important;
459
- color: #fff !important;
 
 
460
  }
461
 
462
- .custom-dropdown:focus {
463
- border-color: #667eea !important;
 
 
 
464
  }
465
 
466
- /* Hide default elements */
467
- footer.svelte-1rjryqp { display: none !important; }
468
- .built-with { display: none !important; }
469
 
470
- /* Label styling */
471
- .label-wrap {
472
- color: #aaa !important;
 
 
473
  }
474
 
475
- /* Image component */
476
- .image-container {
477
- border-radius: 12px !important;
478
- overflow: hidden !important;
479
  }
480
 
481
- /* Markdown output */
482
- .markdown-output {
483
- background: #1a1a1a !important;
 
 
484
  padding: 20px !important;
485
- border-radius: 12px !important;
486
- border: 1px solid #333 !important;
487
- color: #e0e0e0 !important;
488
- min-height: 300px !important;
489
  }
490
 
491
- .markdown-output code {
492
- background: #2a2a2a !important;
493
- padding: 2px 8px !important;
494
- border-radius: 4px !important;
495
- font-size: 12px !important;
 
496
  }
497
 
498
- .markdown-output pre {
499
- background: #0d0d0d !important;
500
- padding: 15px !important;
501
- border-radius: 8px !important;
502
- overflow-x: auto !important;
503
  }
504
 
505
- /* Examples */
506
- .examples-section {
507
  margin-top: 20px;
 
 
 
 
 
 
508
  }
509
 
510
- .examples-section .label-wrap {
511
  display: none !important;
512
  }
513
 
514
- /* Responsive */
515
- @media (max-width: 768px) {
516
- .main-grid {
517
- grid-template-columns: 1fr;
518
- }
519
-
520
- .browser-content {
521
- padding: 20px;
522
- }
523
-
524
- .title-section h1 {
525
- font-size: 24px;
526
- }
527
  }
528
- """
529
 
530
- # Browser header HTML
531
- browser_header = """
532
- <div class="browser">
533
- <div class="tabs-head">
534
- <div class="tab-open">
535
- <div class="rounded-r"><div class="mask-round"></div></div>
536
- <span class="tab-icon">πŸ”</span>
537
- <span>GLM-OCR</span>
538
- <div class="close-tab">βœ•</div>
539
- <div class="rounded-l"><div class="mask-round"></div></div>
540
- </div>
541
- <div class="window-opt">
542
- <div class="dot red"></div>
543
- <div class="dot yellow"></div>
544
- <div class="dot green"></div>
545
- </div>
546
- </div>
547
- <div class="head-browser">
548
- <button class="nav-btn">←</button>
549
- <button class="nav-btn" disabled>β†’</button>
550
- <div class="address-bar">
551
- <span class="lock">πŸ”’</span>
552
- <span class="url">huggingface.co/spaces/zai-org/GLM-OCR</span>
553
- </div>
554
- <button class="nav-btn">↻</button>
555
- <span class="star">β˜†</span>
556
- </div>
557
- </div>
558
- """
559
 
560
- title_html = """
561
- <div class="title-section">
562
- <h1>GLM-OCR</h1>
563
- <p class="subtitle">
564
- <span class="status-dot"></span>
565
- Document & Formula Recognition
566
- </p>
567
- </div>
568
- """
569
 
570
- footer_html = """
571
- <div class="footer">
572
- <p>
573
- Powered by <a href="https://huggingface.co/zai-org/GLM-OCR" target="_blank">zai-org/GLM-OCR</a>
574
- Β· Text Β· Formula Β· Table Recognition
575
- </p>
576
- </div>
577
  """
578
 
579
- @spaces.GPU
580
- def process_image(image, task):
581
- """Process image with GLM-OCR model"""
582
- if image is None:
583
- return "⚠️ Please upload an image first.", ""
584
-
585
- # Convert image if needed
586
- if image.mode in ('RGBA', 'LA', 'P'):
587
- image = image.convert('RGB')
588
- image = ImageOps.exif_transpose(image)
589
 
590
- # Save image temporarily
591
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
592
- image.save(tmp.name, 'PNG')
593
- tmp.close()
 
 
 
 
594
 
595
- try:
596
- # Prepare messages
597
- prompt_text = TASK_PROMPTS[task]
598
- messages = [
599
- {
600
- "role": "user",
601
- "content": [
602
- {
603
- "type": "image",
604
- "url": tmp.name
605
- },
606
- {
607
- "type": "text",
608
- "text": prompt_text
609
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  ],
611
- }
612
- ]
613
-
614
- # Process input
615
- inputs = processor.apply_chat_template(
616
- messages,
617
- tokenize=True,
618
- add_generation_prompt=True,
619
- return_dict=True,
620
- return_tensors="pt"
621
- ).to(model.device)
622
- inputs.pop("token_type_ids", None)
623
-
624
- # Generate output
625
- generated_ids = model.generate(**inputs, max_new_tokens=8192)
626
- output_text = processor.decode(
627
- generated_ids[0][inputs["input_ids"].shape[1]:],
628
- skip_special_tokens=True
629
- )
630
-
631
- # Clean up temp file
632
- os.unlink(tmp.name)
633
-
634
- result = output_text.strip()
635
- return result, result
636
-
637
- except Exception as e:
638
- if os.path.exists(tmp.name):
639
- os.unlink(tmp.name)
640
- error_msg = f"❌ Error: {str(e)}"
641
- return error_msg, error_msg
642
-
643
- # Build Gradio interface
644
- with gr.Blocks(title="GLM-OCR") as demo:
645
-
646
- # Browser-style header
647
- gr.HTML(browser_header)
648
-
649
- # Main content wrapper
650
- with gr.Column(elem_classes=["browser-content"]):
651
 
652
- # Title
653
- gr.HTML(title_html)
654
-
655
- # Main grid
656
- with gr.Row():
657
- # Left column - Input
658
- with gr.Column(scale=1):
659
- gr.HTML('<div class="card-title">πŸ“· Upload Image</div>')
660
-
661
- image_input = gr.Image(
662
- type="pil",
663
- label="",
664
- sources=["upload", "clipboard"],
665
- elem_classes=["upload-zone"]
666
- )
667
-
668
- gr.HTML('<div class="card-title" style="margin-top: 20px;">🎯 Recognition Task</div>')
669
-
670
- task = gr.Radio(
671
- choices=list(TASK_PROMPTS.keys()),
672
- value="Text",
673
- label="",
674
- elem_classes=["task-buttons"]
675
- )
676
-
677
- btn = gr.Button(
678
- "πŸš€ Recognize",
679
- variant="primary",
680
- elem_classes=["primary-btn"]
681
- )
682
-
683
- # Examples section
684
- if os.path.exists("examples"):
685
- gr.HTML('<div class="card-title" style="margin-top: 20px;">πŸ“‚ Examples</div>')
686
- gr.Examples(
687
- examples=[
688
- f"examples/{f}" for f in os.listdir("examples")
689
- if f.endswith(('.jpg', '.jpeg', '.png', '.webp'))
690
- ][:3] if os.path.exists("examples") else [],
691
- inputs=image_input,
692
- label=""
693
- )
694
 
695
- # Right column - Output
696
- with gr.Column(scale=1):
697
- gr.HTML('<div class="card-title">πŸ“ Recognition Result</div>')
 
 
 
 
 
698
 
699
- with gr.Tabs(elem_classes=["output-tabs"]):
700
- with gr.Tab("Plain Text"):
701
- text_output = gr.Textbox(
702
- lines=15,
703
- show_label=False,
704
- elem_classes=["output-area"],
705
- placeholder="Recognition results will appear here..."
706
- )
707
-
708
- with gr.Tab("Markdown"):
709
- md_output = gr.Markdown(
710
- value="",
711
- elem_classes=["markdown-output"]
712
- )
713
-
714
- # Footer
715
- gr.HTML(footer_html)
 
716
 
717
- # Event handlers
718
  btn.click(
719
- process_image,
720
- inputs=[image_input, task],
721
- outputs=[text_output, md_output]
722
  )
723
 
724
- # Also trigger on image upload with enter key
725
  image_input.change(
726
- lambda: None,
727
- inputs=[],
728
- outputs=[]
729
  )
730
 
731
  if __name__ == "__main__":
732
  demo.queue(max_size=50).launch(
733
- css=css,
734
  show_error=True,
735
- share=False
736
  )
 
5
  import tempfile
6
  from PIL import Image, ImageOps
7
  from transformers import AutoProcessor, AutoModelForImageTextToText
8
+ from io import BytesIO
9
+ import base64
10
 
 
11
  MODEL_PATH = "zai-org/GLM-OCR"
12
 
13
  # Load model and processor
14
  processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
15
  model = AutoModelForImageTextToText.from_pretrained(
16
  pretrained_model_name_or_path=MODEL_PATH,
17
+ torch_dtype=torch.bfloat16,
18
  device_map="auto",
19
  trust_remote_code=True
20
  )
21
 
 
22
  TASK_PROMPTS = {
23
  "Text": "Text Recognition:",
24
  "Formula": "Formula Recognition:",
25
+ "Table": "Table Recognition:",
26
  }
27
 
28
+ @spaces.GPU
29
+ def process_image(image, task):
30
+ if image is None:
31
+ return "⚠️ Please upload an image first"
32
+
33
+ if image.mode in ('RGBA', 'LA', 'P'):
34
+ image = image.convert('RGB')
35
+ image = ImageOps.exif_transpose(image)
36
+
37
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
38
+ image.save(tmp.name, 'PNG')
39
+ tmp.close()
40
+
41
+ prompt = TASK_PROMPTS.get(task, "Text Recognition:")
42
+
43
+ messages = [
44
+ {
45
+ "role": "user",
46
+ "content": [
47
+ {"type": "image", "url": tmp.name},
48
+ {"type": "text", "text": prompt}
49
+ ],
50
+ }
51
+ ]
52
+
53
+ inputs = processor.apply_chat_template(
54
+ messages,
55
+ tokenize=True,
56
+ add_generation_prompt=True,
57
+ return_dict=True,
58
+ return_tensors="pt"
59
+ ).to(model.device)
60
+
61
+ inputs.pop("token_type_ids", None)
62
+
63
+ generated_ids = model.generate(**inputs, max_new_tokens=8192)
64
+ output_text = processor.decode(
65
+ generated_ids[0][inputs["input_ids"].shape[1]:],
66
+ skip_special_tokens=True
67
+ )
68
+
69
+ os.unlink(tmp.name)
70
+
71
+ return output_text.strip()
72
+
73
  css = """
74
+ @import url('https://fonts.googleapis.com/css2?family=Outfit:wght@400;600;900&family=IBM+Plex+Mono:wght@400;500&display=swap');
75
+
76
+ :root {
77
+ --input-focus: #2d8cf0;
78
+ --font-color: #323232;
79
+ --font-color-sub: #666;
80
+ --bg-color: beige;
81
+ --main-color: black;
82
+ --accent-bg: lightblue;
83
  }
84
 
85
+ * {
86
+ font-family: 'Outfit', Arial, sans-serif !important;
 
 
 
87
  }
88
 
89
+ .gradio-container {
90
+ background: linear-gradient(135deg, #e8f5e9, #e3f2fd, #fce4ec) !important;
91
  min-height: 100vh;
92
  }
93
 
94
+ .main-wrapper {
95
+ max-width: 1000px;
96
+ margin: 0 auto;
97
+ padding: 30px 20px;
 
 
 
 
 
 
98
  }
99
 
100
+ .header-card {
101
+ background: var(--accent-bg);
102
+ padding: 25px 30px;
103
+ border-radius: 8px;
104
+ border: 3px solid var(--main-color);
105
+ box-shadow: 6px 6px var(--main-color);
106
+ margin-bottom: 25px;
107
+ text-align: center;
 
108
  }
109
 
110
+ .header-title {
111
+ font-size: 2.5em;
112
+ font-weight: 900;
113
+ color: var(--font-color);
114
+ margin: 0;
115
+ letter-spacing: -1px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  }
117
 
118
+ .header-subtitle {
119
+ font-size: 1.1em;
120
+ font-weight: 600;
121
+ color: var(--font-color-sub);
122
+ margin-top: 8px;
123
  }
124
 
125
+ .content-card {
126
+ background: var(--accent-bg);
127
+ padding: 25px;
128
+ border-radius: 8px;
129
+ border: 3px solid var(--main-color);
130
+ box-shadow: 6px 6px var(--main-color);
 
 
131
  }
132
 
133
+ .section-label {
134
+ font-weight: 700;
135
+ font-size: 0.95em;
136
+ color: var(--font-color);
137
+ margin-bottom: 10px;
138
+ text-transform: uppercase;
139
+ letter-spacing: 0.5px;
140
+ }
141
+
142
+ .input-field, .input-field textarea, .input-field input {
143
+ border-radius: 6px !important;
144
+ border: 2px solid var(--main-color) !important;
145
+ background-color: var(--bg-color) !important;
146
+ box-shadow: 4px 4px var(--main-color) !important;
147
+ font-size: 15px !important;
148
+ font-weight: 500 !important;
149
+ color: var(--font-color) !important;
150
  }
151
 
152
+ .input-field textarea:focus, .input-field input:focus {
153
+ border-color: var(--input-focus) !important;
154
+ outline: none !important;
 
 
 
 
 
155
  }
156
 
157
+ .upload-box {
158
+ border: 3px dashed var(--main-color) !important;
159
+ border-radius: 8px !important;
160
+ background-color: var(--bg-color) !important;
161
+ box-shadow: 4px 4px var(--main-color) !important;
162
+ min-height: 200px !important;
163
+ transition: all 0.2s ease;
164
  }
165
 
166
+ .upload-box:hover {
167
+ background-color: #f5f5dc !important;
168
  }
169
 
170
+ .dropdown-field {
171
+ border-radius: 6px !important;
172
+ border: 2px solid var(--main-color) !important;
173
+ background-color: var(--bg-color) !important;
174
+ box-shadow: 4px 4px var(--main-color) !important;
175
  }
176
 
177
+ .task-buttons {
178
  display: flex;
179
+ gap: 12px;
180
+ flex-wrap: wrap;
 
181
  }
182
 
183
+ .task-btn {
184
+ flex: 1;
185
+ min-width: 100px;
186
+ height: 45px;
187
+ border-radius: 6px !important;
188
+ border: 2px solid var(--main-color) !important;
189
+ background-color: var(--bg-color) !important;
190
+ box-shadow: 4px 4px var(--main-color) !important;
191
+ font-size: 15px !important;
192
+ font-weight: 600 !important;
193
+ color: var(--font-color) !important;
194
  cursor: pointer;
195
+ transition: all 0.1s ease;
196
  }
197
 
198
+ .task-btn:hover {
199
+ background-color: #f5f5dc !important;
200
  }
201
 
202
+ .task-btn.selected, .task-btn:active {
203
+ box-shadow: 0px 0px var(--main-color) !important;
204
+ transform: translate(3px, 3px);
205
+ background-color: #ffd54f !important;
206
+ }
207
 
208
+ .go-button {
 
209
  width: 100%;
210
+ height: 55px;
211
+ border-radius: 8px !important;
212
+ border: 3px solid var(--main-color) !important;
213
+ background: linear-gradient(135deg, #ffd54f, #ffb300) !important;
214
+ box-shadow: 5px 5px var(--main-color) !important;
215
+ font-size: 1.2em !important;
216
+ font-weight: 700 !important;
217
+ color: var(--font-color) !important;
 
 
 
 
 
 
 
 
218
  cursor: pointer;
219
+ transition: all 0.15s ease;
220
+ margin-top: 15px;
 
 
221
  }
222
 
223
+ .go-button:hover {
224
+ background: linear-gradient(135deg, #ffca28, #ffa000) !important;
 
225
  }
226
 
227
+ .go-button:active {
228
+ box-shadow: 0px 0px var(--main-color) !important;
229
+ transform: translate(4px, 4px);
230
  }
231
 
232
+ .output-area {
233
+ background-color: var(--bg-color) !important;
234
+ border: 2px solid var(--main-color) !important;
235
+ border-radius: 8px !important;
236
+ box-shadow: 4px 4px var(--main-color) !important;
237
+ font-family: 'IBM Plex Mono', monospace !important;
238
+ min-height: 300px !important;
239
+ padding: 15px !important;
240
  }
241
 
242
+ .output-area textarea {
243
+ font-family: 'IBM Plex Mono', monospace !important;
244
+ background-color: transparent !important;
245
+ border: none !important;
246
+ box-shadow: none !important;
247
+ }
248
+
249
+ .example-gallery {
250
+ margin-top: 15px;
 
 
 
 
 
251
  }
252
 
253
+ .example-gallery img {
254
+ border: 2px solid var(--main-color) !important;
255
+ border-radius: 6px !important;
256
+ box-shadow: 3px 3px var(--main-color) !important;
257
+ transition: all 0.15s ease;
258
  }
259
 
260
+ .example-gallery img:hover {
261
+ transform: translate(-2px, -2px);
262
+ box-shadow: 5px 5px var(--main-color) !important;
263
  }
264
 
265
+ .md-preview {
266
+ background-color: var(--bg-color) !important;
267
+ border: 2px solid var(--main-color) !important;
268
+ border-radius: 8px !important;
269
+ box-shadow: 4px 4px var(--main-color) !important;
270
+ padding: 20px !important;
271
+ min-height: 300px;
272
  }
273
 
274
+ .tabs-container button {
275
+ border: 2px solid var(--main-color) !important;
276
+ background-color: var(--bg-color) !important;
277
+ font-weight: 600 !important;
278
+ border-radius: 6px 6px 0 0 !important;
279
+ margin-right: 5px;
280
  }
281
 
282
+ .tabs-container button.selected {
283
+ background-color: #ffd54f !important;
284
+ box-shadow: none !important;
 
 
285
  }
286
 
287
+ .footer-note {
 
288
  text-align: center;
289
+ margin-top: 20px;
290
+ padding: 15px;
291
+ background: rgba(255,255,255,0.5);
292
+ border-radius: 8px;
293
+ border: 2px solid var(--main-color);
294
+ font-size: 0.9em;
295
+ color: var(--font-color-sub);
296
  }
297
 
298
+ footer {
299
+ display: none !important;
 
 
 
 
 
 
 
300
  }
301
 
302
+ .gr-box, .gr-form, .gr-panel {
303
+ border: none !important;
304
+ background: transparent !important;
305
+ box-shadow: none !important;
 
 
 
 
306
  }
307
 
308
+ label {
309
+ font-weight: 700 !important;
310
+ font-size: 0.9em !important;
311
+ color: var(--font-color) !important;
312
+ text-transform: uppercase !important;
313
+ letter-spacing: 0.5px !important;
314
  }
315
 
316
+ .copy-btn {
317
+ border: 2px solid var(--main-color) !important;
318
+ background-color: var(--bg-color) !important;
319
+ box-shadow: 2px 2px var(--main-color) !important;
320
+ border-radius: 4px !important;
321
  }
322
 
323
+ .copy-btn:active {
324
+ box-shadow: 0px 0px var(--main-color) !important;
325
+ transform: translate(2px, 2px);
 
 
326
  }
327
+ """
328
 
329
+ with gr.Blocks(css=css, title="GLM-OCR", theme=gr.themes.Soft()) as demo:
330
+
331
+ gr.HTML("""
332
+ <div class="main-wrapper">
333
+ <div class="header-card">
334
+ <h1 class="header-title">✦ GLM-OCR ✦</h1>
335
+ <p class="header-subtitle">Document parsing & text recognition powered by AI</p>
336
+ </div>
337
+ </div>
338
+ """)
339
+
340
+ with gr.Row(elem_classes=["main-wrapper"]):
341
+ with gr.Column(scale=1, elem_classes=["content-card"]):
342
+ gr.HTML('<div class="section-label">πŸ“· Upload Image</div>')
343
+
344
+ image_input = gr.Image(
345
+ type="pil",
346
+ label="",
347
+ sources=["upload", "clipboard"],
348
+ elem_classes=["upload-box"],
349
+ height=250
350
+ )
351
+
352
+ gr.HTML('<div class="section-label" style="margin-top: 20px;">🎯 Recognition Type</div>')
353
+
354
+ task = gr.Radio(
355
+ choices=list(TASK_PROMPTS.keys()),
356
+ value="Text",
357
+ label="",
358
+ elem_classes=["task-buttons"]
359
+ )
360
+
361
+ btn = gr.Button(
362
+ "Recognize β†’",
363
+ variant="primary",
364
+ elem_classes=["go-button"]
365
+ )
366
+
367
+ gr.HTML('<div class="section-label" style="margin-top: 20px;">πŸ“ Examples</div>')
368
+
369
+ examples = gr.Examples(
370
+ examples=[
371
+ "examples/1.jpg",
372
+ "examples/2.jpg",
373
+ "examples/3.jpg"
374
+ ],
375
+ inputs=image_input,
376
+ label="",
377
+ elem_classes=["example-gallery"]
378
+ )
379
+
380
+ with gr.Column(scale=1, elem_classes=["content-card"]):
381
+ gr.HTML('<div class="section-label">πŸ“ Output</div>')
382
+
383
+ with gr.Tabs(elem_classes=["tabs-container"]):
384
+ with gr.Tab("Text"):
385
+ output_text = gr.Textbox(
386
+ label="",
387
+ lines=18,
388
+ elem_classes=["output-area"],
389
+ show_copy_button=True
390
+ )
391
+
392
+ with gr.Tab("Markdown"):
393
+ output_md = gr.Markdown(
394
+ value="",
395
+ elem_classes=["md-preview"]
396
+ )
397
+
398
+ gr.HTML("""
399
+ <div class="main-wrapper">
400
+ <div class="footer-note">
401
+ πŸš€ Powered by <strong>zai-org/GLM-OCR</strong> β€’
402
+ Supports text, formula, and table recognition
403
+ </div>
404
+ </div>
405
+ """)
406
+
407
+ def run_ocr(image, task):
408
+ result = process_image(image, task)
409
+ return result, result
410
+
411
+ btn.click(
412
+ run_ocr,
413
+ [image_input, task],
414
+ [output_text, output_md]
415
+ )
416
+
417
+ image_input.change(
418
+ lambda: ("", ""),
419
+ None,
420
+ [output_text, output_md]
421
+ )
422
+
423
+ if __name__ == "__main__":
424
+ demo.queue(max_size=50).launch(
425
+ show_error=True,
426
+ ssr_mode=False
427
+ )import gradio as gr
428
+ import torch
429
+ import spaces
430
+ import os
431
+ import tempfile
432
+ from PIL import Image, ImageOps
433
+ from transformers import AutoProcessor, AutoModelForImageTextToText
434
+ from io import BytesIO
435
+ import base64
436
+
437
+ MODEL_PATH = "zai-org/GLM-OCR"
438
+
439
+ # Load model and processor
440
+ processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
441
+ model = AutoModelForImageTextToText.from_pretrained(
442
+ pretrained_model_name_or_path=MODEL_PATH,
443
+ torch_dtype=torch.bfloat16,
444
+ device_map="auto",
445
+ trust_remote_code=True
446
+ )
447
+
448
+ TASK_PROMPTS = {
449
+ "Text": "Text Recognition:",
450
+ "Formula": "Formula Recognition:",
451
+ "Table": "Table Recognition:",
452
  }
453
 
454
+ @spaces.GPU
455
+ def process_image(image, task):
456
+ if image is None:
457
+ return "⚠️ Please upload an image first"
458
+
459
+ if image.mode in ('RGBA', 'LA', 'P'):
460
+ image = image.convert('RGB')
461
+ image = ImageOps.exif_transpose(image)
462
+
463
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
464
+ image.save(tmp.name, 'PNG')
465
+ tmp.close()
466
+
467
+ prompt = TASK_PROMPTS.get(task, "Text Recognition:")
468
+
469
+ messages = [
470
+ {
471
+ "role": "user",
472
+ "content": [
473
+ {"type": "image", "url": tmp.name},
474
+ {"type": "text", "text": prompt}
475
+ ],
476
+ }
477
+ ]
478
+
479
+ inputs = processor.apply_chat_template(
480
+ messages,
481
+ tokenize=True,
482
+ add_generation_prompt=True,
483
+ return_dict=True,
484
+ return_tensors="pt"
485
+ ).to(model.device)
486
+
487
+ inputs.pop("token_type_ids", None)
488
+
489
+ generated_ids = model.generate(**inputs, max_new_tokens=8192)
490
+ output_text = processor.decode(
491
+ generated_ids[0][inputs["input_ids"].shape[1]:],
492
+ skip_special_tokens=True
493
+ )
494
+
495
+ os.unlink(tmp.name)
496
+
497
+ return output_text.strip()
498
+
499
+ css = """
500
+ @import url('https://fonts.googleapis.com/css2?family=Outfit:wght@400;600;900&family=IBM+Plex+Mono:wght@400;500&display=swap');
501
+
502
+ :root {
503
+ --input-focus: #2d8cf0;
504
+ --font-color: #323232;
505
+ --font-color-sub: #666;
506
+ --bg-color: beige;
507
+ --main-color: black;
508
+ --accent-bg: lightblue;
509
  }
510
 
511
+ * {
512
+ font-family: 'Outfit', Arial, sans-serif !important;
 
 
 
 
 
513
  }
514
 
515
+ .gradio-container {
516
+ background: linear-gradient(135deg, #e8f5e9, #e3f2fd, #fce4ec) !important;
517
+ min-height: 100vh;
518
  }
519
 
520
+ .main-wrapper {
521
+ max-width: 1000px;
522
+ margin: 0 auto;
523
+ padding: 30px 20px;
 
524
  }
525
 
526
+ .header-card {
527
+ background: var(--accent-bg);
528
+ padding: 25px 30px;
529
+ border-radius: 8px;
530
+ border: 3px solid var(--main-color);
531
+ box-shadow: 6px 6px var(--main-color);
532
+ margin-bottom: 25px;
533
+ text-align: center;
 
 
 
534
  }
535
 
536
+ .header-title {
537
+ font-size: 2.5em;
538
+ font-weight: 900;
539
+ color: var(--font-color);
540
+ margin: 0;
541
+ letter-spacing: -1px;
542
  }
543
 
544
+ .header-subtitle {
545
+ font-size: 1.1em;
546
+ font-weight: 600;
547
+ color: var(--font-color-sub);
548
+ margin-top: 8px;
549
  }
550
 
551
+ .content-card {
552
+ background: var(--accent-bg);
553
+ padding: 25px;
554
+ border-radius: 8px;
555
+ border: 3px solid var(--main-color);
556
+ box-shadow: 6px 6px var(--main-color);
 
 
 
 
 
 
 
557
  }
558
 
559
+ .section-label {
560
+ font-weight: 700;
561
+ font-size: 0.95em;
562
+ color: var(--font-color);
563
+ margin-bottom: 10px;
564
+ text-transform: uppercase;
565
+ letter-spacing: 0.5px;
566
+ }
567
+
568
+ .input-field, .input-field textarea, .input-field input {
569
+ border-radius: 6px !important;
570
+ border: 2px solid var(--main-color) !important;
571
+ background-color: var(--bg-color) !important;
572
+ box-shadow: 4px 4px var(--main-color) !important;
573
+ font-size: 15px !important;
574
+ font-weight: 500 !important;
575
+ color: var(--font-color) !important;
576
  }
577
 
578
+ .input-field textarea:focus, .input-field input:focus {
579
+ border-color: var(--input-focus) !important;
580
+ outline: none !important;
581
  }
582
 
583
+ .upload-box {
584
+ border: 3px dashed var(--main-color) !important;
585
+ border-radius: 8px !important;
586
+ background-color: var(--bg-color) !important;
587
+ box-shadow: 4px 4px var(--main-color) !important;
588
+ min-height: 200px !important;
589
+ transition: all 0.2s ease;
 
 
 
590
  }
591
 
592
+ .upload-box:hover {
593
+ background-color: #f5f5dc !important;
 
594
  }
595
 
596
+ .dropdown-field {
597
+ border-radius: 6px !important;
598
+ border: 2px solid var(--main-color) !important;
599
+ background-color: var(--bg-color) !important;
600
+ box-shadow: 4px 4px var(--main-color) !important;
 
 
601
  }
602
 
603
+ .task-buttons {
604
+ display: flex;
605
+ gap: 12px;
606
+ flex-wrap: wrap;
 
 
 
 
 
607
  }
608
 
609
+ .task-btn {
610
+ flex: 1;
611
+ min-width: 100px;
612
+ height: 45px;
613
+ border-radius: 6px !important;
614
+ border: 2px solid var(--main-color) !important;
615
+ background-color: var(--bg-color) !important;
616
+ box-shadow: 4px 4px var(--main-color) !important;
617
+ font-size: 15px !important;
618
+ font-weight: 600 !important;
619
+ color: var(--font-color) !important;
620
+ cursor: pointer;
621
+ transition: all 0.1s ease;
622
  }
623
 
624
+ .task-btn:hover {
625
+ background-color: #f5f5dc !important;
 
626
  }
627
 
628
+ .task-btn.selected, .task-btn:active {
629
+ box-shadow: 0px 0px var(--main-color) !important;
630
+ transform: translate(3px, 3px);
631
+ background-color: #ffd54f !important;
 
 
632
  }
633
 
634
+ .go-button {
635
+ width: 100%;
636
+ height: 55px;
637
+ border-radius: 8px !important;
638
+ border: 3px solid var(--main-color) !important;
639
+ background: linear-gradient(135deg, #ffd54f, #ffb300) !important;
640
+ box-shadow: 5px 5px var(--main-color) !important;
641
+ font-size: 1.2em !important;
642
+ font-weight: 700 !important;
643
+ color: var(--font-color) !important;
644
+ cursor: pointer;
645
+ transition: all 0.15s ease;
646
+ margin-top: 15px;
647
  }
648
 
649
+ .go-button:hover {
650
+ background: linear-gradient(135deg, #ffca28, #ffa000) !important;
 
 
651
  }
652
 
653
+ .go-button:active {
654
+ box-shadow: 0px 0px var(--main-color) !important;
655
+ transform: translate(4px, 4px);
656
  }
657
 
658
+ .output-area {
659
+ background-color: var(--bg-color) !important;
660
+ border: 2px solid var(--main-color) !important;
661
+ border-radius: 8px !important;
662
+ box-shadow: 4px 4px var(--main-color) !important;
663
+ font-family: 'IBM Plex Mono', monospace !important;
664
+ min-height: 300px !important;
665
+ padding: 15px !important;
666
  }
667
 
668
+ .output-area textarea {
669
+ font-family: 'IBM Plex Mono', monospace !important;
670
+ background-color: transparent !important;
671
+ border: none !important;
672
+ box-shadow: none !important;
673
  }
674
 
675
+ .example-gallery {
676
+ margin-top: 15px;
677
+ }
678
 
679
+ .example-gallery img {
680
+ border: 2px solid var(--main-color) !important;
681
+ border-radius: 6px !important;
682
+ box-shadow: 3px 3px var(--main-color) !important;
683
+ transition: all 0.15s ease;
684
  }
685
 
686
+ .example-gallery img:hover {
687
+ transform: translate(-2px, -2px);
688
+ box-shadow: 5px 5px var(--main-color) !important;
 
689
  }
690
 
691
+ .md-preview {
692
+ background-color: var(--bg-color) !important;
693
+ border: 2px solid var(--main-color) !important;
694
+ border-radius: 8px !important;
695
+ box-shadow: 4px 4px var(--main-color) !important;
696
  padding: 20px !important;
697
+ min-height: 300px;
 
 
 
698
  }
699
 
700
+ .tabs-container button {
701
+ border: 2px solid var(--main-color) !important;
702
+ background-color: var(--bg-color) !important;
703
+ font-weight: 600 !important;
704
+ border-radius: 6px 6px 0 0 !important;
705
+ margin-right: 5px;
706
  }
707
 
708
+ .tabs-container button.selected {
709
+ background-color: #ffd54f !important;
710
+ box-shadow: none !important;
 
 
711
  }
712
 
713
+ .footer-note {
714
+ text-align: center;
715
  margin-top: 20px;
716
+ padding: 15px;
717
+ background: rgba(255,255,255,0.5);
718
+ border-radius: 8px;
719
+ border: 2px solid var(--main-color);
720
+ font-size: 0.9em;
721
+ color: var(--font-color-sub);
722
  }
723
 
724
+ footer {
725
  display: none !important;
726
  }
727
 
728
+ .gr-box, .gr-form, .gr-panel {
729
+ border: none !important;
730
+ background: transparent !important;
731
+ box-shadow: none !important;
 
 
 
 
 
 
 
 
 
732
  }
 
733
 
734
+ label {
735
+ font-weight: 700 !important;
736
+ font-size: 0.9em !important;
737
+ color: var(--font-color) !important;
738
+ text-transform: uppercase !important;
739
+ letter-spacing: 0.5px !important;
740
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
 
742
+ .copy-btn {
743
+ border: 2px solid var(--main-color) !important;
744
+ background-color: var(--bg-color) !important;
745
+ box-shadow: 2px 2px var(--main-color) !important;
746
+ border-radius: 4px !important;
747
+ }
 
 
 
748
 
749
+ .copy-btn:active {
750
+ box-shadow: 0px 0px var(--main-color) !important;
751
+ transform: translate(2px, 2px);
752
+ }
 
 
 
753
  """
754
 
755
+ with gr.Blocks(css=css, title="GLM-OCR", theme=gr.themes.Soft()) as demo:
 
 
 
 
 
 
 
 
 
756
 
757
+ gr.HTML("""
758
+ <div class="main-wrapper">
759
+ <div class="header-card">
760
+ <h1 class="header-title">✦ GLM-OCR ✦</h1>
761
+ <p class="header-subtitle">Document parsing & text recognition powered by AI</p>
762
+ </div>
763
+ </div>
764
+ """)
765
 
766
+ with gr.Row(elem_classes=["main-wrapper"]):
767
+ with gr.Column(scale=1, elem_classes=["content-card"]):
768
+ gr.HTML('<div class="section-label">πŸ“· Upload Image</div>')
769
+
770
+ image_input = gr.Image(
771
+ type="pil",
772
+ label="",
773
+ sources=["upload", "clipboard"],
774
+ elem_classes=["upload-box"],
775
+ height=250
776
+ )
777
+
778
+ gr.HTML('<div class="section-label" style="margin-top: 20px;">🎯 Recognition Type</div>')
779
+
780
+ task = gr.Radio(
781
+ choices=list(TASK_PROMPTS.keys()),
782
+ value="Text",
783
+ label="",
784
+ elem_classes=["task-buttons"]
785
+ )
786
+
787
+ btn = gr.Button(
788
+ "Recognize β†’",
789
+ variant="primary",
790
+ elem_classes=["go-button"]
791
+ )
792
+
793
+ gr.HTML('<div class="section-label" style="margin-top: 20px;">πŸ“ Examples</div>')
794
+
795
+ examples = gr.Examples(
796
+ examples=[
797
+ "examples/1.jpg",
798
+ "examples/2.jpg",
799
+ "examples/3.jpg"
800
  ],
801
+ inputs=image_input,
802
+ label="",
803
+ elem_classes=["example-gallery"]
804
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
805
 
806
+ with gr.Column(scale=1, elem_classes=["content-card"]):
807
+ gr.HTML('<div class="section-label">πŸ“ Output</div>')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
808
 
809
+ with gr.Tabs(elem_classes=["tabs-container"]):
810
+ with gr.Tab("Text"):
811
+ output_text = gr.Textbox(
812
+ label="",
813
+ lines=18,
814
+ elem_classes=["output-area"],
815
+ show_copy_button=True
816
+ )
817
 
818
+ with gr.Tab("Markdown"):
819
+ output_md = gr.Markdown(
820
+ value="",
821
+ elem_classes=["md-preview"]
822
+ )
823
+
824
+ gr.HTML("""
825
+ <div class="main-wrapper">
826
+ <div class="footer-note">
827
+ πŸš€ Powered by <strong>zai-org/GLM-OCR</strong> β€’
828
+ Supports text, formula, and table recognition
829
+ </div>
830
+ </div>
831
+ """)
832
+
833
+ def run_ocr(image, task):
834
+ result = process_image(image, task)
835
+ return result, result
836
 
 
837
  btn.click(
838
+ run_ocr,
839
+ [image_input, task],
840
+ [output_text, output_md]
841
  )
842
 
 
843
  image_input.change(
844
+ lambda: ("", ""),
845
+ None,
846
+ [output_text, output_md]
847
  )
848
 
849
  if __name__ == "__main__":
850
  demo.queue(max_size=50).launch(
 
851
  show_error=True,
852
+ ssr_mode=False
853
  )