prithivMLmods commited on
Commit
4697f26
·
verified ·
1 Parent(s): da50847

update app

Browse files
Files changed (1) hide show
  1. app.py +105 -305
app.py CHANGED
@@ -4,10 +4,82 @@ import spaces
4
  import os
5
  import tempfile
6
  from PIL import Image, ImageOps
 
 
7
  from transformers import AutoProcessor, AutoModelForImageTextToText
 
 
8
  from gradio.themes import Soft
9
  from gradio.themes.utils import colors, fonts, sizes
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  MODEL_PATH = "zai-org/GLM-OCR"
12
 
13
  processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
@@ -69,314 +141,47 @@ def process_image(image, task):
69
 
70
  return output_text.strip()
71
 
72
- # --- Custom Theme: Tech Blue/Cyan ---
73
- colors.tech_blue = colors.Color(
74
- name="tech_blue",
75
- c50="#f3f7fb",
76
- c100="#e6eef7",
77
- c200="#c6d9ee",
78
- c300="#9bbbe0",
79
- c400="#6a96cf",
80
- c500="#3f6fb8",
81
- c600="#2f569d",
82
- c700="#26457f",
83
- c800="#213b69",
84
- c900="#1d3258",
85
- c950="#0f1b34",
86
- )
87
-
88
- colors.electric = colors.Color(
89
- name="electric",
90
- c50="#f6f4fb",
91
- c100="#ede9f8",
92
- c200="#d9d1f0",
93
- c300="#bba9e3",
94
- c400="#9a7ed3",
95
- c500="#7a56c2",
96
- c600="#6541aa",
97
- c700="#52348c",
98
- c800="#442c72",
99
- c900="#38265d",
100
- c950="#1e1436",
101
- )
102
-
103
-
104
- class TechTheme(Soft):
105
- def __init__(self):
106
- super().__init__(
107
- primary_hue=colors.tech_blue,
108
- secondary_hue=colors.electric,
109
- neutral_hue=colors.slate,
110
- text_size=sizes.text_md,
111
- font=(fonts.GoogleFont("Inter"), "system-ui", "sans-serif"),
112
- font_mono=(fonts.GoogleFont("JetBrains Mono"), "monospace"),
113
- )
114
- super().set(
115
- # Background - clean tech gradient
116
- body_background_fill="linear-gradient(135deg, #ecfeff 0%, #ffffff 35%, #eff6ff 65%, #f0f9ff 100%)",
117
- body_background_fill_dark="linear-gradient(135deg, #0f172a 0%, #020617 50%, #0c1929 100%)",
118
- # Buttons - cyan to blue gradient
119
- button_primary_background_fill="linear-gradient(135deg, #06b6d4 0%, #3b82f6 100%)",
120
- button_primary_background_fill_hover="linear-gradient(135deg, #0891b2 0%, #2563eb 100%)",
121
- button_primary_text_color="white",
122
- button_primary_shadow="0 4px 20px rgba(6, 182, 212, 0.35)",
123
- button_primary_shadow_hover="0 6px 24px rgba(6, 182, 212, 0.45)",
124
- # Blocks - glassmorphism
125
- block_background_fill="rgba(255, 255, 255, 0.85)",
126
- block_background_fill_dark="rgba(15, 23, 42, 0.7)",
127
- block_shadow="0 4px 24px rgba(6, 182, 212, 0.1)",
128
- block_border_width="1px",
129
- block_border_color="rgba(6, 182, 212, 0.12)",
130
- block_border_color_dark="rgba(6, 182, 212, 0.25)",
131
- block_title_text_weight="600",
132
- block_label_text_weight="500",
133
- # Inputs
134
- input_background_fill="rgba(255, 255, 255, 0.95)",
135
- input_background_fill_dark="rgba(15, 23, 42, 0.8)",
136
- input_border_color="rgba(6, 182, 212, 0.2)",
137
- input_border_color_focus="rgba(6, 182, 212, 0.5)",
138
- input_shadow="0 2px 8px rgba(6, 182, 212, 0.06)",
139
- input_shadow_focus="0 0 0 3px rgba(6, 182, 212, 0.12)",
140
- # Tabs
141
- border_color_accent="*primary_500",
142
- )
143
-
144
-
145
- theme = TechTheme()
146
-
147
- # --- Custom CSS ---
148
- css = """
149
- /* Main Title with tech gradient */
150
- .main-title {
151
- text-align: center;
152
- padding: 1.5rem 0 1rem;
153
- }
154
- .main-title h1 {
155
- background: linear-gradient(135deg, #06b6d4 0%, #3b82f6 50%, #0ea5e9 100%);
156
- background-size: 200% 200%;
157
- -webkit-background-clip: text;
158
- -webkit-text-fill-color: transparent;
159
- background-clip: text;
160
- font-size: 2.4rem !important;
161
- font-weight: 700 !important;
162
- margin: 0 !important;
163
- letter-spacing: -0.02em;
164
- animation: gradient-shift 6s ease infinite;
165
- }
166
- @keyframes gradient-shift {
167
- 0%, 100% { background-position: 0% 50%; }
168
- 50% { background-position: 100% 50%; }
169
- }
170
- .subtitle {
171
- color: #64748b;
172
- font-size: 0.95rem;
173
- margin-top: 0.4rem;
174
- font-weight: 400;
175
- }
176
- /* Extract Button */
177
- #extract-btn {
178
- font-size: 1rem !important;
179
- font-weight: 600 !important;
180
- padding: 12px 28px !important;
181
- border-radius: 10px !important;
182
- transition: all 0.3s ease !important;
183
- }
184
- #extract-btn:hover {
185
- transform: translateY(-2px);
186
- }
187
- /* Output text styling */
188
- .output-text textarea {
189
- font-family: 'JetBrains Mono', 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace !important;
190
- font-size: 0.875rem !important;
191
- line-height: 1.6 !important;
192
- font-variant-ligatures: none !important;
193
- }
194
- /* Tab styling */
195
- .tab-nav {
196
- border-bottom: 2px solid rgba(6, 182, 212, 0.1) !important;
197
- }
198
- .tab-nav button {
199
- font-weight: 500 !important;
200
- padding: 10px 18px !important;
201
- border-radius: 8px 8px 0 0 !important;
202
- transition: all 0.2s ease !important;
203
- }
204
- .tab-nav button.selected {
205
- background: linear-gradient(135deg, rgba(6, 182, 212, 0.08), rgba(59, 130, 246, 0.08)) !important;
206
- border-bottom: 2px solid #06b6d4 !important;
207
- }
208
- /* Card hover effects */
209
- .gradio-group {
210
- transition: all 0.3s ease;
211
- border-radius: 14px !important;
212
- }
213
- .gradio-group:hover {
214
- box-shadow: 0 8px 32px rgba(6, 182, 212, 0.12);
215
- }
216
- /* Dropdown styling */
217
- .gradio-dropdown {
218
- border-radius: 8px !important;
219
- }
220
- /* Image container */
221
- .gradio-image {
222
- border-radius: 10px !important;
223
- overflow: hidden;
224
- }
225
- /* Gallery styling */
226
- .gradio-gallery {
227
- border-radius: 10px !important;
228
- }
229
- .gradio-gallery .gallery-item {
230
- border-radius: 8px !important;
231
- border: 1px solid rgba(6, 182, 212, 0.15) !important;
232
- }
233
- /* Accordion styling */
234
- .gradio-accordion {
235
- border-radius: 10px !important;
236
- border: 1px solid rgba(6, 182, 212, 0.12) !important;
237
- }
238
- .gradio-accordion > .label-wrap {
239
- background: linear-gradient(135deg, rgba(6, 182, 212, 0.04), rgba(59, 130, 246, 0.04)) !important;
240
- }
241
- /* Scrollbar styling */
242
- ::-webkit-scrollbar {
243
- width: 6px;
244
- height: 6px;
245
- }
246
- ::-webkit-scrollbar-track {
247
- background: rgba(6, 182, 212, 0.05);
248
- border-radius: 3px;
249
- }
250
- ::-webkit-scrollbar-thumb {
251
- background: linear-gradient(135deg, #06b6d4, #3b82f6);
252
- border-radius: 3px;
253
- }
254
- ::-webkit-scrollbar-thumb:hover {
255
- background: linear-gradient(135deg, #0891b2, #2563eb);
256
- }
257
- /* File upload area */
258
- .gradio-file {
259
- border: 2px dashed rgba(6, 182, 212, 0.25) !important;
260
- border-radius: 10px !important;
261
- transition: all 0.3s ease;
262
- }
263
- .gradio-file:hover {
264
- border-color: rgba(6, 182, 212, 0.45) !important;
265
- background: rgba(6, 182, 212, 0.02) !important;
266
- }
267
- /* Examples section */
268
- .gradio-examples {
269
- border-radius: 10px !important;
270
- }
271
- /* Radio group styling */
272
- .gradio-radio {
273
- border-radius: 10px !important;
274
- }
275
- .gradio-radio label {
276
- border-radius: 8px !important;
277
- transition: all 0.2s ease !important;
278
- }
279
- .gradio-radio label.selected {
280
- background: linear-gradient(135deg, rgba(6, 182, 212, 0.1), rgba(59, 130, 246, 0.1)) !important;
281
- border-color: rgba(6, 182, 212, 0.3) !important;
282
- }
283
- /* Markdown output */
284
- .markdown-output {
285
- font-family: 'Inter', system-ui, sans-serif !important;
286
- line-height: 1.7 !important;
287
- }
288
- .markdown-output code {
289
- font-family: 'JetBrains Mono', monospace !important;
290
- background: rgba(6, 182, 212, 0.08) !important;
291
- padding: 2px 6px !important;
292
- border-radius: 4px !important;
293
- }
294
- .markdown-output pre {
295
- background: rgba(6, 182, 212, 0.05) !important;
296
- border: 1px solid rgba(6, 182, 212, 0.15) !important;
297
- border-radius: 8px !important;
298
- padding: 1rem !important;
299
- }
300
- /* Footer */
301
- .footer-text {
302
- text-align: center;
303
- padding: 1rem;
304
- color: #64748b;
305
- font-size: 0.85rem;
306
- }
307
- .footer-text strong {
308
- background: linear-gradient(135deg, #06b6d4, #3b82f6);
309
- -webkit-background-clip: text;
310
- -webkit-text-fill-color: transparent;
311
- background-clip: text;
312
- }
313
- """
314
-
315
  with gr.Blocks(title="GLM-OCR") as demo:
316
 
317
- # Header
318
- gr.HTML("""
319
- <div class="main-title">
320
- <h1>GLM-OCR</h1>
321
- <p class="subtitle">Document parsing and text recognition powered by AI</p>
322
- </div>
323
- """)
324
 
325
  with gr.Row():
326
 
327
- # Input Column
328
  with gr.Column(scale=1):
329
- with gr.Group():
330
- gr.Markdown("### Upload Image")
331
- image_input = gr.Image(
332
- type="pil",
333
- label="",
334
- sources=["upload", "clipboard"],
335
- height=280,
336
- show_label=False
337
- )
338
-
339
- gr.Markdown("### Recognition Type")
340
- task = gr.Radio(
341
- choices=list(TASK_PROMPTS.keys()),
342
- value="Text",
343
- label="",
344
- show_label=False
345
- )
346
-
347
- btn = gr.Button(
348
- "Recognize",
349
- variant="primary",
350
- elem_id="extract-btn"
351
- )
352
 
353
- with gr.Accordion("Examples", open=False):
354
- examples = gr.Examples(
355
- examples=["examples/1.jpg", "examples/2.jpg", "examples/3.jpg"],
356
- inputs=image_input,
357
- label=""
358
- )
 
 
 
 
 
 
 
359
 
360
- # Output Column
361
  with gr.Column(scale=1):
362
- with gr.Group():
363
- gr.Markdown("### Result")
364
- with gr.Tabs():
365
- with gr.Tab("Text"):
366
- output_text = gr.Textbox(
367
- label="",
368
- lines=16,
369
- show_label=False,
370
- #show_copy_button=True,
371
- elem_classes=["output-text"]
372
- )
373
-
374
- with gr.Tab("Markdown"):
375
- output_md = gr.Markdown(
376
- value="",
377
- elem_classes=["markdown-output"]
378
- )
379
- # Event handlers
380
  def run_ocr(image, task):
381
  result = process_image(image, task)
382
  return result, result
@@ -394,9 +199,4 @@ with gr.Blocks(title="GLM-OCR") as demo:
394
  )
395
 
396
  if __name__ == "__main__":
397
- demo.queue(max_size=50).launch(
398
- css=css,
399
- theme=theme,
400
- show_error=True,
401
- ssr_mode=False
402
- )
 
4
  import os
5
  import tempfile
6
  from PIL import Image, ImageOps
7
+ from threading import Thread
8
+ from typing import Iterable
9
  from transformers import AutoProcessor, AutoModelForImageTextToText
10
+
11
+ from transformers.image_utils import load_image
12
  from gradio.themes import Soft
13
  from gradio.themes.utils import colors, fonts, sizes
14
 
15
+ colors.steel_blue = colors.Color(
16
+ name="steel_blue",
17
+ c50="#EBF3F8",
18
+ c100="#D3E5F0",
19
+ c200="#A8CCE1",
20
+ c300="#7DB3D2",
21
+ c400="#529AC3",
22
+ c500="#4682B4",
23
+ c600="#3E72A0",
24
+ c700="#36638C",
25
+ c800="#2E5378",
26
+ c900="#264364",
27
+ c950="#1E3450",
28
+ )
29
+
30
+ class SteelBlueTheme(Soft):
31
+ def __init__(
32
+ self,
33
+ *,
34
+ primary_hue: colors.Color | str = colors.gray,
35
+ secondary_hue: colors.Color | str = colors.steel_blue,
36
+ neutral_hue: colors.Color | str = colors.slate,
37
+ text_size: sizes.Size | str = sizes.text_lg,
38
+ font: fonts.Font | str | Iterable[fonts.Font | str] = (
39
+ fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
40
+ ),
41
+ font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
42
+ fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
43
+ ),
44
+ ):
45
+ super().__init__(
46
+ primary_hue=primary_hue,
47
+ secondary_hue=secondary_hue,
48
+ neutral_hue=neutral_hue,
49
+ text_size=text_size,
50
+ font=font,
51
+ font_mono=font_mono,
52
+ )
53
+ super().set(
54
+ background_fill_primary="*primary_50",
55
+ background_fill_primary_dark="*primary_900",
56
+ body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
57
+ body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
58
+ button_primary_text_color="white",
59
+ button_primary_text_color_hover="white",
60
+ button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
61
+ button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
62
+ button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
63
+ button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
64
+ button_secondary_text_color="black",
65
+ button_secondary_text_color_hover="white",
66
+ button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
67
+ button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
68
+ button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
69
+ button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
70
+ slider_color="*secondary_500",
71
+ slider_color_dark="*secondary_600",
72
+ block_title_text_weight="600",
73
+ block_border_width="3px",
74
+ block_shadow="*shadow_drop_lg",
75
+ button_primary_shadow="*shadow_drop_lg",
76
+ button_large_padding="11px",
77
+ color_accent_soft="*primary_100",
78
+ block_label_background_fill="*primary_200",
79
+ )
80
+
81
+ steel_blue_theme = SteelBlueTheme()
82
+
83
  MODEL_PATH = "zai-org/GLM-OCR"
84
 
85
  processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
 
141
 
142
  return output_text.strip()
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  with gr.Blocks(title="GLM-OCR") as demo:
145
 
146
+ gr.Markdown("# GLM-OCR")
147
+ gr.Markdown("Document parsing and text recognition powered by AI")
 
 
 
 
 
148
 
149
  with gr.Row():
150
 
 
151
  with gr.Column(scale=1):
152
+ image_input = gr.Image(
153
+ type="pil",
154
+ label="Upload Image",
155
+ sources=["upload", "clipboard"],
156
+ height=300
157
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ task = gr.Radio(
160
+ choices=list(TASK_PROMPTS.keys()),
161
+ value="Text",
162
+ label="Recognition Type"
163
+ )
164
+
165
+ btn = gr.Button("Recognize", variant="primary")
166
+
167
+ gr.Examples(
168
+ examples=["examples/1.jpg", "examples/2.jpg", "examples/3.jpg"],
169
+ inputs=image_input,
170
+ label="Examples"
171
+ )
172
 
 
173
  with gr.Column(scale=1):
174
+ with gr.Tabs():
175
+ with gr.Tab("Text"):
176
+ output_text = gr.Textbox(
177
+ label="Output",
178
+ lines=18,
179
+ show_copy_button=True
180
+ )
181
+
182
+ with gr.Tab("Markdown"):
183
+ output_md = gr.Markdown(value="")
184
+
 
 
 
 
 
 
 
185
  def run_ocr(image, task):
186
  result = process_image(image, task)
187
  return result, result
 
199
  )
200
 
201
  if __name__ == "__main__":
202
+ demo.queue(max_size=50).launch(css=css, theme=steel_blue_theme, mcp_server=True, ssr_mode=False, show_error=True)