prithivMLmods commited on
Commit
b7ca1b2
·
verified ·
1 Parent(s): 93c4cc1

update app

Browse files
Files changed (1) hide show
  1. app.py +19 -129
app.py CHANGED
@@ -1,100 +1,22 @@
1
  import os
2
  import sys
3
- import random
4
- import uuid
5
- import json
6
  import time
7
  from threading import Thread
8
- from typing import Iterable
9
- from huggingface_hub import snapshot_download
10
 
11
  import gradio as gr
12
  import spaces
13
  import torch
14
- import numpy as np
15
  from PIL import Image
16
- import cv2
17
 
18
  from transformers import (
19
  Qwen2_5_VLForConditionalGeneration,
20
  Qwen3VLForConditionalGeneration,
21
- AutoModelForImageTextToText,
22
  AutoModelForCausalLM,
23
  AutoProcessor,
24
  TextIteratorStreamer,
25
  )
26
 
27
- from transformers.image_utils import load_image
28
- from gradio.themes import Soft
29
- from gradio.themes.utils import colors, fonts, sizes
30
-
31
- colors.steel_blue = colors.Color(
32
- name="steel_blue",
33
- c50="#EBF3F8",
34
- c100="#D3E5F0",
35
- c200="#A8CCE1",
36
- c300="#7DB3D2",
37
- c400="#529AC3",
38
- c500="#4682B4",
39
- c600="#3E72A0",
40
- c700="#36638C",
41
- c800="#2E5378",
42
- c900="#264364",
43
- c950="#1E3450",
44
- )
45
-
46
- class SteelBlueTheme(Soft):
47
- def __init__(
48
- self,
49
- *,
50
- primary_hue: colors.Color | str = colors.gray,
51
- secondary_hue: colors.Color | str = colors.steel_blue,
52
- neutral_hue: colors.Color | str = colors.slate,
53
- text_size: sizes.Size | str = sizes.text_lg,
54
- font: fonts.Font | str | Iterable[fonts.Font | str] = (
55
- fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
56
- ),
57
- font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
58
- fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
59
- ),
60
- ):
61
- super().__init__(
62
- primary_hue=primary_hue,
63
- secondary_hue=secondary_hue,
64
- neutral_hue=neutral_hue,
65
- text_size=text_size,
66
- font=font,
67
- font_mono=font_mono,
68
- )
69
- super().set(
70
- background_fill_primary="*primary_50",
71
- background_fill_primary_dark="*primary_900",
72
- body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
73
- body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
74
- button_primary_text_color="white",
75
- button_primary_text_color_hover="white",
76
- button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
77
- button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
78
- button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
79
- button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
80
- button_secondary_text_color="black",
81
- button_secondary_text_color_hover="white",
82
- button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
83
- button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
84
- button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
85
- button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
86
- slider_color="*secondary_500",
87
- slider_color_dark="*secondary_600",
88
- block_title_text_weight="600",
89
- block_border_width="3px",
90
- block_shadow="*shadow_drop_lg",
91
- button_primary_shadow="*shadow_drop_lg",
92
- button_large_padding="11px",
93
- color_accent_soft="*primary_100",
94
- block_label_background_fill="*primary_200",
95
- )
96
-
97
- steel_blue_theme = SteelBlueTheme()
98
 
99
  css = """
100
  #main-title h1 {
@@ -122,43 +44,6 @@ if torch.cuda.is_available():
122
 
123
  print("Using device:", device)
124
 
125
- # CACHE_PATH = "./model_cache"
126
- # if not os.path.exists(CACHE_PATH):
127
- # os.makedirs(CACHE_PATH)
128
- #
129
- # model_path_d_local = snapshot_download(
130
- # repo_id='rednote-hilab/dots.ocr',
131
- # local_dir=os.path.join(CACHE_PATH, 'dots.ocr'),
132
- # max_workers=20,
133
- # local_dir_use_symlinks=False
134
- # )
135
- #
136
- # config_file_path = os.path.join(model_path_d_local, "configuration_dots.py")
137
- #
138
- # if os.path.exists(config_file_path):
139
- # with open(config_file_path, 'r') as f:
140
- # input_code = f.read()
141
- #
142
- # lines = input_code.splitlines()
143
- # if "class DotsVLProcessor" in input_code and not any("attributes = " in line for line in lines):
144
- # output_lines = []
145
- # for line in lines:
146
- # output_lines.append(line)
147
- # if line.strip().startswith("class DotsVLProcessor"):
148
- # output_lines.append(" attributes = [\"image_processor\", \"tokenizer\"]")
149
- #
150
- # with open(config_file_path, 'w') as f:
151
- # f.write('\n'.join(output_lines))
152
- # print("Patched configuration_dots.py successfully.")
153
- #
154
- #sys.path.append(model_path_d_local)
155
-
156
- MAX_MAX_NEW_TOKENS = 4096
157
- DEFAULT_MAX_NEW_TOKENS = 2048
158
- MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
159
-
160
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
161
-
162
  # Load Chandra-OCR
163
  MODEL_ID_V = "datalab-to/chandra"
164
  processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
@@ -197,6 +82,7 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
197
  torch_dtype=torch.float16
198
  ).to(device).eval()
199
 
 
200
  @spaces.GPU
201
  def generate_image(model_name: str, text: str, image: Image.Image,
202
  max_new_tokens: int, temperature: float, top_p: float,
@@ -260,6 +146,7 @@ def generate_image(model_name: str, text: str, image: Image.Image,
260
  time.sleep(0.01)
261
  yield buffer, buffer
262
 
 
263
  image_examples = [
264
  ["OCR the content perfectly.", "examples/3.jpg"],
265
  ["Perform OCR on the image.", "examples/1.jpg"],
@@ -280,27 +167,30 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
280
  )
281
 
282
  with gr.Accordion("Advanced options", open=False):
283
- max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
 
284
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.7)
285
  top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
286
  top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
287
- repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1)
 
288
 
289
  with gr.Column(scale=3):
290
- gr.Markdown("## Output", elem_id="output-title")
291
- output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=11, show_copy_button=True)
292
- with gr.Accordion("(Result.md)", open=False):
293
- markdown_output = gr.Markdown(label="(Result.Md)")
294
-
295
- model_choice = gr.Radio(
296
- choices=["Nanonets-OCR2-3B", "Chandra-OCR", "Dots.OCR", "olmOCR-2-7B-1025"],
297
- label="Select Model",
298
- value="Nanonets-OCR2-3B"
299
- )
300
 
301
  image_submit.click(
302
  fn=generate_image,
303
- inputs=[model_choice, image_query, image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
 
304
  outputs=[output, markdown_output]
305
  )
306
 
 
1
  import os
2
  import sys
 
 
 
3
  import time
4
  from threading import Thread
 
 
5
 
6
  import gradio as gr
7
  import spaces
8
  import torch
 
9
  from PIL import Image
 
10
 
11
  from transformers import (
12
  Qwen2_5_VLForConditionalGeneration,
13
  Qwen3VLForConditionalGeneration,
 
14
  AutoModelForCausalLM,
15
  AutoProcessor,
16
  TextIteratorStreamer,
17
  )
18
 
19
+ from theme import steel_blue_theme
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  css = """
22
  #main-title h1 {
 
44
 
45
  print("Using device:", device)
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  # Load Chandra-OCR
48
  MODEL_ID_V = "datalab-to/chandra"
49
  processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
 
82
  torch_dtype=torch.float16
83
  ).to(device).eval()
84
 
85
+
86
  @spaces.GPU
87
  def generate_image(model_name: str, text: str, image: Image.Image,
88
  max_new_tokens: int, temperature: float, top_p: float,
 
146
  time.sleep(0.01)
147
  yield buffer, buffer
148
 
149
+
150
  image_examples = [
151
  ["OCR the content perfectly.", "examples/3.jpg"],
152
  ["Perform OCR on the image.", "examples/1.jpg"],
 
167
  )
168
 
169
  with gr.Accordion("Advanced options", open=False):
170
+ max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1,
171
+ value=DEFAULT_MAX_NEW_TOKENS)
172
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.7)
173
  top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
174
  top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
175
+ repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05,
176
+ value=1.1)
177
 
178
  with gr.Column(scale=3):
179
+ gr.Markdown("## Output", elem_id="output-title")
180
+ output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=11, show_copy_button=True)
181
+ with gr.Accordion("(Result.md)", open=False):
182
+ markdown_output = gr.Markdown(label="(Result.Md)")
183
+
184
+ model_choice = gr.Radio(
185
+ choices=["Nanonets-OCR2-3B", "Chandra-OCR", "Dots.OCR", "olmOCR-2-7B-1025"],
186
+ label="Select Model",
187
+ value="Nanonets-OCR2-3B"
188
+ )
189
 
190
  image_submit.click(
191
  fn=generate_image,
192
+ inputs=[model_choice, image_query, image_upload, max_new_tokens, temperature, top_p, top_k,
193
+ repetition_penalty],
194
  outputs=[output, markdown_output]
195
  )
196