prithivMLmods commited on
Commit
a4fae6f
·
verified ·
1 Parent(s): c4a3c6a

update appp

Browse files
Files changed (1) hide show
  1. app.py +92 -17
app.py CHANGED
@@ -6,6 +6,96 @@ import os
6
  import tempfile
7
  from PIL import Image, ImageDraw
8
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # --- 1. Load Model and Tokenizer directly to the correct device ---
11
  print("Determining device...")
@@ -130,23 +220,8 @@ def process_ocr_task(image, model_size, task_type, ref_text):
130
 
131
 
132
  # --- 3. Build the Gradio Interface ---
133
- with gr.Blocks(title="🐳DeepSeek-OCR🐳", theme=gr.themes.Soft()) as demo:
134
- gr.Markdown(
135
- """
136
- # 🐳 Full Demo of DeepSeek-OCR 🐳
137
-
138
- **💡 How to use:**
139
- 1. **Upload an image** using the upload box.
140
- 2. Select a **Resolution**. `Gundam` is recommended for most documents.
141
- 3. Choose a **Task Type**:
142
- - **📝 Free OCR**: Extracts raw text from the image.
143
- - **📄 Convert to Markdown**: Converts the document into Markdown, preserving structure.
144
- - **📈 Parse Figure**: Extracts structured data from charts and figures.
145
- - **🔍 Locate Object by Reference**: Finds a specific object/text.
146
- 4. If this helpful, please give it a like! 🙏 ❤️
147
- """
148
- )
149
-
150
  with gr.Row():
151
  with gr.Column(scale=1):
152
  image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])
 
6
  import tempfile
7
  from PIL import Image, ImageDraw
8
  import re
9
+ from gradio.themes import Soft
10
+ from gradio.themes.utils import colors, fonts, sizes
11
+ from docling_core.types.doc import DoclingDocument, DocTagsDocument
12
+
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+
15
+ # --- # Device and CUDA Setup Check ---
16
+ print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
17
+ print("torch.__version__ =", torch.__version__)
18
+ print("torch.version.cuda =", torch.version.cuda)
19
+ print("cuda available:", torch.cuda.is_available())
20
+ print("cuda device count:", torch.cuda.device_count())
21
+ if torch.cuda.is_available():
22
+ print("current device:", torch.cuda.current_device())
23
+ print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
24
+
25
+ print("Using device:", device)
26
+
27
+
28
+ colors.steel_blue = colors.Color(
29
+ name="steel_blue",
30
+ c50="#EBF3F8",
31
+ c100="#D3E5F0",
32
+ c200="#A8CCE1",
33
+ c300="#7DB3D2",
34
+ c400="#529AC3",
35
+ c500="#4682B4", # SteelBlue base color
36
+ c600="#3E72A0",
37
+ c700="#36638C",
38
+ c800="#2E5378",
39
+ c900="#264364",
40
+ c950="#1E3450",
41
+ )
42
+
43
+ class SteelBlueTheme(Soft):
44
+ def __init__(
45
+ self,
46
+ *,
47
+ primary_hue: colors.Color | str = colors.gray,
48
+ secondary_hue: colors.Color | str = colors.steel_blue,
49
+ neutral_hue: colors.Color | str = colors.slate,
50
+ text_size: sizes.Size | str = sizes.text_lg,
51
+ font: fonts.Font | str | Iterable[fonts.Font | str] = (
52
+ fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
53
+ ),
54
+ font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
55
+ fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
56
+ ),
57
+ ):
58
+ super().__init__(
59
+ primary_hue=primary_hue,
60
+ secondary_hue=secondary_hue,
61
+ neutral_hue=neutral_hue,
62
+ text_size=text_size,
63
+ font=font,
64
+ font_mono=font_mono,
65
+ )
66
+ super().set(
67
+ background_fill_primary="*primary_50",
68
+ background_fill_primary_dark="*primary_900",
69
+ body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
70
+ body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
71
+ button_primary_text_color="white",
72
+ button_primary_text_color_hover="white",
73
+ button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
74
+ button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
75
+ button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
76
+ button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
77
+ slider_color="*secondary_500",
78
+ slider_color_dark="*secondary_600",
79
+ block_title_text_weight="600",
80
+ block_border_width="3px",
81
+ block_shadow="*shadow_drop_lg",
82
+ button_primary_shadow="*shadow_drop_lg",
83
+ button_large_padding="11px",
84
+ color_accent_soft="*primary_100",
85
+ block_label_background_fill="*primary_200",
86
+ )
87
+
88
+ steel_blue_theme = SteelBlueTheme()
89
+
90
+ css = """
91
+ #main-title h1 {
92
+ font-size: 2.3em !important;
93
+ }
94
+ #output-title h2 {
95
+ font-size: 2.1em !important;
96
+ }
97
+ """
98
+
99
 
100
  # --- 1. Load Model and Tokenizer directly to the correct device ---
101
  print("Determining device...")
 
220
 
221
 
222
  # --- 3. Build the Gradio Interface ---
223
+ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
224
+ gr.Markdown("# **DeepSeek OCR [exp]**", elem_id="main-title")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  with gr.Row():
226
  with gr.Column(scale=1):
227
  image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])