DeepSeek-OCR-experimental

Running on Zero

App Files Files Community

prithivMLmods commited on Oct 29

Commit

a4fae6f

verified ·

1 Parent(s): c4a3c6a

update appp

Browse files

Files changed (1) hide show

app.py +92 -17

app.py CHANGED Viewed

@@ -6,6 +6,96 @@ import os
 import tempfile
 from PIL import Image, ImageDraw
 import re
 # --- 1. Load Model and Tokenizer directly to the correct device ---
 print("Determining device...")
@@ -130,23 +220,8 @@ def process_ocr_task(image, model_size, task_type, ref_text):
 # --- 3. Build the Gradio Interface ---
-with gr.Blocks(title="🐳DeepSeek-OCR🐳", theme=gr.themes.Soft()) as demo:
-    gr.Markdown(
-        """
-        # 🐳 Full Demo of DeepSeek-OCR 🐳
-        **💡 How to use:**
-        1.  **Upload an image** using the upload box.
-        2.  Select a **Resolution**. `Gundam` is recommended for most documents.
-        3.  Choose a **Task Type**:
-            - **📝 Free OCR**: Extracts raw text from the image.
-            - **📄 Convert to Markdown**: Converts the document into Markdown, preserving structure.
-            - **📈 Parse Figure**: Extracts structured data from charts and figures.
-            - **🔍 Locate Object by Reference**: Finds a specific object/text.
-        4. If this helpful, please give it a like! 🙏 ❤️
-        """
-    )
     with gr.Row():
         with gr.Column(scale=1):
             image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])

 import tempfile
 from PIL import Image, ImageDraw
 import re
+from gradio.themes import Soft
+from gradio.themes.utils import colors, fonts, sizes
+from docling_core.types.doc import DoclingDocument, DocTagsDocument
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# --- # Device and CUDA Setup Check ---
+print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
+print("torch.__version__ =", torch.__version__)
+print("torch.version.cuda =", torch.version.cuda)
+print("cuda available:", torch.cuda.is_available())
+print("cuda device count:", torch.cuda.device_count())
+if torch.cuda.is_available():
+    print("current device:", torch.cuda.current_device())
+    print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
+print("Using device:", device)
+colors.steel_blue = colors.Color(
+    name="steel_blue",
+    c50="#EBF3F8",
+    c100="#D3E5F0",
+    c200="#A8CCE1",
+    c300="#7DB3D2",
+    c400="#529AC3",
+    c500="#4682B4",  # SteelBlue base color
+    c600="#3E72A0",
+    c700="#36638C",
+    c800="#2E5378",
+    c900="#264364",
+    c950="#1E3450",
+)
+class SteelBlueTheme(Soft):
+    def __init__(
+        self,
+        *,
+        primary_hue: colors.Color | str = colors.gray,
+        secondary_hue: colors.Color | str = colors.steel_blue,
+        neutral_hue: colors.Color | str = colors.slate,
+        text_size: sizes.Size | str = sizes.text_lg,
+        font: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
+        ),
+        font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
+        ),
+    ):
+        super().__init__(
+            primary_hue=primary_hue,
+            secondary_hue=secondary_hue,
+            neutral_hue=neutral_hue,
+            text_size=text_size,
+            font=font,
+            font_mono=font_mono,
+        )
+        super().set(
+            background_fill_primary="*primary_50",
+            background_fill_primary_dark="*primary_900",
+            body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
+            body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
+            button_primary_text_color="white",
+            button_primary_text_color_hover="white",
+            button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
+            button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
+            button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
+            button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
+            slider_color="*secondary_500",
+            slider_color_dark="*secondary_600",
+            block_title_text_weight="600",
+            block_border_width="3px",
+            block_shadow="*shadow_drop_lg",
+            button_primary_shadow="*shadow_drop_lg",
+            button_large_padding="11px",
+            color_accent_soft="*primary_100",
+            block_label_background_fill="*primary_200",
+        )
+steel_blue_theme = SteelBlueTheme()
+css = """
+#main-title h1 {
+    font-size: 2.3em !important;
+}
+#output-title h2 {
+    font-size: 2.1em !important;
+}
+"""
 # --- 1. Load Model and Tokenizer directly to the correct device ---
 print("Determining device...")
 # --- 3. Build the Gradio Interface ---
+with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
+    gr.Markdown("# **DeepSeek OCR [exp]**", elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=1):
             image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])