sarashina2.2-ocr

Paused

App Files Files Community

hayas commited on Mar 31

Commit

caef7ce

1 Parent(s): aac8486

Add files

Browse files

Files changed (6) hide show

.python-version +1 -0
README.md +4 -3
app.py +113 -0
pyproject.toml +62 -0
requirements.txt +319 -0
uv.lock +0 -0

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12.12

README.md CHANGED Viewed

@@ -1,10 +1,11 @@
 ---
-title: Sarashina2.2 Ocr
-emoji: 💻
 colorFrom: blue
-colorTo: red
 sdk: gradio
 sdk_version: 6.10.0
 app_file: app.py
 pinned: false
 ---

 ---
+title: Sarashina2.2-OCR
+emoji: 📄
 colorFrom: blue
+colorTo: indigo
 sdk: gradio
 sdk_version: 6.10.0
+python_version: "3.12.12"
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from collections.abc import Generator
+from threading import Thread
+import gradio as gr
+import spaces
+import torch
+from PIL import Image
+from transformers import AutoModelForCausalLM, AutoProcessor, TextIteratorStreamer
+MODEL_ID = "sbintuitions/sarashina2.2-ocr"
+processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True, use_fast=False)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    device_map="cuda",
+    dtype=torch.bfloat16,
+    trust_remote_code=True,
+)
+@spaces.GPU(duration=90)
+def run_ocr(image: Image.Image | None) -> Generator[tuple[str, str], None, None]:
+    if image is None:
+        yield "", ""
+        return
+    message = [{"role": "user", "content": [{"type": "image", "image": image}]}]
+    inputs = processor.apply_chat_template(
+        message,
+        tokenize=True,
+        add_generation_prompt=True,
+        return_dict=True,
+        return_tensors="pt",
+    ).to(model.device)
+    streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True, timeout=20.0)
+    generate_kwargs = dict(
+        **inputs,
+        max_new_tokens=6000,
+        temperature=0.0,
+        top_p=0.95,
+        repetition_penalty=1.2,
+        use_cache=True,
+        streamer=streamer,
+    )
+    exception_holder: list[Exception] = []
+    def _generate() -> None:
+        try:
+            model.generate(**generate_kwargs)
+        except Exception as e:  # noqa: BLE001
+            exception_holder.append(e)
+    thread = Thread(target=_generate)
+    thread.start()
+    result = ""
+    for text in streamer:
+        result += text
+        yield result, result
+    thread.join()
+    if exception_holder:
+        msg = f"Generation failed: {exception_holder[0]}"
+        raise gr.Error(msg)
+with gr.Blocks() as demo:
+    gr.Markdown("# Sarashina2.2-OCR Demo")
+    gr.Markdown(
+        "Upload a document image to extract text using "
+        "[sbintuitions/sarashina2.2-ocr](https://huggingface.co/sbintuitions/sarashina2.2-ocr)."
+    )
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(label="Document Image", type="pil")
+            run_btn = gr.Button("Run OCR")
+        with gr.Column():
+            with gr.Tab("Rendered"):
+                output_md = gr.Markdown(
+                    label="Result",
+                    latex_delimiters=[
+                        {"left": "$$", "right": "$$", "display": True},
+                        {"left": "$", "right": "$", "display": False},
+                        {"left": "\\(", "right": "\\)", "display": False},
+                        {"left": "\\[", "right": "\\]", "display": True},
+                    ],
+                )
+            with gr.Tab("Raw"):
+                output_text = gr.Textbox(label="Raw Markdown", lines=20)
+    gr.on(
+        triggers=[run_btn.click, image_input.upload],
+        fn=run_ocr,
+        inputs=image_input,
+        outputs=[output_md, output_text],
+    )
+    gr.Examples(
+        examples=[
+            ["https://huggingface.co/sbintuitions/sarashina2.2-ocr/resolve/main/assets/sample1.jpeg"],
+            ["https://huggingface.co/sbintuitions/sarashina2.2-ocr/resolve/main/assets/sample2.jpeg"],
+            ["https://huggingface.co/sbintuitions/sarashina2.2-ocr/resolve/main/assets/sample3.jpeg"],
+            ["https://huggingface.co/sbintuitions/sarashina2.2-ocr/resolve/main/assets/sample4.jpeg"],
+        ],
+        inputs=image_input,
+        fn=run_ocr,
+        outputs=[output_md, output_text],
+    )
+if __name__ == "__main__":
+    demo.launch()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,62 @@

+[project]
+name = "sarashina2-2-ocr"
+version = "0.1.0"
+description = "Gradio demo for sbintuitions/sarashina2.2-ocr"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "accelerate>=1.13.0",
+    "gradio>=6.10.0",
+    "pillow>=12.1.1",
+    "protobuf>=7.34.1",
+    "sentencepiece>=0.2.1",
+    "spaces>=0.48.1",
+    "torch==2.9.1",
+    "torchvision>=0.24.1",
+    "transformers==4.57.1",
+]
+[tool.ruff]
+line-length = 119
+[tool.ruff.lint]
+select = ["ALL"]
+ignore = [
+    "COM812", # missing-trailing-comma
+    "D203",   # one-blank-line-before-class
+    "D213",   # multi-line-summary-second-line
+    "E501",   # line-too-long
+    "SIM117", # multiple-with-statements
+    #
+    "D100",    # undocumented-public-module
+    "D101",    # undocumented-public-class
+    "D102",    # undocumented-public-method
+    "D103",    # undocumented-public-function
+    "D104",    # undocumented-public-package
+    "D105",    # undocumented-magic-method
+    "D107",    # undocumented-public-init
+    "EM101",   # raw-string-in-exception
+    "FBT001",  # boolean-type-hint-positional-argument
+    "FBT002",  # boolean-default-value-positional-argument
+    "ISC001",  # single-line-implicit-string-concatenation
+    "PGH003",  # blanket-type-ignore
+    "PLR0913", # too-many-arguments
+    "PLR0915", # too-many-statements
+    "TRY003",  # raise-vanilla-args
+]
+unfixable = [
+    "F401", # unused-import
+]
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+[tool.ruff.lint.per-file-ignores]
+"app.py" = ["INP001"]
+[tool.ruff.format]
+docstring-code-format = true
+[dependency-groups]
+dev = ["ruff>=0.15.8"]
+hf-spaces = ["datasets>=4.8.4"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,319 @@

+# This file was autogenerated by uv via the following command:
+#    uv export --no-hashes --no-dev --group hf-spaces --no-emit-package typer-slim --no-emit-package spaces -o requirements.txt
+accelerate==1.13.0
+    # via sarashina2-2-ocr
+aiofiles==24.1.0
+    # via gradio
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.13.4
+    # via fsspec
+aiosignal==1.4.0
+    # via aiohttp
+annotated-doc==0.0.4
+    # via
+    #   fastapi
+    #   typer
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.13.0
+    # via
+    #   gradio
+    #   httpx
+    #   starlette
+attrs==26.1.0
+    # via aiohttp
+audioop-lts==0.2.2 ; python_full_version >= '3.13'
+    # via gradio
+brotli==1.2.0
+    # via gradio
+certifi==2026.2.25
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+charset-normalizer==3.4.6
+    # via requests
+click==8.3.1
+    # via
+    #   typer
+    #   uvicorn
+colorama==0.4.6 ; sys_platform == 'win32'
+    # via
+    #   click
+    #   tqdm
+datasets==4.8.4
+dill==0.4.1
+    # via
+    #   datasets
+    #   multiprocess
+fastapi==0.135.2
+    # via gradio
+ffmpy==1.0.0
+    # via gradio
+filelock==3.25.2
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   torch
+    #   transformers
+frozenlist==1.8.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2026.2.0
+    # via
+    #   datasets
+    #   gradio-client
+    #   huggingface-hub
+    #   torch
+gradio==6.10.0
+    # via
+    #   sarashina2-2-ocr
+    #   spaces
+gradio-client==2.4.0
+    # via
+    #   gradio
+    #   hf-gradio
+groovy==0.1.2
+    # via gradio
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+hf-gradio==0.3.0
+    # via gradio
+hf-xet==1.4.2 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+    # via huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   datasets
+    #   gradio
+    #   gradio-client
+    #   safehttpx
+    #   spaces
+huggingface-hub==0.36.2
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   gradio-client
+    #   tokenizers
+    #   transformers
+idna==3.11
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+jinja2==3.1.6
+    # via
+    #   gradio
+    #   torch
+markdown-it-py==4.0.0
+    # via rich
+markupsafe==3.0.3
+    # via
+    #   gradio
+    #   jinja2
+mdurl==0.1.2
+    # via markdown-it-py
+mpmath==1.3.0
+    # via sympy
+multidict==6.7.1
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.19
+    # via datasets
+networkx==3.6.1
+    # via torch
+numpy==2.4.4
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   pandas
+    #   torchvision
+    #   transformers
+nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cufft-cu12
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+orjson==3.11.7
+    # via gradio
+packaging==26.0
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   spaces
+    #   transformers
+pandas==3.0.2
+    # via
+    #   datasets
+    #   gradio
+pillow==12.1.1
+    # via
+    #   gradio
+    #   sarashina2-2-ocr
+    #   torchvision
+propcache==0.4.1
+    # via
+    #   aiohttp
+    #   yarl
+protobuf==7.34.1
+    # via sarashina2-2-ocr
+psutil==5.9.8
+    # via
+    #   accelerate
+    #   spaces
+pyarrow==23.0.1
+    # via datasets
+pydantic==2.12.5
+    # via
+    #   fastapi
+    #   gradio
+    #   spaces
+pydantic-core==2.41.5
+    # via pydantic
+pydub==0.25.1
+    # via gradio
+pygments==2.20.0
+    # via rich
+python-dateutil==2.9.0.post0
+    # via pandas
+python-multipart==0.0.22
+    # via gradio
+pytz==2026.1.post1
+    # via gradio
+pyyaml==6.0.3
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   huggingface-hub
+    #   transformers
+regex==2026.3.32
+    # via transformers
+requests==2.33.1
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   spaces
+    #   transformers
+rich==14.3.3
+    # via typer
+safehttpx==0.1.7
+    # via gradio
+safetensors==0.7.0
+    # via
+    #   accelerate
+    #   transformers
+semantic-version==2.10.0
+    # via gradio
+sentencepiece==0.2.1
+    # via sarashina2-2-ocr
+setuptools==82.0.1
+    # via torch
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+starlette==0.52.1
+    # via
+    #   fastapi
+    #   gradio
+sympy==1.14.0
+    # via torch
+tokenizers==0.22.2
+    # via transformers
+tomlkit==0.13.3
+    # via gradio
+torch==2.9.1
+    # via
+    #   accelerate
+    #   sarashina2-2-ocr
+    #   torchvision
+torchvision==0.24.1
+    # via sarashina2-2-ocr
+tqdm==4.67.3
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   transformers
+transformers==4.57.1
+    # via sarashina2-2-ocr
+triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+typer==0.24.1
+    # via
+    #   gradio
+    #   hf-gradio
+typing-extensions==4.15.0
+    # via
+    #   aiosignal
+    #   anyio
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   pydantic
+    #   pydantic-core
+    #   spaces
+    #   starlette
+    #   torch
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via
+    #   fastapi
+    #   pydantic
+tzdata==2025.3 ; sys_platform == 'emscripten' or sys_platform == 'win32'
+    # via pandas
+urllib3==2.6.3
+    # via requests
+uvicorn==0.42.0
+    # via gradio
+xxhash==3.6.0
+    # via datasets
+yarl==1.23.0
+    # via aiohttp

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff