Spaces:

BlackSpire
/

testOcr1

Running

App Files Files Community

BlackSpire commited on Nov 26, 2025

Commit

020cf60

verified ·

1 Parent(s): b2dcaa8

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -4

app.py CHANGED Viewed

@@ -1,7 +1,115 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+from transformers import AutoProcessor, HunYuanVLForConditionalGeneration
+from PIL import Image
+import torch
+# -------------------------
+# Clean Repeating Substrings (from your script)
+# -------------------------
+def clean_repeated_substrings(text):
+    n = len(text)
+    if n < 8000:
+        return text
+    for length in range(2, n // 10 + 1):
+        candidate = text[-length:]
+        count = 0
+        i = n - length
+        while i >= 0 and text[i:i + length] == candidate:
+            count += 1
+            i -= length
+        if count >= 10:
+            return text[:n - length * (count - 1)]
+    return text
+# --------------------------------------------------
+# Load Model + Processor (cached by Hugging Face)
+# --------------------------------------------------
+model_name = "tencent/HunyuanOCR"
+processor = AutoProcessor.from_pretrained(model_name, use_fast=False)
+model = HunYuanVLForConditionalGeneration.from_pretrained(
+    model_name,
+    attn_implementation="eager",
+    dtype=torch.bfloat16,
+    device_map="auto"  # HF Spaces will auto-select GPU/CPU
+)
+# --------------------------------------------------
+# OCR Function
+# --------------------------------------------------
+def run_ocr(image):
+    if image is None:
+        return "⚠ Please upload an image."
+    messages = [
+        [
+            {"role": "system", "content": ""},
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "image": image},
+                    {
+                        "type": "text",
+                        "text": "检测并识别图片中的文字，将文本坐标格式化输出。"
+                    },
+                ],
+            },
+        ]
+    ]
+    prompt_text = [
+        processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True)
+        for msg in messages
+    ]
+    inputs = processor(
+        text=prompt_text,
+        images=image,
+        padding=True,
+        return_tensors="pt",
+    )
+    with torch.no_grad():
+        device = next(model.parameters()).device
+        inputs = inputs.to(device)
+        generated_ids = model.generate(
+            **inputs,
+            max_new_tokens=16384,
+            do_sample=False
+        )
+    # Slice out only generated tokens
+    input_ids = inputs.input_ids
+    generated_ids_trimmed = [
+        out[len(inp):] for inp, out in zip(input_ids, generated_ids)
+    ]
+    text_output = processor.batch_decode(
+        generated_ids_trimmed,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=False
+    )
+    return clean_repeated_substrings(text_output[0])
+# --------------------------------------------------
+# Gradio UI
+# --------------------------------------------------
+app = gr.Interface(
+    fn=run_ocr,
+    inputs=gr.Image(type="pil", label="Upload Image"),
+    outputs=gr.Textbox(lines=20, label="OCR Output"),
+    title="HunYuanOCR - Tencent OCR",
+    description="Upload an image to extract Chinese/English text using Tencent HunYuanOCR."
+)
+app.launch()