BlackSpire commited on
Commit
020cf60
·
verified ·
1 Parent(s): b2dcaa8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -4
app.py CHANGED
@@ -1,7 +1,115 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoProcessor, HunYuanVLForConditionalGeneration
3
+ from PIL import Image
4
+ import torch
5
 
6
+ # -------------------------
7
+ # Clean Repeating Substrings (from your script)
8
+ # -------------------------
9
+ def clean_repeated_substrings(text):
10
+ n = len(text)
11
+ if n < 8000:
12
+ return text
13
 
14
+ for length in range(2, n // 10 + 1):
15
+ candidate = text[-length:]
16
+ count = 0
17
+ i = n - length
18
+
19
+ while i >= 0 and text[i:i + length] == candidate:
20
+ count += 1
21
+ i -= length
22
+
23
+ if count >= 10:
24
+ return text[:n - length * (count - 1)]
25
+
26
+ return text
27
+
28
+
29
+ # --------------------------------------------------
30
+ # Load Model + Processor (cached by Hugging Face)
31
+ # --------------------------------------------------
32
+ model_name = "tencent/HunyuanOCR"
33
+
34
+ processor = AutoProcessor.from_pretrained(model_name, use_fast=False)
35
+
36
+ model = HunYuanVLForConditionalGeneration.from_pretrained(
37
+ model_name,
38
+ attn_implementation="eager",
39
+ dtype=torch.bfloat16,
40
+ device_map="auto" # HF Spaces will auto-select GPU/CPU
41
+ )
42
+
43
+ # --------------------------------------------------
44
+ # OCR Function
45
+ # --------------------------------------------------
46
+ def run_ocr(image):
47
+
48
+ if image is None:
49
+ return "⚠ Please upload an image."
50
+
51
+ messages = [
52
+ [
53
+ {"role": "system", "content": ""},
54
+ {
55
+ "role": "user",
56
+ "content": [
57
+ {"type": "image", "image": image},
58
+ {
59
+ "type": "text",
60
+ "text": "检测并识别图片中的文字,将文本坐标格式化输出。"
61
+ },
62
+ ],
63
+ },
64
+ ]
65
+ ]
66
+
67
+ prompt_text = [
68
+ processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True)
69
+ for msg in messages
70
+ ]
71
+
72
+ inputs = processor(
73
+ text=prompt_text,
74
+ images=image,
75
+ padding=True,
76
+ return_tensors="pt",
77
+ )
78
+
79
+ with torch.no_grad():
80
+ device = next(model.parameters()).device
81
+ inputs = inputs.to(device)
82
+
83
+ generated_ids = model.generate(
84
+ **inputs,
85
+ max_new_tokens=16384,
86
+ do_sample=False
87
+ )
88
+
89
+ # Slice out only generated tokens
90
+ input_ids = inputs.input_ids
91
+ generated_ids_trimmed = [
92
+ out[len(inp):] for inp, out in zip(input_ids, generated_ids)
93
+ ]
94
+
95
+ text_output = processor.batch_decode(
96
+ generated_ids_trimmed,
97
+ skip_special_tokens=True,
98
+ clean_up_tokenization_spaces=False
99
+ )
100
+
101
+ return clean_repeated_substrings(text_output[0])
102
+
103
+
104
+ # --------------------------------------------------
105
+ # Gradio UI
106
+ # --------------------------------------------------
107
+ app = gr.Interface(
108
+ fn=run_ocr,
109
+ inputs=gr.Image(type="pil", label="Upload Image"),
110
+ outputs=gr.Textbox(lines=20, label="OCR Output"),
111
+ title="HunYuanOCR - Tencent OCR",
112
+ description="Upload an image to extract Chinese/English text using Tencent HunYuanOCR."
113
+ )
114
+
115
+ app.launch()