ZennyKenny commited on
Commit
e3493fe
·
verified ·
1 Parent(s): fdb3d4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -153
app.py CHANGED
@@ -1,167 +1,208 @@
1
  import os
2
- import random
3
- import uuid
4
- import json
5
- import time
6
- import asyncio
7
- from threading import Thread
8
 
9
- import gradio as gr
10
- import spaces
11
  import torch
12
- import numpy as np
13
- from PIL import Image, ImageOps
14
- # import cv2 # not needed anymore
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- from transformers import (
17
- Qwen2_5_VLForConditionalGeneration,
18
- AutoProcessor,
19
- TextIteratorStreamer,
20
- )
21
- from transformers.image_utils import load_image
22
 
23
- # Optional docling imports (unused now but kept for easy re-enable)
24
- # from docling_core.types.doc import DoclingDocument, DocTagsDocument
 
25
 
26
- import re
27
- import ast
28
- import html
29
-
30
- # ---------------------------
31
- # Constants & device
32
- # ---------------------------
33
- MAX_MAX_NEW_TOKENS = 2048
34
- DEFAULT_MAX_NEW_TOKENS = 1024
35
- MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
36
-
37
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
38
-
39
- # ---------------------------
40
- # Load ONLY Typhoon OCR 20B
41
- # ---------------------------
42
- MODEL_ID = "scb10x/typhoon-ocr-20b" # <- 20B model
43
- processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
44
- model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
45
- MODEL_ID,
46
- trust_remote_code=True,
47
- torch_dtype=torch.float16
48
- ).to(device).eval()
49
-
50
- # ---------------------------
51
- # (Optional) image helpers
52
- # ---------------------------
53
- def add_random_padding(image, min_percent=0.1, max_percent=0.10):
54
- image = image.convert("RGB")
55
- width, height = image.size
56
- pad_w_percent = random.uniform(min_percent, max_percent)
57
- pad_h_percent = random.uniform(min_percent, max_percent)
58
- pad_w = int(width * pad_w_percent)
59
- pad_h = int(height * pad_h_percent)
60
- corner_pixel = image.getpixel((0, 0))
61
- padded_image = ImageOps.expand(image, border=(pad_w, pad_h, pad_w, pad_h), fill=corner_pixel)
62
- return padded_image
63
-
64
- def normalize_values(text, target_max=500):
65
- def normalize_list(values):
66
- max_value = max(values) if values else 1
67
- return [round((v / max_value) * target_max) for v in values]
68
-
69
- def process_match(match):
70
- num_list = ast.literal_eval(match.group(0))
71
- normalized = normalize_list(num_list)
72
- return "".join([f"<loc_{num}>" for num in normalized])
73
-
74
- pattern = r"\[([\d\.\s,]+)\]"
75
- return re.sub(pattern, process_match, text)
76
-
77
- # ---------------------------
78
- # Image generation only
79
- # ---------------------------
80
- @spaces.GPU
81
- def generate_image(
82
  text: str,
83
- image: Image.Image,
84
- max_new_tokens: int = 2048,
85
- temperature: float = 0.1,
86
- top_p: float = 0.9,
87
- top_k: int = 50,
88
- repetition_penalty: float = 1.2,
89
- ):
90
- """Generate OCR/vision response for a single image with Typhoon OCR 20B."""
91
- if image is None:
92
- yield "Please upload an image."
93
- return
 
 
 
 
 
 
 
 
94
 
95
- images = [image]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
- messages = [
98
- {
99
- "role": "user",
100
- "content": [{"type": "image"} for _ in images] + [
101
- {"type": "text", "text": text}
102
- ]
103
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  ]
105
- prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
106
- inputs = processor(text=prompt, images=images, return_tensors="pt").to(device)
107
-
108
- streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
109
- generation_kwargs = {
110
- **inputs,
111
- "streamer": streamer,
112
- "max_new_tokens": max_new_tokens,
113
- "temperature": temperature,
114
- "top_p": top_p,
115
- "top_k": top_k,
116
- "repetition_penalty": repetition_penalty,
117
- }
118
-
119
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
120
- thread.start()
121
-
122
- buffer = ""
123
- for new_text in streamer:
124
- buffer += new_text.replace("<|im_end|>", "")
125
- yield buffer
126
-
127
- # ---------------------------
128
- # Minimal UI (Image only)
129
- # ---------------------------
130
- css = """
131
- .submit-btn {
132
- background-color: #2980b9 !important;
133
- color: white !important;
134
- }
135
- .submit-btn:hover {
136
- background-color: #3498db !important;
137
- }
138
- """
139
-
140
- with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
141
- gr.Markdown("# **Typhoon OCR 20B**")
142
 
143
- with gr.Row():
144
- with gr.Column():
145
- image_query = gr.Textbox(label="Query Input", placeholder="e.g., \"OCR the image\" or task instruction…")
146
- image_upload = gr.Image(type="pil", label="Image")
147
- image_submit = gr.Button("Submit", elem_classes="submit-btn")
148
-
149
- with gr.Accordion("Advanced options", open=False):
150
- max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
151
- temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.1)
152
- top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
153
- top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
154
- repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
155
-
156
- # Right column: ONLY output (no model info, no radios)
157
- with gr.Column():
158
- output = gr.Textbox(label="Output", interactive=False, lines=12, scale=2)
159
-
160
- image_submit.click(
161
- fn=generate_image,
162
- inputs=[image_query, image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
163
- outputs=output
164
  )
165
 
166
  if __name__ == "__main__":
167
- demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)
 
 
1
  import os
2
+ import re
3
+ import importlib.util
4
+ from pathlib import Path
 
 
 
5
 
 
 
6
  import torch
7
+ import gradio as gr
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
9
+
10
+ MODEL_ID = "ZennyKenny/oss-20b-prereform-to-modern-ru-merged"
11
+
12
+ # ----------------- Load SYSTEM_PROMPT from 'text-prompt.py' -----------------
13
+ def _load_system_prompt():
14
+ prompt_path = Path(__file__).with_name("text-prompt.py")
15
+ default_prompt = (
16
+ "Ты компетентный редактор русского языка. "
17
+ "Преобразуй дореформенную русскую орфографию (до 1918 года) "
18
+ "в современную орфографию. Сохраняй смысл, пунктуацию и регистр. "
19
+ "Не добавляй комментариев. Верни только преобразованный текст."
20
+ )
21
+ try:
22
+ if not prompt_path.exists():
23
+ return default_prompt
24
+ spec = importlib.util.spec_from_file_location("text_prompt_mod", str(prompt_path))
25
+ mod = importlib.util.module_from_spec(spec)
26
+ assert spec and spec.loader, "Cannot load spec for text-prompt.py"
27
+ spec.loader.exec_module(mod) # type: ignore[attr-defined]
28
+ return getattr(mod, "SYSTEM_PROMPT", default_prompt)
29
+ except Exception:
30
+ return default_prompt
31
+
32
+ SYSTEM_PROMPT = _load_system_prompt()
33
+
34
+ # ----------------- Fallback: rule-based converter (no ML needed) -----------------
35
+ REPLACEMENTS = [
36
+ ("Ѣ", "Е"), ("ѣ", "е"),
37
+ ("І", "И"), ("і", "и"),
38
+ ("Ѳ", "Ф"), ("ѳ", "ф"),
39
+ ("Ѵ", "И"), ("ѵ", "и"),
40
+ ]
41
+ TERMINAL_HARD_SIGN = re.compile(r"(?i)ъ\b") # remove word-final hard sign
42
+ MULTI_SPACES = re.compile(r"[ \t]{2,}")
43
+
44
+ def rule_based_convert(text: str) -> str:
45
+ if not text:
46
+ return ""
47
+ out = text
48
+ for old, new in REPLACEMENTS:
49
+ out = out.replace(old, new)
50
+ out = TERMINAL_HARD_SIGN.sub("", out)
51
+ out = MULTI_SPACES.sub(" ", out)
52
+ return out
53
+
54
+ # ----------------- Model state (CPU-only) -----------------
55
+ _tokenizer = None
56
+ _model = None
57
+ _streamer = None
58
+ _MODEL_READY = False
59
+ _MODEL_ERROR = None
60
+
61
+ def build_prompt(text: str) -> str:
62
+ return (
63
+ f"{SYSTEM_PROMPT}\n\n"
64
+ f"Текст (дореформ.):\n{text.strip()}\n\n"
65
+ f"Текст (современная орфография):"
66
+ )
67
 
68
+ def load_model_cpu():
69
+ """Force CPU load. Gracefully degrade if loading fails."""
70
+ global _tokenizer, _model, _streamer, _MODEL_READY, _MODEL_ERROR
71
+ if _MODEL_READY or _MODEL_ERROR:
72
+ return
 
73
 
74
+ if os.getenv("DISABLE_MODEL", "0") == "1":
75
+ _MODEL_ERROR = "Model disabled via DISABLE_MODEL=1."
76
+ return
77
 
78
+ try:
79
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
80
+ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
81
+
82
+ _tokenizer = AutoTokenizer.from_pretrained(
83
+ MODEL_ID, use_fast=True, trust_remote_code=True
84
+ )
85
+ _model = AutoModelForCausalLM.from_pretrained(
86
+ MODEL_ID,
87
+ trust_remote_code=True,
88
+ torch_dtype=torch.float32, # CPU dtype
89
+ low_cpu_mem_usage=True,
90
+ device_map=None, # ensure CPU
91
+ ).to("cpu")
92
+ _streamer = TextStreamer(_tokenizer, skip_prompt=True, skip_special_tokens=True)
93
+ _MODEL_READY = True
94
+ except Exception as e:
95
+ _MODEL_ERROR = f"{type(e).__name__}: {e}"
96
+
97
+ def convert_with_model(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  text: str,
99
+ max_new_tokens: int,
100
+ temperature: float,
101
+ top_p: float,
102
+ top_k: int,
103
+ repetition_penalty: float,
104
+ do_stream: bool
105
+ ) -> str:
106
+ prompt = build_prompt(text)
107
+ inputs = _tokenizer(prompt, return_tensors="pt")
108
+ input_ids = inputs.input_ids.to("cpu")
109
+
110
+ gen_kwargs = dict(
111
+ max_new_tokens=int(max_new_tokens),
112
+ temperature=float(temperature),
113
+ top_p=float(top_p),
114
+ top_k=int(top_k),
115
+ repetition_penalty=float(repetition_penalty),
116
+ do_sample=True,
117
+ )
118
 
119
+ if do_stream:
120
+ chunks = []
121
+
122
+ class _BufStreamer(TextStreamer):
123
+ def on_finalized_text(self, text, stream_end=False):
124
+ chunks.append(text)
125
+
126
+ buf_streamer = _BufStreamer(_tokenizer, skip_prompt=True, skip_special_tokens=True)
127
+ _ = _model.generate(input_ids=input_ids, streamer=buf_streamer, **gen_kwargs)
128
+ out = "".join(chunks)
129
+ else:
130
+ with torch.no_grad():
131
+ output_ids = _model.generate(input_ids=input_ids, **gen_kwargs)
132
+ out = _tokenizer.decode(output_ids[0], skip_special_tokens=True)
133
+
134
+ marker = "Текст (современная орфография):"
135
+ return out.split(marker, 1)[-1].strip() if marker in out else out.strip()
136
+
137
+ def convert(text, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_stream):
138
+ if not text or not text.strip():
139
+ return ""
140
+
141
+ load_model_cpu()
142
+
143
+ if _MODEL_READY:
144
+ try:
145
+ return convert_with_model(
146
+ text, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_stream
147
+ )
148
+ except Exception:
149
+ return rule_based_convert(text) + "\n\n[Примечание: использовано правило-базовое преобразование из-за ошибки генерации на CPU.]"
150
+ else:
151
+ note = "\n\n[Примечание: используется правило-базовое преобразование"
152
+ if _MODEL_ERROR:
153
+ note += f" (модель недоступна: {_MODEL_ERROR})"
154
+ note += ".]"
155
+ return rule_based_convert(text) + note
156
+
157
+ # ----------------- UI -----------------
158
+ with gr.Blocks(title="Pre-reform → Modern Russian (CPU-only)") as demo:
159
+ gr.Markdown(
160
+ """
161
+ # Преобразование дореформенной орфографии → современная (CPU-only)
162
+ Вставьте дореформенный русский текст — получите современную орфографию.
163
+ Модель: `ZennyKenny/oss-20b-prereform-to-modern-ru-merged`
164
+
165
+ *Подсказка:* На CPU загрузка большой модели может быть недоступна; в таком случае
166
+ автоматически используется быстрый правило-базовый конвертер (ѣ→е, і→и, ѳ→ф, ѵ→и, удаление конечного ъ).
167
+ """
168
+ )
169
 
170
+ with gr.Row():
171
+ with gr.Column(scale=1):
172
+ inp = gr.Textbox(
173
+ label="Ввод: дореформенный текст",
174
+ placeholder="Например: \"въ мирѣ сёмъ многа есть...\"",
175
+ lines=10
176
+ )
177
+ with gr.Accordion("Параметры генерации (медленно на CPU)", open=False):
178
+ max_new_tokens = gr.Slider(8, 256, value=128, step=8, label="max_new_tokens")
179
+ temperature = gr.Slider(0.0, 1.2, value=0.2, step=0.05, label="temperature")
180
+ top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="top_p")
181
+ top_k = gr.Slider(0, 100, value=40, step=1, label="top_k")
182
+ repetition_penalty = gr.Slider(1.0, 2.0, value=1.05, step=0.01, label="repetition_penalty")
183
+ do_stream = gr.Checkbox(value=False, label="Стриминг вывода")
184
+
185
+ btn = gr.Button("Преобразовать", variant="primary")
186
+
187
+ with gr.Column(scale=1):
188
+ out = gr.Textbox(label="Вывод: современная орфография", lines=12)
189
+
190
+ examples = [
191
+ ["въ семъ домѣ обитало три семейства, и каждое имѣло свои обыкновенія."],
192
+ ["Онъ шёлъ по узкой улѣцѣ, разсматривая вывѣски лавокъ и фонари."],
193
+ ["въ мирѣ сёмъ многа есть, чего мудрецу и не снилось."]
194
  ]
195
+ gr.Examples(examples=examples, inputs=[inp])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
+ def _on_click(text, a, b, c, d, e, f):
198
+ return convert(text, a, b, c, d, e, f)
199
+
200
+ btn.click(
201
+ _on_click,
202
+ inputs=[inp, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_stream],
203
+ outputs=[out]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  )
205
 
206
  if __name__ == "__main__":
207
+ os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
208
+ demo.queue().launch()