ZennyKenny commited on
Commit
e090e43
·
verified ·
1 Parent(s): 56d2d66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -99
app.py CHANGED
@@ -2,83 +2,111 @@ import os
2
  import re
3
  from pathlib import Path
4
 
5
- import torch
6
  import gradio as gr
 
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
8
- from peft import PeftModel # NEW
 
 
9
 
10
- MODEL_ID_BASE = "openai/gpt-oss-20b" # base model
 
 
11
  ADAPTER_REPO = "ZennyKenny/oss-20b-prereform-to-modern-ru-merged"
12
- ADAPTER_SUBFOLDER = "checkpoint-60" # where adapter lives in your repo
13
 
14
- # ---- load SYSTEM_PROMPT from text-prompt.py (same as before) ----
 
 
 
15
  def _load_system_prompt():
16
- prompt_path = Path(__file__).with_name("text-prompt.py")
17
- default = ("Ты компетентный редактор русского языка. "
18
- "Преобразуй дореформенную русскую орфографию (до 1918 года) "
19
- "в современную орфографию. Сохраняй смысл, пунктуацию и регистр. "
20
- "Не добавляй комментариев. Верни только преобразованный текст.")
 
 
21
  try:
22
  ns = {}
23
- exec(prompt_path.read_text(encoding="utf-8"), ns) if prompt_path.exists() else None
 
24
  return ns.get("SYSTEM_PROMPT", default)
25
  except Exception:
26
  return default
27
 
28
  SYSTEM_PROMPT = _load_system_prompt()
29
 
30
- # ---- simple rule-based fallback (unchanged) ----
31
- REPLACEMENTS = [("Ѣ","Е"),("ѣ","е"),("І","И"),("і","и"),("Ѳ","Ф"),("ѳ","ф"),("Ѵ","И"),("ѵ","и")]
 
 
 
 
 
 
 
 
 
 
 
 
32
  TERMINAL_HARD_SIGN = re.compile(r"(?i)ъ\b")
33
- def rule_based_convert(t):
34
- if not t: return ""
35
- for a,b in REPLACEMENTS: t = t.replace(a,b)
36
- return TERMINAL_HARD_SIGN.sub("", t)
37
-
38
- # ---- model state (CPU only) ----
39
- _tokenizer = None
40
- _model = None
41
- _streamer = None
42
- _MODEL_READY = False
43
- _MODEL_ERROR = None
44
 
45
- def build_prompt(text: str) -> str:
46
- return f"{SYSTEM_PROMPT}\n\nТекст (дореформ.):\n{text.strip()}\n\nТекст (современная орфография):"
47
-
48
- def load_model_cpu():
49
- """Load base model, then apply LoRA adapter from your repo."""
50
- global _tokenizer, _model, _streamer, _MODEL_READY, _MODEL_ERROR
51
- if _MODEL_READY or _MODEL_ERROR:
52
- return
53
- if os.getenv("DISABLE_MODEL", "0") == "1":
54
- _MODEL_ERROR = "Model disabled via DISABLE_MODEL=1."
55
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  try:
57
- os.environ["CUDA_VISIBLE_DEVICES"] = ""
58
- _tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, use_fast=True, trust_remote_code=True)
59
- base = AutoModelForCausalLM.from_pretrained(
60
- MODEL_ID_BASE,
61
- trust_remote_code=True,
62
- torch_dtype=torch.float32,
63
- low_cpu_mem_usage=True,
64
- device_map=None,
65
- ).to("cpu")
66
- # Apply LoRA adapter from your repo/subfolder
67
- _model = PeftModel.from_pretrained(base, ADAPTER_REPO, subfolder=ADAPTER_SUBFOLDER)
68
- # (Optional) Merge for faster inference on CPU:
69
- try:
70
- _model = _model.merge_and_unload()
71
- except Exception:
72
- pass
73
- _streamer = TextStreamer(_tokenizer, skip_prompt=True, skip_special_tokens=True)
74
- _MODEL_READY = True
75
- except Exception as e:
76
- _MODEL_ERROR = f"{type(e).__name__}: {e}"
77
-
78
- def convert_with_model(text, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_stream):
79
  prompt = build_prompt(text)
80
- inputs = _tokenizer(prompt, return_tensors="pt")
81
- input_ids = inputs.input_ids.to("cpu")
82
  gen_kwargs = dict(
83
  max_new_tokens=int(max_new_tokens),
84
  temperature=float(temperature),
@@ -87,64 +115,57 @@ def convert_with_model(text, max_new_tokens, temperature, top_p, top_k, repetiti
87
  repetition_penalty=float(repetition_penalty),
88
  do_sample=True,
89
  )
90
- if do_stream:
91
- chunks = []
92
- class _Buf(TextStreamer):
93
- def on_finalized_text(self, txt, stream_end=False):
94
- chunks.append(txt)
95
- buf = _Buf(_tokenizer, skip_prompt=True, skip_special_tokens=True)
96
- _ = _model.generate(input_ids=input_ids, streamer=buf, **gen_kwargs)
97
- out = "".join(chunks)
98
- else:
99
- with torch.no_grad():
100
- out_ids = _model.generate(input_ids=input_ids, **gen_kwargs)
101
- out = _tokenizer.decode(out_ids[0], skip_special_tokens=True)
102
- marker = "Текст (современная орфография):"
103
- return out.split(marker, 1)[-1].strip() if marker in out else out.strip()
104
 
105
- def convert(text, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_stream):
106
- if not text or not text.strip():
107
- return ""
108
- load_model_cpu()
109
- if _MODEL_READY:
110
  try:
111
- return convert_with_model(text, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_stream)
112
- except Exception:
113
- return rule_based_convert(text) + "\n\n[Примечание: использовано правило-базовое преобразование из-за ошибки генерации на CPU.]"
114
- note = "\n\n[Примечание: используется правило-базовое преобразование"
115
- if _MODEL_ERROR: note += f" (модель недоступна: {_MODEL_ERROR})"
116
- note += ".]"
117
- return rule_based_convert(text) + note
118
-
119
- # ---- Gradio UI (same structure as before) ----
120
- with gr.Blocks(title="Pre-reform Modern Russian (CPU-only)") as demo:
 
121
  gr.Markdown(
122
  """
123
- # Преобразование дореформенной орфографии современная (CPU-only)
124
- Модель: LoRA-адаптер к `openai/gpt-oss-20b` из `ZennyKenny/oss-20b-prereform-to-modern-ru-merged`.
125
- При недоступности модели используется правило-базовый конвертер (ѣ→е, і→и, ѳ→ф, ѵ→и, удаление конечного ъ).
126
  """
127
  )
 
128
  with gr.Row():
129
  with gr.Column():
130
- inp = gr.Textbox(label="Ввод: дореформенный текст", lines=10)
131
- with gr.Accordion("Параметры генерации (медленно на CPU)", open=False):
132
- max_new_tokens = gr.Slider(8, 256, value=128, step=8, label="max_new_tokens")
133
- temperature = gr.Slider(0.0, 1.2, value=0.2, step=0.05, label="temperature")
 
 
 
 
134
  top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="top_p")
135
  top_k = gr.Slider(0, 100, value=40, step=1, label="top_k")
136
  repetition_penalty = gr.Slider(1.0, 2.0, value=1.05, step=0.01, label="repetition_penalty")
137
- do_stream = gr.Checkbox(value=False, label="Стриминг вывода")
138
  btn = gr.Button("Преобразовать", variant="primary")
139
  with gr.Column():
140
  out = gr.Textbox(label="Вывод: современная орфография", lines=12)
 
141
  gr.Examples(
142
- examples=[["въ семъ домѣ обитало три семейства, и каждое имѣло свои обыкновенія."]],
 
 
 
143
  inputs=[inp],
144
  )
 
145
  btn.click(
146
- lambda t,a,b,c,d,e,f: convert(t,a,b,c,d,e,f),
147
- inputs=[inp, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_stream],
148
  outputs=[out],
149
  )
150
 
 
2
  import re
3
  from pathlib import Path
4
 
 
5
  import gradio as gr
6
+ import torch
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
8
+ from peft import PeftModel
9
+ import spaces # ZeroGPU
10
+
11
 
12
+ # ========= Config =========
13
+ # Your LoRA repo and base model:
14
+ MODEL_ID_BASE = "openai/gpt-oss-20b" # base architecture
15
  ADAPTER_REPO = "ZennyKenny/oss-20b-prereform-to-modern-ru-merged"
16
+ ADAPTER_SUBFOLDER = "checkpoint-60" # LoRA lives here in your repo
17
 
18
+ # ZeroGPU toggle (you can also set in Space Secrets):
19
+ USE_ZEROGPU = os.getenv("USE_ZEROGPU", "1") == "1"
20
+
21
+ # ========= Load external prompt =========
22
  def _load_system_prompt():
23
+ path = Path(__file__).with_name("text-prompt.py")
24
+ default = (
25
+ "Ты компетентный редактор русского языка. "
26
+ "Преобразуй дореформенную русскую орфографию (до 1918 года) "
27
+ "в современную орфографию. Сохраняй смысл, пунктуацию и регистр. "
28
+ "Не добавляй комментариев. Верни только преобразованный текст."
29
+ )
30
  try:
31
  ns = {}
32
+ if path.exists():
33
+ exec(path.read_text(encoding="utf-8"), ns)
34
  return ns.get("SYSTEM_PROMPT", default)
35
  except Exception:
36
  return default
37
 
38
  SYSTEM_PROMPT = _load_system_prompt()
39
 
40
+ def build_prompt(text: str) -> str:
41
+ return (
42
+ f"{SYSTEM_PROMPT}\n\n"
43
+ f"Текст (дореформ.):\n{text.strip()}\n\n"
44
+ f"Текст (современная орфография):"
45
+ )
46
+
47
+ # ========= Rule-based CPU fallback =========
48
+ REPLACEMENTS = [
49
+ ("Ѣ", "Е"), ("ѣ", "е"),
50
+ ("І", "И"), ("і", "и"),
51
+ ("Ѳ", "Ф"), ("ѳ", "ф"),
52
+ ("Ѵ", "И"), ("ѵ", "и"),
53
+ ]
54
  TERMINAL_HARD_SIGN = re.compile(r"(?i)ъ\b")
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ def rule_based_convert(text: str) -> str:
57
+ if not text:
58
+ return ""
59
+ for old, new in REPLACEMENTS:
60
+ text = text.replace(old, new)
61
+ text = TERMINAL_HARD_SIGN.sub("", text)
62
+ return text
63
+
64
+
65
+ # ========= ZeroGPU path (model loads INSIDE the GPU-decorated function) =========
66
+ # Note: Gradio/Spaces allocate the GPU ONLY during the call to this function.
67
+ # Keep everything self-contained here: tokenizer, model, generate, return.
68
+ @spaces.GPU(duration=180) # allocate GPU just for this call (extend duration if you expect long runs)
69
+ def _infer_zerogpu(prompt: str, gen_kwargs: dict) -> str:
70
+ # Load tokenizer from your adapter repo (it contains tokenizer files)
71
+ tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, use_fast=True, trust_remote_code=True)
72
+
73
+ # Load base model on GPU (ZeroGPU provides an H200/A100-like device)
74
+ # Use bf16 if available, fallback fp16.
75
+ torch_dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16
76
+ base = AutoModelForCausalLM.from_pretrained(
77
+ MODEL_ID_BASE,
78
+ trust_remote_code=True,
79
+ torch_dtype=torch_dtype,
80
+ device_map="auto",
81
+ )
82
+
83
+ # Apply LoRA adapter from your repo/subfolder
84
+ model = PeftModel.from_pretrained(base, ADAPTER_REPO, subfolder=ADAPTER_SUBFOLDER)
85
+
86
+ # (Optional) Merge LoRA for faster generation and less VRAM fragmentation
87
  try:
88
+ model = model.merge_and_unload()
89
+ except Exception:
90
+ pass
91
+
92
+ # Generate on GPU
93
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
94
+ with torch.no_grad():
95
+ if "streamer" in gen_kwargs:
96
+ gen_kwargs.pop("streamer", None)
97
+ out_ids = model.generate(input_ids=input_ids, **gen_kwargs)
98
+ out = tokenizer.decode(out_ids[0], skip_special_tokens=True)
99
+
100
+ marker = "Текст (современная орфография):"
101
+ return out.split(marker, 1)[-1].strip() if marker in out else out.strip()
102
+
103
+
104
+ # ========= Orchestrator =========
105
+ def convert(text, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_stream):
106
+ if not text or not text.strip():
107
+ return ""
108
+
 
109
  prompt = build_prompt(text)
 
 
110
  gen_kwargs = dict(
111
  max_new_tokens=int(max_new_tokens),
112
  temperature=float(temperature),
 
115
  repetition_penalty=float(repetition_penalty),
116
  do_sample=True,
117
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ # Prefer ZeroGPU if enabled; otherwise CPU fallback
120
+ if USE_ZEROGPU:
 
 
 
121
  try:
122
+ return _infer_zerogpu(prompt, gen_kwargs)
123
+ except Exception as e:
124
+ # If ZeroGPU is unavailable/rate limited/errored, gracefully fall back.
125
+ return rule_based_convert(text) + f"\n\n[Примечание: ZeroGPU недоступен или ошибка: {type(e).__name__}: {e}]"
126
+ else:
127
+ # Explicit CPU-only mode (fast fallback)
128
+ return rule_based_convert(text) + "\n\n[Примечание: используется правило-базовое преобразование (ZeroGPU отключён).]"
129
+
130
+
131
+ # ========= UI =========
132
+ with gr.Blocks(title="Pre-reform → Modern Russian (ZeroGPU)") as demo:
133
  gr.Markdown(
134
  """
135
+ # Преобразование дореформенной → современной орфографии
136
+ По умолчанию генерация выполняется на **ZeroGPU** (GPU выделяется на время запроса).
137
+ Если ZeroGPU временно недоступен, используется надёжный **правило-базовый** конвертер.
138
  """
139
  )
140
+
141
  with gr.Row():
142
  with gr.Column():
143
+ inp = gr.Textbox(
144
+ label="Ввод: дореформенный текст",
145
+ placeholder="Например: \"въ мирѣ сёмъ многа есть...\"",
146
+ lines=10
147
+ )
148
+ with gr.Accordion("Параметры генерации", open=False):
149
+ max_new_tokens = gr.Slider(16, 512, value=192, step=8, label="max_new_tokens")
150
+ temperature = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="temperature")
151
  top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="top_p")
152
  top_k = gr.Slider(0, 100, value=40, step=1, label="top_k")
153
  repetition_penalty = gr.Slider(1.0, 2.0, value=1.05, step=0.01, label="repetition_penalty")
 
154
  btn = gr.Button("Преобразовать", variant="primary")
155
  with gr.Column():
156
  out = gr.Textbox(label="Вывод: современная орфография", lines=12)
157
+
158
  gr.Examples(
159
+ examples=[
160
+ ["въ семъ домѣ обитало три семейства, и каждое имѣло свои обыкновенія."],
161
+ ["Онъ шёлъ по узкой улѣцѣ, разсматривая вывѣски лавокъ и фонари."]
162
+ ],
163
  inputs=[inp],
164
  )
165
+
166
  btn.click(
167
+ lambda t,a,b,c,d,e: convert(t, a, b, c, d, e, False),
168
+ inputs=[inp, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
169
  outputs=[out],
170
  )
171