ZennyKenny commited on
Commit
581828a
·
verified ·
1 Parent(s): 6704877

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -62
app.py CHANGED
@@ -1,24 +1,20 @@
1
  import os
2
- import re
3
  from pathlib import Path
4
 
5
  import gradio as gr
6
  import torch
7
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
8
  from peft import PeftModel
9
  import spaces # ZeroGPU
10
 
11
 
12
  # ========= Config =========
13
- # Your LoRA repo and base model:
14
- MODEL_ID_BASE = "openai/gpt-oss-20b" # base architecture
15
- ADAPTER_REPO = "ZennyKenny/oss-20b-prereform-to-modern-ru-merged"
16
- ADAPTER_SUBFOLDER = "checkpoint-60" # LoRA lives here in your repo
17
 
18
- # ZeroGPU toggle (you can also set in Space Secrets):
19
- USE_ZEROGPU = os.getenv("USE_ZEROGPU", "1") == "1"
20
-
21
- # ========= Load external prompt =========
22
  def _load_system_prompt():
23
  path = Path(__file__).with_name("text-prompt.py")
24
  default = (
@@ -30,7 +26,7 @@ def _load_system_prompt():
30
  try:
31
  ns = {}
32
  if path.exists():
33
- exec(path.read_text(encoding="utf-8"), ns)
34
  return ns.get("SYSTEM_PROMPT", default)
35
  except Exception:
36
  return default
@@ -44,34 +40,17 @@ def build_prompt(text: str) -> str:
44
  f"Текст (современная орфография):"
45
  )
46
 
47
- # ========= Rule-based CPU fallback =========
48
- REPLACEMENTS = [
49
- ("Ѣ", "Е"), ("ѣ", "е"),
50
- ("І", "И"), ("і", "и"),
51
- ("Ѳ", "Ф"), ("ѳ", "ф"),
52
- ("Ѵ", "И"), ("ѵ", "и"),
53
- ]
54
- TERMINAL_HARD_SIGN = re.compile(r"(?i)ъ\b")
55
-
56
- def rule_based_convert(text: str) -> str:
57
- if not text:
58
- return ""
59
- for old, new in REPLACEMENTS:
60
- text = text.replace(old, new)
61
- text = TERMINAL_HARD_SIGN.sub("", text)
62
- return text
63
-
64
-
65
- # ========= ZeroGPU path (model loads INSIDE the GPU-decorated function) =========
66
- # Note: Gradio/Spaces allocate the GPU ONLY during the call to this function.
67
- # Keep everything self-contained here: tokenizer, model, generate, return.
68
- @spaces.GPU(duration=180) # allocate GPU just for this call (extend duration if you expect long runs)
69
  def _infer_zerogpu(prompt: str, gen_kwargs: dict) -> str:
70
- # Load tokenizer from your adapter repo (it contains tokenizer files)
71
  tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, use_fast=True, trust_remote_code=True)
72
 
73
- # Load base model on GPU (ZeroGPU provides an H200/A100-like device)
74
- # Use bf16 if available, fallback fp16.
 
 
 
75
  torch_dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16
76
  base = AutoModelForCausalLM.from_pretrained(
77
  MODEL_ID_BASE,
@@ -83,26 +62,49 @@ def _infer_zerogpu(prompt: str, gen_kwargs: dict) -> str:
83
  # Apply LoRA adapter from your repo/subfolder
84
  model = PeftModel.from_pretrained(base, ADAPTER_REPO, subfolder=ADAPTER_SUBFOLDER)
85
 
86
- # (Optional) Merge LoRA for faster generation and less VRAM fragmentation
87
  try:
88
  model = model.merge_and_unload()
89
  except Exception:
90
  pass
91
 
92
- # Generate on GPU
93
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  with torch.no_grad():
95
- if "streamer" in gen_kwargs:
96
- gen_kwargs.pop("streamer", None)
97
- out_ids = model.generate(input_ids=input_ids, **gen_kwargs)
98
- out = tokenizer.decode(out_ids[0], skip_special_tokens=True)
 
99
 
100
- marker = "Текст (современная орфография):"
101
- return out.split(marker, 1)[-1].strip() if marker in out else out.strip()
 
102
 
 
 
 
 
 
 
 
103
 
104
  # ========= Orchestrator =========
105
- def convert(text, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_stream):
106
  if not text or not text.strip():
107
  return ""
108
 
@@ -116,25 +118,18 @@ def convert(text, max_new_tokens, temperature, top_p, top_k, repetition_penalty,
116
  do_sample=True,
117
  )
118
 
119
- # Prefer ZeroGPU if enabled; otherwise CPU fallback
120
- if USE_ZEROGPU:
121
- try:
122
- return _infer_zerogpu(prompt, gen_kwargs)
123
- except Exception as e:
124
- # If ZeroGPU is unavailable/rate limited/errored, gracefully fall back.
125
- return rule_based_convert(text) + f"\n\n[Примечание: ZeroGPU недоступен или ошибка: {type(e).__name__}: {e}]"
126
- else:
127
- # Explicit CPU-only mode (fast fallback)
128
- return rule_based_convert(text) + "\n\n[Примечание: используется правило-базовое преобразование (ZeroGPU отключён).]"
129
-
130
 
131
  # ========= UI =========
132
  with gr.Blocks(title="Pre-reform → Modern Russian (ZeroGPU)") as demo:
133
  gr.Markdown(
134
  """
135
  # Преобразование дореформенной → современной орфографии
136
- По умолчанию генерация выполняется на **ZeroGPU** (GPU выделяется на время запроса).
137
- Если ZeroGPU временно недоступен, используется надёжный **правило-базовый** конвертер.
138
  """
139
  )
140
 
@@ -153,18 +148,26 @@ with gr.Blocks(title="Pre-reform → Modern Russian (ZeroGPU)") as demo:
153
  repetition_penalty = gr.Slider(1.0, 2.0, value=1.05, step=0.01, label="repetition_penalty")
154
  btn = gr.Button("Преобразовать", variant="primary")
155
  with gr.Column():
156
- out = gr.Textbox(label="Вывод: современная орфография", lines=12)
157
 
158
  gr.Examples(
159
  examples=[
 
160
  ["въ семъ домѣ обитало три семейства, и каждое имѣло свои обыкновенія."],
161
- ["Онъ шёлъ по узкой улѣцѣ, разсматривая вывѣски лавокъ и фонари."]
 
 
 
 
 
 
 
162
  ],
163
  inputs=[inp],
164
  )
165
 
166
  btn.click(
167
- lambda t,a,b,c,d,e: convert(t, a, b, c, d, e, False),
168
  inputs=[inp, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
169
  outputs=[out],
170
  )
 
1
  import os
 
2
  from pathlib import Path
3
 
4
  import gradio as gr
5
  import torch
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
7
  from peft import PeftModel
8
  import spaces # ZeroGPU
9
 
10
 
11
  # ========= Config =========
12
+ # Base model + your LoRA adapter (override via Space Secrets if needed)
13
+ MODEL_ID_BASE = os.getenv("BASE_MODEL_ID", "openai/gpt-oss-20b")
14
+ ADAPTER_REPO = os.getenv("ADAPTER_REPO", "ZennyKenny/oss-20b-prereform-to-modern-ru-merged")
15
+ ADAPTER_SUBFOLDER = os.getenv("ADAPTER_SUBFOLDER", "checkpoint-60") # change if your adapter folder differs
16
 
17
+ # ========= Load external system prompt =========
 
 
 
18
  def _load_system_prompt():
19
  path = Path(__file__).with_name("text-prompt.py")
20
  default = (
 
26
  try:
27
  ns = {}
28
  if path.exists():
29
+ exec(path.read_text(encoding='utf-8'), ns)
30
  return ns.get("SYSTEM_PROMPT", default)
31
  except Exception:
32
  return default
 
40
  f"Текст (современная орфография):"
41
  )
42
 
43
+ # ========= ZeroGPU inference =========
44
+ @spaces.GPU(duration=180) # GPU is leased only while this function runs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def _infer_zerogpu(prompt: str, gen_kwargs: dict) -> str:
46
+ # Tokenizer from adapter repo (it contains tokenizer files)
47
  tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, use_fast=True, trust_remote_code=True)
48
 
49
+ # Ensure pad token exists; if not, align it with EOS (common for GPT-like)
50
+ if tokenizer.pad_token_id is None:
51
+ tokenizer.pad_token = tokenizer.eos_token
52
+
53
+ # Load base model on GPU with appropriate dtype
54
  torch_dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16
55
  base = AutoModelForCausalLM.from_pretrained(
56
  MODEL_ID_BASE,
 
62
  # Apply LoRA adapter from your repo/subfolder
63
  model = PeftModel.from_pretrained(base, ADAPTER_REPO, subfolder=ADAPTER_SUBFOLDER)
64
 
65
+ # Optional: merge LoRA for faster generation
66
  try:
67
  model = model.merge_and_unload()
68
  except Exception:
69
  pass
70
 
71
+ # Sync pad_token_id to model config to avoid warnings
72
+ try:
73
+ model.config.pad_token_id = tokenizer.pad_token_id
74
+ except Exception:
75
+ pass
76
+
77
+ # ----- Tokenize & always pass attention_mask -----
78
+ enc = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
79
+ input_ids = enc["input_ids"].to(model.device)
80
+ attention_mask = enc.get("attention_mask", torch.ones_like(input_ids)).to(model.device)
81
+
82
+ # Reasonable defaults
83
+ gen_kwargs = dict(gen_kwargs or {})
84
+ gen_kwargs.setdefault("use_cache", True)
85
+
86
+ # ----- Generate -----
87
  with torch.no_grad():
88
+ out_ids = model.generate(
89
+ input_ids=input_ids,
90
+ attention_mask=attention_mask, # Key fix for pad==eos
91
+ **gen_kwargs,
92
+ )
93
 
94
+ # Decode ONLY the continuation (exclude prompt tokens)
95
+ continuation = out_ids[0, input_ids.shape[1]:]
96
+ out = tokenizer.decode(continuation, skip_special_tokens=True).strip()
97
 
98
+ # Fallback to full decode if continuation is empty (still no letter-replacement fallback)
99
+ if not out:
100
+ full = tokenizer.decode(out_ids[0], skip_special_tokens=True).strip()
101
+ marker = "Текст (современная орфография):"
102
+ out = full.split(marker, 1)[-1].strip() if marker in full else full
103
+
104
+ return out
105
 
106
  # ========= Orchestrator =========
107
+ def convert(text, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
108
  if not text or not text.strip():
109
  return ""
110
 
 
118
  do_sample=True,
119
  )
120
 
121
+ # ZeroGPU-only path; if it fails, show an informative message (no rule-based output)
122
+ try:
123
+ return _infer_zerogpu(prompt, gen_kwargs)
124
+ except Exception as e:
125
+ return f"[Ошибка ZeroGPU: {type(e).__name__}: {e}]"
 
 
 
 
 
 
126
 
127
  # ========= UI =========
128
  with gr.Blocks(title="Pre-reform → Modern Russian (ZeroGPU)") as demo:
129
  gr.Markdown(
130
  """
131
  # Преобразование дореформенной → современной орфографии
132
+ Запросы выполняются на **ZeroGPU** (GPU выделяется только на время генерации).
 
133
  """
134
  )
135
 
 
148
  repetition_penalty = gr.Slider(1.0, 2.0, value=1.05, step=0.01, label="repetition_penalty")
149
  btn = gr.Button("Преобразовать", variant="primary")
150
  with gr.Column():
151
+ out = gr.Textbox(label="Вывод: современная орфография", lines=14)
152
 
153
  gr.Examples(
154
  examples=[
155
+ # Classic prose examples
156
  ["въ семъ домѣ обитало три семейства, и каждое имѣло свои обыкновенія."],
157
+ ["Онъ шёлъ по узкой улѣцѣ, разсматривая вывѣски лавокъ и фонари."],
158
+ ["въ мирѣ сёмъ многа есть, чего мудрецу и не снилось."],
159
+ # Orthography stress tests
160
+ ["Сей образъ мыслей былъ въ обычаѣ: въслѣдствіе того, что ѣще не наступило прояснѣніе."],
161
+ ["Именіе его находилось на уѣздной окраинѣ; крестьяне имѣли обыкновеніе собираться къ вечеру."],
162
+ ["Лѣтописи глаголютъ, яко многа бывало чудесъ на рѣкѣ сей."],
163
+ ["Оный человѣкъ писалъ послѣднія строки при свѣтѣ фонаря, на улицѣ безлюдной."],
164
+ ["Въ семъ письмѣ обрѣтёте вы извѣстія, коихъ до нынѣ не имѣли."],
165
  ],
166
  inputs=[inp],
167
  )
168
 
169
  btn.click(
170
+ lambda t,a,b,c,d,e: convert(t, a, b, c, d, e),
171
  inputs=[inp, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
172
  outputs=[out],
173
  )