reverse-text / app.py
loocorez's picture
Update app.py
8f9beef verified
# app.py
import re, spaces, gradio as gr, torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import re, types
try:
import verifiers as vf
_ = vf.XMLParser # raises AttributeError on v0.0.0
except (ImportError, AttributeError):
class _XMLParser:
def __init__(self, tags):
self.tags = tags
def get_format_str(self):
return "\n".join(f"<{t}>…</{t}>" for t in self.tags)
def extract(self, text):
out = {}
for tag in self.tags:
m = re.search(fr"<{tag}>(.*?)</{tag}>", text, re.S)
out[tag] = m.group(1).strip() if m else ""
return out
vf = types.SimpleNamespace(XMLParser=_XMLParser) # drop-in shim
MODEL_NAME = "loocorez/reverse-text-warmup"
# ---- prompt helpers --------------------------------------------------------
parser = vf.XMLParser(["think", "answer"]) # <think> … </think>\n<answer> … </answer>
SYSTEM_MSG = f"""Reverse the given text.
Respond in the following format:
{parser.get_format_str()}"""
def build_prompt(user_msg: str, tok) -> str:
"""Use the model’s native chat template so all special tokens are right."""
return tok.apply_chat_template(
[{"role": "system", "content": SYSTEM_MSG},
{"role": "user", "content": user_msg}],
tokenize=False,
add_generation_prompt=True
)
# ---- lazy-load model the first time a GPU is granted -----------------------
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
model = None # brought into scope & moved to CUDA inside @spaces.GPU
@spaces.GPU(duration=60) # ← the “proper annotation” for ZeroGPU
def reverse(user_msg: str) -> str:
global model
if model is None: # cold-start: happens on the first request
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16,
device_map="auto"
)
prompt = build_prompt(user_msg, tokenizer)
with torch.inference_mode():
encoded = tokenizer(prompt, return_tensors="pt").to(model.device)
out = model.generate(
**encoded, # <- pass as keyword args
max_new_tokens=1024,
do_sample=False # temperature becomes irrelevant
)
full = tokenizer.decode(out[0], skip_special_tokens=True)
return full[len(prompt):] # strip the prompt – return only the reply
# ---- Gradio UI -------------------------------------------------------------
with gr.Blocks() as demo:
gr.Markdown("### Reverse-Text demo (ZeroGPU)")
txt_in = gr.Textbox(label="Input")
txt_out = gr.Textbox(label="Model reply")
btn = gr.Button("Run")
btn.click(reverse, txt_in, txt_out)
demo.queue().launch()