Spaces:
Sleeping
Sleeping
File size: 2,858 Bytes
3a3e45f 79e4e0f 3a3e45f 79e4e0f 6af58a6 3a3e45f 1203287 3a3e45f bc5cbfa 1203287 769bf82 af5c399 1203287 8e13968 1203287 f9d267c 1203287 f9d267c 8f9beef f9d267c 1203287 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# app.py
import re, spaces, gradio as gr, torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import re, types
try:
import verifiers as vf
_ = vf.XMLParser # raises AttributeError on v0.0.0
except (ImportError, AttributeError):
class _XMLParser:
def __init__(self, tags):
self.tags = tags
def get_format_str(self):
return "\n".join(f"<{t}>…</{t}>" for t in self.tags)
def extract(self, text):
out = {}
for tag in self.tags:
m = re.search(fr"<{tag}>(.*?)</{tag}>", text, re.S)
out[tag] = m.group(1).strip() if m else ""
return out
vf = types.SimpleNamespace(XMLParser=_XMLParser) # drop-in shim
MODEL_NAME = "loocorez/reverse-text-warmup"
# ---- prompt helpers --------------------------------------------------------
parser = vf.XMLParser(["think", "answer"]) # <think> … </think>\n<answer> … </answer>
SYSTEM_MSG = f"""Reverse the given text.
Respond in the following format:
{parser.get_format_str()}"""
def build_prompt(user_msg: str, tok) -> str:
"""Use the model’s native chat template so all special tokens are right."""
return tok.apply_chat_template(
[{"role": "system", "content": SYSTEM_MSG},
{"role": "user", "content": user_msg}],
tokenize=False,
add_generation_prompt=True
)
# ---- lazy-load model the first time a GPU is granted -----------------------
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
model = None # brought into scope & moved to CUDA inside @spaces.GPU
@spaces.GPU(duration=60) # ← the “proper annotation” for ZeroGPU
def reverse(user_msg: str) -> str:
global model
if model is None: # cold-start: happens on the first request
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16,
device_map="auto"
)
prompt = build_prompt(user_msg, tokenizer)
with torch.inference_mode():
encoded = tokenizer(prompt, return_tensors="pt").to(model.device)
out = model.generate(
**encoded, # <- pass as keyword args
max_new_tokens=1024,
do_sample=False # temperature becomes irrelevant
)
full = tokenizer.decode(out[0], skip_special_tokens=True)
return full[len(prompt):] # strip the prompt – return only the reply
# ---- Gradio UI -------------------------------------------------------------
with gr.Blocks() as demo:
gr.Markdown("### Reverse-Text demo (ZeroGPU)")
txt_in = gr.Textbox(label="Input")
txt_out = gr.Textbox(label="Model reply")
btn = gr.Button("Run")
btn.click(reverse, txt_in, txt_out)
demo.queue().launch() |