Spaces:
Sleeping
Sleeping
| # app.py | |
| import re, spaces, gradio as gr, torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import re, types | |
| try: | |
| import verifiers as vf | |
| _ = vf.XMLParser # raises AttributeError on v0.0.0 | |
| except (ImportError, AttributeError): | |
| class _XMLParser: | |
| def __init__(self, tags): | |
| self.tags = tags | |
| def get_format_str(self): | |
| return "\n".join(f"<{t}>…</{t}>" for t in self.tags) | |
| def extract(self, text): | |
| out = {} | |
| for tag in self.tags: | |
| m = re.search(fr"<{tag}>(.*?)</{tag}>", text, re.S) | |
| out[tag] = m.group(1).strip() if m else "" | |
| return out | |
| vf = types.SimpleNamespace(XMLParser=_XMLParser) # drop-in shim | |
| MODEL_NAME = "loocorez/reverse-text-warmup" | |
| # ---- prompt helpers -------------------------------------------------------- | |
| parser = vf.XMLParser(["think", "answer"]) # <think> … </think>\n<answer> … </answer> | |
| SYSTEM_MSG = f"""Reverse the given text. | |
| Respond in the following format: | |
| {parser.get_format_str()}""" | |
| def build_prompt(user_msg: str, tok) -> str: | |
| """Use the model’s native chat template so all special tokens are right.""" | |
| return tok.apply_chat_template( | |
| [{"role": "system", "content": SYSTEM_MSG}, | |
| {"role": "user", "content": user_msg}], | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| # ---- lazy-load model the first time a GPU is granted ----------------------- | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True) | |
| model = None # brought into scope & moved to CUDA inside @spaces.GPU | |
| # ← the “proper annotation” for ZeroGPU | |
| def reverse(user_msg: str) -> str: | |
| global model | |
| if model is None: # cold-start: happens on the first request | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| prompt = build_prompt(user_msg, tokenizer) | |
| with torch.inference_mode(): | |
| encoded = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| out = model.generate( | |
| **encoded, # <- pass as keyword args | |
| max_new_tokens=1024, | |
| do_sample=False # temperature becomes irrelevant | |
| ) | |
| full = tokenizer.decode(out[0], skip_special_tokens=True) | |
| return full[len(prompt):] # strip the prompt – return only the reply | |
| # ---- Gradio UI ------------------------------------------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("### Reverse-Text demo (ZeroGPU)") | |
| txt_in = gr.Textbox(label="Input") | |
| txt_out = gr.Textbox(label="Model reply") | |
| btn = gr.Button("Run") | |
| btn.click(reverse, txt_in, txt_out) | |
| demo.queue().launch() |