Spaces:

loocorez
/

reverse-text

Sleeping

File size: 2,858 Bytes

# app.py
import re, spaces, gradio as gr, torch
from transformers import AutoTokenizer, AutoModelForCausalLM

import re, types
try:
    import verifiers as vf
    _ = vf.XMLParser  # raises AttributeError on v0.0.0
except (ImportError, AttributeError):
    class _XMLParser:
        def __init__(self, tags):
            self.tags = tags
        def get_format_str(self):
            return "\n".join(f"<{t}>…</{t}>" for t in self.tags)
        def extract(self, text):
            out = {}
            for tag in self.tags:
                m = re.search(fr"<{tag}>(.*?)</{tag}>", text, re.S)
                out[tag] = m.group(1).strip() if m else ""
            return out
    vf = types.SimpleNamespace(XMLParser=_XMLParser)  # drop-in shim

MODEL_NAME = "loocorez/reverse-text-warmup"


# ----  prompt helpers --------------------------------------------------------
parser = vf.XMLParser(["think", "answer"])          # <think> … </think>\n<answer> … </answer>
SYSTEM_MSG = f"""Reverse the given text.

Respond in the following format:
{parser.get_format_str()}"""

def build_prompt(user_msg: str, tok) -> str:
    """Use the model’s native chat template so all special tokens are right."""
    return tok.apply_chat_template(
        [{"role": "system", "content": SYSTEM_MSG},
         {"role": "user",   "content": user_msg}],
        tokenize=False,
        add_generation_prompt=True
    )

# ----  lazy-load model the first time a GPU is granted -----------------------
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
model = None                        # brought into scope & moved to CUDA inside @spaces.GPU

@spaces.GPU(duration=60)            # ← the “proper annotation” for ZeroGPU
def reverse(user_msg: str) -> str:
    global model
    if model is None:               # cold-start: happens on the first request
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            torch_dtype=torch.float16,
            device_map="auto"
        )
    prompt = build_prompt(user_msg, tokenizer)
    with torch.inference_mode():
        encoded = tokenizer(prompt, return_tensors="pt").to(model.device)
        out = model.generate(
            **encoded,                # <- pass as keyword args
            max_new_tokens=1024,
            do_sample=False           # temperature becomes irrelevant
        )
    full = tokenizer.decode(out[0], skip_special_tokens=True)
    return full[len(prompt):]       # strip the prompt – return only the reply

# ----  Gradio UI -------------------------------------------------------------
with gr.Blocks() as demo:
    gr.Markdown("### Reverse-Text demo (ZeroGPU)")
    txt_in = gr.Textbox(label="Input")
    txt_out = gr.Textbox(label="Model reply")
    btn = gr.Button("Run")
    btn.click(reverse, txt_in, txt_out)

demo.queue().launch()