File size: 2,858 Bytes
3a3e45f
79e4e0f
3a3e45f
79e4e0f
6af58a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a3e45f
1203287
3a3e45f
bc5cbfa
1203287
 
769bf82
 
 
 
af5c399
1203287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e13968
1203287
 
 
 
 
f9d267c
1203287
f9d267c
8f9beef
f9d267c
1203287
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# app.py
import re, spaces, gradio as gr, torch
from transformers import AutoTokenizer, AutoModelForCausalLM

import re, types
try:
    import verifiers as vf
    _ = vf.XMLParser  # raises AttributeError on v0.0.0
except (ImportError, AttributeError):
    class _XMLParser:
        def __init__(self, tags):
            self.tags = tags
        def get_format_str(self):
            return "\n".join(f"<{t}>…</{t}>" for t in self.tags)
        def extract(self, text):
            out = {}
            for tag in self.tags:
                m = re.search(fr"<{tag}>(.*?)</{tag}>", text, re.S)
                out[tag] = m.group(1).strip() if m else ""
            return out
    vf = types.SimpleNamespace(XMLParser=_XMLParser)  # drop-in shim

MODEL_NAME = "loocorez/reverse-text-warmup"


# ----  prompt helpers --------------------------------------------------------
parser = vf.XMLParser(["think", "answer"])          # <think> … </think>\n<answer> … </answer>
SYSTEM_MSG = f"""Reverse the given text.

Respond in the following format:
{parser.get_format_str()}"""

def build_prompt(user_msg: str, tok) -> str:
    """Use the model’s native chat template so all special tokens are right."""
    return tok.apply_chat_template(
        [{"role": "system", "content": SYSTEM_MSG},
         {"role": "user",   "content": user_msg}],
        tokenize=False,
        add_generation_prompt=True
    )

# ----  lazy-load model the first time a GPU is granted -----------------------
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
model = None                        # brought into scope & moved to CUDA inside @spaces.GPU

@spaces.GPU(duration=60)            # ← the “proper annotation” for ZeroGPU
def reverse(user_msg: str) -> str:
    global model
    if model is None:               # cold-start: happens on the first request
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            torch_dtype=torch.float16,
            device_map="auto"
        )
    prompt = build_prompt(user_msg, tokenizer)
    with torch.inference_mode():
        encoded = tokenizer(prompt, return_tensors="pt").to(model.device)
        out = model.generate(
            **encoded,                # <- pass as keyword args
            max_new_tokens=1024,
            do_sample=False           # temperature becomes irrelevant
        )
    full = tokenizer.decode(out[0], skip_special_tokens=True)
    return full[len(prompt):]       # strip the prompt – return only the reply

# ----  Gradio UI -------------------------------------------------------------
with gr.Blocks() as demo:
    gr.Markdown("### Reverse-Text demo (ZeroGPU)")
    txt_in = gr.Textbox(label="Input")
    txt_out = gr.Textbox(label="Model reply")
    btn = gr.Button("Run")
    btn.click(reverse, txt_in, txt_out)

demo.queue().launch()