Spaces:

loocorez
/

reverse-text

Sleeping

App Files Files Community

reverse-text / app.py

loocorez

Update app.py

8f9beef verified 7 months ago

raw

history blame contribute delete

2.86 kB

	# app.py
	import re, spaces, gradio as gr, torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	import re, types
	try:
	import verifiers as vf
	_ = vf.XMLParser # raises AttributeError on v0.0.0
	except (ImportError, AttributeError):
	class _XMLParser:
	def __init__(self, tags):
	self.tags = tags
	def get_format_str(self):
	return "\n".join(f"<{t}>…</{t}>" for t in self.tags)
	def extract(self, text):
	out = {}
	for tag in self.tags:
	m = re.search(fr"<{tag}>(.*?)</{tag}>", text, re.S)
	out[tag] = m.group(1).strip() if m else ""
	return out
	vf = types.SimpleNamespace(XMLParser=_XMLParser) # drop-in shim

	MODEL_NAME = "loocorez/reverse-text-warmup"


	# ---- prompt helpers --------------------------------------------------------
	parser = vf.XMLParser(["think", "answer"]) # <think> … </think>\n<answer> … </answer>
	SYSTEM_MSG = f"""Reverse the given text.

	Respond in the following format:
	{parser.get_format_str()}"""

	def build_prompt(user_msg: str, tok) -> str:
	"""Use the model’s native chat template so all special tokens are right."""
	return tok.apply_chat_template(
	[{"role": "system", "content": SYSTEM_MSG},
	{"role": "user", "content": user_msg}],
	tokenize=False,
	add_generation_prompt=True
	)

	# ---- lazy-load model the first time a GPU is granted -----------------------
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
	model = None # brought into scope & moved to CUDA inside @spaces.GPU

	@spaces.GPU(duration=60) # ← the “proper annotation” for ZeroGPU
	def reverse(user_msg: str) -> str:
	global model
	if model is None: # cold-start: happens on the first request
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float16,
	device_map="auto"
	)
	prompt = build_prompt(user_msg, tokenizer)
	with torch.inference_mode():
	encoded = tokenizer(prompt, return_tensors="pt").to(model.device)
	out = model.generate(
	**encoded, # <- pass as keyword args
	max_new_tokens=1024,
	do_sample=False # temperature becomes irrelevant
	)
	full = tokenizer.decode(out[0], skip_special_tokens=True)
	return full[len(prompt):] # strip the prompt – return only the reply

	# ---- Gradio UI -------------------------------------------------------------
	with gr.Blocks() as demo:
	gr.Markdown("### Reverse-Text demo (ZeroGPU)")
	txt_in = gr.Textbox(label="Input")
	txt_out = gr.Textbox(label="Model reply")
	btn = gr.Button("Run")
	btn.click(reverse, txt_in, txt_out)

	demo.queue().launch()