Spaces:

Indraos
/

labor-LLM

Sleeping

labor-LLM / app.py

Added decorators

e75f521 about 1 month ago

1.3 kB

	import torch
	import gradio as gr
	import spaces # required for ZeroGPU
	from transformers import AutoTokenizer, AutoModelForCausalLM

	MODEL_ID = "tianyudu/LABOR_LLM"
	VARIANT = "ft_7b_NLSY79_with_birth_year_ckpt_bo5"

	# Tokenizer is CPU-safe, load at startup
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_ID,
	subfolder=VARIANT
	)

	# Model loaded lazily inside GPU context
	model = None

	def load_model():
	global model
	if model is None:
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	subfolder=VARIANT,
	torch_dtype=torch.float16,
	device_map="cuda" # explicit, not conditional
	)
	return model


	@spaces.GPU # acquires GPU for the duration of this call
	def generate(prompt: str) -> str:
	m = load_model()
	inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
	outputs = m.generate(
	**inputs,
	max_new_tokens=200,
	temperature=0.7,
	do_sample=True # required when temperature != 1.0
	)
	return tokenizer.decode(outputs[0], skip_special_tokens=True)


	demo = gr.Interface(
	fn=generate,
	inputs=gr.Textbox(lines=4, label="Prompt"),
	outputs=gr.Textbox(label="Completion"),
	title="Labor LLM"
	)

	demo.launch()