labor-LLM / app.py
Indraos's picture
Added decorators
e75f521
import torch
import gradio as gr
import spaces # required for ZeroGPU
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_ID = "tianyudu/LABOR_LLM"
VARIANT = "ft_7b_NLSY79_with_birth_year_ckpt_bo5"
# Tokenizer is CPU-safe, load at startup
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
subfolder=VARIANT
)
# Model loaded lazily inside GPU context
model = None
def load_model():
global model
if model is None:
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
subfolder=VARIANT,
torch_dtype=torch.float16,
device_map="cuda" # explicit, not conditional
)
return model
@spaces.GPU # acquires GPU for the duration of this call
def generate(prompt: str) -> str:
m = load_model()
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = m.generate(
**inputs,
max_new_tokens=200,
temperature=0.7,
do_sample=True # required when temperature != 1.0
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
demo = gr.Interface(
fn=generate,
inputs=gr.Textbox(lines=4, label="Prompt"),
outputs=gr.Textbox(label="Completion"),
title="Labor LLM"
)
demo.launch()