| import torch |
| import gradio as gr |
| import spaces |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
| MODEL_ID = "tianyudu/LABOR_LLM" |
| VARIANT = "ft_7b_NLSY79_with_birth_year_ckpt_bo5" |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained( |
| MODEL_ID, |
| subfolder=VARIANT |
| ) |
|
|
| |
| model = None |
|
|
| def load_model(): |
| global model |
| if model is None: |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_ID, |
| subfolder=VARIANT, |
| torch_dtype=torch.float16, |
| device_map="cuda" |
| ) |
| return model |
|
|
|
|
| @spaces.GPU |
| def generate(prompt: str) -> str: |
| m = load_model() |
| inputs = tokenizer(prompt, return_tensors="pt").to("cuda") |
| outputs = m.generate( |
| **inputs, |
| max_new_tokens=200, |
| temperature=0.7, |
| do_sample=True |
| ) |
| return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
| demo = gr.Interface( |
| fn=generate, |
| inputs=gr.Textbox(lines=4, label="Prompt"), |
| outputs=gr.Textbox(label="Completion"), |
| title="Labor LLM" |
| ) |
|
|
| demo.launch() |