Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
| from peft import PeftModel | |
| model_name = "HuggingFaceH4/zephyr-7b-beta" | |
| adapter_path = "zephyr_lora_adapter" | |
| tokenizer = AutoTokenizer.from_pretrained(adapter_path) | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.float16 | |
| ) | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| quantization_config=bnb_config, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| model = PeftModel.from_pretrained(base_model, adapter_path) | |
| model.eval() | |
| def solve_math(question, max_tokens=512): | |
| prompt = f"<|user|>\n{question}\n<|assistant|>\n" | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_tokens, | |
| do_sample=False, | |
| pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id | |
| ) | |
| decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return decoded.split("<|assistant|>")[-1].strip() | |
| demo = gr.Interface(fn=solve_math, | |
| inputs=gr.Textbox(lines=5, label="Enter math problem"), | |
| outputs=gr.Textbox(label="Solution"), | |
| title="Math Solver (Zephyr Fine-Tuned)", | |
| description="This app uses a fine-tuned LLM to solve school-level math problems step by step.") | |
| demo.launch() | |