Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig | |
| from peft import PeftModel, LoraConfig | |
| # Define the path where the model and adapters are saved | |
| model_path = "netmouse/Llama-3-Taiwan-8B-Instruct-finetuning-by-promisedchat" # Update this to your model path | |
| adapter_path = "netmouse/Llama-3-Taiwan-8B-Instruct-finetuning-by-promisedchat" # Assuming adapter is stored in the same path | |
| # Load the tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| # Load the base model config | |
| config = AutoConfig.from_pretrained(model_path) | |
| # Load the base model without quantization configurations | |
| # Ensure that bitsandbytes is not used by removing any reference to 4bit or 8bit | |
| base_model = AutoModelForCausalLM.from_pretrained(model_path, config=config, ignore_mismatched_sizes=True) | |
| # Load the LoRA adapter | |
| model = PeftModel.from_pretrained(base_model, adapter_path) | |
| def generate_text(input_text): | |
| input_ids = tokenizer.encode(input_text, return_tensors='pt') | |
| outputs = model.generate(input_ids, max_length=50, num_return_sequences=1) | |
| generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return generated_text | |
| iface = gr.Interface(fn=generate_text, inputs="text", outputs="text") | |
| iface.launch() |