Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from peft import PeftConfig, PeftModel | |
| import torch | |
| from transformers import BitsAndBytesConfig | |
| # models | |
| base_model_name = "mistralai/Mistral-7B-Instruct-v0.2" | |
| adapter_model_name = "TymofiiNas/results" | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_use_double_quant=False, | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| base_model_name, quantization_config=bnb_config, device_map={"": 0} | |
| ) | |
| model = PeftModel.from_pretrained(model, adapter_model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
| def generate_response(text): | |
| text = "<s> [INST]" + text + "[/INST]" | |
| encoded_input = tokenizer(text, return_tensors="pt", add_special_tokens=False) | |
| model_inputs = encoded_input.to("cuda") | |
| generated_ids = model.generate( | |
| **model_inputs, | |
| max_new_tokens=400, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| decoded_output = tokenizer.batch_decode(generated_ids) | |
| return decoded_output[0][len(text) :] | |
| demo = gr.Interface( | |
| fn=generate_response, | |
| inputs="text", | |
| outputs="text", | |
| ) | |
| gr.TabbedInterface([demo]).queue().launch() | |