| import gradio as gr | |
| from peft import AutoPeftModelForCausalLM | |
| from transformers import AutoTokenizer, GPTQConfig, GenerationConfig | |
| from peft import AutoPeftModelForCausalLM | |
| from transformers import GenerationConfig | |
| from transformers import AutoTokenizer, GPTQConfig | |
| import torch | |
| gptq_config = GPTQConfig(bits=4, disable_exllama=True) | |
| model = AutoPeftModelForCausalLM.from_pretrained( | |
| "Aneeth/zephyr_10k", | |
| return_dict=True, | |
| torch_dtype=torch.float32, | |
| trust_remote_code=True, | |
| quantization_config=gptq_config | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained("Aneeth/zephyr_10k") | |
| generation_config = GenerationConfig( | |
| do_sample=True, | |
| top_k=1, | |
| temperature=0.5, | |
| max_new_tokens=5000, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| def process_data_sample(example): | |
| processed_example = "\n Generate an authentic job description using the given input.\n\n" + example["instruction"] + "\n\n" | |
| return processed_example | |
| def generate_text(prompt): | |
| inp_str = process_data_sample({"instruction": prompt}) | |
| inputs = tokenizer(inp_str, return_tensors="pt").to("cpu") | |
| outputs = model.generate(**inputs, generation_config=generation_config) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response | |
| iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", live=True) | |
| iface.launch() | |