import gradio as gr from peft import AutoPeftModelForCausalLM from transformers import AutoTokenizer, GPTQConfig, GenerationConfig from peft import AutoPeftModelForCausalLM from transformers import GenerationConfig from transformers import AutoTokenizer, GPTQConfig import torch gptq_config = GPTQConfig(bits=4, disable_exllama=True) model = AutoPeftModelForCausalLM.from_pretrained( "Aneeth/zephyr_10k", return_dict=True, torch_dtype=torch.float32, trust_remote_code=True, quantization_config=gptq_config ) tokenizer = AutoTokenizer.from_pretrained("Aneeth/zephyr_10k") generation_config = GenerationConfig( do_sample=True, top_k=1, temperature=0.5, max_new_tokens=5000, pad_token_id=tokenizer.eos_token_id, ) def process_data_sample(example): processed_example = "\n Generate an authentic job description using the given input.\n\n" + example["instruction"] + "\n\n" return processed_example def generate_text(prompt): inp_str = process_data_sample({"instruction": prompt}) inputs = tokenizer(inp_str, return_tensors="pt").to("cpu") outputs = model.generate(**inputs, generation_config=generation_config) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", live=True) iface.launch()