import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # Load your model and tokenizer tokenizer = AutoTokenizer.from_pretrained("sammanamgain/callcenter_response") model = AutoModelForCausalLM.from_pretrained("sammanamgain/callcenter_response") # Set pad_token_id to eos_token_id model.config.pad_token_id = model.config.eos_token_id def generate_response(prompt): # Use a special separator token or pattern instruction = "how to open a account" input_text = f"{prompt}" # Tokenize the input and create the attention mask inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True) # Generate response with adjusted parameters generated_ids = model.generate( inputs['input_ids'], attention_mask=inputs['attention_mask'], max_new_tokens=200, # Adjusted to a reasonable token count for response length do_sample=True, top_k=50, # Adjust top-k sampling top_p=0.95, # Adjust top-p sampling temperature=0.7, # Adjust temperature pad_token_id=model.config.eos_token_id # Explicitly set pad_token_id ) # Decode the generated tokens result = tokenizer.decode(generated_ids[0], skip_special_tokens=True) # Remove the input part from the result by splitting at the special token response = result.split('[SEP]')[-1].strip() return response # Define the Gradio interface interface = gr.Interface(fn=generate_response, inputs="text", outputs="text", title="Call Center Response Generator") # Launch the Gradio app if __name__ == "__main__": interface.launch()