Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # Load your model and tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("sammanamgain/callcenter_response") | |
| model = AutoModelForCausalLM.from_pretrained("sammanamgain/callcenter_response") | |
| # Set pad_token_id to eos_token_id | |
| model.config.pad_token_id = model.config.eos_token_id | |
| def generate_response(prompt): | |
| # Use a special separator token or pattern | |
| instruction = "how to open a account" | |
| input_text = f"{prompt}" | |
| # Tokenize the input and create the attention mask | |
| inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True) | |
| # Generate response with adjusted parameters | |
| generated_ids = model.generate( | |
| inputs['input_ids'], | |
| attention_mask=inputs['attention_mask'], | |
| max_new_tokens=200, # Adjusted to a reasonable token count for response length | |
| do_sample=True, | |
| top_k=50, # Adjust top-k sampling | |
| top_p=0.95, # Adjust top-p sampling | |
| temperature=0.7, # Adjust temperature | |
| pad_token_id=model.config.eos_token_id # Explicitly set pad_token_id | |
| ) | |
| # Decode the generated tokens | |
| result = tokenizer.decode(generated_ids[0], skip_special_tokens=True) | |
| # Remove the input part from the result by splitting at the special token | |
| response = result.split('[SEP]')[-1].strip() | |
| return response | |
| # Define the Gradio interface | |
| interface = gr.Interface(fn=generate_response, inputs="text", outputs="text", title="Call Center Response Generator") | |
| # Launch the Gradio app | |
| if __name__ == "__main__": | |
| interface.launch() | |