Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| import difflib | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # Initialize Hugging Face Inference Client | |
| client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
| # Load the model and tokenizer from Hugging Face | |
| model_name = "HuggingFaceH4/zephyr-7b-beta" | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # Define the SevaAI context with FAQ data | |
| context = { | |
| "SevaAI FAQ": { | |
| "General": { | |
| "What is SevaAI?": "SevaAI is an AI-powered assistant for health insurance.", | |
| "How does SevaAI work?": "SevaAI helps you navigate the health insurance process in India." | |
| }, | |
| "Technical Support": { | |
| "How can I reset my password?": "To reset your password, click on 'Forgot Password' on the login page." | |
| } | |
| } | |
| } | |
| # Extract FAQ pairs from the context | |
| def extract_faqs(context): | |
| faq_list = [] | |
| for category, faq_dict in context["SevaAI FAQ"].items(): | |
| for question, answer in faq_dict.items(): | |
| faq_list.append((question, answer)) | |
| return faq_list | |
| faqs = extract_faqs(context) | |
| # Find the best matching FAQ for the user's input | |
| def find_best_match(user_input, faq_list): | |
| questions = [faq[0] for faq in faq_list] | |
| best_match = difflib.get_close_matches(user_input, questions, n=1, cutoff=0.6) | |
| if best_match: | |
| for faq in faq_list: | |
| if faq[0] == best_match[0]: | |
| return faq[1] # Return the answer to the matched question | |
| return None | |
| # Use the FAQ to guide the model's response | |
| def generate_response(user_input): | |
| # First, check if the user input matches any FAQ | |
| faq_answer = find_best_match(user_input, faqs) | |
| if faq_answer: | |
| # If an FAQ answer is found, prepend it to guide the model | |
| user_input = f"Here is the information I found: {faq_answer}. Now, how can I help you further?" | |
| # Tokenize the input message and generate a response from the model | |
| inputs = tokenizer(user_input, return_tensors="pt") | |
| outputs = model.generate(inputs["input_ids"], max_length=150, num_return_sequences=1, temperature=0.7, top_p=0.95) | |
| # Decode the model's response | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response | |
| # Function to handle Gradio input and output | |
| def respond(message, history, system_message, max_tokens, temperature, top_p): | |
| response = generate_response(message) | |
| return response | |
| # Set up the Gradio interface | |
| demo = gr.Interface( | |
| fn=respond, | |
| inputs=[ | |
| gr.Textbox(label="Your message"), | |
| gr.Textbox(value="You are a friendly Chatbot.", label="System message"), | |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)") | |
| ], | |
| outputs="text" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |