anaantraj's picture
Update app.py
f08ddf8 verified
import gradio as gr
from huggingface_hub import InferenceClient
import difflib
from transformers import AutoModelForCausalLM, AutoTokenizer
# Initialize Hugging Face Inference Client
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
# Load the model and tokenizer from Hugging Face
model_name = "HuggingFaceH4/zephyr-7b-beta"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Define the SevaAI context with FAQ data
context = {
"SevaAI FAQ": {
"General": {
"What is SevaAI?": "SevaAI is an AI-powered assistant for health insurance.",
"How does SevaAI work?": "SevaAI helps you navigate the health insurance process in India."
},
"Technical Support": {
"How can I reset my password?": "To reset your password, click on 'Forgot Password' on the login page."
}
}
}
# Extract FAQ pairs from the context
def extract_faqs(context):
faq_list = []
for category, faq_dict in context["SevaAI FAQ"].items():
for question, answer in faq_dict.items():
faq_list.append((question, answer))
return faq_list
faqs = extract_faqs(context)
# Find the best matching FAQ for the user's input
def find_best_match(user_input, faq_list):
questions = [faq[0] for faq in faq_list]
best_match = difflib.get_close_matches(user_input, questions, n=1, cutoff=0.6)
if best_match:
for faq in faq_list:
if faq[0] == best_match[0]:
return faq[1] # Return the answer to the matched question
return None
# Use the FAQ to guide the model's response
def generate_response(user_input):
# First, check if the user input matches any FAQ
faq_answer = find_best_match(user_input, faqs)
if faq_answer:
# If an FAQ answer is found, prepend it to guide the model
user_input = f"Here is the information I found: {faq_answer}. Now, how can I help you further?"
# Tokenize the input message and generate a response from the model
inputs = tokenizer(user_input, return_tensors="pt")
outputs = model.generate(inputs["input_ids"], max_length=150, num_return_sequences=1, temperature=0.7, top_p=0.95)
# Decode the model's response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# Function to handle Gradio input and output
def respond(message, history, system_message, max_tokens, temperature, top_p):
response = generate_response(message)
return response
# Set up the Gradio interface
demo = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(label="Your message"),
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
],
outputs="text"
)
if __name__ == "__main__":
demo.launch()