Spaces:

johnnnguyen
/

smartchatbot

Sleeping

File size: 3,021 Bytes

b9f2ace
6756a51
fb99966
 
 
b9f2ace
 
 
 
 
ae4028b
 
 
 
 
 
 
b9f9d33
 
 
 
 
 
 
 
c7d49e4
 
b9f9d33
 
 
 
 
 
 
 
ae4028b
b9f2ace
 
 
c7d49e4
 
 
 
 
b9f2ace
c7d49e4
 
b9f2ace
 
 
 
 
c7d49e4
b9f2ace
 
 
 
c7d49e4
b9f2ace
 
 
 
 
 
 
 
 
 
 
 
9437ce0
b9f2ace

import gradio as gr
import faiss
import json
import numpy as np
from sentence_transformers import SentenceTransformer
from huggingface_hub import InferenceClient

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# Load Uber FAQ Data
with open("uber_faqs.json", "r") as f:
    faq_data = json.load(f)

faq_questions = [item["question"] for item in faq_data]
faq_answers = {item["question"]: item["answer"] for item in faq_data}

# Load Sentence Transformer Model for Embeddings
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
faq_embeddings = embedding_model.encode(faq_questions, convert_to_numpy=True)

# Create FAISS Index
index = faiss.IndexFlatL2(faq_embeddings.shape[1])
index.add(faq_embeddings)



def retrieve_uber_info(query):
    """Retrieve the most relevant Uber FAQ answer for the given query."""
    query_embedding = embedding_model.encode([query], convert_to_numpy=True)
    D, I = index.search(query_embedding, k=1)  # Get the closest match
    retrieved_question = faq_questions[I[0][0]]
    retrieved_answer = faq_answers[retrieved_question]
    return retrieved_answer


client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")


def respond(message, history, system_message, max_tokens, temperature, top_p):
    """Generate a response using Zephyr 7B while integrating retrieved Uber knowledge."""
    
    retrieved_answer = retrieve_uber_info(message)
    system_message += f"\n\nUber FAQ Context: {retrieved_answer}"

    messages = [{"role": "system", "content": system_message}]
    
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    
    messages.append({"role": "user", "content": message})

    response = ""
    for message in client.chat_completion(
        messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p
    ):
        token = message.choices[0].delta.content
        response += token
        yield response


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are an Uber AI assistant. Only answer questions about Uber services, policies, pricing, and support. If a question is unrelated to Uber, say 'I can only help with Uber-related topics.'", label="System Instruction"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()