pratikshahp's picture
Update app.py
6bffce1 verified
import gradio as gr
import os
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langchain_community.document_loaders import WhatsAppChatLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Load environment variables
load_dotenv()
# Initialize ChromaDB
embeddings = HuggingFaceEmbeddings()
persist_directory = 'whatsapp_embeddings'
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
def load_chat_content(file) -> str:
"""Load chat content from the uploaded file and store it in ChromaDB."""
# Initialize the WhatsAppChatLoader with the uploaded file
loader = WhatsAppChatLoader(path=file.name)
raw_messages = loader.lazy_load()
messages = list(raw_messages)
# Combine all messages into a single string
chat_content = "\n".join([doc.page_content for doc in messages])
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=2000,
chunk_overlap=200,
length_function=len,
is_separator_regex=False,
)
chunks = text_splitter.create_documents([chat_content])
# Store chunks in ChromaDB
vectordb.add_documents(documents=chunks)
vectordb.persist() # Persist ChromaDB
return chat_content
def answer_question(openai_api_key, file, question: str) -> str:
"""Generate an answer based on chat content and a question."""
# Initialize the OpenAI model with the user's API key
llm = ChatOpenAI(
openai_api_key=openai_api_key,
temperature=0.1,
max_tokens=100,
model="gpt-4o-mini"
)
# Load and store chat content
load_chat_content(file)
# Retrieve relevant documents from ChromaDB
docs = vectordb.similarity_search(question)
if docs:
# Combine the retrieved documents' content
chat_content = " ".join([doc.page_content for doc in docs])
# Generate response using OpenAI model
response = llm.invoke(chat_content + "\n\n" + question)
response_text = response.content # Accessing the content directly
return response_text
else:
return "No relevant documents found."
# Define the Gradio interface
interface = gr.Interface(
fn=answer_question,
inputs=[
gr.Textbox(label="Enter OpenAI API Key", type="password"),
gr.File(label="Upload WhatsApp Chat File"),
gr.Textbox(label="Ask a Question", placeholder="Enter your question here...")
],
outputs="text",
title="WhatsApp Chat Q&A",
description="Upload a WhatsApp chat file and ask questions related to the chat content.",
)
if __name__ == "__main__":
interface.launch()