Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from langchain_openai import ChatOpenAI | |
| from dotenv import load_dotenv | |
| from langchain_community.document_loaders import WhatsAppChatLoader | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| # Load environment variables | |
| load_dotenv() | |
| # Initialize ChromaDB | |
| embeddings = HuggingFaceEmbeddings() | |
| persist_directory = 'whatsapp_embeddings' | |
| vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings) | |
| def load_chat_content(file) -> str: | |
| """Load chat content from the uploaded file and store it in ChromaDB.""" | |
| # Initialize the WhatsAppChatLoader with the uploaded file | |
| loader = WhatsAppChatLoader(path=file.name) | |
| raw_messages = loader.lazy_load() | |
| messages = list(raw_messages) | |
| # Combine all messages into a single string | |
| chat_content = "\n".join([doc.page_content for doc in messages]) | |
| # Split text into chunks | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=2000, | |
| chunk_overlap=200, | |
| length_function=len, | |
| is_separator_regex=False, | |
| ) | |
| chunks = text_splitter.create_documents([chat_content]) | |
| # Store chunks in ChromaDB | |
| vectordb.add_documents(documents=chunks) | |
| vectordb.persist() # Persist ChromaDB | |
| return chat_content | |
| def answer_question(openai_api_key, file, question: str) -> str: | |
| """Generate an answer based on chat content and a question.""" | |
| # Initialize the OpenAI model with the user's API key | |
| llm = ChatOpenAI( | |
| openai_api_key=openai_api_key, | |
| temperature=0.1, | |
| max_tokens=100, | |
| model="gpt-4o-mini" | |
| ) | |
| # Load and store chat content | |
| load_chat_content(file) | |
| # Retrieve relevant documents from ChromaDB | |
| docs = vectordb.similarity_search(question) | |
| if docs: | |
| # Combine the retrieved documents' content | |
| chat_content = " ".join([doc.page_content for doc in docs]) | |
| # Generate response using OpenAI model | |
| response = llm.invoke(chat_content + "\n\n" + question) | |
| response_text = response.content # Accessing the content directly | |
| return response_text | |
| else: | |
| return "No relevant documents found." | |
| # Define the Gradio interface | |
| interface = gr.Interface( | |
| fn=answer_question, | |
| inputs=[ | |
| gr.Textbox(label="Enter OpenAI API Key", type="password"), | |
| gr.File(label="Upload WhatsApp Chat File"), | |
| gr.Textbox(label="Ask a Question", placeholder="Enter your question here...") | |
| ], | |
| outputs="text", | |
| title="WhatsApp Chat Q&A", | |
| description="Upload a WhatsApp chat file and ask questions related to the chat content.", | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() | |