Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import chainlit as cl | |
| import os | |
| from langchain_openai import ChatOpenAI | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain_community.chat_message_histories import ChatMessageHistory | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain_core.prompts import PromptTemplate | |
| # Access the OpenAI API key from the environment | |
| open_ai_key = os.getenv("OPENAI_API_KEY") | |
| llm = ChatOpenAI(api_key=open_ai_key) | |
| template = """Use the following pieces of information to answer the user's question. | |
| If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
| Context: {context} | |
| Question: {question} | |
| Only return the helpful answer below and nothing else. | |
| Helpful answer: | |
| """ | |
| prompt = PromptTemplate(template=template, input_variables=["context", "question"]) | |
| def process_pdf_and_ask_question(pdf_file, question): | |
| # Load and process the PDF | |
| loader = PyPDFLoader(pdf_file.name) | |
| pdf_data = loader.load() | |
| # Split the text into chunks | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| docs = text_splitter.split_documents(pdf_data) | |
| # Create a Chroma vector store | |
| embeddings = HuggingFaceEmbeddings(model_name="embaas/sentence-transformers-multilingual-e5-base") | |
| db = Chroma.from_documents(docs, embeddings) | |
| # Initialize message history for conversation | |
| message_history = ChatMessageHistory() | |
| # Memory for conversational context | |
| memory = ConversationBufferMemory( | |
| memory_key="chat_history", | |
| output_key="answer", | |
| chat_memory=message_history, | |
| return_messages=True, | |
| ) | |
| # Create a chain that uses the Chroma vector store | |
| chain = ConversationalRetrievalChain.from_llm( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=db.as_retriever(), | |
| memory=memory, | |
| return_source_documents=False, | |
| combine_docs_chain_kwargs={'prompt': prompt} | |
| ) | |
| # Process the question | |
| res = chain({"question": question}) | |
| return res["answer"] | |
| def gradio_interface(pdf, question): | |
| return process_pdf_and_ask_question(pdf, question) | |
| # Gradio interface | |
| gr.Interface( | |
| fn=gradio_interface, | |
| inputs=[gr.File(file_count="single", type="filepath"), gr.Textbox(lines=2, placeholder="Ask a question...")], | |
| outputs="text", | |
| title="PDF Q&A", | |
| description="Upload a PDF and ask questions about it.", | |
| ).launch() | |