Spaces:
Build error
Build error
| import streamlit as st | |
| from langchain.document_loaders import PyPDFDirectoryLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import Chroma | |
| from langchain.embeddings import SentenceTransformerEmbeddings | |
| from langchain import hub | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| import os | |
| # Set up the directories for data and vector DB | |
| DATA_DIR = "MyData" | |
| DB_DIR = "MyData" | |
| # Initialize the embeddings model | |
| embeddings_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") | |
| # Load and process PDF documents | |
| def load_data(): | |
| loader = PyPDFDirectoryLoader(DATA_DIR) | |
| data_on_pdf = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| separators=["\n\n", "\n", ". ", " ", ""], | |
| chunk_size=2000, | |
| chunk_overlap=200 | |
| ) | |
| splits = text_splitter.split_documents(data_on_pdf) | |
| vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings_model, persist_directory=DB_DIR) | |
| return vectorstore | |
| # Set up the generative AI model | |
| llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key="AIzaSyAnsIVS4x_7lJLe9AYXGLV8FRwUTQkB-1w") | |
| # Load vector store | |
| vectorstore = load_data() | |
| # Streamlit interface | |
| st.title("RAG App: Question-Answering with PDFs") | |
| # User input for question | |
| question = st.text_input("Ask a question about the documents:") | |
| if st.button("Submit"): | |
| if question: | |
| retriever = vectorstore.as_retriever() | |
| prompt = hub.pull("rlm/rag-prompt") | |
| def format_docs(docs): | |
| return "\n\n".join(doc.page_content for doc in docs) | |
| rag_chain = ( | |
| {"context": retriever | format_docs, "question": RunnablePassthrough()} | |
| | prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| response = rag_chain.invoke(question) | |
| st.markdown(response) | |
| else: | |
| st.warning("Please enter a question.") | |