import os import streamlit as st from langchain.embeddings import OpenAIEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import FAISS from langchain.document_loaders import PyPDFLoader from langchain.chains import RetrievalQA from langchain.llms import OpenAI from dotenv import load_dotenv # Load API key from Hugging Face secrets load_dotenv() OPENAI_API_KEY = os.getenv("GROQ_API_KEY") if not OPENAI_API_KEY: st.error("API key is not set. Please set GROQ_API_KEY in Hugging Face secrets.") # Configure OpenAI API key os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY # Streamlit app UI st.title("RAG-based Application") st.write("Upload a PDF, ask questions, and get answers based on the document content.") # Upload PDF file uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file: # Load PDF loader = PyPDFLoader(uploaded_file) documents = loader.load() # Split the text into chunks text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) chunks = text_splitter.split_documents(documents) # Tokenize and store data in FAISS vector database st.write("Processing the document and creating vector database...") embeddings = OpenAIEmbeddings() vector_db = FAISS.from_documents(chunks, embeddings) # Save vector database faiss_file = "vector_store.faiss" vector_db.save_local(faiss_file) st.success(f"Vector database saved as {faiss_file}.") # Question-Answer Retrieval st.write("You can now ask questions about the document.") query = st.text_input("Enter your question:") if query: # Initialize QA Chain retriever = vector_db.as_retriever() llm = OpenAI(model="text-davinci-003", temperature=0.7) qa_chain = RetrievalQA(llm=llm, retriever=retriever) # Get the answer with st.spinner("Generating answer..."): answer = qa_chain.run(query) st.success("Answer:") st.write(answer) # Deployment instructions st.write("To deploy this app on Hugging Face, use the following command:") st.code("huggingface-cli login && huggingface-cli deploy --app-dir ")