import streamlit as st import PyPDF2 import faiss import numpy as np from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.llms import OpenAI from langchain.chains import RetrievalQA def extract_text_from_pdf(pdf_file): reader = PyPDF2.PdfReader(pdf_file) text = '' for page in reader.pages: text += page.extract_text() return text def create_embeddings(text): embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") embeddings = embedding_model.embed_documents([text]) return embeddings def create_faiss_index(embeddings): dim = len(embeddings[0]) index = faiss.IndexFlatL2(dim) embeddings_np = np.array(embeddings).astype('float32') index.add(embeddings_np) return index def create_retrieval_chain(index): llm = OpenAI(model="gpt-3.5-turbo") vector_store = FAISS(index) retrieval_qa = RetrievalQA(combine_docs_chain=llm, vectorstore=vector_store) return retrieval_qa def retrieve_and_generate(query, retrieval_qa): response = retrieval_qa.run(query) return response def main(): st.title("RAG Application with FAISS & PDF") pdf_file = st.file_uploader("Upload your PDF document", type="pdf") if pdf_file is not None: text = extract_text_from_pdf(pdf_file) st.subheader("Extracted Text from PDF") st.write(text[:1000]) # Display first 1000 characters for preview embeddings = create_embeddings(text) index = create_faiss_index(embeddings) retrieval_qa = create_retrieval_chain(index) query = st.text_input("Enter your query:") if query: response = retrieve_and_generate(query, retrieval_qa) st.subheader("Answer from RAG Model:") st.write(response) if __name__ == "__main__": main()