Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import PyPDF2 | |
| import faiss | |
| import numpy as np | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.llms import OpenAI | |
| from langchain.chains import RetrievalQA | |
| def extract_text_from_pdf(pdf_file): | |
| reader = PyPDF2.PdfReader(pdf_file) | |
| text = '' | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| def create_embeddings(text): | |
| embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| embeddings = embedding_model.embed_documents([text]) | |
| return embeddings | |
| def create_faiss_index(embeddings): | |
| dim = len(embeddings[0]) | |
| index = faiss.IndexFlatL2(dim) | |
| embeddings_np = np.array(embeddings).astype('float32') | |
| index.add(embeddings_np) | |
| return index | |
| def create_retrieval_chain(index): | |
| llm = OpenAI(model="gpt-3.5-turbo") | |
| vector_store = FAISS(index) | |
| retrieval_qa = RetrievalQA(combine_docs_chain=llm, vectorstore=vector_store) | |
| return retrieval_qa | |
| def retrieve_and_generate(query, retrieval_qa): | |
| response = retrieval_qa.run(query) | |
| return response | |
| def main(): | |
| st.title("RAG Application with FAISS & PDF") | |
| pdf_file = st.file_uploader("Upload your PDF document", type="pdf") | |
| if pdf_file is not None: | |
| text = extract_text_from_pdf(pdf_file) | |
| st.subheader("Extracted Text from PDF") | |
| st.write(text[:1000]) # Display first 1000 characters for preview | |
| embeddings = create_embeddings(text) | |
| index = create_faiss_index(embeddings) | |
| retrieval_qa = create_retrieval_chain(index) | |
| query = st.text_input("Enter your query:") | |
| if query: | |
| response = retrieve_and_generate(query, retrieval_qa) | |
| st.subheader("Answer from RAG Model:") | |
| st.write(response) | |
| if __name__ == "__main__": | |
| main() | |