SmartPDF_Rag / app.py
kartik2627's picture
Update app.py
84d3174 verified
import streamlit as st
import PyPDF2
import faiss
import numpy as np
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
def extract_text_from_pdf(pdf_file):
reader = PyPDF2.PdfReader(pdf_file)
text = ''
for page in reader.pages:
text += page.extract_text()
return text
def create_embeddings(text):
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
embeddings = embedding_model.embed_documents([text])
return embeddings
def create_faiss_index(embeddings):
dim = len(embeddings[0])
index = faiss.IndexFlatL2(dim)
embeddings_np = np.array(embeddings).astype('float32')
index.add(embeddings_np)
return index
def create_retrieval_chain(index):
llm = OpenAI(model="gpt-3.5-turbo")
vector_store = FAISS(index)
retrieval_qa = RetrievalQA(combine_docs_chain=llm, vectorstore=vector_store)
return retrieval_qa
def retrieve_and_generate(query, retrieval_qa):
response = retrieval_qa.run(query)
return response
def main():
st.title("RAG Application with FAISS & PDF")
pdf_file = st.file_uploader("Upload your PDF document", type="pdf")
if pdf_file is not None:
text = extract_text_from_pdf(pdf_file)
st.subheader("Extracted Text from PDF")
st.write(text[:1000]) # Display first 1000 characters for preview
embeddings = create_embeddings(text)
index = create_faiss_index(embeddings)
retrieval_qa = create_retrieval_chain(index)
query = st.text_input("Enter your query:")
if query:
response = retrieve_and_generate(query, retrieval_qa)
st.subheader("Answer from RAG Model:")
st.write(response)
if __name__ == "__main__":
main()