# app.py import streamlit as st from utils.pdf_processing import extract_text_from_pdf, split_into_chunks from utils.embeddings import generate_embeddings from utils.database import initialize_faiss, insert_embeddings, search_embeddings def main(): st.title("PDF Retrieval-Augmented Generation (RAG) Application") # Initialize FAISS index faiss_index = initialize_faiss() # Upload PDF file uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") if uploaded_file is not None: # Extract text from the uploaded PDF with st.spinner("Processing PDF..."): text = extract_text_from_pdf(uploaded_file) chunks = split_into_chunks(text) # Generate embeddings for text chunks with st.spinner("Generating embeddings..."): embeddings = generate_embeddings(chunks) # Insert embeddings into FAISS index with st.spinner("Inserting embeddings into FAISS..."): insert_embeddings(faiss_index, embeddings, chunks) st.success("PDF processed and embeddings stored successfully!") # Search functionality query = st.text_input("Enter a query to search:") if query: with st.spinner("Searching..."): query_embedding = generate_embeddings([query])[0] results = search_embeddings(faiss_index, query_embedding) st.write("Results:", results) if __name__ == "__main__": main()