import os import streamlit as st from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from groq import Groq import requests # Helper function to download and load the PDF from Google Drive def load_pdf_from_drive(output_path="downloaded_document.pdf"): drive_link = "https://drive.google.com/file/d/1SzVEuEdKi4dHeKgDrUbmoq1MShB-hyG4/view?usp=drive_link" file_id = drive_link.split("/d/")[1].split("/")[0] download_url = f"https://drive.google.com/uc?export=download&id={file_id}" response = requests.get(download_url) with open(output_path, "wb") as f: f.write(response.content) return output_path # Helper function to parse the PDF def load_pdf_content(pdf_path): reader = PdfReader(pdf_path) text = "" for page in reader.pages: text += page.extract_text() return text # Define the Streamlit app st.title("RAG-Based Application with Groq API") st.write("Processing a predefined PDF document from Google Drive to create a vector database and interact with it.") st.write("Downloading and processing the document...") # Download and load content from the PDF pdf_path = load_pdf_from_drive() document_text = load_pdf_content(pdf_path) # Split the text into manageable chunks text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200 ) text_chunks = text_splitter.split_text(document_text) st.write(f"Document split into {len(text_chunks)} chunks.") # Initialize embedding function embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Create FAISS vector database faiss_index = FAISS.from_texts(text_chunks, embedding=embedding_function) st.write("Vector database created successfully.") # Save the FAISS index faiss_index.save_local("faiss_index") # Initialize Groq client for querying GROQ_API_KEY = "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" client = Groq(api_key=GROQ_API_KEY) # Chat interaction setup st.write("Ask a question related to the document:") user_query = st.text_input("Your question:") if user_query: query_response = client.chat.completions.create( messages=[ {"role": "user", "content": user_query} ], model="llama-3.3-70b-versatile", ) st.write("Response:") st.write(query_response.choices[0].message.content)