import os import requests import numpy as np import faiss from PyPDF2 import PdfReader from sentence_transformers import SentenceTransformer from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain.llms import GroqLLM import streamlit as st # Initialize Groq API LLM llm = GroqLLM(api_key=os.getenv("GROQ_API_KEY")) # Function to extract content from a public Google Drive PDF link def extract_pdf_content(drive_url): file_id = drive_url.split("/d/")[1].split("/view")[0] download_url = f"https://drive.google.com/uc?export=download&id={file_id}" response = requests.get(download_url) if response.status_code != 200: return None with open("document.pdf", "wb") as f: f.write(response.content) reader = PdfReader("document.pdf") text = "" for page in reader.pages: text += page.extract_text() return text # Function to create a FAISS vector store from the document content def create_vector_store(text): sentences = text.split(". ") embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vector_store = FAISS.from_texts(sentences, embedding=embeddings) return vector_store, sentences # Streamlit app st.title("RAG-based Application with Focused Context") # Predefined Google Drive link drive_url = "https://drive.google.com/file/d/1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0/view?usp=sharing" # Extract document content st.write("Extracting content from the document...") text = extract_pdf_content(drive_url) if text: st.write("Document extracted successfully!") st.write("Creating vector store...") vector_store, sentences = create_vector_store(text) st.write("Vector store created successfully!") query = st.text_input("Enter your query:") if query: st.write("Retrieving relevant context from the document...") retriever = vector_store.as_retriever() retriever.search_kwargs["k"] = 3 # Retrieve top 3 matches # Define a prompt template to guide LLM response generation prompt_template = PromptTemplate( template=""" Use the following context to answer the question: {context} Question: {question} Answer:""", input_variables=["context", "question"] ) # Create a RetrievalQA chain qa_chain = RetrievalQA( retriever=retriever, llm=llm, prompt=prompt_template ) # Run the query through the QA chain result = qa_chain.run(query) st.write("Answer:", result) else: st.error("Failed to extract content from the document.")