Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import numpy as np | |
| import faiss | |
| from PyPDF2 import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.chains import RetrievalQA | |
| from langchain.prompts import PromptTemplate | |
| from langchain.llms import GroqLLM | |
| import streamlit as st | |
| # Initialize Groq API LLM | |
| llm = GroqLLM(api_key=os.getenv("GROQ_API_KEY")) | |
| # Function to extract content from a public Google Drive PDF link | |
| def extract_pdf_content(drive_url): | |
| file_id = drive_url.split("/d/")[1].split("/view")[0] | |
| download_url = f"https://drive.google.com/uc?export=download&id={file_id}" | |
| response = requests.get(download_url) | |
| if response.status_code != 200: | |
| return None | |
| with open("document.pdf", "wb") as f: | |
| f.write(response.content) | |
| reader = PdfReader("document.pdf") | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # Function to create a FAISS vector store from the document content | |
| def create_vector_store(text): | |
| sentences = text.split(". ") | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vector_store = FAISS.from_texts(sentences, embedding=embeddings) | |
| return vector_store, sentences | |
| # Streamlit app | |
| st.title("RAG-based Application with Focused Context") | |
| # Predefined Google Drive link | |
| drive_url = "https://drive.google.com/file/d/1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0/view?usp=sharing" | |
| # Extract document content | |
| st.write("Extracting content from the document...") | |
| text = extract_pdf_content(drive_url) | |
| if text: | |
| st.write("Document extracted successfully!") | |
| st.write("Creating vector store...") | |
| vector_store, sentences = create_vector_store(text) | |
| st.write("Vector store created successfully!") | |
| query = st.text_input("Enter your query:") | |
| if query: | |
| st.write("Retrieving relevant context from the document...") | |
| retriever = vector_store.as_retriever() | |
| retriever.search_kwargs["k"] = 3 # Retrieve top 3 matches | |
| # Define a prompt template to guide LLM response generation | |
| prompt_template = PromptTemplate( | |
| template=""" | |
| Use the following context to answer the question: | |
| {context} | |
| Question: {question} | |
| Answer:""", | |
| input_variables=["context", "question"] | |
| ) | |
| # Create a RetrievalQA chain | |
| qa_chain = RetrievalQA( | |
| retriever=retriever, | |
| llm=llm, | |
| prompt=prompt_template | |
| ) | |
| # Run the query through the QA chain | |
| result = qa_chain.run(query) | |
| st.write("Answer:", result) | |
| else: | |
| st.error("Failed to extract content from the document.") | |