Spaces:
Sleeping
Sleeping
File size: 2,831 Bytes
05b86d4 daa4a6a d386915 daa4a6a 05b86d4 d386915 0ac9077 05b86d4 644455e 05b86d4 daa4a6a 644455e daa4a6a 05b86d4 daa4a6a 644455e 05b86d4 fdf7122 daa4a6a 05b86d4 fdf7122 daa4a6a fdf7122 daa4a6a fdf7122 05b86d4 018761e 05b86d4 d285555 daa4a6a 05b86d4 daa4a6a 05b86d4 daa4a6a 05b86d4 daa4a6a 05b86d4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | import os
import requests
import numpy as np
import faiss
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms import GroqLLM
import streamlit as st
# Initialize Groq API LLM
llm = GroqLLM(api_key=os.getenv("GROQ_API_KEY"))
# Function to extract content from a public Google Drive PDF link
def extract_pdf_content(drive_url):
file_id = drive_url.split("/d/")[1].split("/view")[0]
download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
response = requests.get(download_url)
if response.status_code != 200:
return None
with open("document.pdf", "wb") as f:
f.write(response.content)
reader = PdfReader("document.pdf")
text = ""
for page in reader.pages:
text += page.extract_text()
return text
# Function to create a FAISS vector store from the document content
def create_vector_store(text):
sentences = text.split(". ")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_texts(sentences, embedding=embeddings)
return vector_store, sentences
# Streamlit app
st.title("RAG-based Application with Focused Context")
# Predefined Google Drive link
drive_url = "https://drive.google.com/file/d/1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0/view?usp=sharing"
# Extract document content
st.write("Extracting content from the document...")
text = extract_pdf_content(drive_url)
if text:
st.write("Document extracted successfully!")
st.write("Creating vector store...")
vector_store, sentences = create_vector_store(text)
st.write("Vector store created successfully!")
query = st.text_input("Enter your query:")
if query:
st.write("Retrieving relevant context from the document...")
retriever = vector_store.as_retriever()
retriever.search_kwargs["k"] = 3 # Retrieve top 3 matches
# Define a prompt template to guide LLM response generation
prompt_template = PromptTemplate(
template="""
Use the following context to answer the question:
{context}
Question: {question}
Answer:""",
input_variables=["context", "question"]
)
# Create a RetrievalQA chain
qa_chain = RetrievalQA(
retriever=retriever,
llm=llm,
prompt=prompt_template
)
# Run the query through the QA chain
result = qa_chain.run(query)
st.write("Answer:", result)
else:
st.error("Failed to extract content from the document.")
|