File size: 2,831 Bytes
05b86d4
daa4a6a
d386915
daa4a6a
 
05b86d4
 
 
 
 
 
d386915
0ac9077
05b86d4
 
644455e
05b86d4
daa4a6a
 
 
 
 
 
644455e
daa4a6a
 
05b86d4
daa4a6a
 
 
 
 
644455e
05b86d4
 
 
 
 
 
fdf7122
daa4a6a
05b86d4
fdf7122
daa4a6a
 
fdf7122
daa4a6a
 
 
 
 
fdf7122
05b86d4
 
018761e
05b86d4
d285555
daa4a6a
 
05b86d4
 
 
 
 
 
 
 
 
 
 
 
 
 
daa4a6a
 
05b86d4
 
 
 
 
daa4a6a
05b86d4
 
 
 
daa4a6a
 
05b86d4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import requests
import numpy as np
import faiss
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms import GroqLLM
import streamlit as st

# Initialize Groq API LLM
llm = GroqLLM(api_key=os.getenv("GROQ_API_KEY"))

# Function to extract content from a public Google Drive PDF link
def extract_pdf_content(drive_url):
    file_id = drive_url.split("/d/")[1].split("/view")[0]
    download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
    response = requests.get(download_url)
    if response.status_code != 200:
        return None

    with open("document.pdf", "wb") as f:
        f.write(response.content)

    reader = PdfReader("document.pdf")
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

# Function to create a FAISS vector store from the document content
def create_vector_store(text):
    sentences = text.split(". ")
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = FAISS.from_texts(sentences, embedding=embeddings)
    return vector_store, sentences

# Streamlit app
st.title("RAG-based Application with Focused Context")

# Predefined Google Drive link
drive_url = "https://drive.google.com/file/d/1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0/view?usp=sharing"

# Extract document content
st.write("Extracting content from the document...")
text = extract_pdf_content(drive_url)
if text:
    st.write("Document extracted successfully!")

    st.write("Creating vector store...")
    vector_store, sentences = create_vector_store(text)

    st.write("Vector store created successfully!")

    query = st.text_input("Enter your query:")
    if query:
        st.write("Retrieving relevant context from the document...")
        retriever = vector_store.as_retriever()
        retriever.search_kwargs["k"] = 3  # Retrieve top 3 matches

        # Define a prompt template to guide LLM response generation
        prompt_template = PromptTemplate(
            template="""
            Use the following context to answer the question:
            
            {context}
            
            Question: {question}
            Answer:""",
            input_variables=["context", "question"]
        )

        # Create a RetrievalQA chain
        qa_chain = RetrievalQA(
            retriever=retriever,
            llm=llm,
            prompt=prompt_template
        )

        # Run the query through the QA chain
        result = qa_chain.run(query)
        st.write("Answer:", result)
else:
    st.error("Failed to extract content from the document.")