File size: 1,511 Bytes
77fa265
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import streamlit as st
import PyPDF2
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import Groq
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Streamlit UI
st.title("📄 PDF Q&A Assistant")
st.write("Upload a PDF and ask questions about its content!")

# Upload PDF
uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])

if uploaded_file:
    # Extract text from PDF
    pdf_loader = PyPDFLoader(uploaded_file)
    documents = pdf_loader.load()
    
    # Split text into chunks for processing
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    docs = text_splitter.split_documents(documents)
    
    # Create embeddings and store in FAISS vector database
    embeddings = HuggingFaceEmbeddings()
    vector_db = FAISS.from_documents(docs, embeddings)
    retriever = vector_db.as_retriever()
    
    # Load Groq API model for Q&A
    llm = Groq(api_key=GROQ_API_KEY, model_name="mixtral-8x7b")  # Change model as needed
    qa_chain = RetrievalQA(llm=llm, retriever=retriever)
    
    # User input for questions
    query = st.text_input("Ask a question about the PDF:")
    if query:
        answer = qa_chain.run(query)
        st.write("**Answer:**", answer)