import streamlit as st import PyPDF2 from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.chains import RetrievalQA from langchain.llms import Groq from dotenv import load_dotenv import os # Load environment variables load_dotenv() GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Streamlit UI st.title("📄 PDF Q&A Assistant") st.write("Upload a PDF and ask questions about its content!") # Upload PDF uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"]) if uploaded_file: # Extract text from PDF pdf_loader = PyPDFLoader(uploaded_file) documents = pdf_loader.load() # Split text into chunks for processing text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) docs = text_splitter.split_documents(documents) # Create embeddings and store in FAISS vector database embeddings = HuggingFaceEmbeddings() vector_db = FAISS.from_documents(docs, embeddings) retriever = vector_db.as_retriever() # Load Groq API model for Q&A llm = Groq(api_key=GROQ_API_KEY, model_name="mixtral-8x7b") # Change model as needed qa_chain = RetrievalQA(llm=llm, retriever=retriever) # User input for questions query = st.text_input("Ask a question about the PDF:") if query: answer = qa_chain.run(query) st.write("**Answer:**", answer)