snehakingrani's picture
Create app.py
77fa265 verified
import streamlit as st
import PyPDF2
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import Groq
from dotenv import load_dotenv
import os
# Load environment variables
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# Streamlit UI
st.title("๐Ÿ“„ PDF Q&A Assistant")
st.write("Upload a PDF and ask questions about its content!")
# Upload PDF
uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
if uploaded_file:
# Extract text from PDF
pdf_loader = PyPDFLoader(uploaded_file)
documents = pdf_loader.load()
# Split text into chunks for processing
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)
# Create embeddings and store in FAISS vector database
embeddings = HuggingFaceEmbeddings()
vector_db = FAISS.from_documents(docs, embeddings)
retriever = vector_db.as_retriever()
# Load Groq API model for Q&A
llm = Groq(api_key=GROQ_API_KEY, model_name="mixtral-8x7b") # Change model as needed
qa_chain = RetrievalQA(llm=llm, retriever=retriever)
# User input for questions
query = st.text_input("Ask a question about the PDF:")
if query:
answer = qa_chain.run(query)
st.write("**Answer:**", answer)