Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import tempfile | |
| from dotenv import load_dotenv | |
| from langchain_groq import ChatGroq | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.chains import RetrievalQA | |
| from Datapreprocessing import PreprocessingData | |
| from pdfparsing import ExtractDatafrompdf | |
| load_dotenv() | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| st.set_page_config(page_title="π Chat with PDF", layout="wide") | |
| # Sidebar for PDF Upload | |
| st.sidebar.title("π Upload your PDF") | |
| uploaded_file = st.sidebar.file_uploader("Choose a PDF", type="pdf") | |
| # LLM and Embeddings - cached | |
| def get_embeddings(): | |
| return HuggingFaceEmbeddings() | |
| def get_llm(): | |
| return ChatGroq(api_key=GROQ_API_KEY, model="gemma2-9b-it", temperature=0.2) | |
| # Build Retrieval Chain | |
| def get_chain(retriever): | |
| llm = get_llm() | |
| return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff") | |
| # PDF processing pipeline | |
| def process_pdf_and_create_chain(uploaded_file): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
| tmp.write(uploaded_file.read()) | |
| tmp_path = tmp.name | |
| documents = ExtractDatafrompdf(tmp_path) | |
| chunks = PreprocessingData(documents) | |
| embedder = get_embeddings() | |
| retriever = FAISS.from_documents(chunks, embedder).as_retriever(search_type="similarity", search_kwargs={"k": 1}) | |
| return get_chain(retriever) | |
| # Main UI | |
| st.title("π Ask Questions About Your PDF") | |
| if uploaded_file: | |
| if "chain" not in st.session_state: | |
| st.success("PDF uploaded successfully! Processing...") | |
| with st.spinner("Extracting and chunking PDF..."): | |
| st.session_state.chain = process_pdf_and_create_chain(uploaded_file) | |
| st.success("Ready to chat with your PDF!") | |
| else: | |
| st.sidebar.info("Using cached PDF session.") | |
| user_query = st.text_input("Ask a question about your PDF:") | |
| submit = st.button("Search") | |
| if submit: | |
| if user_query: | |
| with st.spinner("Generating answer..."): | |
| result = st.session_state.chain.invoke({"query": user_query}) | |
| st.markdown("### π Answer:") | |
| st.write(result["result"]) | |
| else: | |
| st.info("π€ Upload a PDF from the sidebar to begin.") | |