Spaces:

chburhan64
/

PDF_Agent

Sleeping

App Files Files Community

chburhan64 commited on Jul 13, 2025

Commit

8db5210

verified ·

1 Parent(s): b2468ac

Create app.py

Browse files

Files changed (1) hide show

app.py +105 -0

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import streamlit as st
+import os
+import time
+import tempfile
+from dotenv import load_dotenv
+from langchain_groq import ChatGroq
+from langchain_community.vectorstores import FAISS
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain.chains import create_retrieval_chain
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.docstore.document import Document
+from sentence_transformers import SentenceTransformer
+import numpy as np
+import faiss
+import PyPDF2
+# Load environment variables
+load_dotenv()
+groq_api_key = os.getenv("GROQ_API_KEY")
+st.set_page_config(page_title="Document Q&A with Llama3")
+st.title("📄 Document Q&A with Llama3 (via Groq)")
+# Load the LLM
+llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
+# Prompt template
+prompt = ChatPromptTemplate.from_template("""
+Answer the question based only on the provided context.
+<context>
+{context}
+</context>
+Question: {input}
+""")
+# Load sentence-transformers model
+embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+# Function to extract and split text from uploaded PDFs
+def process_pdfs(uploaded_files):
+    docs = []
+    for file in uploaded_files:
+        reader = PyPDF2.PdfReader(file)
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text() or ""
+        docs.append(Document(page_content=text, metadata={"source": file.name}))
+    # Split into chunks
+    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    split_docs = splitter.split_documents(docs)
+    return split_docs
+# Create FAISS index
+def create_vector_store(documents):
+    texts = [doc.page_content for doc in documents]
+    embeddings = embedding_model.encode(texts)
+    index = faiss.IndexFlatL2(embeddings.shape[1])
+    index.add(np.array(embeddings))
+    vectorstore = FAISS(embedding_function=lambda x: embedding_model.encode([x])[0],
+                        index=index,
+                        documents=documents)
+    return vectorstore
+# File uploader
+uploaded_files = st.file_uploader("📁 Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
+# Button to process documents
+if uploaded_files and st.button("📚 Process Documents"):
+    with st.spinner("Processing PDFs and creating vector store..."):
+        documents = process_pdfs(uploaded_files)
+        st.session_state.vectors = create_vector_store(documents)
+        st.success("✅ Documents processed and vector store created!")
+# Question input
+query = st.text_input("💬 Ask a question about the uploaded documents")
+# Answering
+if query and "vectors" in st.session_state:
+    document_chain = create_stuff_documents_chain(llm, prompt)
+    retriever = st.session_state.vectors.as_retriever()
+    retrieval_chain = create_retrieval_chain(retriever, document_chain)
+    with st.spinner("Generating answer..."):
+        start = time.process_time()
+        response = retrieval_chain.invoke({'input': query})
+        end = time.process_time()
+    st.markdown("### ✅ Answer:")
+    st.write(response['answer'])
+    st.markdown(f"⏱️ Response time: {end - start:.2f} seconds")
+    with st.expander("🔍 Document Chunks Used"):
+        for i, doc in enumerate(response.get("context", [])):
+            st.write(doc.page_content)
+            st.write("---")
+elif query and "vectors" not in st.session_state:
+    st.warning("⚠️ Please upload and process PDFs first.")