PDF-Assistant / app.py
absiitr's picture
Controlled Hallucination
28a8426 verified
Raw
History Blame Contribute Delete
10.9 kB
import os
import tempfile
import gc
import logging
import streamlit as st
from groq import Groq, APIError
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
import torch
# ---------------- CONFIGURATION ----------------
logging.basicConfig(level=logging.INFO)
# Load API key from Hugging Face secrets
GROQ_API_KEY = st.secrets.get("GROQ_API_KEY", os.environ.get("GROQ_API_KEY"))
GROQ_MODEL = "openai/gpt-oss-120b"
# Initialize Groq client
client = None
if GROQ_API_KEY:
try:
client = Groq(api_key=GROQ_API_KEY)
logging.info("βœ… Groq client initialized successfully.")
except Exception as e:
st.error(f"❌ Failed to initialize Groq client: {e}")
client = None
else:
st.warning("⚠️ GROQ_API_KEY not found. Please add it to Hugging Face secrets.")
# ---------------- STREAMLIT UI SETUP ----------------
st.set_page_config(
page_title="PDF Assistant",
page_icon="πŸ“˜",
layout="wide",
initial_sidebar_state="expanded"
)
# ---------------- CSS ----------------
st.markdown("""
<style>
/* 1. GLOBAL RESET & SCROLL LOCK */
html, body {
overflow: hidden;
height: 100%;
margin: 0;
}
/* 2. HIDE DEFAULT STREAMLIT ELEMENTS & SIDEBAR TOGGLES */
header[data-testid="stHeader"] {
display: none;
}
footer {
display: none;
}
/* Hide the 'Close Sidebar' (<<) button inside the sidebar */
section[data-testid="stSidebar"] > div > div:first-child {
display: none;
}
/* Hide the 'Open Sidebar' (>) button on main screen */
[data-testid="collapsedControl"] {
display: none;
}
/* 3. SIDEBAR STYLING (INDEPENDENT LEFT PANEL SCROLL) */
[data-testid="stSidebar"] {
position: fixed;
top: 0;
left: 0;
height: 100vh;
width: 20rem;
overflow-y: auto !important;
z-index: 99999;
}
[data-testid="stSidebar"]::-webkit-scrollbar {
width: 6px;
}
[data-testid="stSidebar"]::-webkit-scrollbar-thumb {
background: #2d3748;
border-radius: 3px;
}
/* 4. FIXED HEADER STYLING */
.fixed-header {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 6rem;
background-color: #0e1117; /* Hardcoded Dark Background */
z-index: 99998;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
border-bottom: 1px solid rgba(255, 255, 255, 0.1);
}
/* 5. MAIN CONTENT SCROLLING (INDEPENDENT RIGHT PANEL SCROLL) */
.main .block-container {
margin-top: 6rem;
height: calc(100vh - 6rem);
overflow-y: auto;
padding-top: 1rem;
padding-bottom: 5rem;
}
.main .block-container::-webkit-scrollbar {
width: 8px;
}
.main .block-container::-webkit-scrollbar-thumb {
background: #2d3748;
border-radius: 4px;
}
/* 6. SIDEBAR BUTTON STYLING */
[data-testid="stSidebar"] .stButton button {
width: 100%;
border-radius: 8px;
font-weight: 600;
margin-bottom: 6px;
}
/* 7. HIDE UPLOADED FILE LIST & NAME */
[data-testid='stFileUploaderFile'] {
display: none;
}
section[data-testid="stFileUploader"] ul {
display: none;
}
section[data-testid="stFileUploader"] small {
display: none;
}
/* 8. CHAT BUBBLES */
.chat-user {
background: #2d3748;
padding: 12px;
border-radius: 10px 10px 2px 10px;
margin: 6px 0 6px auto;
max-width: 85%;
text-align: right;
color: #f0f2f6;
}
.chat-bot {
background: #1e3a8a;
padding: 12px;
border-radius: 10px 10px 10px 2px;
margin: 6px auto 6px 0;
max-width: 85%;
text-align: left;
color: #ffffff;
}
.sources {
display: none;
}
/* 9. TITLE TEXT */
/* UPDATED: Added color: #ffffff to ensure visibility on the dark header in Light Mode */
.title-text {
font-size: 2.5rem;
font-weight: 800;
margin: 0;
line-height: 1.2;
color: #ffffff !important;
}
.creator-text {
font-size: 1rem;
font-weight: 500;
color: #cccccc;
}
.creator-text a {
color: #4da6ff;
text-decoration: none;
}
/* 10. INPUT FORM STYLING */
[data-testid="stForm"] {
border: none;
padding: 0;
}
/* --- NEW: FIX FOR CHAT BUTTON IN LIGHT MODE --- */
/* If browser is in light mode, force the chat button to look clean (White bg, Dark text) */
@media (prefers-color-scheme: light) {
[data-testid="stFormSubmitButton"] > button {
background-color: #ffffff !important;
color: #000000 !important;
border: 1px solid #e2e8f0 !important;
}
[data-testid="stFormSubmitButton"] > button:hover {
background-color: #f7fafc !important;
border-color: #cbd5e0 !important;
color: #000000 !important;
}
}
</style>
""", unsafe_allow_html=True)
# ---------------- FIXED HEADER ----------------
st.markdown("""
<div class="fixed-header">
<div class="title-text">πŸ“˜ PDF Assistant</div>
<div class="creator-text">
by <a href="https://www.linkedin.com/in/abhishek-iitr/" target="_blank">Abhishek Saxena</a>
</div>
</div>
""", unsafe_allow_html=True)
# ---------------- SESSION STATE ----------------
if "chat" not in st.session_state:
st.session_state.chat = []
if "vectorstore" not in st.session_state:
st.session_state.vectorstore = None
if "retriever" not in st.session_state:
st.session_state.retriever = None
if "uploaded_file_name" not in st.session_state:
st.session_state.uploaded_file_name = None
if "uploader_key" not in st.session_state:
st.session_state.uploader_key = 0
# ---------------- FUNCTIONS ----------------
def clear_chat_history():
st.session_state.chat = []
def clear_memory():
st.session_state.vectorstore = None
st.session_state.retriever = None
st.session_state.uploaded_file_name = None
st.session_state.uploader_key += 1
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
def process_pdf(uploaded_file):
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(uploaded_file.getvalue())
path = tmp.name
loader = PyPDFLoader(path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=60)
chunks = splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
vectorstore = Chroma.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
st.session_state.vectorstore = vectorstore
st.session_state.retriever = retriever
if os.path.exists(path):
os.unlink(path)
return len(chunks)
except Exception as e:
st.error(f"Error processing PDF: {str(e)}")
return None
def ask_question(question):
if not client:
return None, 0, "Groq client is not initialized."
if not st.session_state.retriever:
return None, 0, "Upload PDF first."
try:
docs = st.session_state.retriever.invoke(question)
context = "\n\n".join(d.page_content for d in docs)
prompt = f"""You are a strict RAG Q&A assistant who answers only from user's input PDF.
Use only below CONTEXT to answer the below mentioned QUESTION
If the answer is not found, reply: "I cannot find this in the PDF."
CONTEXT = {context}
QUESTION = {question}
Answer on your behalf, write answer in a presentable manner (proper formatting) like point-wise with numbering or bullet points accordingly!"""
response = client.chat.completions.create(
model=GROQ_MODEL,
messages=[
{"role": "system", "content": "Use only the PDF content."},
{"role": "user", "content": prompt}
],
temperature=0.0
)
return response.choices[0].message.content.strip(), len(docs), None
except Exception as e:
return None, 0, f"Error: {str(e)}"
# ---------------- SIDEBAR ----------------
with st.sidebar:
st.write("")
if st.button("πŸ—‘οΈ Clear Chat History", use_container_width=True):
clear_chat_history()
if st.button("πŸ”₯ Clear PDF Memory", on_click=clear_memory, use_container_width=True):
st.success("Memory Cleared!")
st.markdown("---")
upload_label = "βœ… PDF Uploaded!" if st.session_state.uploaded_file_name else "Upload PDF"
uploaded = st.file_uploader(
upload_label, type=["pdf"], key=st.session_state.uploader_key, label_visibility="collapsed"
)
if uploaded:
if uploaded.name != st.session_state.uploaded_file_name:
st.session_state.uploaded_file_name = None
st.session_state.chat = []
with st.spinner(f"Processing '{uploaded.name}'..."):
chunks = process_pdf(uploaded)
if chunks:
st.session_state.uploaded_file_name = uploaded.name
st.success("βœ… PDF Processed!")
else:
st.error("❌ Failed.")
else:
st.success(f"βœ… **Active:** `{uploaded.name}`")
else:
st.warning("⬆️ Upload a PDF to start chatting!")
# ---------------- INPUT AREA ----------------
disabled_input = st.session_state.uploaded_file_name is None or client is None
# Input Form
with st.form(key='chat_form', clear_on_submit=True):
col_input, col_btn = st.columns([0.85, 0.15], gap="small")
with col_input:
user_question = st.text_input(
"Ask a question",
placeholder="Ask a question about the loaded PDF...",
label_visibility="collapsed",
disabled=disabled_input
)
with col_btn:
submit_btn = st.form_submit_button("➀", disabled=disabled_input, use_container_width=True)
if submit_btn and user_question:
st.session_state.chat.append(("user", user_question))
with st.spinner("Thinking..."):
answer, sources, error = ask_question(user_question)
if answer:
bot_msg = answer
st.session_state.chat.append(("bot", bot_msg))
else:
st.session_state.chat.append(("bot", f"πŸ”΄ **Error:** {error}"))
st.rerun()
# ---------------- CHAT HISTORY (REVERSED) ----------------
if st.session_state.chat:
st.markdown("---")
for role, msg in reversed(st.session_state.chat):
if role == "user":
st.markdown(f"<div class='chat-user'>{msg}</div>", unsafe_allow_html=True)
else:
st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True)