pdf-chatter / src /streamlit_app.py
vaishu2002's picture
Update src/streamlit_app.py
93c90cc verified
import streamlit as st
import pdfplumber
import docx
from langchain.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import LLMChain
import os
# Use /tmp (the only guaranteed writable location on Hugging Face)
os.environ["STREAMLIT_HOME"] = "/tmp/.streamlit"
os.environ["STREAMLIT_CACHE_DIR"] = "/tmp/.streamlit/cache"
# Make sure the directories exist
os.makedirs("/tmp/.streamlit/cache", exist_ok=True)
def extract_text_from_docx(uploaded_file):
doc = docx.Document(uploaded_file)
full_text = "\n".join([para.text for para in doc.paragraphs])
return full_text
st.set_page_config(
page_title="Chat with PDF",
page_icon="πŸ“„",
layout="centered",
initial_sidebar_state="expanded"
)
st.markdown("""
<style>
body, .main {
background: linear-gradient(135deg, #e0eafc 0%, #cfdef3 100%);
}
.stApp {
background: linear-gradient(135deg, #e8f5e9 0%, #d0f0d0 100%);
}
.custom-header {
font-size: 2.5em;
font-weight: bold;
color: #2d3a4a;
text-align: center;
margin-bottom: 0.2em;
letter-spacing: 2px;
text-shadow: 1px 1px 8px #b2bec3;
}
.custom-subtitle {
font-size: 1.2em;
color: #006400;
text-align: center;
margin-bottom: 2em;
}
.summary-box {
background: #f7faff;
border-left: 8px solid #006400;
border-radius: 12px;
padding: 1.2em 1.5em;
margin-bottom: 1.5em;
box-shadow: 0 2px 12px #dbeafe;
}
.question-box {
background: #e8f5e9;
border-left: 8px solid #2e7d32;
border-radius: 12px;
padding: 1.2em 1.5em;
margin-bottom: 1.5em;
box-shadow: 0 2px 12px #a5d6a7;
}
.custom-info-box {
background-color: #e8f5e9; /* Light green background */
color: #1b5e20; /* Dark green text */
border-left: 8px solid #1b5e20;
padding: 1em;
border-radius: 8px;
font-size: 1.1em;
font-weight: bold;
margin-top: 1em;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
}
.stTextInput > label {
font-size: 1.2em;
color: #2e7d32;
font-weight: bold;
}
.stButton > button {
background: linear-gradient(90deg, #2e7d32 0%, #e8f5e9 100%);
color: white;
font-size: 1.1em;
border-radius: 8px;
padding: 0.5em 2em;
border: none;
box-shadow: 0 2px 8px #dbeafe;
transition: background 0.3s;
}
.stButton > button:hover {
background: linear-gradient(270deg, #2e7d32 0%, #e8f5e9 100%);
}
.stMarkdown {
background-color: #e8f5e9; /* light green background */
color: #1b5e20; /* dark green text */
font-weight: bold;
font-size: 1.1em;
border-radius: 10px;
padding: 10px;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
}
</style>
""", unsafe_allow_html=True)
st.markdown('<div class="custom-header">πŸ“„ Chat with your PDF/DOCX</div>', unsafe_allow_html=True)
st.markdown('<div class="custom-subtitle">Upload your document and instantly get a summary. Ask anything about its content!</div>', unsafe_allow_html=True)
uploaded_file = st.file_uploader("Choose a file (PDF or DOCX)", type=["pdf", "docx"])
text = ""
if uploaded_file:
file_type = uploaded_file.name.split(".")[-1].lower()
if file_type == "pdf":
with pdfplumber.open(uploaded_file) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
elif file_type == "docx":
text = extract_text_from_docx(uploaded_file)
else:
st.error("Unsupported file type. Please upload a PDF or DOCX.")
system_prompt = f"Here is the content of the PDF:\n{text}\nAnswer the user's question based on this content."
prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
("human", "{user_query}")
])
llm = ChatGoogleGenerativeAI(
model="gemini-2.5-flash",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
api_key="AIzaSyDTELcgkFbYrseEjZ-vsMcIT19M9E9HY5s" # Replace with your actual API key
)
chain = LLMChain(llm=llm, prompt=prompt)
# Generate and show summary after upload
summary_prompt = ChatPromptTemplate.from_messages([
("system", "Summarize the following document in a concise paragraph so the user can easily understand its main points."),
("human", "{user_query}")
])
summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
with st.spinner("Generating summary..."):
summary_response = summary_chain.invoke({"user_query": text})
summary = summary_response["text"] if "text" in summary_response else summary_response
st.markdown(f'<div class="summary-box"><b>πŸ“‘ PDF/DOCX Summary</b><br>{summary}</div>', unsafe_allow_html=True)
st.success("File loaded successfully! You can now ask questions.")
st.markdown('<div class="question-box"><b>Ask a question about your file:</b></div>', unsafe_allow_html=True)
user_query = st.text_input("Type your question here...", "What is the main topic of the document?")
if st.button("Get Answer") and user_query:
with st.spinner("Thinking..."):
response = chain.invoke({"user_query": user_query})
answer = response["text"] if "text" in response else response
st.markdown(f'<div class="question-box"><b>Answer:</b> {answer}</div>', unsafe_allow_html=True)
if not uploaded_file:
st.markdown('<div class="custom-info-box">Please upload a PDF to get started.</div>', unsafe_allow_html=True)