import streamlit as st
import pdfplumber
import docx
from langchain.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import LLMChain
import os
# Use /tmp (the only guaranteed writable location on Hugging Face)
os.environ["STREAMLIT_HOME"] = "/tmp/.streamlit"
os.environ["STREAMLIT_CACHE_DIR"] = "/tmp/.streamlit/cache"
# Make sure the directories exist
os.makedirs("/tmp/.streamlit/cache", exist_ok=True)
def extract_text_from_docx(uploaded_file):
doc = docx.Document(uploaded_file)
full_text = "\n".join([para.text for para in doc.paragraphs])
return full_text
st.set_page_config(
page_title="Chat with PDF",
page_icon="📄",
layout="centered",
initial_sidebar_state="expanded"
)
st.markdown("""
""", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
st.markdown('Upload your document and instantly get a summary. Ask anything about its content!
', unsafe_allow_html=True)
uploaded_file = st.file_uploader("Choose a file (PDF or DOCX)", type=["pdf", "docx"])
text = ""
if uploaded_file:
file_type = uploaded_file.name.split(".")[-1].lower()
if file_type == "pdf":
with pdfplumber.open(uploaded_file) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
elif file_type == "docx":
text = extract_text_from_docx(uploaded_file)
else:
st.error("Unsupported file type. Please upload a PDF or DOCX.")
system_prompt = f"Here is the content of the PDF:\n{text}\nAnswer the user's question based on this content."
prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
("human", "{user_query}")
])
llm = ChatGoogleGenerativeAI(
model="gemini-2.5-flash",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
api_key="AIzaSyDTELcgkFbYrseEjZ-vsMcIT19M9E9HY5s" # Replace with your actual API key
)
chain = LLMChain(llm=llm, prompt=prompt)
# Generate and show summary after upload
summary_prompt = ChatPromptTemplate.from_messages([
("system", "Summarize the following document in a concise paragraph so the user can easily understand its main points."),
("human", "{user_query}")
])
summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
with st.spinner("Generating summary..."):
summary_response = summary_chain.invoke({"user_query": text})
summary = summary_response["text"] if "text" in summary_response else summary_response
st.markdown(f'📑 PDF/DOCX Summary
{summary}
', unsafe_allow_html=True)
st.success("File loaded successfully! You can now ask questions.")
st.markdown('Ask a question about your file:
', unsafe_allow_html=True)
user_query = st.text_input("Type your question here...", "What is the main topic of the document?")
if st.button("Get Answer") and user_query:
with st.spinner("Thinking..."):
response = chain.invoke({"user_query": user_query})
answer = response["text"] if "text" in response else response
st.markdown(f'Answer: {answer}
', unsafe_allow_html=True)
if not uploaded_file:
st.markdown('Please upload a PDF to get started.
', unsafe_allow_html=True)