import streamlit as st import pdfplumber import docx from langchain.prompts import ChatPromptTemplate from langchain_google_genai import ChatGoogleGenerativeAI from langchain.chains import LLMChain import os # Use /tmp (the only guaranteed writable location on Hugging Face) os.environ["STREAMLIT_HOME"] = "/tmp/.streamlit" os.environ["STREAMLIT_CACHE_DIR"] = "/tmp/.streamlit/cache" # Make sure the directories exist os.makedirs("/tmp/.streamlit/cache", exist_ok=True) def extract_text_from_docx(uploaded_file): doc = docx.Document(uploaded_file) full_text = "\n".join([para.text for para in doc.paragraphs]) return full_text st.set_page_config( page_title="Chat with PDF", page_icon="📄", layout="centered", initial_sidebar_state="expanded" ) st.markdown(""" """, unsafe_allow_html=True) st.markdown('
📄 Chat with your PDF/DOCX
', unsafe_allow_html=True) st.markdown('
Upload your document and instantly get a summary. Ask anything about its content!
', unsafe_allow_html=True) uploaded_file = st.file_uploader("Choose a file (PDF or DOCX)", type=["pdf", "docx"]) text = "" if uploaded_file: file_type = uploaded_file.name.split(".")[-1].lower() if file_type == "pdf": with pdfplumber.open(uploaded_file) as pdf: for page in pdf.pages: text += page.extract_text() or "" elif file_type == "docx": text = extract_text_from_docx(uploaded_file) else: st.error("Unsupported file type. Please upload a PDF or DOCX.") system_prompt = f"Here is the content of the PDF:\n{text}\nAnswer the user's question based on this content." prompt = ChatPromptTemplate.from_messages([ ("system", system_prompt), ("human", "{user_query}") ]) llm = ChatGoogleGenerativeAI( model="gemini-2.5-flash", temperature=0, max_tokens=None, timeout=None, max_retries=2, api_key="AIzaSyDTELcgkFbYrseEjZ-vsMcIT19M9E9HY5s" # Replace with your actual API key ) chain = LLMChain(llm=llm, prompt=prompt) # Generate and show summary after upload summary_prompt = ChatPromptTemplate.from_messages([ ("system", "Summarize the following document in a concise paragraph so the user can easily understand its main points."), ("human", "{user_query}") ]) summary_chain = LLMChain(llm=llm, prompt=summary_prompt) with st.spinner("Generating summary..."): summary_response = summary_chain.invoke({"user_query": text}) summary = summary_response["text"] if "text" in summary_response else summary_response st.markdown(f'
📑 PDF/DOCX Summary
{summary}
', unsafe_allow_html=True) st.success("File loaded successfully! You can now ask questions.") st.markdown('
Ask a question about your file:
', unsafe_allow_html=True) user_query = st.text_input("Type your question here...", "What is the main topic of the document?") if st.button("Get Answer") and user_query: with st.spinner("Thinking..."): response = chain.invoke({"user_query": user_query}) answer = response["text"] if "text" in response else response st.markdown(f'
Answer: {answer}
', unsafe_allow_html=True) if not uploaded_file: st.markdown('
Please upload a PDF to get started.
', unsafe_allow_html=True)