Spaces:

vaishu2002
/

pdf-chatter

Sleeping

File size: 5,762 Bytes

import streamlit as st
import pdfplumber
import docx
from langchain.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import LLMChain
import os

# Use /tmp (the only guaranteed writable location on Hugging Face)
os.environ["STREAMLIT_HOME"] = "/tmp/.streamlit"
os.environ["STREAMLIT_CACHE_DIR"] = "/tmp/.streamlit/cache"

# Make sure the directories exist
os.makedirs("/tmp/.streamlit/cache", exist_ok=True)


def extract_text_from_docx(uploaded_file):
    doc = docx.Document(uploaded_file)
    full_text = "\n".join([para.text for para in doc.paragraphs])
    return full_text

st.set_page_config(
    page_title="Chat with PDF",
    page_icon="📄",
    layout="centered",
    initial_sidebar_state="expanded"
)

st.markdown("""
    <style>
    body, .main {
        background: linear-gradient(135deg, #e0eafc 0%, #cfdef3 100%);
    }
    .stApp {
        background: linear-gradient(135deg, #e8f5e9 0%, #d0f0d0 100%);
    }
    .custom-header {
        font-size: 2.5em;
        font-weight: bold;
        color: #2d3a4a;
        text-align: center;
        margin-bottom: 0.2em;
        letter-spacing: 2px;
        text-shadow: 1px 1px 8px #b2bec3;
    }
    .custom-subtitle {
        font-size: 1.2em;
        color: #006400;
        text-align: center;
        margin-bottom: 2em;
    }
    .summary-box {
        background: #f7faff;
        border-left: 8px solid #006400;
        border-radius: 12px;
        padding: 1.2em 1.5em;
        margin-bottom: 1.5em;
        box-shadow: 0 2px 12px #dbeafe;
    }
    .question-box {
        background: #e8f5e9;
        border-left: 8px solid #2e7d32;
        border-radius: 12px;
        padding: 1.2em 1.5em;
        margin-bottom: 1.5em;
        box-shadow: 0 2px 12px #a5d6a7;
    }
    .custom-info-box {
        background-color: #e8f5e9;  /* Light green background */
        color: #1b5e20;             /* Dark green text */
        border-left: 8px solid #1b5e20;
        padding: 1em;
        border-radius: 8px;
        font-size: 1.1em;
        font-weight: bold;
        margin-top: 1em;
        box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
    }
    .stTextInput > label {
        font-size: 1.2em;
        color: #2e7d32;
        font-weight: bold;
    }
    .stButton > button {
        background: linear-gradient(90deg, #2e7d32 0%, #e8f5e9 100%);
        color: white;
        font-size: 1.1em;
        border-radius: 8px;
        padding: 0.5em 2em;
        border: none;
        box-shadow: 0 2px 8px #dbeafe;
        transition: background 0.3s;
    }
    .stButton > button:hover {
        background: linear-gradient(270deg, #2e7d32 0%, #e8f5e9 100%);
    }
    .stMarkdown {
        background-color: #e8f5e9; /* light green background */
        color: #1b5e20; /* dark green text */
        font-weight: bold;
        font-size: 1.1em;
        border-radius: 10px;
        padding: 10px;
        box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
    }
    </style>
""", unsafe_allow_html=True)


st.markdown('<div class="custom-header">📄 Chat with your PDF/DOCX</div>', unsafe_allow_html=True)
st.markdown('<div class="custom-subtitle">Upload your document and instantly get a summary. Ask anything about its content!</div>', unsafe_allow_html=True)

uploaded_file = st.file_uploader("Choose a file (PDF or DOCX)", type=["pdf", "docx"])

text = ""
if uploaded_file:
    file_type = uploaded_file.name.split(".")[-1].lower()

    if file_type == "pdf":
        with pdfplumber.open(uploaded_file) as pdf:
            for page in pdf.pages:
                text += page.extract_text() or ""
    elif file_type == "docx":
        text = extract_text_from_docx(uploaded_file)
    else:
        st.error("Unsupported file type. Please upload a PDF or DOCX.")

    
    system_prompt = f"Here is the content of the PDF:\n{text}\nAnswer the user's question based on this content."
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("human", "{user_query}")
    ])
    llm = ChatGoogleGenerativeAI(
        model="gemini-2.5-flash",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
        api_key="AIzaSyDTELcgkFbYrseEjZ-vsMcIT19M9E9HY5s"  # Replace with your actual API key
    )
    chain = LLMChain(llm=llm, prompt=prompt)

    # Generate and show summary after upload
    summary_prompt = ChatPromptTemplate.from_messages([
        ("system", "Summarize the following document in a concise paragraph so the user can easily understand its main points."),
        ("human", "{user_query}")
    ])
    summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
    with st.spinner("Generating summary..."):
        summary_response = summary_chain.invoke({"user_query": text})
        summary = summary_response["text"] if "text" in summary_response else summary_response
    st.markdown(f'<div class="summary-box"><b>📑 PDF/DOCX Summary</b><br>{summary}</div>', unsafe_allow_html=True)

    st.success("File loaded successfully! You can now ask questions.")
    st.markdown('<div class="question-box"><b>Ask a question about your file:</b></div>', unsafe_allow_html=True)
    user_query = st.text_input("Type your question here...", "What is the main topic of the document?")
    if st.button("Get Answer") and user_query:
        with st.spinner("Thinking..."):
            response = chain.invoke({"user_query": user_query})
            answer = response["text"] if "text" in response else response
        st.markdown(f'<div class="question-box"><b>Answer:</b> {answer}</div>', unsafe_allow_html=True)
if not uploaded_file:
    st.markdown('<div class="custom-info-box">Please upload a PDF to get started.</div>', unsafe_allow_html=True)