Spaces:
Sleeping
Sleeping
File size: 5,762 Bytes
056af66 9125245 056af66 93c90cc 7db0700 93c90cc 7db0700 9125245 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | import streamlit as st
import pdfplumber
import docx
from langchain.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import LLMChain
import os
# Use /tmp (the only guaranteed writable location on Hugging Face)
os.environ["STREAMLIT_HOME"] = "/tmp/.streamlit"
os.environ["STREAMLIT_CACHE_DIR"] = "/tmp/.streamlit/cache"
# Make sure the directories exist
os.makedirs("/tmp/.streamlit/cache", exist_ok=True)
def extract_text_from_docx(uploaded_file):
doc = docx.Document(uploaded_file)
full_text = "\n".join([para.text for para in doc.paragraphs])
return full_text
st.set_page_config(
page_title="Chat with PDF",
page_icon="π",
layout="centered",
initial_sidebar_state="expanded"
)
st.markdown("""
<style>
body, .main {
background: linear-gradient(135deg, #e0eafc 0%, #cfdef3 100%);
}
.stApp {
background: linear-gradient(135deg, #e8f5e9 0%, #d0f0d0 100%);
}
.custom-header {
font-size: 2.5em;
font-weight: bold;
color: #2d3a4a;
text-align: center;
margin-bottom: 0.2em;
letter-spacing: 2px;
text-shadow: 1px 1px 8px #b2bec3;
}
.custom-subtitle {
font-size: 1.2em;
color: #006400;
text-align: center;
margin-bottom: 2em;
}
.summary-box {
background: #f7faff;
border-left: 8px solid #006400;
border-radius: 12px;
padding: 1.2em 1.5em;
margin-bottom: 1.5em;
box-shadow: 0 2px 12px #dbeafe;
}
.question-box {
background: #e8f5e9;
border-left: 8px solid #2e7d32;
border-radius: 12px;
padding: 1.2em 1.5em;
margin-bottom: 1.5em;
box-shadow: 0 2px 12px #a5d6a7;
}
.custom-info-box {
background-color: #e8f5e9; /* Light green background */
color: #1b5e20; /* Dark green text */
border-left: 8px solid #1b5e20;
padding: 1em;
border-radius: 8px;
font-size: 1.1em;
font-weight: bold;
margin-top: 1em;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
}
.stTextInput > label {
font-size: 1.2em;
color: #2e7d32;
font-weight: bold;
}
.stButton > button {
background: linear-gradient(90deg, #2e7d32 0%, #e8f5e9 100%);
color: white;
font-size: 1.1em;
border-radius: 8px;
padding: 0.5em 2em;
border: none;
box-shadow: 0 2px 8px #dbeafe;
transition: background 0.3s;
}
.stButton > button:hover {
background: linear-gradient(270deg, #2e7d32 0%, #e8f5e9 100%);
}
.stMarkdown {
background-color: #e8f5e9; /* light green background */
color: #1b5e20; /* dark green text */
font-weight: bold;
font-size: 1.1em;
border-radius: 10px;
padding: 10px;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
}
</style>
""", unsafe_allow_html=True)
st.markdown('<div class="custom-header">π Chat with your PDF/DOCX</div>', unsafe_allow_html=True)
st.markdown('<div class="custom-subtitle">Upload your document and instantly get a summary. Ask anything about its content!</div>', unsafe_allow_html=True)
uploaded_file = st.file_uploader("Choose a file (PDF or DOCX)", type=["pdf", "docx"])
text = ""
if uploaded_file:
file_type = uploaded_file.name.split(".")[-1].lower()
if file_type == "pdf":
with pdfplumber.open(uploaded_file) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
elif file_type == "docx":
text = extract_text_from_docx(uploaded_file)
else:
st.error("Unsupported file type. Please upload a PDF or DOCX.")
system_prompt = f"Here is the content of the PDF:\n{text}\nAnswer the user's question based on this content."
prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
("human", "{user_query}")
])
llm = ChatGoogleGenerativeAI(
model="gemini-2.5-flash",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
api_key="AIzaSyDTELcgkFbYrseEjZ-vsMcIT19M9E9HY5s" # Replace with your actual API key
)
chain = LLMChain(llm=llm, prompt=prompt)
# Generate and show summary after upload
summary_prompt = ChatPromptTemplate.from_messages([
("system", "Summarize the following document in a concise paragraph so the user can easily understand its main points."),
("human", "{user_query}")
])
summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
with st.spinner("Generating summary..."):
summary_response = summary_chain.invoke({"user_query": text})
summary = summary_response["text"] if "text" in summary_response else summary_response
st.markdown(f'<div class="summary-box"><b>π PDF/DOCX Summary</b><br>{summary}</div>', unsafe_allow_html=True)
st.success("File loaded successfully! You can now ask questions.")
st.markdown('<div class="question-box"><b>Ask a question about your file:</b></div>', unsafe_allow_html=True)
user_query = st.text_input("Type your question here...", "What is the main topic of the document?")
if st.button("Get Answer") and user_query:
with st.spinner("Thinking..."):
response = chain.invoke({"user_query": user_query})
answer = response["text"] if "text" in response else response
st.markdown(f'<div class="question-box"><b>Answer:</b> {answer}</div>', unsafe_allow_html=True)
if not uploaded_file:
st.markdown('<div class="custom-info-box">Please upload a PDF to get started.</div>', unsafe_allow_html=True)
|