Spaces:

vaishu2002
/

pdf-chatter

Sleeping

App Files Files Community

pdf-chatter / src /streamlit_app.py

vaishu2002

Update src/streamlit_app.py

93c90cc verified 7 months ago

raw

history blame contribute delete

5.76 kB

	import streamlit as st
	import pdfplumber
	import docx
	from langchain.prompts import ChatPromptTemplate
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain.chains import LLMChain
	import os

	# Use /tmp (the only guaranteed writable location on Hugging Face)
	os.environ["STREAMLIT_HOME"] = "/tmp/.streamlit"
	os.environ["STREAMLIT_CACHE_DIR"] = "/tmp/.streamlit/cache"

	# Make sure the directories exist
	os.makedirs("/tmp/.streamlit/cache", exist_ok=True)


	def extract_text_from_docx(uploaded_file):
	doc = docx.Document(uploaded_file)
	full_text = "\n".join([para.text for para in doc.paragraphs])
	return full_text

	st.set_page_config(
	page_title="Chat with PDF",
	page_icon="📄",
	layout="centered",
	initial_sidebar_state="expanded"
	)

	st.markdown("""
	<style>
	body, .main {
	background: linear-gradient(135deg, #e0eafc 0%, #cfdef3 100%);
	}
	.stApp {
	background: linear-gradient(135deg, #e8f5e9 0%, #d0f0d0 100%);
	}
	.custom-header {
	font-size: 2.5em;
	font-weight: bold;
	color: #2d3a4a;
	text-align: center;
	margin-bottom: 0.2em;
	letter-spacing: 2px;
	text-shadow: 1px 1px 8px #b2bec3;
	}
	.custom-subtitle {
	font-size: 1.2em;
	color: #006400;
	text-align: center;
	margin-bottom: 2em;
	}
	.summary-box {
	background: #f7faff;
	border-left: 8px solid #006400;
	border-radius: 12px;
	padding: 1.2em 1.5em;
	margin-bottom: 1.5em;
	box-shadow: 0 2px 12px #dbeafe;
	}
	.question-box {
	background: #e8f5e9;
	border-left: 8px solid #2e7d32;
	border-radius: 12px;
	padding: 1.2em 1.5em;
	margin-bottom: 1.5em;
	box-shadow: 0 2px 12px #a5d6a7;
	}
	.custom-info-box {
	background-color: #e8f5e9; /* Light green background */
	color: #1b5e20; /* Dark green text */
	border-left: 8px solid #1b5e20;
	padding: 1em;
	border-radius: 8px;
	font-size: 1.1em;
	font-weight: bold;
	margin-top: 1em;
	box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
	}
	.stTextInput > label {
	font-size: 1.2em;
	color: #2e7d32;
	font-weight: bold;
	}
	.stButton > button {
	background: linear-gradient(90deg, #2e7d32 0%, #e8f5e9 100%);
	color: white;
	font-size: 1.1em;
	border-radius: 8px;
	padding: 0.5em 2em;
	border: none;
	box-shadow: 0 2px 8px #dbeafe;
	transition: background 0.3s;
	}
	.stButton > button:hover {
	background: linear-gradient(270deg, #2e7d32 0%, #e8f5e9 100%);
	}
	.stMarkdown {
	background-color: #e8f5e9; /* light green background */
	color: #1b5e20; /* dark green text */
	font-weight: bold;
	font-size: 1.1em;
	border-radius: 10px;
	padding: 10px;
	box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
	}
	</style>
	""", unsafe_allow_html=True)


	st.markdown('<div class="custom-header">📄 Chat with your PDF/DOCX</div>', unsafe_allow_html=True)
	st.markdown('<div class="custom-subtitle">Upload your document and instantly get a summary. Ask anything about its content!</div>', unsafe_allow_html=True)

	uploaded_file = st.file_uploader("Choose a file (PDF or DOCX)", type=["pdf", "docx"])

	text = ""
	if uploaded_file:
	file_type = uploaded_file.name.split(".")[-1].lower()

	if file_type == "pdf":
	with pdfplumber.open(uploaded_file) as pdf:
	for page in pdf.pages:
	text += page.extract_text() or ""
	elif file_type == "docx":
	text = extract_text_from_docx(uploaded_file)
	else:
	st.error("Unsupported file type. Please upload a PDF or DOCX.")


	system_prompt = f"Here is the content of the PDF:\n{text}\nAnswer the user's question based on this content."
	prompt = ChatPromptTemplate.from_messages([
	("system", system_prompt),
	("human", "{user_query}")
	])
	llm = ChatGoogleGenerativeAI(
	model="gemini-2.5-flash",
	temperature=0,
	max_tokens=None,
	timeout=None,
	max_retries=2,
	api_key="AIzaSyDTELcgkFbYrseEjZ-vsMcIT19M9E9HY5s" # Replace with your actual API key
	)
	chain = LLMChain(llm=llm, prompt=prompt)

	# Generate and show summary after upload
	summary_prompt = ChatPromptTemplate.from_messages([
	("system", "Summarize the following document in a concise paragraph so the user can easily understand its main points."),
	("human", "{user_query}")
	])
	summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
	with st.spinner("Generating summary..."):
	summary_response = summary_chain.invoke({"user_query": text})
	summary = summary_response["text"] if "text" in summary_response else summary_response
	st.markdown(f'<div class="summary-box"><b>📑 PDF/DOCX Summary</b><br>{summary}</div>', unsafe_allow_html=True)

	st.success("File loaded successfully! You can now ask questions.")
	st.markdown('<div class="question-box"><b>Ask a question about your file:</b></div>', unsafe_allow_html=True)
	user_query = st.text_input("Type your question here...", "What is the main topic of the document?")
	if st.button("Get Answer") and user_query:
	with st.spinner("Thinking..."):
	response = chain.invoke({"user_query": user_query})
	answer = response["text"] if "text" in response else response
	st.markdown(f'<div class="question-box"><b>Answer:</b> {answer}</div>', unsafe_allow_html=True)
	if not uploaded_file:
	st.markdown('<div class="custom-info-box">Please upload a PDF to get started.</div>', unsafe_allow_html=True)