Spaces:

HammadKprojects
/

Climate_Change_Awareness_for_Policymakers

Sleeping

App Files Files Community

Climate_Change_Awareness_for_Policymakers / app.py

HammadKprojects

Update app.py

de8fb8e verified 10 months ago

raw

history blame contribute delete

5.06 kB

	import os
	import tempfile
	import streamlit as st
	import PyPDF2
	import faiss
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from groq import Groq
	from gtts import gTTS

	# 🚨 Must be the first Streamlit command
	st.set_page_config(page_title="🌍 Climate Companion", layout="wide")

	# Load model and Groq client once
	@st.cache_resource
	def load_model():
	return SentenceTransformer("all-MiniLM-L6-v2")

	@st.cache_resource
	def load_groq_client():
	return Groq(api_key=os.getenv("GROQ_API_KEY"))

	embed_model = load_model()
	client = load_groq_client()

	# UI Header
	st.markdown(
	"<h1 style='text-align: center; color: #2E8B57;'>🌿 Climate Companion</h1>"
	"<p style='text-align: center; font-size: 18px;'>Upload a climate report and ask environment-related questions.</p>",
	unsafe_allow_html=True
	)

	# PDF uploader
	uploaded_file = st.file_uploader("📄 Upload Climate Report (PDF)", type="pdf")

	# Text chunking
	def chunk_text(text, max_tokens=100, overlap=20):
	words = text.split()
	chunks = []
	for i in range(0, len(words), max_tokens - overlap):
	chunk = " ".join(words[i:i + max_tokens])
	if chunk.strip():
	chunks.append(chunk)
	return chunks

	# Process file only once per session
	if uploaded_file:
	if "processed_file" not in st.session_state or st.session_state.processed_file != uploaded_file.name:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(uploaded_file.read())
	tmp_path = tmp_file.name

	try:
	with open(tmp_path, "rb") as f:
	reader = PyPDF2.PdfReader(f)
	full_text = "\n".join([page.extract_text() or "" for page in reader.pages])
	except Exception as e:
	st.error(f"❌ Failed to read PDF: {e}")
	st.stop()

	if not full_text.strip():
	st.error("❌ No extractable text found in the PDF.")
	st.stop()

	st.success("✅ Extracted text from PDF successfully.")

	# Chunk + Embed
	with st.spinner("🔄 Chunking and embedding text..."):
	chunks = chunk_text(full_text)
	embeddings = embed_model.encode(chunks, show_progress_bar=True)
	dimension = embeddings.shape[1]

	index = faiss.IndexFlatL2(dimension)
	index.add(np.array(embeddings).astype("float32"))

	# Store in session_state
	st.session_state.processed_file = uploaded_file.name
	st.session_state.chunks = chunks
	st.session_state.index = index
	st.session_state.dimension = dimension

	st.success(f"📚 {len(chunks)} text chunks embedded and indexed.")

	else:
	chunks = st.session_state.chunks
	index = st.session_state.index
	dimension = st.session_state.dimension
	st.success("✅ Using cached embeddings from this session.")

	# Question and Answer section
	st.markdown("---")
	st.subheader("🌱 Ask a Climate-Related Question")
	col1, col2 = st.columns([5, 1])
	question = col1.text_input("Enter your question here")
	submit = col2.button("🔍 Get Answer")

	if submit and question:
	with st.spinner("🧠 Generating response..."):
	q_embed = embed_model.encode([question])
	_, indices = index.search(np.array(q_embed).astype("float32"), k=3)
	top_chunks = [chunks[i] for i in indices[0]]
	context = "\n".join(top_chunks)

	prompt = f"""
	You are a climate science expert. Use the context to answer the user's question concisely.

	Context:
	{context}

	Question:
	{question}
	"""

	try:
	response = client.chat.completions.create(
	model="llama3-8b-8192",
	messages=[
	{"role": "system", "content": "You are a helpful environmental scientist."},
	{"role": "user", "content": prompt}
	]
	)
	answer = response.choices[0].message.content.strip()

	st.markdown("### ✅ Answer")
	st.markdown(
	f"<div style='background-color:#f0f9f5;padding:15px;border-radius:10px;'>{answer}</div>",
	unsafe_allow_html=True,
	)
	st.markdown("### ✅ Wanna Hear")
	# Generate and play audio response
	try:
	tts = gTTS(text=answer)
	audio_path = os.path.join(tempfile.gettempdir(), "answer.mp3")
	tts.save(audio_path)
	st.audio(audio_path, format="audio/mp3")
	except Exception as audio_err:
	st.warning(f"🎤 Text-to-Speech error: {audio_err}")

	with st.expander("📖 Context Used"):
	st.code(context)

	except Exception as e:
	st.error(f"🚨 Error from Groq API: {e}")
	else:
	st.info("📤 Please upload a PDF to begin.")