HammadKprojects's picture
Update app.py
de8fb8e verified
import os
import tempfile
import streamlit as st
import PyPDF2
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from groq import Groq
from gtts import gTTS
# 🚨 Must be the first Streamlit command
st.set_page_config(page_title="🌍 Climate Companion", layout="wide")
# Load model and Groq client once
@st.cache_resource
def load_model():
return SentenceTransformer("all-MiniLM-L6-v2")
@st.cache_resource
def load_groq_client():
return Groq(api_key=os.getenv("GROQ_API_KEY"))
embed_model = load_model()
client = load_groq_client()
# UI Header
st.markdown(
"<h1 style='text-align: center; color: #2E8B57;'>🌿 Climate Companion</h1>"
"<p style='text-align: center; font-size: 18px;'>Upload a climate report and ask environment-related questions.</p>",
unsafe_allow_html=True
)
# PDF uploader
uploaded_file = st.file_uploader("πŸ“„ Upload Climate Report (PDF)", type="pdf")
# Text chunking
def chunk_text(text, max_tokens=100, overlap=20):
words = text.split()
chunks = []
for i in range(0, len(words), max_tokens - overlap):
chunk = " ".join(words[i:i + max_tokens])
if chunk.strip():
chunks.append(chunk)
return chunks
# Process file only once per session
if uploaded_file:
if "processed_file" not in st.session_state or st.session_state.processed_file != uploaded_file.name:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(uploaded_file.read())
tmp_path = tmp_file.name
try:
with open(tmp_path, "rb") as f:
reader = PyPDF2.PdfReader(f)
full_text = "\n".join([page.extract_text() or "" for page in reader.pages])
except Exception as e:
st.error(f"❌ Failed to read PDF: {e}")
st.stop()
if not full_text.strip():
st.error("❌ No extractable text found in the PDF.")
st.stop()
st.success("βœ… Extracted text from PDF successfully.")
# Chunk + Embed
with st.spinner("πŸ”„ Chunking and embedding text..."):
chunks = chunk_text(full_text)
embeddings = embed_model.encode(chunks, show_progress_bar=True)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings).astype("float32"))
# Store in session_state
st.session_state.processed_file = uploaded_file.name
st.session_state.chunks = chunks
st.session_state.index = index
st.session_state.dimension = dimension
st.success(f"πŸ“š {len(chunks)} text chunks embedded and indexed.")
else:
chunks = st.session_state.chunks
index = st.session_state.index
dimension = st.session_state.dimension
st.success("βœ… Using cached embeddings from this session.")
# Question and Answer section
st.markdown("---")
st.subheader("🌱 Ask a Climate-Related Question")
col1, col2 = st.columns([5, 1])
question = col1.text_input("Enter your question here")
submit = col2.button("πŸ” Get Answer")
if submit and question:
with st.spinner("🧠 Generating response..."):
q_embed = embed_model.encode([question])
_, indices = index.search(np.array(q_embed).astype("float32"), k=3)
top_chunks = [chunks[i] for i in indices[0]]
context = "\n".join(top_chunks)
prompt = f"""
You are a climate science expert. Use the context to answer the user's question concisely.
Context:
{context}
Question:
{question}
"""
try:
response = client.chat.completions.create(
model="llama3-8b-8192",
messages=[
{"role": "system", "content": "You are a helpful environmental scientist."},
{"role": "user", "content": prompt}
]
)
answer = response.choices[0].message.content.strip()
st.markdown("### βœ… Answer")
st.markdown(
f"<div style='background-color:#f0f9f5;padding:15px;border-radius:10px;'>{answer}</div>",
unsafe_allow_html=True,
)
st.markdown("### βœ… Wanna Hear")
# Generate and play audio response
try:
tts = gTTS(text=answer)
audio_path = os.path.join(tempfile.gettempdir(), "answer.mp3")
tts.save(audio_path)
st.audio(audio_path, format="audio/mp3")
except Exception as audio_err:
st.warning(f"🎀 Text-to-Speech error: {audio_err}")
with st.expander("πŸ“– Context Used"):
st.code(context)
except Exception as e:
st.error(f"🚨 Error from Groq API: {e}")
else:
st.info("πŸ“€ Please upload a PDF to begin.")