🌿 Climate Companion

import os
import tempfile
import streamlit as st
import PyPDF2
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from groq import Groq
from gtts import gTTS

# 🚨 Must be the first Streamlit command
st.set_page_config(page_title="🌍 Climate Companion", layout="wide")

# Load model and Groq client once
@st.cache_resource
def load_model():
    return SentenceTransformer("all-MiniLM-L6-v2")

@st.cache_resource
def load_groq_client():
    return Groq(api_key=os.getenv("GROQ_API_KEY"))

embed_model = load_model()
client = load_groq_client()

# UI Header
st.markdown(
    "<h1 style='text-align: center; color: #2E8B57;'>🌿 Climate Companion</h1>"
    "<p style='text-align: center; font-size: 18px;'>Upload a climate report and ask environment-related questions.</p>",
    unsafe_allow_html=True
)

# PDF uploader
uploaded_file = st.file_uploader("📄 Upload Climate Report (PDF)", type="pdf")

# Text chunking
def chunk_text(text, max_tokens=100, overlap=20):
    words = text.split()
    chunks = []
    for i in range(0, len(words), max_tokens - overlap):
        chunk = " ".join(words[i:i + max_tokens])
        if chunk.strip():
            chunks.append(chunk)
    return chunks

# Process file only once per session
if uploaded_file:
    if "processed_file" not in st.session_state or st.session_state.processed_file != uploaded_file.name:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
            tmp_file.write(uploaded_file.read())
            tmp_path = tmp_file.name

        try:
            with open(tmp_path, "rb") as f:
                reader = PyPDF2.PdfReader(f)
                full_text = "\n".join([page.extract_text() or "" for page in reader.pages])
        except Exception as e:
            st.error(f"❌ Failed to read PDF: {e}")
            st.stop()

        if not full_text.strip():
            st.error("❌ No extractable text found in the PDF.")
            st.stop()

        st.success("✅ Extracted text from PDF successfully.")

        # Chunk + Embed
        with st.spinner("🔄 Chunking and embedding text..."):
            chunks = chunk_text(full_text)
            embeddings = embed_model.encode(chunks, show_progress_bar=True)
            dimension = embeddings.shape[1]

            index = faiss.IndexFlatL2(dimension)
            index.add(np.array(embeddings).astype("float32"))

            # Store in session_state
            st.session_state.processed_file = uploaded_file.name
            st.session_state.chunks = chunks
            st.session_state.index = index
            st.session_state.dimension = dimension

        st.success(f"📚 {len(chunks)} text chunks embedded and indexed.")

    else:
        chunks = st.session_state.chunks
        index = st.session_state.index
        dimension = st.session_state.dimension
        st.success("✅ Using cached embeddings from this session.")

    # Question and Answer section
    st.markdown("---")
    st.subheader("🌱 Ask a Climate-Related Question")
    col1, col2 = st.columns([5, 1])
    question = col1.text_input("Enter your question here")
    submit = col2.button("🔍 Get Answer")

    if submit and question:
        with st.spinner("🧠 Generating response..."):
            q_embed = embed_model.encode([question])
            _, indices = index.search(np.array(q_embed).astype("float32"), k=3)
            top_chunks = [chunks[i] for i in indices[0]]
            context = "\n".join(top_chunks)

            prompt = f"""
You are a climate science expert. Use the context to answer the user's question concisely.

Context:
{context}

Question:
{question}
"""

            try:
                response = client.chat.completions.create(
                    model="llama3-8b-8192",
                    messages=[
                        {"role": "system", "content": "You are a helpful environmental scientist."},
                        {"role": "user", "content": prompt}
                    ]
                )
                answer = response.choices[0].message.content.strip()

                st.markdown("### ✅ Answer")
                st.markdown(
                    f"<div style='background-color:#f0f9f5;padding:15px;border-radius:10px;'>{answer}</div>",
                    unsafe_allow_html=True,
                )
                st.markdown("### ✅ Wanna Hear")
                # Generate and play audio response
                try:
                    tts = gTTS(text=answer)
                    audio_path = os.path.join(tempfile.gettempdir(), "answer.mp3")
                    tts.save(audio_path)
                    st.audio(audio_path, format="audio/mp3")
                except Exception as audio_err:
                    st.warning(f"🎤 Text-to-Speech error: {audio_err}")

                with st.expander("📖 Context Used"):
                    st.code(context)

            except Exception as e:
                st.error(f"🚨 Error from Groq API: {e}")
else:
    st.info("📤 Please upload a PDF to begin.")