| | import os |
| | import tempfile |
| | import streamlit as st |
| | import PyPDF2 |
| | import faiss |
| | import numpy as np |
| | from sentence_transformers import SentenceTransformer |
| | from groq import Groq |
| | from gtts import gTTS |
| |
|
| | |
| | st.set_page_config(page_title="π Climate Companion", layout="wide") |
| |
|
| | |
| | @st.cache_resource |
| | def load_model(): |
| | return SentenceTransformer("all-MiniLM-L6-v2") |
| |
|
| | @st.cache_resource |
| | def load_groq_client(): |
| | return Groq(api_key=os.getenv("GROQ_API_KEY")) |
| |
|
| | embed_model = load_model() |
| | client = load_groq_client() |
| |
|
| | |
| | st.markdown( |
| | "<h1 style='text-align: center; color: #2E8B57;'>πΏ Climate Companion</h1>" |
| | "<p style='text-align: center; font-size: 18px;'>Upload a climate report and ask environment-related questions.</p>", |
| | unsafe_allow_html=True |
| | ) |
| |
|
| | |
| | uploaded_file = st.file_uploader("π Upload Climate Report (PDF)", type="pdf") |
| |
|
| | |
| | def chunk_text(text, max_tokens=100, overlap=20): |
| | words = text.split() |
| | chunks = [] |
| | for i in range(0, len(words), max_tokens - overlap): |
| | chunk = " ".join(words[i:i + max_tokens]) |
| | if chunk.strip(): |
| | chunks.append(chunk) |
| | return chunks |
| |
|
| | |
| | if uploaded_file: |
| | if "processed_file" not in st.session_state or st.session_state.processed_file != uploaded_file.name: |
| | with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: |
| | tmp_file.write(uploaded_file.read()) |
| | tmp_path = tmp_file.name |
| |
|
| | try: |
| | with open(tmp_path, "rb") as f: |
| | reader = PyPDF2.PdfReader(f) |
| | full_text = "\n".join([page.extract_text() or "" for page in reader.pages]) |
| | except Exception as e: |
| | st.error(f"β Failed to read PDF: {e}") |
| | st.stop() |
| |
|
| | if not full_text.strip(): |
| | st.error("β No extractable text found in the PDF.") |
| | st.stop() |
| |
|
| | st.success("β
Extracted text from PDF successfully.") |
| |
|
| | |
| | with st.spinner("π Chunking and embedding text..."): |
| | chunks = chunk_text(full_text) |
| | embeddings = embed_model.encode(chunks, show_progress_bar=True) |
| | dimension = embeddings.shape[1] |
| |
|
| | index = faiss.IndexFlatL2(dimension) |
| | index.add(np.array(embeddings).astype("float32")) |
| |
|
| | |
| | st.session_state.processed_file = uploaded_file.name |
| | st.session_state.chunks = chunks |
| | st.session_state.index = index |
| | st.session_state.dimension = dimension |
| |
|
| | st.success(f"π {len(chunks)} text chunks embedded and indexed.") |
| |
|
| | else: |
| | chunks = st.session_state.chunks |
| | index = st.session_state.index |
| | dimension = st.session_state.dimension |
| | st.success("β
Using cached embeddings from this session.") |
| |
|
| | |
| | st.markdown("---") |
| | st.subheader("π± Ask a Climate-Related Question") |
| | col1, col2 = st.columns([5, 1]) |
| | question = col1.text_input("Enter your question here") |
| | submit = col2.button("π Get Answer") |
| |
|
| | if submit and question: |
| | with st.spinner("π§ Generating response..."): |
| | q_embed = embed_model.encode([question]) |
| | _, indices = index.search(np.array(q_embed).astype("float32"), k=3) |
| | top_chunks = [chunks[i] for i in indices[0]] |
| | context = "\n".join(top_chunks) |
| |
|
| | prompt = f""" |
| | You are a climate science expert. Use the context to answer the user's question concisely. |
| | |
| | Context: |
| | {context} |
| | |
| | Question: |
| | {question} |
| | """ |
| |
|
| | try: |
| | response = client.chat.completions.create( |
| | model="llama3-8b-8192", |
| | messages=[ |
| | {"role": "system", "content": "You are a helpful environmental scientist."}, |
| | {"role": "user", "content": prompt} |
| | ] |
| | ) |
| | answer = response.choices[0].message.content.strip() |
| |
|
| | st.markdown("### β
Answer") |
| | st.markdown( |
| | f"<div style='background-color:#f0f9f5;padding:15px;border-radius:10px;'>{answer}</div>", |
| | unsafe_allow_html=True, |
| | ) |
| | st.markdown("### β
Wanna Hear") |
| | |
| | try: |
| | tts = gTTS(text=answer) |
| | audio_path = os.path.join(tempfile.gettempdir(), "answer.mp3") |
| | tts.save(audio_path) |
| | st.audio(audio_path, format="audio/mp3") |
| | except Exception as audio_err: |
| | st.warning(f"π€ Text-to-Speech error: {audio_err}") |
| |
|
| | with st.expander("π Context Used"): |
| | st.code(context) |
| |
|
| | except Exception as e: |
| | st.error(f"π¨ Error from Groq API: {e}") |
| | else: |
| | st.info("π€ Please upload a PDF to begin.") |
| |
|
| |
|