import os
import streamlit as st
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from groq import Groq

# Initialize the Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Load FAISS index
index = faiss.read_index("faiss_index.idx")

# Sample dataset (Replace with preprocessed dataset from IEA, IRENA, etc.)
data = [
    "Alkaline electrolysis efficiency is 65-70%.",
    "PEM electrolysis has higher efficiency, around 75-80%, but is more expensive.",
    "SOEC electrolysis can reach efficiencies above 85% when using waste heat.",
    "The cost of electrolysis depends on electricity price, water source, and system efficiency.",
    "Using solar energy can lower hydrogen production costs in sunny regions.",
]
# Load FAISS index or create if missing
index_path = "faiss_index.idx"
model = SentenceTransformer('all-MiniLM-L6-v2')

if os.path.exists(index_path):
    index = faiss.read_index(index_path)
else:
    embeddings = model.encode(data).astype('float32')
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    faiss.write_index(index, index_path)
    
# ✅ Set Groq API key as an environment variable 
os.environ["GROQ_API_KEY"] = "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"  # Replace with your actual key

# Initialize Groq API using the environment variable
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

# Function for retrieval-augmented generation (RAG)
def rag_pipeline(query):
    # Encode query
    query_embedding = model.encode([query]).astype('float32')

    # Search in FAISS index
    D, I = index.search(query_embedding, k=3)  # Retrieve top 3 relevant documents

    # Retrieve relevant documents
    retrieved_docs = [data[i] for i in I[0] if i < len(data)]

    # Combine retrieved documents
    context = " ".join(retrieved_docs)

    # Generate response using Llama 3 (via Groq API)
    response = client.chat.completions.create(
        messages=[
            {"role": "system", "content": f"Context: {context}"},
            {"role": "user", "content": query}
        ],
        model="llama3-70b-8192"
    )

    return response.choices[0].message.content  # ✅ Fix applied

# Streamlit UI
st.set_page_config(page_title="HydroGen-AI", layout="wide")
st.title("🔬 HydroGen-AI: Green Hydrogen Techno-Economic Analyzer")

st.markdown(
    "🚀 **A Gen-AI powered tool for techno-economic analysis of green hydrogen production** "
    "using **retrieval-augmented generation (RAG)** and Llama 3 via **Groq API**."
)

# User Input Fields
st.sidebar.header("🔧 Input Parameters")
water_source = st.sidebar.selectbox("Water Source", ["Atmospheric", "Seawater", "Groundwater", "Municipal"])
water_cost = st.sidebar.number_input("Water Cost ($/m³)", min_value=0.0, value=0.5)
purification_cost = st.sidebar.number_input("Water Purification Cost ($/m³)", min_value=0.0, value=0.1)
water_quantity = st.sidebar.number_input("Water Quantity (m³)", min_value=0.1, value=10.0)
method = st.sidebar.selectbox("Electrolysis Method", ["Alkaline", "PEM", "SOEC"])
energy_input = st.sidebar.number_input("Energy Input (kWh)", min_value=0.0, value=100.0)
energy_source = st.sidebar.selectbox("Energy Source", ["Solar", "Wind", "Hydro", "Grid"])

# Analysis Button
if st.sidebar.button("Analyze"):
    query = (f"Analyze hydrogen production using {water_quantity} m³ of {water_source} water "
             f"with {method} electrolysis. Water cost: {water_cost}$, purification cost: {purification_cost}$, "
             f"energy input: {energy_input} kWh from {energy_source}. Provide cost breakdown, efficiency, "
             f"and best production method.")

    result = rag_pipeline(query)

    # Display Results
    st.subheader("📊 Analysis Result")
    st.write(result)

    st.success("✅ Analysis completed successfully!")