# app.py
import os
import faiss
import streamlit as st
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq
import numpy as np

# --- SETUP GROQ ---
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

# --- LOAD EMBEDDING MODEL ---
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# --- STREAMLIT UI ---
st.set_page_config(page_title="RAG App with Groq", layout="wide")
st.title("📄🧠 RAG-Based Question Answering App")

# --- UPLOAD PDF ---
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
if uploaded_file:
    reader = PdfReader(uploaded_file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    
    # --- CHUNKING ---
    def chunk_text(text, chunk_size=500):
        words = text.split()
        return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

    chunks = chunk_text(text)

    # --- VECTORIZE CHUNKS ---
    embeddings = embedder.encode(chunks)
    dim = embeddings[0].shape[0]
    index = faiss.IndexFlatL2(dim)
    index.add(np.array(embeddings))

    st.success("✅ Document uploaded and indexed!")

    # --- USER QUERY ---
    user_query = st.text_input("Ask something about the document")
    if user_query:
        query_embedding = embedder.encode([user_query])
        _, I = index.search(np.array(query_embedding), k=3)
        retrieved_chunks = "\n\n".join([chunks[i] for i in I[0]])

        # --- GROQ COMPLETION ---
        response = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[
                {"role": "system", "content": "You are a helpful assistant who answers based on the provided context."},
                {"role": "user", "content": f"Context: {retrieved_chunks}\n\nQuestion: {user_query}"}
            ]
        )

        st.subheader("📢 Answer")
        st.write(response.choices[0].message.content)