import streamlit as st
from PyPDF2 import PdfReader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
import os

# Page config
st.set_page_config(page_title="RAG Document Q&A", page_icon="📚", layout="wide")

# Title
st.title("📚 RAG Document Q&A System")
st.markdown("Upload PDFs and ask questions about them!")

# Sidebar for API key
with st.sidebar:
    st.header("⚙️ Configuration")
    api_key = st.text_input("Enter Groq API Key:", type="password")
    st.markdown("[Get free API key from Groq](https://console.groq.com/)")
    
    st.markdown("---")
    st.markdown("### About")
    st.markdown("This RAG system uses:")
    st.markdown("- 🤖 Groq (Llama 3.3)")
    st.markdown("- 🔍 Vector Search")
    st.markdown("- 📄 PDF Processing")

# Initialize session state
if 'vectorstore' not in st.session_state:
    st.session_state.vectorstore = None
if 'chat_history' not in st.session_state:
    st.session_state.chat_history = []

# Main area
col1, col2 = st.columns([1, 2])

with col1:
    st.header("📤 Upload Documents")
    uploaded_files = st.file_uploader(
        "Upload PDF files",
        type=['pdf'],
        accept_multiple_files=True
    )
    
    if uploaded_files and api_key:
        if st.button("🔄 Process Documents", type="primary"):
            with st.spinner("Processing PDFs..."):
                try:
                    # Extract text from all PDFs
                    all_text = ""
                    for pdf_file in uploaded_files:
                        pdf_reader = PdfReader(pdf_file)
                        for page in pdf_reader.pages:
                            all_text += page.extract_text()
                    
                    # Split into chunks
                    text_splitter = RecursiveCharacterTextSplitter(
                        chunk_size=1000,
                        chunk_overlap=200
                    )
                    chunks = text_splitter.split_text(all_text)
                    
                    # Create embeddings and vector store
                    embeddings = HuggingFaceEmbeddings(
                        model_name="sentence-transformers/all-MiniLM-L6-v2"
                    )
                    st.session_state.vectorstore = Chroma.from_texts(
                        texts=chunks,
                        embedding=embeddings
                    )
                    
                    st.success(f"✅ Processed {len(uploaded_files)} PDF(s) into {len(chunks)} chunks!")
                    
                except Exception as e:
                    st.error(f"Error: {str(e)}")

with col2:
    st.header("💬 Ask Questions")
    
    if st.session_state.vectorstore and api_key:
        # Question input
        question = st.text_input("Ask a question about your documents:")
        
        if question:
            with st.spinner("Thinking..."):
                try:
                    # Setup LLM
                    os.environ["GROQ_API_KEY"] = api_key
                    llm = ChatGroq(
                        model="llama-3.3-70b-versatile",
                        temperature=0
                    )
                    
                    # Get relevant docs
                    docs = st.session_state.vectorstore.similarity_search(question, k=3)
                    context = "\n\n".join([doc.page_content for doc in docs])
                    
                    # Create prompt
                    prompt = f"""Answer based only on this context:

{context}

Question: {question}

Answer:"""
                    
                    # Get answer
                    answer = llm.invoke(prompt)
                    
                    # Display answer
                    st.markdown("### 💡 Answer")
                    st.markdown(answer.content)
                    
                    # Show sources
                    with st.expander("📚 View Sources"):
                        for i, doc in enumerate(docs, 1):
                            st.markdown(f"**Source {i}:**")
                            st.text(doc.page_content[:300] + "...")
                            st.markdown("---")
                    
                    # Add to history
                    st.session_state.chat_history.append({
                        "question": question,
                        "answer": answer.content
                    })
                    
                except Exception as e:
                    st.error(f"Error: {str(e)}")
        
        # Show chat history
        if st.session_state.chat_history:
            st.markdown("### 📜 Chat History")
            for i, chat in enumerate(reversed(st.session_state.chat_history[-5:]), 1):
                with st.expander(f"Q{i}: {chat['question'][:50]}..."):
                    st.markdown(f"**Q:** {chat['question']}")
                    st.markdown(f"**A:** {chat['answer']}")
    
    else:
        st.info("👈 Upload PDFs and enter API key to get started!")

# Footer
st.markdown("---")
st.markdown("Built with Streamlit, LangChain, and Groq 🚀")