import os
import streamlit as st
import requests
from PyPDF2 import PdfReader
from langchain_community.vectorstores import FAISS
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from groq import Groq

# Hardcoded Google Drive link
GOOGLE_DRIVE_LINK = "https://drive.google.com/file/d/1wv5gbGP0SA15BzoNUxprXhYx0jHhPgHl/view?usp=sharing"

# Function to download the PDF from Google Drive
def download_pdf():
    file_id = GOOGLE_DRIVE_LINK.split("/d/")[1].split("/view")[0]
    url = f"https://drive.google.com/uc?id={file_id}&export=download"
    response = requests.get(url)
    with open("document.pdf", "wb") as f:
        f.write(response.content)
    return "document.pdf"

# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
    reader = PdfReader(pdf_file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

# Function to create FAISS vector database
def create_vector_db(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = text_splitter.split_text(text)

    # Use Hugging Face Embeddings
    model_name = "all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    vector_db = FAISS.from_texts(chunks, embeddings)
    return vector_db

# Function to query Groq API
def query_groq_api(query, context, model="llama-3.3-70b-versatile"):
    # Define the Groq API key
    GROQ_API_KEY = "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"
    
    # Optionally set it as an environment variable (not necessary in this case)
    os.environ["GROQ_API_KEY"] = GROQ_API_KEY

    # API endpoint (Uncomment the URL)
    url = "https://api.groq.com/openai/v1/chat/completions"
    
    # Headers for the API request
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}",  # Retrieve from environment
    }
    
    # Data to send to the API
    data = {
        "model": model,
        "messages": [
            {"role": "system", "content": "You are an intelligent assistant."},
            {"role": "user", "content": f"Context: {context}\nQuestion: {query}"}
        ],
    }

    try:
        # Send POST request to Groq API
        response = requests.post(url, headers=headers, json=data)
        response.raise_for_status()  # Raise an error for bad responses

        # Get the API response content
        result = response.json()

        # Extract the answer from the response
        return result.get("choices", [{}])[0].get("message", {}).get("content", "No response.")
    
    except requests.exceptions.RequestException as e:
        # Handle errors
        return f"Error: {e}"

# Streamlit App
st.title("PDF Book Querry and Response")

# Persistent state to store vector database
if "vector_db" not in st.session_state:
    st.session_state.vector_db = None

# Process the hardcoded PDF link
if st.button("Process PDF"):
    st.info("Downloading and processing the PDF...")
    pdf_file = download_pdf()
    pdf_text = extract_text_from_pdf(pdf_file)
    st.success("PDF processed successfully!")

    # Create FAISS vector database
    st.info("Creating vector database...")
    st.session_state.vector_db = create_vector_db(pdf_text)
    st.success("Vector database created!")

# Query the document
if st.session_state.vector_db:
    user_query = st.text_input("Ask a question about the document:")
    if st.button("Submit Query"):
        with st.spinner("Processing your query..."):
            # Retrieve similar text chunks
            similar_docs = st.session_state.vector_db.similarity_search(user_query, k=3)
            context = " ".join([doc.page_content for doc in similar_docs])

            # Send query with context to Groq API
            response = query_groq_api(user_query, context)
            st.write("**Answer:**", response)