import os
import numpy as np
import faiss
import pytesseract
from pdf2image import convert_from_path
import requests
import streamlit as st
from groq import Groq

# Set up Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    images = convert_from_path(pdf_path)
    text = ""
    for page in images:
        text += pytesseract.image_to_string(page)
    return text

# Function to chunk the text
def create_chunks(text, chunk_size=200):
    words = text.split()
    chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
    return chunks

# Function to store chunks in FAISS (GPU enabled)
def store_chunks_in_faiss(chunks):
    vector_dim = 768  # Assuming embeddings are 768-dimensional
    index = faiss.IndexFlatL2(vector_dim)
    
    # Move index to GPU if available
    res = faiss.StandardGpuResources()
    index = faiss.index_cpu_to_gpu(res, 0, index)

    # Generate dummy embeddings for demonstration
    embeddings = np.random.rand(len(chunks), vector_dim).astype("float32")
    index.add(embeddings)
    return index

# Check if FAISS is using GPU
def is_gpu_available():
    return faiss.get_num_gpus() > 0

# Streamlit app interface
st.title("PDF Content Chunking and Retrieval with FAISS-GPU")

# PDF upload
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

if uploaded_file:
    st.write("Processing the uploaded file...")
    with open("uploaded_file.pdf", "wb") as f:
        f.write(uploaded_file.getbuffer())
    
    # Extract text
    extracted_text = extract_text_from_pdf("uploaded_file.pdf")
    st.text_area("Extracted Text", extracted_text, height=200)
    
    # Chunk text
    st.write("Creating chunks...")
    chunks = create_chunks(extracted_text)
    st.write(f"Total chunks created: {len(chunks)}")
    
    # Store chunks in FAISS
    st.write("Storing chunks in FAISS...")
    index = store_chunks_in_faiss(chunks)
    
    if is_gpu_available():
        st.success("FAISS is using GPU resources!")
    else:
        st.warning("FAISS is running on CPU.")

    st.write("Chunks successfully stored in the FAISS index!")

# Interaction with Groq
user_input = st.text_input("Ask a question about the content:")
if user_input:
    st.write("Sending query to Groq API...")
    response = client.chat.completions.create(
        messages=[{"role": "user", "content": user_input}],
        model="llama-3.3-70b-versatile"
    )
    st.text_area("Groq API Response", response.choices[0].message.content, height=100)