import os
import streamlit as st
import fitz  # PyMuPDF
from google.cloud import language_v1
import requests
import json
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec

# Load the environment variables from the .env file
load_dotenv()
google_api_key = os.getenv('GOOGLE_API_KEY')
pinecone_api_key = os.getenv('PINECONE_API_KEY')

# Initialize Pinecone
try:
    pc = Pinecone(api_key=pinecone_api_key)
except Exception as e:
    st.error(f"Error initializing Pinecone: {e}")
    st.stop()

index_name = 'pdf-analysis'
if index_name not in pc.list_indexes().names():
    try:
        pc.create_index(
            name=index_name,
            dimension=768,
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='aws',
                region='us-west-2'
            )
        )
    except Exception as e:
        st.error(f"Error creating Pinecone index: {e}")
        st.stop()

# Function to analyze entities and get embeddings using the API key
def get_embeddings(text, api_key):
    url = f"https://language.googleapis.com/v1/documents:analyzeEntities?key={api_key}"
    headers = {
        "Content-Type": "application/json",
    }
    data = {
        "document": {
            "type": "PLAIN_TEXT",
            "content": text
        },
        "encodingType": "UTF8"
    }
    try:
        response = requests.post(url, headers=headers, json=data)
        response.raise_for_status()
        embeddings = response.json()
        return embeddings
    except requests.exceptions.RequestException as e:
        st.error(f"Error getting embeddings: {e}")
        return None

# Streamlit app
st.title("Chat with Your Document")
st.write("Upload a PDF file to chat with its content using Google's Language API and Pinecone.")

# File upload
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

if uploaded_file is not None:
    try:
        # Load the PDF file
        pdf_document = fitz.open(stream=uploaded_file.read(), filetype="pdf")
        pdf_text = ""
        for page_num in range(pdf_document.page_count):
            page = pdf_document.load_page(page_num)
            pdf_text += page.get_text()

        # Get embeddings for the PDF text
        embeddings = get_embeddings(pdf_text, google_api_key)
        if embeddings is None:
            st.stop()
        vectors = [(str(i), embedding) for i, embedding in enumerate(embeddings['entities'])]

        # Create or connect to Pinecone index
        index = pc.Index(index_name)
        index.upsert(vectors)

        # Chat with the document
        user_input = st.text_input("Ask a question about the document:")
        if st.button("Ask"):
            if user_input:
                # Get embeddings for the user query
                user_query_embeddings = get_embeddings(user_input, google_api_key)
                if user_query_embeddings is None:
                    st.stop()
                query_vector = user_query_embeddings['entities'][0]['name']

                # Perform similarity search
                results = index.query(query_vector, top_k=5)
                response_text = "Relevant information from the document:\n"
                for result in results['matches']:
                    response_text += f"Text: {result['text']}, Score: {result['score']}\n"
                
                st.write(response_text.strip())
            else:
                st.write("Please enter a question to ask.")

        # Display the PDF text
        st.write("Extracted Text from PDF:")
        st.write(pdf_text)
    except Exception as e:
        st.error(f"Error processing PDF file: {e}")
##