import fitz  # PyMuPDF
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from openai import OpenAI
from dotenv import load_dotenv
import os
import gradio as gr

# Load environment variables
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

# Initialize OpenAI client
client = OpenAI(api_key=api_key)

# File Path (replace "sample.pdf" with the name of your PDF file)
PDF_FILE = "Resume_Pratiksha.pdf" #"Company_HR_Policy.pdf"  # Ensure this file is in the same directory as app.py

# Utility Functions
def load_pdf(file_path):
    """Extract text from a PDF file."""
    try:
        with fitz.open(file_path) as doc:
            return "".join([page.get_text() for page in doc])
    except Exception as e:
        return f"Error reading PDF: {e}"

def split_text(text, chunk_size=1000, chunk_overlap=20):
    """Split text into manageable chunks."""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len, is_separator_regex=False
    )
    return text_splitter.create_documents([text])

def create_and_load_db(chunks, persist_directory="pdf_embeddings"):
    """Create and load ChromaDB."""
    embeddings = HuggingFaceEmbeddings()
    vectordb = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=persist_directory)
    vectordb.persist()
    return Chroma(persist_directory=persist_directory, embedding_function=embeddings)

def generate_response(context, question):
    """Generate a response using OpenAI."""
    try:
        messages = [
            {"role": "system", "content": "You are an assistant that answers questions based on PDF content."},
            {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"}
        ]
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",  # Replace with preferred model
            messages=messages,
            max_tokens=150,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Error generating response: {e}"

def process_question(question):
    if not question:
        return "Please provide a question."

    # Step 1: Load and extract text from the PDF
    pdf_text = load_pdf(PDF_FILE)
    if pdf_text.startswith("Error"):
        return pdf_text

    # Step 2: Split the text into chunks
    chunks = split_text(pdf_text)

    # Step 3: Create and load ChromaDB
    vectordb = create_and_load_db(chunks)

    # Step 4: Perform similarity search
    try:
        docs = vectordb.similarity_search(question)
        if not docs:
            return "No relevant information found."

        # Step 5: Generate a response using the retrieved context
        context = docs[0].page_content
        response = generate_response(context, question)
        return response
    except Exception as e:
        return f"Error during similarity search or response generation: {str(e)}"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# PDF Chatbot")

    with gr.Row():
        question_input = gr.Textbox(label="Ask a Question", placeholder="Enter your question here...")
        output = gr.Textbox(label="Answer", lines=5, interactive=False)

    submit_button = gr.Button("Submit")
    submit_button.click(process_question, inputs=[question_input], outputs=output)

if __name__ == "__main__":
    demo.launch()