import os
import json
import numpy as np
import faiss
import gradio as gr
from PyPDF2 import PdfReader
import re
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the knowledge base
with open("knowledge_base.json", "r") as file:
    kb = json.load(file)

# Authenticate with Hugging Face
os.system("huggingface-cli login")

# Initialize the embedding model and FAISS index
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
kb_texts = [f"{item['Component']} {item['Range']} {item['Advice']}" for item in kb]
kb_embeddings = embedding_model.encode(kb_texts)
kb_embeddings = np.array(kb_embeddings, dtype="float32")

index = faiss.IndexFlatL2(kb_embeddings.shape[1])
index.add(kb_embeddings)

# Load the Hugging Face LLM (LLaMA)
llama_model_name = "meta-llama/Llama-3.2-3B-Instruct"
API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

tokenizer = AutoTokenizer.from_pretrained(llama_model_name, token=API_TOKEN)
llm = AutoModelForCausalLM.from_pretrained(llama_model_name, token=API_TOKEN)

# Generate advice using FAISS + LLM
def generate_advice(extracted_data):
    try:
        recommendations = []

        for item in extracted_data:
            query = f"{item['Component']} {item['Status']}"
            print(f"Processing Query: {query}")  # Debugging step

            # Generate query embedding as float32
            query_embedding = embedding_model.encode([query])
            query_embedding = np.array(query_embedding, dtype="float32").reshape(1, -1)

            # Search FAISS for the closest match
            _, idx = index.search(query_embedding, 1)
            best_match = kb[idx[0][0]]

            # Prepare LLM prompt
            role = "Medical expert providing advice based on lab results."
            prompt = f"""
            Lab Test: {item['Component']}
            Value: {item['Value']} {item['Units']}
            Status: {item['Status']}

            Medical Guidelines: {best_match['Advice']}

            Provide additional insights or recommendations.
            """

            # Generate advice with LLaMA
            message = [
                {"role": "system", "content": role},
                {"role": "user", "content": prompt},
            ]

            input_text = tokenizer.apply_chat_template(
                message, tokenize=True, add_generation_prompt=True, return_tensors="pt"
            )

            output = llm.generate(
                input_ids=input_text["input_ids"],
                max_length=150,
                num_return_sequences=1
            )

            advice = tokenizer.decode(output[0], skip_special_tokens=True).strip()
            recommendations.append({"Component": item["Component"], "Advice": advice})

        return recommendations

    except Exception as e:
        print(f"Error: {e}")  # Debugging any unexpected issues
        return [{"error": f"Exception occurred: {str(e)}"}]


# Extract structured data from the PDF
def pdf_to_text(pdf_file):
    try:
        reader = PdfReader(pdf_file.name)
        text = ""
        for page in reader.pages:
            text += page.extract_text()

        # Regex to extract structured lab results
        pattern = r"(\w+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\w/%]+)\s+(\w+)"
        matches = re.findall(pattern, text)

        # Structure data into a list of dictionaries
        if matches:
            results = [
                {"Component": m[0], "Value": float(m[1]), "Min": float(m[2]), "Max": float(m[3]), "Units": m[4], "Status": m[5]}
                for m in matches
            ]
            return results
        else:
            return "No structured data found in the PDF."

    except Exception as e:
        return f"Error: {e}"

# Gradio Interface
def main():
    with gr.Blocks() as app:
        gr.Markdown("## Medical Test Interpreter with RAG and LLM")
        with gr.Row():
            pdf_input = gr.File(label="Upload PDF", type="filepath")
            structured_data = gr.JSON(label="Extracted Structured Data")
            advice_output = gr.JSON(label="Generated Advice")
        extract_button = gr.Button("Extract Data")
        interpret_button = gr.Button("Get Advice")

        extract_button.click(pdf_to_text, inputs=pdf_input, outputs=structured_data)
        interpret_button.click(generate_advice, inputs=structured_data, outputs=advice_output)

    app.launch()

# Run the app
if __name__ == "__main__":
    main()