import os
import time
import gradio as gr
import re
import requests
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer
import PyPDF2

# Install required tools
os.system("apt-get update")

# Ensure Hugging Face Authentication
os.system("huggingface-cli login")

# Load the Llama-3.2-3B-Instruct model and tokenizer
MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
print("Loading Llama model...")
API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=API_TOKEN)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=API_TOKEN)

# Load the encoder model for FAISS
encoder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Paths for required files
INDEX_PATH = "blood_test_index.faiss"
CSV_PATH = "rag_documents.csv"

def generate_faiss_index():
    df = pd.read_csv(CSV_PATH)
    if "Content" not in df.columns:
        raise ValueError("The CSV file must contain a 'Content' column.")
    print("Encoding sentences...")
    embeddings = encoder.encode(df["Content"].tolist()).astype("float32")
    print("Creating FAISS index...")
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)
    faiss.write_index(index, INDEX_PATH)
    print("FAISS index generated successfully!")

if not os.path.exists(INDEX_PATH):
    print("Generating FAISS index...")
    if not os.path.exists(CSV_PATH):
        raise FileNotFoundError(f"The required file '{CSV_PATH}' is missing!")
    generate_faiss_index()

print("Loading FAISS index...")
index = faiss.read_index(INDEX_PATH)
rag_df = pd.read_csv(CSV_PATH)

def load_thresholds(file_path="blood_test_thresholds.csv"):
    df = pd.read_csv(file_path)
    thresholds = {}
    for _, row in df.iterrows():
        thresholds[row["Parameter"]] = {"low": row["Low"], "high": row["High"], "unit": row["Unit"]}
    return thresholds

def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text() or ""
    return text

def retrieve_context(query, index, document_df):
    query_vector = encoder.encode([query]).astype("float32")
    distances, indices = index.search(query_vector, k=3)
    results = [document_df.iloc[i]["Content"] for i in indices[0]]
    return " ".join(results)

def generate_response_with_llama(flagged_abnormality, context):
    prompt = (
        f"Flagged Abnormality: {flagged_abnormality}\n"
        f"Context: {context[:300]}.\n"
        f"Provide specific and actionable medical advice for the abnormality:"
    )
    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(
        inputs["input_ids"],
        max_length=150,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        top_p=0.9,
        temperature=0.7,
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def analyze_blood_report(extracted_text, thresholds):
    lines = [line.strip() for line in extracted_text.split("\n") if line.strip()]
    blood_data = {}
    regex = r"([A-Za-z\s\#]+)\s*[:\-]?\s*([\d.]+)\s*([a-zA-Z/\s]*)"
    for line in lines:
        match = re.search(regex, line)
        if match:
            param, value_str, unit = match.group(1).strip(), match.group(2).strip(), match.group(3).strip()
            try:
                value = float(value_str)
                blood_data[param] = {"value": value, "unit": unit}
            except ValueError:
                continue

    flagged = {}
    recommendations = {}
    for parameter, data in blood_data.items():
        if parameter in thresholds:
            value, unit = data["value"], data["unit"]
            if value < thresholds[parameter]["low"]:
                flagged[parameter] = f"Low ({value} {unit})"
            elif value > thresholds[parameter]["high"]:
                flagged[parameter] = f"High ({value} {unit})"

    for param, status in flagged.items():
        query = f"The blood test result for {param} is {status}."
        context = retrieve_context(query, index, rag_df)
        recommendations[param] = generate_response_with_llama(flagged[param], context)

    return flagged, recommendations

def process_pdf(pdf_path):
    thresholds = load_thresholds()
    extracted_text = extract_text_from_pdf(pdf_path)
    flagged, recommendations = analyze_blood_report(extracted_text, thresholds)
    return {"Flagged Abnormalities": flagged, "Recommendations": recommendations}

interface = gr.Interface(
    fn=process_pdf,
    inputs=gr.File(type="filepath", label="Upload Blood Test PDF"),
    outputs="json",
    title="Blood Test Analyzer (PDF)",
    description="Upload a PDF blood test report. This tool extracts data, flags abnormalities, and provides medical recommendations."
)

if __name__ == "__main__":
    interface.launch(share=True)