import json
import glob
import os
import random
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv

# Core AI Libraries
from sentence_transformers import SentenceTransformer, util
from groq import Groq
from datasets import load_dataset, Dataset

# Load environment variables
load_dotenv()

# Initialize Groq client
groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))

# Load similarity model
similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Config
HF_DATASET_REPO = "midrees2806/unmatched_queries"
HF_TOKEN = os.getenv("HF_TOKEN")

# Greeting list
GREETINGS = ["hi", "hello", "hey", "good morning", "good afternoon", "good evening", "assalam o alaikum", "salam", "aoa", "hi there", "hey there", "greetings"]

# Load multiple JSON datasets
dataset = []
try:
    json_files = glob.glob('datasets/*.json')
    for file_path in json_files:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            if isinstance(data, list):
                dataset.extend([item for item in data if isinstance(item, dict) and 'Question' in item and 'Answer' in item])
except Exception as e:
    print(f"Error loading datasets: {e}")

# Precompute embeddings
dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
dataset_answers = [item.get("Answer", "") for item in dataset]
dataset_embeddings = similarity_model.encode(dataset_questions, convert_to_tensor=True)

def manage_unmatched_queries(query: str):
    try:
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        try:
            ds = load_dataset(HF_DATASET_REPO, token=HF_TOKEN)
            df = ds["train"].to_pandas()
        except:
            df = pd.DataFrame(columns=["Query", "Timestamp", "Processed"])
        
        if query not in df["Query"].values:
            new_entry = {"Query": query, "Timestamp": timestamp, "Processed": False}
            df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
            updated_ds = Dataset.from_pandas(df)
            updated_ds.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN)
    except Exception as e:
        print(f"Failed to save query: {e}")

def query_groq_llm(prompt):
    try:
        # Llama-3.3-70b-versatile with streaming enabled
        completion = groq_client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=[
                {
                    "role": "system", 
                    "content": "You are the official UOE AI Assistant. Your goal is to provide highly structured, professional, and easy-to-read information about the University of Education Lahore."
                },
                {
                    "role": "user", 
                    "content": prompt
                }
            ],
            temperature=1,
            max_completion_tokens=1024,
            top_p=1,
            stream=True,
            stop=None
        )

        full_response = ""
        for chunk in completion:
            content = chunk.choices[0].delta.content or ""
            full_response += content
            
        return full_response.strip()

    except Exception as e:
        print(f"Groq API Error: {e}")
        return None

def get_best_answer(user_input):
    if not user_input.strip():
        return "Please enter a valid question."

    user_input_lower = user_input.lower().strip()

    # Greeting Handling
    if any(greet == user_input_lower for greet in GREETINGS):
        return "Hello! I am the UOE AI Assistant. How can I help you regarding admissions, fees, or programs today?"

    # Length check
    if len(user_input_lower.split()) < 3 and not any(greet in user_input_lower for greet in GREETINGS):
        return "Please provide more details. I need at least 3 words to understand your query properly."

    # # Direct Fee Link
    # if any(kw in user_input_lower for kw in ["fee structure", "fees structure", "semester fees", "semester fee"]):
    #     return (
    #         "💰 **Official Fee Structure**\n\n"
    #         "For the most accurate and up-to-date fee details, please visit the official University of Education link:\n"
    #         "🔗 https://ue.edu.pk/allfeestructure.php"
    #     )

    # Similarity Calculation
    user_embedding = similarity_model.encode(user_input_lower, convert_to_tensor=True)
    similarities = util.pytorch_cos_sim(user_embedding, dataset_embeddings)[0]
    best_match_idx = similarities.argmax().item()
    best_score = similarities[best_match_idx].item()

    if best_score >= 0.65:
        # --- PATH 1: DATASET MATCH (Enhanced Formatting) ---
        original_answer = dataset_answers[best_match_idx]
        prompt = f"""You are the official University of Education (UOE) Assistant. 
        I have found a verified answer in our database. Please rephrase it to make it extremely professional and user-friendly.

        STRICT GUIDELINES:
        1. Use clear **Headings**.
        2. Use **Bullet Points** for lists.
        3. If there is data like timing, criteria, or contact info, present it in a **Markdown Table**.
        4. Bold important keywords.
        5. Maintain a polite and welcoming tone.

        User Question: {user_input}
        Verified Data: {original_answer}

        Enhanced Response:"""
    else:
        # --- PATH 2: GENERAL KNOWLEDGE (with Forwarding Note) ---
        manage_unmatched_queries(user_input)
        prompt = f"""You are the UOE AI Assistant for University of Education Lahore. 
        The user asked: "{user_input}". This query is not in our verified database.

        Instructions:
        1. Answer based on your general knowledge about UOE Lahore.
        2. Format the response with headings and bold text.
        3. At the end, you MUST add this exact text:
           "📢 *Note: Your query has been forwarded to our support team. We are currently updating our verified database to provide a verified answer next time.*"
        4. List official contacts: Website: https://ue.edu.pk | Phone: +92-42-99262231-33"""

    return query_groq_llm(prompt)