import os
import json
import hdbscan
import openai
from openai import OpenAI
from sentence_transformers import SentenceTransformer
import umap
import ast
import markdown
from pathlib import Path
import gradio as gr


# ---------------------------------------------------------
# Load API key
# ---------------------------------------------------------
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


# ---------------------------------------------------------
# Input loader: handles raw text, file path, or uploaded file
# ---------------------------------------------------------
def load_input(source):
    # Path string → load file
    if isinstance(source, str) and os.path.isfile(source):
        with open(source, "r", encoding="utf-8") as f:
            return f.read()

    # Raw text
    if isinstance(source, str):
        return source

    # Uploaded file (Colab or Claude)
    if hasattr(source, "read"):
        return source.read().decode("utf-8")

    raise ValueError("Unsupported input type. Pass text, file path, or uploaded file.")


# ---------------------------------------------------------
# Expand notes for better embedding semantic separation
# ---------------------------------------------------------
def expand_note(note: str) -> str:
    return (
        f"This note says: '{note}'. "
        "Interpret it as a possible work task, personal task, reminder, idea, or question. "
        "Expand the hidden meaning so semantic embeddings become more distinguishable."
    )


# ---------------------------------------------------------
# Clustering with UMAP + tuned HDBSCAN
# ---------------------------------------------------------
def cluster_embeddings(expanded_notes):
    n = len(expanded_notes)

    # If only 1 note → trivial cluster
    if n == 1:
        return {-1: [0]}

    model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
    embeddings = model.encode(expanded_notes)

    # Dimensionality reduction for cleaner clusters
    reducer = umap.UMAP(
        n_neighbors=5,
        min_dist=0.1,
        metric="cosine"
    )
    reduced = reducer.fit_transform(embeddings)

    # Stronger clustering behavior
    clusterer = hdbscan.HDBSCAN(
        min_cluster_size=2,
        min_samples=1,
        cluster_selection_epsilon=0.2,
        metric='euclidean'
    )

    labels = clusterer.fit_predict(reduced)

    clusters = {}
    for idx, label in enumerate(labels):
        clusters.setdefault(int(label), []).append(idx)

    return clusters


# ---------------------------------------------------------
# LLM cluster summarizer → always returns valid JSON
# ---------------------------------------------------------
def summarize_cluster_with_llm(raw_items):
    prompt = f"""
You must return ONLY valid JSON.
No markdown. No backticks. No explanations.

Notes:
{raw_items}

Return JSON exactly like:
{{
  "title": "...",
  "summary": "..."
}}
"""

    response = client.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.2,
    )

    json_text = response.choices[0].message.content.strip()

    # Try direct JSON parse
    try:
        return json.loads(json_text)
    except:
        # Fallback: remove accidental formatting
        cleaned = (
            json_text.replace("```", "")
            .replace("json", "")
            .strip()
        )
        return json.loads(cleaned)


# ---------------------------------------------------------
# MAIN TOOL — Works with text OR file path OR upload
# ---------------------------------------------------------
def cluster_notes_dynamic(input_data) -> dict:
    """
    Dynamically clusters unstructured notes into semantic groups using embeddings and HDBSCAN,
    then summarizes each cluster with an LLM.

    Args:
        input_data (str or file-like):
            - Raw multiline text containing notes, OR
            - A file path to a text file, OR
            - A file-like object uploaded in environments such as Colab or Gradio.

    Returns:
        dict: A JSON-like dictionary structure:
        {
            "clusters": [
                {
                    "id": <cluster_id>,
                    "items": [list of notes],
                    "analysis": {
                        "title": "...",
                        "summary": "..."
                    }
                },
                ...
            ]
        }

    Behavior:
        - Automatically detects if input is text or file path.
        - Expands each note for better semantic embedding separation.
        - Uses SentenceTransformer embeddings + HDBSCAN for density-based clustering.
        - Uses an LLM to generate a clean title and summary for each cluster.
        - Returns strictly structured output for downstream formatting tools.

    """

    text = load_input(input_data)

    # Parse text into notes
    raw_notes = [l.strip() for l in text.split("\n") if l.strip()]

    if not raw_notes:
        return {"clusters": []}

    expanded_notes = [expand_note(n) for n in raw_notes]
    cluster_map = cluster_embeddings(expanded_notes)

    results = []
    for cid, idx_list in cluster_map.items():
        items = [raw_notes[i] for i in idx_list]
        analysis = summarize_cluster_with_llm(items)
        results.append({
            "id": cid,
            "items": items,
            "analysis": analysis
        })

    return {"clusters": results}


def convert_structure_to_markdown(structured_json: dict | str) -> str:
    """
    Converts a structured notes JSON object into a clean, readable Markdown document.

    Args:
        structured_json (dict | str):
            Either a Python dictionary or a JSON string containing clustered notes
            in the format produced by `cluster_notes_dynamic`.  
            Example structure:
            {
                "clusters": [
                    {
                        "id": 0,
                        "items": [...],
                        "analysis": {"title": "...", "summary": "..."}
                    }
                ]
            }

    Returns:
        str: A Markdown-formatted representation of all clusters, including
             titles, summaries, and individual note items.
    """

    # Convert string input into dict
    if isinstance(structured_json, str):
        try:
            structured_json = json.loads(structured_json)
        except:
            structured_json = ast.literal_eval(structured_json)

    md = "# 🗂 Structured Notes\n\n"

    for cluster in structured_json["clusters"]:
        title = cluster["analysis"]["title"]
        summary = cluster["analysis"]["summary"]
        items = cluster["items"]

        md += f"## {title}\n"
        md += f"{summary}\n\n"
        md += "### Notes:\n"
        for item in items:
            md += f"- {item}\n"
        md += "\n"

    return md


def generate_minimal_google_font_html(md_text: str, font: str = "Inter") -> str:
    """
    Converts Markdown text into a simple, styled HTML document using a Google Font.

    Args:
        md_text (str):
            The Markdown-formatted text to convert into HTML.
        font (str, optional):
            The Google Font to apply to the exported HTML.
            Defaults to "Inter". If empty or None, "Inter" is used automatically.

    Returns:
        str:
            The file path of the generated HTML file, which can be returned
            directly to Gradio for download.
    """

    # If no font was provided
    if not font or font.strip() == "":
        font = "Inter"

    # Convert markdown to HTML
    html_body = markdown.markdown(md_text)

    # Google Font URL (spaces replaced with +)
    font_url = font.replace(" ", "+")

    # Build final HTML
    final_html = f"""
<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <title>Notes Export</title>
    <link href="https://fonts.googleapis.com/css2?family={font_url}:wght@300;400;600&display=swap" rel="stylesheet">
    <style>
        body {{
            font-family: '{font}', sans-serif;
            max-width: 800px;
            margin: 40px auto;
            padding: 20px;
            line-height: 1.6;
            color: #222;
        }}
        h1, h2, h3 {{
            font-weight: 600;
        }}
        ul {{
            margin-left: 20px;
        }}
    </style>
</head>
<body>
{html_body}
</body>
</html>
""".strip()

    # Save file
    output_dir = "outputs"
    Path(output_dir).mkdir(exist_ok=True)

    # File path for HF Spaces
    output_path = f"{output_dir}/notes_export.html"

    Path(output_path).write_text(final_html, encoding="utf-8")

    return output_path


notes_interface = gr.Interface(
    fn=cluster_notes_dynamic,
    inputs=gr.Textbox(
        label="Enter notes (one per line)",
        placeholder="Need to call my brother\nSend email\nResearch project",
        lines=5
    ),
    outputs=gr.Textbox(label="Clustered Output", lines=20),
    api_name="cluster_notes_dynamic"
)

markdown_interface = gr.Interface(
    fn=convert_structure_to_markdown,
    inputs=gr.Textbox(label="Clustered input"),
    outputs=gr.Textbox(label="Markdown output",lines=20),
    api_name="convert_structure_to_markdown"
)

html_interface = gr.Interface(
    fn=generate_minimal_google_font_html,
    inputs=[
        gr.Textbox(label="Markdown Input", lines=12, placeholder="# Your Markdown here..."),
        gr.Textbox(label="Font (optional)", placeholder="Inter (default)")
    ],
    outputs=gr.File(label="Download HTML"),
    title="Markdown → Styled HTML Converter",
    description="Converts markdown into a clean HTML file styled with Google Fonts."
)


with gr.Blocks(title="NeatNote: A smart note-clustering MCP server") as demo:
    gr.Markdown("""
        ## 🗂 NeatNote transforms unstructured text into clear, organized insights using semantic embeddings and LLM summaries.
    """)

    gr.TabbedInterface(
        [
            notes_interface,
            markdown_interface,
            html_interface
            # Add more tools here
        ],
        [
            "notes_interface",
            "markdown_interface",
            "html_interface"
            # Add more tool tabs here

        ]
    
    )
    gr.Markdown("""
# 🚀 Workflow: From Raw Notes → Clusters → Markdown → Beautiful HTML  
1. **Cluster Notes:** Paste your raw notes and let AI organize them.  
2. **Convert to Markdown:** Turn the structured clusters into clean Markdown.  
3. **Export as HTML:** Convert Markdown into a polished, shareable HTML file.  
""")
    gr.Markdown("""
# 🎥 [Watch the Demo on YouTube](https://youtu.be/6d8Drbmn0-s?si=fk716CYMBnIpLKME)
""")
    gr.Markdown("""
# **Example prompt to use on claude**: Convert the attached file messy notes into beautiful structured notes in Inconsolata font html file
""")
    

if __name__ == "__main__":
    demo.launch(mcp_server=True)