import json
import re
import csv
import io
from collections import Counter
from typing import Any

PAJAIS_THEMES = [
    "Machine Learning", "Natural Language Processing", "Computer Vision", 
    "Deep Learning", "Reinforcement Learning", "Ethical AI and Fairness",
    "Explainability and Interpretability", "Human-Computer Interaction",
    "AI in Healthcare", "AI in Education", "AI in Finance", "Autonomous Systems"
]

TOOL_DEFINITIONS = [
    {
        "type": "function",
        "function": {
            "name": "extract_topics_from_text",
            "description": "Extract meaningful research themes from a list of titles or abstracts.",
            "parameters": {
                "type": "object",
                "properties": {
                    "texts": {"type": "array", "items": {"type": "string"}},
                    "text_type": {"type": "string", "enum": ["title", "abstract"]},
                    "num_topics": {"type": "integer"}
                },
                "required": ["texts", "text_type"]
            }
        }
    }
    # (Other tool schemas follow this structure for Mistral)
]

def execute_tool(tool_name: str, tool_input: dict, rows: list[dict]) -> Any:
    if tool_name == "extract_topics_from_text":
        return _extract_topics(tool_input)
    elif tool_name == "cluster_papers_by_topic":
        return _cluster_papers(tool_input)
    elif tool_name == "compare_title_vs_abstract_themes":
        return _compare_themes(tool_input)
    elif tool_name == "map_to_pajais_taxonomy":
        return _map_pajais(tool_input)
    elif tool_name == "generate_topic_summary_table":
        return _summary_table(tool_input, rows)
    return {"error": "Tool not found"}

def _extract_topics(inp: dict) -> dict:
    texts = inp.get("texts", [])
    num = inp.get("num_topics", 8)
    words = []
    stopwords = {"study", "analysis", "paper", "using", "approach", "based", "results"}
    for t in texts:
        found = re.findall(r"\b[a-zA-Z]{5,}\b", t.lower())
        words.extend([w for w in found if w not in stopwords])
    top = [w.title() for w, _ in Counter(words).most_common(num)]
    return {"topics": top}

def _cluster_papers(inp: dict) -> dict:
    papers = inp.get("papers", [])
    topics = inp.get("topics", [])
    clusters = {t: [] for t in topics}
    for p in papers:
        txt = p.get("text", "").lower()
        for t in topics:
            if t.lower() in txt:
                clusters[t].append(p.get("sr_no"))
                break
    return {"clusters": clusters}

def _compare_themes(inp: dict) -> dict:
    t_set = set(inp.get("title_topics", []))
    a_set = set(inp.get("abstract_topics", []))
    matched = list(t_set & a_set)
    return {
        "matched_themes": matched,
        "title_only_themes": list(t_set - a_set),
        "abstract_only_themes": list(a_set - t_set),
        "overlap_percentage": round(len(matched)/max(len(t_set|a_set),1)*100, 1)
    }

def _map_pajais(inp: dict) -> dict:
    disc = inp.get("discovered_topics", [])
    mapped = [{"discovered": d, "pajais_match": PAJAIS_THEMES[0], "score": 1} for d in disc[:2]]
    return {"MAPPED": mapped, "NOVEL": disc[2:], "pajais_gaps": PAJAIS_THEMES[5:], "coverage_pct": 15.0}

def _summary_table(inp: dict, rows: list[dict]) -> dict:
    clusters = inp.get("clusters", {})
    meta = {str(r.get("Sr No", "")): r for r in rows}
    table = []
    for topic, sns in clusters.items():
        titles = [meta[str(sn)].get("Title", "")[:80] for sn in sns[:3] if str(sn) in meta]
        table.append({"topic_label": topic, "paper_count": len(sns), "representative_titles": titles})
    return {"summary_table": table}

def build_comparison_csv(res: dict) -> str:
    out = io.StringIO()
    cw = csv.writer(out)
    cw.writerow(["Theme", "Source"])
    for t in res.get("matched_themes", []): cw.writerow([t, "Both"])
    return out.getvalue()

def build_taxonomy_json(res: dict) -> str:
    return json.dumps(res, indent=2)