import os
import re
from datetime import datetime

import orbit_core

RECENT_CYCLE_GAP_SECONDS = 600
FOLLOW_UP_COUNT = 3
OPEN_COUNT = 3
MAX_SUBJECT_TOKENS = 4

PREDICATE_WORDS = {
    "is",
    "are",
    "was",
    "were",
    "be",
    "being",
    "been",
    "can",
    "cannot",
    "comes",
    "come",
    "has",
    "have",
    "had",
    "uses",
    "use",
    "predicts",
    "explains",
    "describes",
    "models",
    "treats",
    "links",
    "includes",
    "supports",
    "works",
    "helps",
    "states",
    "places",
    "implies",
    "unifies",
    "requires",
    "reduces",
    "postulates",
    "defines",
    "applies",
    "combines",
    "underlies",
    "represents",
    "attributes",
    "organizes",
    "groups",
    "fits",
    "connects",
    "informs",
    "operates",
    "gained",
    "distinguishes",
    "accounts",
    "interprets",
    "relies",
    "provides",
    "proposes",
    "remains",
    "follows",
    "transformed",
}

LEADING_ARTICLES = {"a", "an", "the"}
VAGUE_SUBJECTS = {
    "it",
    "this",
    "that",
    "they",
    "these",
    "those",
    "someone",
    "something",
}


def load_beliefs():
    try:
        beliefs = orbit_core.load_beliefs()
    except ValueError:
        print("Beliefs file is not in the expected format.")
        return []
    except OSError:
        print("Could not open beliefs file.")
        return []

    if not isinstance(beliefs, list):
        print("Beliefs file is not in the expected format.")
        return []

    return beliefs


def normalize_text(text):
    text = re.sub(r"^\s*\d+\s*[\.\)]\s*", "", text.lower())
    text = re.sub(r"^\s*[-*]+\s*", "", text)
    cleaned = re.sub(r"[^\w\s]", " ", text).strip()
    return " ".join(cleaned.split())


def statement_tokens(statement):
    return normalize_text(statement).split()


def looks_like_fragment(statement):
    tokens = statement_tokens(statement)
    return len(tokens) <= 2


def belief_subject(statement):
    tokens = statement_tokens(statement)
    while tokens and tokens[0] in LEADING_ARTICLES:
        tokens.pop(0)

    subject = []
    for token in tokens:
        if token in PREDICATE_WORDS:
            break
        subject.append(token)

    if not subject:
        return normalize_text(statement)

    return " ".join(subject)


def subject_is_usable(subject):
    tokens = subject.split()
    if not tokens:
        return False
    if tokens[0] in VAGUE_SUBJECTS:
        return False
    if len(tokens) > MAX_SUBJECT_TOKENS:
        return False
    return True


def parse_created_at(value):
    if not value:
        return None
    try:
        return datetime.fromisoformat(value)
    except ValueError:
        return None


def recent_cycle_beliefs(beliefs):
    timed = []

    for belief in beliefs:
        created_at = parse_created_at(belief.get("created_at"))
        if created_at is None:
            continue
        timed.append((created_at, belief))

    if not timed:
        return beliefs[-20:]

    timed.sort(key=lambda item: item[0], reverse=True)

    recent = [timed[0][1]]
    previous_time = timed[0][0]

    for created_at, belief in timed[1:]:
        gap = (previous_time - created_at).total_seconds()
        if gap > RECENT_CYCLE_GAP_SECONDS:
            break
        recent.append(belief)
        previous_time = created_at

    return recent


def belief_map_by_id(beliefs):
    mapped = {}
    for belief in beliefs:
        belief_id = belief.get("id")
        if belief_id:
            mapped[belief_id] = belief
    return mapped


def relation_ids(belief):
    relation_ids = []
    for relation in belief.get("relations", []):
        if not isinstance(relation, dict):
            continue
        belief_id = relation.get("belief_id")
        if belief_id:
            relation_ids.append(belief_id)
    return relation_ids


def cluster_recent_subjects(recent_beliefs):
    clusters = {}

    for belief in recent_beliefs:
        statement = belief.get("statement", "")
        subject = belief_subject(statement)
        if not subject_is_usable(subject):
            continue
        clusters.setdefault(subject, []).append(belief)

    grouped = list(clusters.items())
    grouped.sort(
        key=lambda item: (
            -len(item[1]),
            -sum(belief.get("support_count", 0) for belief in item[1]),
            item[0],
        )
    )
    return grouped


def definition_phrase(subject, statement):
    normalized = normalize_text(statement)
    subject_prefix = subject + " "

    if not normalized.startswith(subject_prefix):
        return None

    remaining = normalized[len(subject_prefix):]

    for prefix in ("is a ", "is an ", "is the ", "are ", "was a ", "was an "):
        if remaining.startswith(prefix):
            phrase = remaining[len(prefix):]
            break
    else:
        return None

    for separator in (" with ", " that ", " which ", " using ", " through "):
        if separator in phrase:
            phrase = phrase.split(separator, 1)[0]

    if any(char.isdigit() for char in phrase):
        return None

    if len(phrase.split()) > 5:
        return None

    return phrase.strip()


def follow_up_question(subject, beliefs):
    statements = [belief.get("statement", "") for belief in beliefs]
    phrases = []

    for statement in statements:
        phrase = definition_phrase(subject, statement)
        if phrase:
            phrases.append(phrase)

    if phrases:
        phrases.sort(key=lambda item: (item.startswith("theory"), item.startswith("model"), len(item), item))
        phrase = phrases[0]
        if phrase.startswith("theory") or phrase.startswith("model") or phrase.startswith("hypothesis"):
            return f"What does {subject} explain, and where are its limits?"
        return f"What defines {phrase}?"

    if any("predicts" in normalize_text(statement) for statement in statements):
        return f"What evidence or observations matter most for understanding {subject}?"

    if any(
        word in normalize_text(statement)
        for statement in statements
        for word in ("explains", "describes", "models")
    ):
        return f"What does {subject} explain, and where are its limits?"

    return f"What should Orbit understand next about {subject}?"


def contradiction_pairs(beliefs, excluded_ids):
    belief_map = belief_map_by_id(beliefs)
    pairs = []
    seen = set()

    for belief in beliefs:
        belief_id = belief.get("id")
        for relation in belief.get("relations", []):
            if relation.get("type") != "contradicts":
                continue

            other_id = relation.get("belief_id")
            if not other_id or other_id not in belief_map:
                continue

            pair_key = tuple(sorted((belief_id, other_id)))
            if pair_key in seen:
                continue

            seen.add(pair_key)
            pair_beliefs = [belief_map[pair_key[0]], belief_map[pair_key[1]]]

            if excluded_ids and pair_key[0] in excluded_ids and pair_key[1] in excluded_ids:
                continue

            pairs.append(pair_beliefs)

    pairs.sort(
        key=lambda pair: (
            not contradiction_question(pair).startswith("Under what conditions"),
            -max(pair[0].get("contradiction_count", 0), pair[1].get("contradiction_count", 0)),
            pair[0].get("statement", ""),
            pair[1].get("statement", ""),
        )
    )
    return pairs


def trailing_phrase(statement, subject):
    tokens = statement_tokens(statement)
    subject_tokens = subject.split()

    if tokens[:len(subject_tokens)] != subject_tokens:
        return ""

    remaining = tokens[len(subject_tokens):]

    while remaining and remaining[0] in {"is", "are", "was", "were", "can"}:
        remaining.pop(0)

    if remaining and remaining[0] == "not":
        remaining.pop(0)

    return " ".join(remaining).strip()


def contradiction_question(pair):
    first = pair[0].get("statement", "")
    second = pair[1].get("statement", "")

    first_subject = belief_subject(first)
    second_subject = belief_subject(second)

    if first_subject == second_subject:
        if " cannot " in f" {normalize_text(first)} " or " cannot " in f" {normalize_text(second)} ":
            positive = first
            if " cannot " in f" {normalize_text(first)} ":
                positive = second
            phrase = trailing_phrase(positive, first_subject)
            if phrase:
                return f"Under what conditions does {first_subject} {phrase}?"

    return f'How should Orbit reconcile "{first}" and "{second}"?'


def isolated_candidates(beliefs, excluded_subjects):
    candidates = []

    for belief in beliefs:
        statement = belief.get("statement", "")
        subject = belief_subject(statement)

        if subject in excluded_subjects:
            continue
        if looks_like_fragment(statement):
            continue
        if belief.get("support_count", 0) != 0:
            continue
        if belief.get("contradiction_count", 0) != 0:
            continue

        candidates.append(belief)

    candidates.sort(
        key=lambda belief: (
            statement_tokens(belief.get("statement", ""))[0] in LEADING_ARTICLES,
            -belief.get("confidence", 0),
            -len(statement_tokens(belief.get("statement", ""))),
            belief.get("statement", ""),
        )
    )
    return candidates


def build_follow_up_questions(beliefs):
    recent_beliefs = recent_cycle_beliefs(beliefs)
    questions = []
    used = set()

    for subject, grouped_beliefs in cluster_recent_subjects(recent_beliefs):
        question = follow_up_question(subject, grouped_beliefs)
        if question in used:
            continue
        used.add(question)
        questions.append(question)
        if len(questions) == FOLLOW_UP_COUNT:
            break

    if len(questions) < FOLLOW_UP_COUNT:
        for belief in recent_beliefs:
            subject = belief_subject(belief.get("statement", ""))
            if not subject_is_usable(subject):
                continue
            question = f"What should Orbit understand next about {subject}?"
            if question in used:
                continue
            used.add(question)
            questions.append(question)
            if len(questions) == FOLLOW_UP_COUNT:
                break

    while len(questions) < FOLLOW_UP_COUNT:
        questions.append("What should Orbit understand next about its most recent beliefs?")

    return questions, recent_beliefs


def build_open_questions(beliefs, recent_beliefs):
    questions = []
    used = set()

    recent_ids = {belief.get("id") for belief in recent_beliefs if belief.get("id")}
    recent_subjects = {
        belief_subject(belief.get("statement", ""))
        for belief in recent_beliefs
        if belief.get("statement")
    }

    fragment_subject = None

    for pair in contradiction_pairs(beliefs, recent_ids):
        question = contradiction_question(pair)
        if question in used:
            continue
        used.add(question)
        questions.append(question)
        if len(questions) == OPEN_COUNT:
            return questions
        break

    fragments = []
    for belief in beliefs:
        statement = belief.get("statement", "")
        subject = belief_subject(statement)

        if not looks_like_fragment(statement):
            continue
        if subject in recent_subjects:
            continue

        fragments.append(belief)

    fragments.sort(
        key=lambda belief: (
            -belief.get("support_count", 0),
            -belief.get("confidence", 0),
            belief.get("statement", ""),
        )
    )

    if fragments:
        fragment = fragments[0]
        fragment_tokens = statement_tokens(fragment.get("statement", ""))
        if fragment_tokens:
            fragment_subject = fragment_tokens[0]
        else:
            fragment_subject = belief_subject(fragment.get("statement", ""))
        question = f'What relationship was intended by "{fragment.get("statement", "")}"?'
        if question not in used:
            used.add(question)
            questions.append(question)

    if len(questions) < OPEN_COUNT and fragment_subject:
        for belief in isolated_candidates(beliefs, recent_subjects):
            if belief_subject(belief.get("statement", "")) != fragment_subject:
                continue

            question = f'What broader definition or context would help explain "{belief.get("statement", "")}"?'
            if question in used:
                continue
            used.add(question)
            questions.append(question)
            break

    if len(questions) < OPEN_COUNT:
        for belief in isolated_candidates(beliefs, recent_subjects):
            question = f'What broader definition or context would help explain "{belief.get("statement", "")}"?'
            if question in used:
                continue
            used.add(question)
            questions.append(question)
            if len(questions) == OPEN_COUNT:
                break

    while len(questions) < OPEN_COUNT:
        questions.append("What part of Orbit's current graph still needs clearer context?")

    return questions


def show_questions(follow_up_questions, open_questions):
    print("Orbit engineering mode started.")
    print()
    print("Follow-up questions:")
    for index, question in enumerate(follow_up_questions, 1):
        print(f"{index}. {question}")
    print()
    print("Open questions:")
    for offset, question in enumerate(open_questions, FOLLOW_UP_COUNT + 1):
        print(f"{offset}. {question}")
    print()
    print("Answer freely.")
    print('Type "end session" when finished.')


def collect_answers():
    answers = []

    while True:
        answer = input("> ")
        if answer == "end session":
            return answers

        cleaned = answer.strip()
        if not cleaned:
            continue

        answers.append(cleaned)


def write_session_intake(answers):
    if not answers:
        return ""

    return "\n".join(answers) + "\n"


def run_learning(answers):
    intake_backup = ""
    if os.path.exists(orbit_core.INTAKE_FILE):
        with open(orbit_core.INTAKE_FILE, "r", encoding="utf-8-sig") as f:
            intake_backup = f.read()

    session_intake = write_session_intake(answers)

    try:
        with open(orbit_core.INTAKE_FILE, "w", encoding="utf-8") as f:
            f.write(session_intake)

        orbit_core.intake()
    finally:
        with open(orbit_core.INTAKE_FILE, "w", encoding="utf-8") as f:
            f.write(intake_backup)


def choose_learning():
    while True:
        choice = input("> ").strip().lower()
        if choice in {"y", "n"}:
            return choice
        print("Enter y or n.")


def main():
    beliefs = load_beliefs()
    if not beliefs:
        return

    follow_up_questions, recent_beliefs = build_follow_up_questions(beliefs)
    open_questions = build_open_questions(beliefs, recent_beliefs)

    show_questions(follow_up_questions, open_questions)
    answers = collect_answers()

    print()
    print("Session complete.")
    print(f"{len(answers)} answers collected.")
    print("Run learning now? [y/n]")

    if choose_learning() == "y":
        run_learning(answers)
    else:
        print("Session discarded.")
        print("No learning applied.")


if __name__ == "__main__":
    main()