"""Tests for python manage.py seed_initial_resources.

See research/08-next-modules-build-plan.md §Module A seed test list.
"""
from io import StringIO
from pathlib import Path

import pytest
import yaml
from django.core.management import call_command
from django.core.management.base import CommandError

from apps.resources.models import Resource, ResourceCheckpoint, SkillResource
from apps.skills.models import Skill

pytestmark = pytest.mark.django_db


def _write_yaml(tmp_path: Path, resources: list[dict]) -> Path:
    p = tmp_path / "resources.yaml"
    p.write_text(yaml.safe_dump({"resources": resources}), encoding="utf-8")
    return p


_MISSING = object()


def _entry(title='Python Intro', url='https://example.com/py', skills=_MISSING,
           type_='COURSE', difficulty='BEGINNER', rating=4.0, duration=60):
    if skills is _MISSING:
        skills = [{'skill_name': 'Python', 'relevance_score': 0.9}]
    return {
        'title': title,
        'provider': 'TestProvider',
        'url': url,
        'difficulty_level': difficulty,
        'duration': duration,
        'type': type_,
        'rating': rating,
        'skills': skills,
    }


@pytest.fixture
def python_skill(db):
    return Skill.objects.create(
        skill_name='Python', category='Programming', difficulty_level='BEGINNER',
    )


@pytest.fixture
def sql_skill(db):
    return Skill.objects.create(
        skill_name='SQL', category='Database', difficulty_level='BEGINNER',
    )


def _run(path, prune=False):
    out = StringIO()
    args = ['seed_initial_resources', '--path', str(path)]
    if prune:
        args.append('--prune')
    call_command(*args, stdout=out)
    return out.getvalue()


def test_seed_is_idempotent(tmp_path, python_skill):
    path = _write_yaml(tmp_path, [_entry()])
    _run(path)
    first_count = Resource.objects.count()
    first_links = SkillResource.objects.count()

    _run(path)
    assert Resource.objects.count() == first_count
    assert SkillResource.objects.count() == first_links


def test_seed_creates_skill_links(tmp_path, python_skill, sql_skill):
    path = _write_yaml(tmp_path, [
        _entry(
            title='Multi', url='https://example.com/multi',
            skills=[
                {'skill_name': 'Python', 'relevance_score': 0.9},
                {'skill_name': 'SQL', 'relevance_score': 0.5},
            ],
        ),
    ])
    _run(path)
    resource = Resource.objects.get(url='https://example.com/multi')
    linked = {
        link.skill.skill_name: link.relevance_score
        for link in SkillResource.objects.filter(resource=resource)
    }
    assert linked == {'Python': 0.9, 'SQL': 0.5}


def test_seed_updates_existing_row(tmp_path, python_skill):
    path = _write_yaml(tmp_path, [_entry(rating=3.0)])
    _run(path)
    first = Resource.objects.get(url='https://example.com/py')
    assert first.rating == 3.0

    # Same URL, new rating → update, not duplicate.
    path2 = _write_yaml(tmp_path, [_entry(rating=4.8)])
    _run(path2)
    again = Resource.objects.get(url='https://example.com/py')
    assert again.id == first.id
    assert again.rating == 4.8


def test_prune_removes_stale_resources_and_links(tmp_path, python_skill):
    path = _write_yaml(tmp_path, [
        _entry(title='Keep', url='https://example.com/keep'),
        _entry(title='Drop', url='https://example.com/drop'),
    ])
    _run(path)
    assert Resource.objects.count() == 2

    path2 = _write_yaml(tmp_path, [
        _entry(title='Keep', url='https://example.com/keep'),
    ])
    _run(path2, prune=True)

    assert set(Resource.objects.values_list('url', flat=True)) == {
        'https://example.com/keep',
    }
    # SkillResource rows for the deleted resource are cascaded out.
    assert not SkillResource.objects.filter(
        resource__url='https://example.com/drop'
    ).exists()


def test_prune_removes_stale_skill_links_within_same_resource(tmp_path,
                                                              python_skill,
                                                              sql_skill):
    """Running seed against a reduced skill list on an existing resource should
    drop the orphan SkillResource rows — not leave them stranded."""
    path = _write_yaml(tmp_path, [
        _entry(
            title='Multi', url='https://example.com/multi',
            skills=[
                {'skill_name': 'Python', 'relevance_score': 0.9},
                {'skill_name': 'SQL', 'relevance_score': 0.5},
            ],
        ),
    ])
    _run(path)
    assert SkillResource.objects.count() == 2

    path2 = _write_yaml(tmp_path, [
        _entry(
            title='Multi', url='https://example.com/multi',
            skills=[{'skill_name': 'Python', 'relevance_score': 0.9}],
        ),
    ])
    _run(path2)
    remaining = list(SkillResource.objects.values_list('skill__skill_name', flat=True))
    assert remaining == ['Python']


def test_seed_is_transactional_on_bad_row(tmp_path, python_skill):
    """Validation abort happens before any writes, so partial writes can't leak."""
    path = _write_yaml(tmp_path, [
        _entry(title='Good', url='https://example.com/good'),
        {  # missing most fields
            'title': 'Bad',
            'provider': 'X',
            'url': 'https://example.com/bad',
        },
    ])
    with pytest.raises(CommandError):
        _run(path)
    assert Resource.objects.count() == 0


def test_seed_rejects_unknown_skill(tmp_path, python_skill):
    path = _write_yaml(tmp_path, [
        _entry(
            title='Unknown skill',
            url='https://example.com/unknown',
            skills=[{'skill_name': 'NotAnOnetSkill', 'relevance_score': 0.9}],
        ),
    ])
    with pytest.raises(CommandError) as exc:
        _run(path)
    assert 'NotAnOnetSkill' in str(exc.value)
    assert Resource.objects.count() == 0


def test_seed_rejects_invalid_type(tmp_path, python_skill):
    path = _write_yaml(tmp_path, [_entry(type_='PODCAST')])
    with pytest.raises(CommandError):
        _run(path)


def test_seed_rejects_duplicate_url_in_yaml(tmp_path, python_skill):
    path = _write_yaml(tmp_path, [
        _entry(title='A', url='https://example.com/dup'),
        _entry(title='B', url='https://example.com/dup'),
    ])
    with pytest.raises(CommandError) as exc:
        _run(path)
    assert 'Duplicate url' in str(exc.value)


def test_seed_rejects_empty_skills_list(tmp_path, python_skill):
    path = _write_yaml(tmp_path, [_entry(skills=[])])
    with pytest.raises(CommandError) as exc:
        _run(path)
    assert 'skills' in str(exc.value).lower()


def test_seed_creates_checkpoints_from_yaml(tmp_path, python_skill):
    entry = _entry(url='https://example.com/cp')
    entry['checkpoints'] = ['Week 1: Intro', 'Week 2: Loops', 'Week 3: Done']
    entry['checkpoint_source'] = 'jsonld'
    path = _write_yaml(tmp_path, [entry])
    _run(path)
    resource = Resource.objects.get(url='https://example.com/cp')
    rows = list(resource.checkpoints.order_by('order_index'))
    assert [r.title for r in rows] == ['Week 1: Intro', 'Week 2: Loops', 'Week 3: Done']
    assert [r.order_index for r in rows] == [1, 2, 3]
    assert all(r.source == 'jsonld' for r in rows)


def test_seed_checkpoints_create_if_absent_no_clobber(tmp_path, python_skill):
    """Re-running seed must NOT duplicate or replace existing checkpoints —
    UserCheckpointProgress FKs them and a re-seed can't drop a learner's rows."""
    entry = _entry(url='https://example.com/cp')
    entry['checkpoints'] = ['A', 'B']
    path = _write_yaml(tmp_path, [entry])
    _run(path)
    resource = Resource.objects.get(url='https://example.com/cp')
    first_ids = set(resource.checkpoints.values_list('id', flat=True))
    assert len(first_ids) == 2

    # Re-run with a DIFFERENT list: existing rows are untouched (create-if-absent).
    entry2 = _entry(url='https://example.com/cp')
    entry2['checkpoints'] = ['A', 'B', 'C', 'D']
    _run(_write_yaml(tmp_path, [entry2]))
    again = set(resource.checkpoints.values_list('id', flat=True))
    assert again == first_ids  # unchanged — same rows, no dup, no delete


def test_seed_applies_checkpoints_sidecar_by_url(tmp_path, python_skill):
    rpath = _write_yaml(tmp_path, [_entry(url='https://example.com/side')])
    side = tmp_path / "checkpoints.yaml"
    side.write_text(yaml.safe_dump({"checkpoints": [
        {"url": "https://example.com/side", "source": "jsonld",
         "titles": ["Mod 1", "Mod 2", "Mod 3"]},
        {"url": "https://example.com/not-in-catalog",  # stale entry: skipped
         "titles": ["X"]},
    ]}), encoding="utf-8")
    call_command('seed_initial_resources', '--path', str(rpath),
                 '--checkpoints-path', str(side), stdout=StringIO(), stderr=StringIO())
    resource = Resource.objects.get(url='https://example.com/side')
    rows = list(resource.checkpoints.order_by('order_index'))
    assert [r.title for r in rows] == ['Mod 1', 'Mod 2', 'Mod 3']
    assert all(r.source == 'jsonld' for r in rows)


def test_seed_sidecar_invalid_source_raises(tmp_path, python_skill):
    rpath = _write_yaml(tmp_path, [_entry(url='https://example.com/side')])
    side = tmp_path / "checkpoints.yaml"
    side.write_text(yaml.safe_dump({"checkpoints": [
        {"url": "https://example.com/side", "source": "nope", "titles": ["A"]},
    ]}), encoding="utf-8")
    with pytest.raises(CommandError):
        call_command('seed_initial_resources', '--path', str(rpath),
                     '--checkpoints-path', str(side),
                     stdout=StringIO(), stderr=StringIO())


def test_seed_rejects_invalid_checkpoint_source(tmp_path, python_skill):
    entry = _entry(url='https://example.com/cp')
    entry['checkpoints'] = ['A', 'B']
    entry['checkpoint_source'] = 'bogus'
    path = _write_yaml(tmp_path, [entry])
    with pytest.raises(CommandError) as exc:
        _run(path)
    assert 'checkpoint_source' in str(exc.value)
    assert not ResourceCheckpoint.objects.exists()


def test_full_curated_catalog_loads_cleanly():
    """Sanity: the real seed_data/resources.yaml loads after seed_initial_skills,
    and produces ≥60 resources with ≥1 skill link each."""
    # Seed skills from the curated role file so references resolve.
    call_command('seed_initial_skills', stdout=StringIO())
    out = StringIO()
    call_command('seed_initial_resources', stdout=out)

    count = Resource.objects.count()
    assert count >= 60, f'expected ≥60 resources, got {count}'
    # Every resource has ≥1 skill link.
    orphans = [
        r.title for r in Resource.objects.all()
        if not SkillResource.objects.filter(resource=r).exists()
    ]
    assert orphans == []

    # The checkpoints.yaml sidecar populated the catalog: every COURSE/VIDEO
    # resource ends with >=2 checkpoints (the guarantee check_checkpoints gates).
    from django.db.models import Count
    thin = list(
        Resource.objects.filter(type__in=['COURSE', 'VIDEO'])
        .annotate(n=Count('checkpoints'))
        .filter(n__lt=2)
        .values_list('title', flat=True)
    )
    assert thin == [], f'COURSE/VIDEO resources with <2 checkpoints: {thin}'