"""Tests for python manage.py seed_initial_resources. See research/08-next-modules-build-plan.md §Module A seed test list. """ from io import StringIO from pathlib import Path import pytest import yaml from django.core.management import call_command from django.core.management.base import CommandError from apps.resources.models import Resource, ResourceCheckpoint, SkillResource from apps.skills.models import Skill pytestmark = pytest.mark.django_db def _write_yaml(tmp_path: Path, resources: list[dict]) -> Path: p = tmp_path / "resources.yaml" p.write_text(yaml.safe_dump({"resources": resources}), encoding="utf-8") return p _MISSING = object() def _entry(title='Python Intro', url='https://example.com/py', skills=_MISSING, type_='COURSE', difficulty='BEGINNER', rating=4.0, duration=60): if skills is _MISSING: skills = [{'skill_name': 'Python', 'relevance_score': 0.9}] return { 'title': title, 'provider': 'TestProvider', 'url': url, 'difficulty_level': difficulty, 'duration': duration, 'type': type_, 'rating': rating, 'skills': skills, } @pytest.fixture def python_skill(db): return Skill.objects.create( skill_name='Python', category='Programming', difficulty_level='BEGINNER', ) @pytest.fixture def sql_skill(db): return Skill.objects.create( skill_name='SQL', category='Database', difficulty_level='BEGINNER', ) def _run(path, prune=False): out = StringIO() args = ['seed_initial_resources', '--path', str(path)] if prune: args.append('--prune') call_command(*args, stdout=out) return out.getvalue() def test_seed_is_idempotent(tmp_path, python_skill): path = _write_yaml(tmp_path, [_entry()]) _run(path) first_count = Resource.objects.count() first_links = SkillResource.objects.count() _run(path) assert Resource.objects.count() == first_count assert SkillResource.objects.count() == first_links def test_seed_creates_skill_links(tmp_path, python_skill, sql_skill): path = _write_yaml(tmp_path, [ _entry( title='Multi', url='https://example.com/multi', skills=[ {'skill_name': 'Python', 'relevance_score': 0.9}, {'skill_name': 'SQL', 'relevance_score': 0.5}, ], ), ]) _run(path) resource = Resource.objects.get(url='https://example.com/multi') linked = { link.skill.skill_name: link.relevance_score for link in SkillResource.objects.filter(resource=resource) } assert linked == {'Python': 0.9, 'SQL': 0.5} def test_seed_updates_existing_row(tmp_path, python_skill): path = _write_yaml(tmp_path, [_entry(rating=3.0)]) _run(path) first = Resource.objects.get(url='https://example.com/py') assert first.rating == 3.0 # Same URL, new rating → update, not duplicate. path2 = _write_yaml(tmp_path, [_entry(rating=4.8)]) _run(path2) again = Resource.objects.get(url='https://example.com/py') assert again.id == first.id assert again.rating == 4.8 def test_prune_removes_stale_resources_and_links(tmp_path, python_skill): path = _write_yaml(tmp_path, [ _entry(title='Keep', url='https://example.com/keep'), _entry(title='Drop', url='https://example.com/drop'), ]) _run(path) assert Resource.objects.count() == 2 path2 = _write_yaml(tmp_path, [ _entry(title='Keep', url='https://example.com/keep'), ]) _run(path2, prune=True) assert set(Resource.objects.values_list('url', flat=True)) == { 'https://example.com/keep', } # SkillResource rows for the deleted resource are cascaded out. assert not SkillResource.objects.filter( resource__url='https://example.com/drop' ).exists() def test_prune_removes_stale_skill_links_within_same_resource(tmp_path, python_skill, sql_skill): """Running seed against a reduced skill list on an existing resource should drop the orphan SkillResource rows — not leave them stranded.""" path = _write_yaml(tmp_path, [ _entry( title='Multi', url='https://example.com/multi', skills=[ {'skill_name': 'Python', 'relevance_score': 0.9}, {'skill_name': 'SQL', 'relevance_score': 0.5}, ], ), ]) _run(path) assert SkillResource.objects.count() == 2 path2 = _write_yaml(tmp_path, [ _entry( title='Multi', url='https://example.com/multi', skills=[{'skill_name': 'Python', 'relevance_score': 0.9}], ), ]) _run(path2) remaining = list(SkillResource.objects.values_list('skill__skill_name', flat=True)) assert remaining == ['Python'] def test_seed_is_transactional_on_bad_row(tmp_path, python_skill): """Validation abort happens before any writes, so partial writes can't leak.""" path = _write_yaml(tmp_path, [ _entry(title='Good', url='https://example.com/good'), { # missing most fields 'title': 'Bad', 'provider': 'X', 'url': 'https://example.com/bad', }, ]) with pytest.raises(CommandError): _run(path) assert Resource.objects.count() == 0 def test_seed_rejects_unknown_skill(tmp_path, python_skill): path = _write_yaml(tmp_path, [ _entry( title='Unknown skill', url='https://example.com/unknown', skills=[{'skill_name': 'NotAnOnetSkill', 'relevance_score': 0.9}], ), ]) with pytest.raises(CommandError) as exc: _run(path) assert 'NotAnOnetSkill' in str(exc.value) assert Resource.objects.count() == 0 def test_seed_rejects_invalid_type(tmp_path, python_skill): path = _write_yaml(tmp_path, [_entry(type_='PODCAST')]) with pytest.raises(CommandError): _run(path) def test_seed_rejects_duplicate_url_in_yaml(tmp_path, python_skill): path = _write_yaml(tmp_path, [ _entry(title='A', url='https://example.com/dup'), _entry(title='B', url='https://example.com/dup'), ]) with pytest.raises(CommandError) as exc: _run(path) assert 'Duplicate url' in str(exc.value) def test_seed_rejects_empty_skills_list(tmp_path, python_skill): path = _write_yaml(tmp_path, [_entry(skills=[])]) with pytest.raises(CommandError) as exc: _run(path) assert 'skills' in str(exc.value).lower() def test_seed_creates_checkpoints_from_yaml(tmp_path, python_skill): entry = _entry(url='https://example.com/cp') entry['checkpoints'] = ['Week 1: Intro', 'Week 2: Loops', 'Week 3: Done'] entry['checkpoint_source'] = 'jsonld' path = _write_yaml(tmp_path, [entry]) _run(path) resource = Resource.objects.get(url='https://example.com/cp') rows = list(resource.checkpoints.order_by('order_index')) assert [r.title for r in rows] == ['Week 1: Intro', 'Week 2: Loops', 'Week 3: Done'] assert [r.order_index for r in rows] == [1, 2, 3] assert all(r.source == 'jsonld' for r in rows) def test_seed_checkpoints_create_if_absent_no_clobber(tmp_path, python_skill): """Re-running seed must NOT duplicate or replace existing checkpoints — UserCheckpointProgress FKs them and a re-seed can't drop a learner's rows.""" entry = _entry(url='https://example.com/cp') entry['checkpoints'] = ['A', 'B'] path = _write_yaml(tmp_path, [entry]) _run(path) resource = Resource.objects.get(url='https://example.com/cp') first_ids = set(resource.checkpoints.values_list('id', flat=True)) assert len(first_ids) == 2 # Re-run with a DIFFERENT list: existing rows are untouched (create-if-absent). entry2 = _entry(url='https://example.com/cp') entry2['checkpoints'] = ['A', 'B', 'C', 'D'] _run(_write_yaml(tmp_path, [entry2])) again = set(resource.checkpoints.values_list('id', flat=True)) assert again == first_ids # unchanged — same rows, no dup, no delete def test_seed_applies_checkpoints_sidecar_by_url(tmp_path, python_skill): rpath = _write_yaml(tmp_path, [_entry(url='https://example.com/side')]) side = tmp_path / "checkpoints.yaml" side.write_text(yaml.safe_dump({"checkpoints": [ {"url": "https://example.com/side", "source": "jsonld", "titles": ["Mod 1", "Mod 2", "Mod 3"]}, {"url": "https://example.com/not-in-catalog", # stale entry: skipped "titles": ["X"]}, ]}), encoding="utf-8") call_command('seed_initial_resources', '--path', str(rpath), '--checkpoints-path', str(side), stdout=StringIO(), stderr=StringIO()) resource = Resource.objects.get(url='https://example.com/side') rows = list(resource.checkpoints.order_by('order_index')) assert [r.title for r in rows] == ['Mod 1', 'Mod 2', 'Mod 3'] assert all(r.source == 'jsonld' for r in rows) def test_seed_sidecar_invalid_source_raises(tmp_path, python_skill): rpath = _write_yaml(tmp_path, [_entry(url='https://example.com/side')]) side = tmp_path / "checkpoints.yaml" side.write_text(yaml.safe_dump({"checkpoints": [ {"url": "https://example.com/side", "source": "nope", "titles": ["A"]}, ]}), encoding="utf-8") with pytest.raises(CommandError): call_command('seed_initial_resources', '--path', str(rpath), '--checkpoints-path', str(side), stdout=StringIO(), stderr=StringIO()) def test_seed_rejects_invalid_checkpoint_source(tmp_path, python_skill): entry = _entry(url='https://example.com/cp') entry['checkpoints'] = ['A', 'B'] entry['checkpoint_source'] = 'bogus' path = _write_yaml(tmp_path, [entry]) with pytest.raises(CommandError) as exc: _run(path) assert 'checkpoint_source' in str(exc.value) assert not ResourceCheckpoint.objects.exists() def test_full_curated_catalog_loads_cleanly(): """Sanity: the real seed_data/resources.yaml loads after seed_initial_skills, and produces ≥60 resources with ≥1 skill link each.""" # Seed skills from the curated role file so references resolve. call_command('seed_initial_skills', stdout=StringIO()) out = StringIO() call_command('seed_initial_resources', stdout=out) count = Resource.objects.count() assert count >= 60, f'expected ≥60 resources, got {count}' # Every resource has ≥1 skill link. orphans = [ r.title for r in Resource.objects.all() if not SkillResource.objects.filter(resource=r).exists() ] assert orphans == [] # The checkpoints.yaml sidecar populated the catalog: every COURSE/VIDEO # resource ends with >=2 checkpoints (the guarantee check_checkpoints gates). from django.db.models import Count thin = list( Resource.objects.filter(type__in=['COURSE', 'VIDEO']) .annotate(n=Count('checkpoints')) .filter(n__lt=2) .values_list('title', flat=True) ) assert thin == [], f'COURSE/VIDEO resources with <2 checkpoints: {thin}'