Spaces:
Sleeping
Sleeping
| """Tests for python manage.py seed_initial_resources. | |
| See research/08-next-modules-build-plan.md §Module A seed test list. | |
| """ | |
| from io import StringIO | |
| from pathlib import Path | |
| import pytest | |
| import yaml | |
| from django.core.management import call_command | |
| from django.core.management.base import CommandError | |
| from apps.resources.models import Resource, ResourceCheckpoint, SkillResource | |
| from apps.skills.models import Skill | |
| pytestmark = pytest.mark.django_db | |
| def _write_yaml(tmp_path: Path, resources: list[dict]) -> Path: | |
| p = tmp_path / "resources.yaml" | |
| p.write_text(yaml.safe_dump({"resources": resources}), encoding="utf-8") | |
| return p | |
| _MISSING = object() | |
| def _entry(title='Python Intro', url='https://example.com/py', skills=_MISSING, | |
| type_='COURSE', difficulty='BEGINNER', rating=4.0, duration=60): | |
| if skills is _MISSING: | |
| skills = [{'skill_name': 'Python', 'relevance_score': 0.9}] | |
| return { | |
| 'title': title, | |
| 'provider': 'TestProvider', | |
| 'url': url, | |
| 'difficulty_level': difficulty, | |
| 'duration': duration, | |
| 'type': type_, | |
| 'rating': rating, | |
| 'skills': skills, | |
| } | |
| def python_skill(db): | |
| return Skill.objects.create( | |
| skill_name='Python', category='Programming', difficulty_level='BEGINNER', | |
| ) | |
| def sql_skill(db): | |
| return Skill.objects.create( | |
| skill_name='SQL', category='Database', difficulty_level='BEGINNER', | |
| ) | |
| def _run(path, prune=False): | |
| out = StringIO() | |
| args = ['seed_initial_resources', '--path', str(path)] | |
| if prune: | |
| args.append('--prune') | |
| call_command(*args, stdout=out) | |
| return out.getvalue() | |
| def test_seed_is_idempotent(tmp_path, python_skill): | |
| path = _write_yaml(tmp_path, [_entry()]) | |
| _run(path) | |
| first_count = Resource.objects.count() | |
| first_links = SkillResource.objects.count() | |
| _run(path) | |
| assert Resource.objects.count() == first_count | |
| assert SkillResource.objects.count() == first_links | |
| def test_seed_creates_skill_links(tmp_path, python_skill, sql_skill): | |
| path = _write_yaml(tmp_path, [ | |
| _entry( | |
| title='Multi', url='https://example.com/multi', | |
| skills=[ | |
| {'skill_name': 'Python', 'relevance_score': 0.9}, | |
| {'skill_name': 'SQL', 'relevance_score': 0.5}, | |
| ], | |
| ), | |
| ]) | |
| _run(path) | |
| resource = Resource.objects.get(url='https://example.com/multi') | |
| linked = { | |
| link.skill.skill_name: link.relevance_score | |
| for link in SkillResource.objects.filter(resource=resource) | |
| } | |
| assert linked == {'Python': 0.9, 'SQL': 0.5} | |
| def test_seed_updates_existing_row(tmp_path, python_skill): | |
| path = _write_yaml(tmp_path, [_entry(rating=3.0)]) | |
| _run(path) | |
| first = Resource.objects.get(url='https://example.com/py') | |
| assert first.rating == 3.0 | |
| # Same URL, new rating → update, not duplicate. | |
| path2 = _write_yaml(tmp_path, [_entry(rating=4.8)]) | |
| _run(path2) | |
| again = Resource.objects.get(url='https://example.com/py') | |
| assert again.id == first.id | |
| assert again.rating == 4.8 | |
| def test_prune_removes_stale_resources_and_links(tmp_path, python_skill): | |
| path = _write_yaml(tmp_path, [ | |
| _entry(title='Keep', url='https://example.com/keep'), | |
| _entry(title='Drop', url='https://example.com/drop'), | |
| ]) | |
| _run(path) | |
| assert Resource.objects.count() == 2 | |
| path2 = _write_yaml(tmp_path, [ | |
| _entry(title='Keep', url='https://example.com/keep'), | |
| ]) | |
| _run(path2, prune=True) | |
| assert set(Resource.objects.values_list('url', flat=True)) == { | |
| 'https://example.com/keep', | |
| } | |
| # SkillResource rows for the deleted resource are cascaded out. | |
| assert not SkillResource.objects.filter( | |
| resource__url='https://example.com/drop' | |
| ).exists() | |
| def test_prune_removes_stale_skill_links_within_same_resource(tmp_path, | |
| python_skill, | |
| sql_skill): | |
| """Running seed against a reduced skill list on an existing resource should | |
| drop the orphan SkillResource rows — not leave them stranded.""" | |
| path = _write_yaml(tmp_path, [ | |
| _entry( | |
| title='Multi', url='https://example.com/multi', | |
| skills=[ | |
| {'skill_name': 'Python', 'relevance_score': 0.9}, | |
| {'skill_name': 'SQL', 'relevance_score': 0.5}, | |
| ], | |
| ), | |
| ]) | |
| _run(path) | |
| assert SkillResource.objects.count() == 2 | |
| path2 = _write_yaml(tmp_path, [ | |
| _entry( | |
| title='Multi', url='https://example.com/multi', | |
| skills=[{'skill_name': 'Python', 'relevance_score': 0.9}], | |
| ), | |
| ]) | |
| _run(path2) | |
| remaining = list(SkillResource.objects.values_list('skill__skill_name', flat=True)) | |
| assert remaining == ['Python'] | |
| def test_seed_is_transactional_on_bad_row(tmp_path, python_skill): | |
| """Validation abort happens before any writes, so partial writes can't leak.""" | |
| path = _write_yaml(tmp_path, [ | |
| _entry(title='Good', url='https://example.com/good'), | |
| { # missing most fields | |
| 'title': 'Bad', | |
| 'provider': 'X', | |
| 'url': 'https://example.com/bad', | |
| }, | |
| ]) | |
| with pytest.raises(CommandError): | |
| _run(path) | |
| assert Resource.objects.count() == 0 | |
| def test_seed_rejects_unknown_skill(tmp_path, python_skill): | |
| path = _write_yaml(tmp_path, [ | |
| _entry( | |
| title='Unknown skill', | |
| url='https://example.com/unknown', | |
| skills=[{'skill_name': 'NotAnOnetSkill', 'relevance_score': 0.9}], | |
| ), | |
| ]) | |
| with pytest.raises(CommandError) as exc: | |
| _run(path) | |
| assert 'NotAnOnetSkill' in str(exc.value) | |
| assert Resource.objects.count() == 0 | |
| def test_seed_rejects_invalid_type(tmp_path, python_skill): | |
| path = _write_yaml(tmp_path, [_entry(type_='PODCAST')]) | |
| with pytest.raises(CommandError): | |
| _run(path) | |
| def test_seed_rejects_duplicate_url_in_yaml(tmp_path, python_skill): | |
| path = _write_yaml(tmp_path, [ | |
| _entry(title='A', url='https://example.com/dup'), | |
| _entry(title='B', url='https://example.com/dup'), | |
| ]) | |
| with pytest.raises(CommandError) as exc: | |
| _run(path) | |
| assert 'Duplicate url' in str(exc.value) | |
| def test_seed_rejects_empty_skills_list(tmp_path, python_skill): | |
| path = _write_yaml(tmp_path, [_entry(skills=[])]) | |
| with pytest.raises(CommandError) as exc: | |
| _run(path) | |
| assert 'skills' in str(exc.value).lower() | |
| def test_seed_creates_checkpoints_from_yaml(tmp_path, python_skill): | |
| entry = _entry(url='https://example.com/cp') | |
| entry['checkpoints'] = ['Week 1: Intro', 'Week 2: Loops', 'Week 3: Done'] | |
| entry['checkpoint_source'] = 'jsonld' | |
| path = _write_yaml(tmp_path, [entry]) | |
| _run(path) | |
| resource = Resource.objects.get(url='https://example.com/cp') | |
| rows = list(resource.checkpoints.order_by('order_index')) | |
| assert [r.title for r in rows] == ['Week 1: Intro', 'Week 2: Loops', 'Week 3: Done'] | |
| assert [r.order_index for r in rows] == [1, 2, 3] | |
| assert all(r.source == 'jsonld' for r in rows) | |
| def test_seed_checkpoints_create_if_absent_no_clobber(tmp_path, python_skill): | |
| """Re-running seed must NOT duplicate or replace existing checkpoints — | |
| UserCheckpointProgress FKs them and a re-seed can't drop a learner's rows.""" | |
| entry = _entry(url='https://example.com/cp') | |
| entry['checkpoints'] = ['A', 'B'] | |
| path = _write_yaml(tmp_path, [entry]) | |
| _run(path) | |
| resource = Resource.objects.get(url='https://example.com/cp') | |
| first_ids = set(resource.checkpoints.values_list('id', flat=True)) | |
| assert len(first_ids) == 2 | |
| # Re-run with a DIFFERENT list: existing rows are untouched (create-if-absent). | |
| entry2 = _entry(url='https://example.com/cp') | |
| entry2['checkpoints'] = ['A', 'B', 'C', 'D'] | |
| _run(_write_yaml(tmp_path, [entry2])) | |
| again = set(resource.checkpoints.values_list('id', flat=True)) | |
| assert again == first_ids # unchanged — same rows, no dup, no delete | |
| def test_seed_applies_checkpoints_sidecar_by_url(tmp_path, python_skill): | |
| rpath = _write_yaml(tmp_path, [_entry(url='https://example.com/side')]) | |
| side = tmp_path / "checkpoints.yaml" | |
| side.write_text(yaml.safe_dump({"checkpoints": [ | |
| {"url": "https://example.com/side", "source": "jsonld", | |
| "titles": ["Mod 1", "Mod 2", "Mod 3"]}, | |
| {"url": "https://example.com/not-in-catalog", # stale entry: skipped | |
| "titles": ["X"]}, | |
| ]}), encoding="utf-8") | |
| call_command('seed_initial_resources', '--path', str(rpath), | |
| '--checkpoints-path', str(side), stdout=StringIO(), stderr=StringIO()) | |
| resource = Resource.objects.get(url='https://example.com/side') | |
| rows = list(resource.checkpoints.order_by('order_index')) | |
| assert [r.title for r in rows] == ['Mod 1', 'Mod 2', 'Mod 3'] | |
| assert all(r.source == 'jsonld' for r in rows) | |
| def test_seed_sidecar_invalid_source_raises(tmp_path, python_skill): | |
| rpath = _write_yaml(tmp_path, [_entry(url='https://example.com/side')]) | |
| side = tmp_path / "checkpoints.yaml" | |
| side.write_text(yaml.safe_dump({"checkpoints": [ | |
| {"url": "https://example.com/side", "source": "nope", "titles": ["A"]}, | |
| ]}), encoding="utf-8") | |
| with pytest.raises(CommandError): | |
| call_command('seed_initial_resources', '--path', str(rpath), | |
| '--checkpoints-path', str(side), | |
| stdout=StringIO(), stderr=StringIO()) | |
| def test_seed_rejects_invalid_checkpoint_source(tmp_path, python_skill): | |
| entry = _entry(url='https://example.com/cp') | |
| entry['checkpoints'] = ['A', 'B'] | |
| entry['checkpoint_source'] = 'bogus' | |
| path = _write_yaml(tmp_path, [entry]) | |
| with pytest.raises(CommandError) as exc: | |
| _run(path) | |
| assert 'checkpoint_source' in str(exc.value) | |
| assert not ResourceCheckpoint.objects.exists() | |
| def test_full_curated_catalog_loads_cleanly(): | |
| """Sanity: the real seed_data/resources.yaml loads after seed_initial_skills, | |
| and produces ≥60 resources with ≥1 skill link each.""" | |
| # Seed skills from the curated role file so references resolve. | |
| call_command('seed_initial_skills', stdout=StringIO()) | |
| out = StringIO() | |
| call_command('seed_initial_resources', stdout=out) | |
| count = Resource.objects.count() | |
| assert count >= 60, f'expected ≥60 resources, got {count}' | |
| # Every resource has ≥1 skill link. | |
| orphans = [ | |
| r.title for r in Resource.objects.all() | |
| if not SkillResource.objects.filter(resource=r).exists() | |
| ] | |
| assert orphans == [] | |
| # The checkpoints.yaml sidecar populated the catalog: every COURSE/VIDEO | |
| # resource ends with >=2 checkpoints (the guarantee check_checkpoints gates). | |
| from django.db.models import Count | |
| thin = list( | |
| Resource.objects.filter(type__in=['COURSE', 'VIDEO']) | |
| .annotate(n=Count('checkpoints')) | |
| .filter(n__lt=2) | |
| .values_list('title', flat=True) | |
| ) | |
| assert thin == [], f'COURSE/VIDEO resources with <2 checkpoints: {thin}' | |