gapguide-api / apps /resources /tests /test_seed_resources.py
arifRB's picture
Deploy GapGuide backend (Docker)
ffd36e0 verified
Raw
History Blame Contribute Delete
11.2 kB
"""Tests for python manage.py seed_initial_resources.
See research/08-next-modules-build-plan.md §Module A seed test list.
"""
from io import StringIO
from pathlib import Path
import pytest
import yaml
from django.core.management import call_command
from django.core.management.base import CommandError
from apps.resources.models import Resource, ResourceCheckpoint, SkillResource
from apps.skills.models import Skill
pytestmark = pytest.mark.django_db
def _write_yaml(tmp_path: Path, resources: list[dict]) -> Path:
p = tmp_path / "resources.yaml"
p.write_text(yaml.safe_dump({"resources": resources}), encoding="utf-8")
return p
_MISSING = object()
def _entry(title='Python Intro', url='https://example.com/py', skills=_MISSING,
type_='COURSE', difficulty='BEGINNER', rating=4.0, duration=60):
if skills is _MISSING:
skills = [{'skill_name': 'Python', 'relevance_score': 0.9}]
return {
'title': title,
'provider': 'TestProvider',
'url': url,
'difficulty_level': difficulty,
'duration': duration,
'type': type_,
'rating': rating,
'skills': skills,
}
@pytest.fixture
def python_skill(db):
return Skill.objects.create(
skill_name='Python', category='Programming', difficulty_level='BEGINNER',
)
@pytest.fixture
def sql_skill(db):
return Skill.objects.create(
skill_name='SQL', category='Database', difficulty_level='BEGINNER',
)
def _run(path, prune=False):
out = StringIO()
args = ['seed_initial_resources', '--path', str(path)]
if prune:
args.append('--prune')
call_command(*args, stdout=out)
return out.getvalue()
def test_seed_is_idempotent(tmp_path, python_skill):
path = _write_yaml(tmp_path, [_entry()])
_run(path)
first_count = Resource.objects.count()
first_links = SkillResource.objects.count()
_run(path)
assert Resource.objects.count() == first_count
assert SkillResource.objects.count() == first_links
def test_seed_creates_skill_links(tmp_path, python_skill, sql_skill):
path = _write_yaml(tmp_path, [
_entry(
title='Multi', url='https://example.com/multi',
skills=[
{'skill_name': 'Python', 'relevance_score': 0.9},
{'skill_name': 'SQL', 'relevance_score': 0.5},
],
),
])
_run(path)
resource = Resource.objects.get(url='https://example.com/multi')
linked = {
link.skill.skill_name: link.relevance_score
for link in SkillResource.objects.filter(resource=resource)
}
assert linked == {'Python': 0.9, 'SQL': 0.5}
def test_seed_updates_existing_row(tmp_path, python_skill):
path = _write_yaml(tmp_path, [_entry(rating=3.0)])
_run(path)
first = Resource.objects.get(url='https://example.com/py')
assert first.rating == 3.0
# Same URL, new rating → update, not duplicate.
path2 = _write_yaml(tmp_path, [_entry(rating=4.8)])
_run(path2)
again = Resource.objects.get(url='https://example.com/py')
assert again.id == first.id
assert again.rating == 4.8
def test_prune_removes_stale_resources_and_links(tmp_path, python_skill):
path = _write_yaml(tmp_path, [
_entry(title='Keep', url='https://example.com/keep'),
_entry(title='Drop', url='https://example.com/drop'),
])
_run(path)
assert Resource.objects.count() == 2
path2 = _write_yaml(tmp_path, [
_entry(title='Keep', url='https://example.com/keep'),
])
_run(path2, prune=True)
assert set(Resource.objects.values_list('url', flat=True)) == {
'https://example.com/keep',
}
# SkillResource rows for the deleted resource are cascaded out.
assert not SkillResource.objects.filter(
resource__url='https://example.com/drop'
).exists()
def test_prune_removes_stale_skill_links_within_same_resource(tmp_path,
python_skill,
sql_skill):
"""Running seed against a reduced skill list on an existing resource should
drop the orphan SkillResource rows — not leave them stranded."""
path = _write_yaml(tmp_path, [
_entry(
title='Multi', url='https://example.com/multi',
skills=[
{'skill_name': 'Python', 'relevance_score': 0.9},
{'skill_name': 'SQL', 'relevance_score': 0.5},
],
),
])
_run(path)
assert SkillResource.objects.count() == 2
path2 = _write_yaml(tmp_path, [
_entry(
title='Multi', url='https://example.com/multi',
skills=[{'skill_name': 'Python', 'relevance_score': 0.9}],
),
])
_run(path2)
remaining = list(SkillResource.objects.values_list('skill__skill_name', flat=True))
assert remaining == ['Python']
def test_seed_is_transactional_on_bad_row(tmp_path, python_skill):
"""Validation abort happens before any writes, so partial writes can't leak."""
path = _write_yaml(tmp_path, [
_entry(title='Good', url='https://example.com/good'),
{ # missing most fields
'title': 'Bad',
'provider': 'X',
'url': 'https://example.com/bad',
},
])
with pytest.raises(CommandError):
_run(path)
assert Resource.objects.count() == 0
def test_seed_rejects_unknown_skill(tmp_path, python_skill):
path = _write_yaml(tmp_path, [
_entry(
title='Unknown skill',
url='https://example.com/unknown',
skills=[{'skill_name': 'NotAnOnetSkill', 'relevance_score': 0.9}],
),
])
with pytest.raises(CommandError) as exc:
_run(path)
assert 'NotAnOnetSkill' in str(exc.value)
assert Resource.objects.count() == 0
def test_seed_rejects_invalid_type(tmp_path, python_skill):
path = _write_yaml(tmp_path, [_entry(type_='PODCAST')])
with pytest.raises(CommandError):
_run(path)
def test_seed_rejects_duplicate_url_in_yaml(tmp_path, python_skill):
path = _write_yaml(tmp_path, [
_entry(title='A', url='https://example.com/dup'),
_entry(title='B', url='https://example.com/dup'),
])
with pytest.raises(CommandError) as exc:
_run(path)
assert 'Duplicate url' in str(exc.value)
def test_seed_rejects_empty_skills_list(tmp_path, python_skill):
path = _write_yaml(tmp_path, [_entry(skills=[])])
with pytest.raises(CommandError) as exc:
_run(path)
assert 'skills' in str(exc.value).lower()
def test_seed_creates_checkpoints_from_yaml(tmp_path, python_skill):
entry = _entry(url='https://example.com/cp')
entry['checkpoints'] = ['Week 1: Intro', 'Week 2: Loops', 'Week 3: Done']
entry['checkpoint_source'] = 'jsonld'
path = _write_yaml(tmp_path, [entry])
_run(path)
resource = Resource.objects.get(url='https://example.com/cp')
rows = list(resource.checkpoints.order_by('order_index'))
assert [r.title for r in rows] == ['Week 1: Intro', 'Week 2: Loops', 'Week 3: Done']
assert [r.order_index for r in rows] == [1, 2, 3]
assert all(r.source == 'jsonld' for r in rows)
def test_seed_checkpoints_create_if_absent_no_clobber(tmp_path, python_skill):
"""Re-running seed must NOT duplicate or replace existing checkpoints —
UserCheckpointProgress FKs them and a re-seed can't drop a learner's rows."""
entry = _entry(url='https://example.com/cp')
entry['checkpoints'] = ['A', 'B']
path = _write_yaml(tmp_path, [entry])
_run(path)
resource = Resource.objects.get(url='https://example.com/cp')
first_ids = set(resource.checkpoints.values_list('id', flat=True))
assert len(first_ids) == 2
# Re-run with a DIFFERENT list: existing rows are untouched (create-if-absent).
entry2 = _entry(url='https://example.com/cp')
entry2['checkpoints'] = ['A', 'B', 'C', 'D']
_run(_write_yaml(tmp_path, [entry2]))
again = set(resource.checkpoints.values_list('id', flat=True))
assert again == first_ids # unchanged — same rows, no dup, no delete
def test_seed_applies_checkpoints_sidecar_by_url(tmp_path, python_skill):
rpath = _write_yaml(tmp_path, [_entry(url='https://example.com/side')])
side = tmp_path / "checkpoints.yaml"
side.write_text(yaml.safe_dump({"checkpoints": [
{"url": "https://example.com/side", "source": "jsonld",
"titles": ["Mod 1", "Mod 2", "Mod 3"]},
{"url": "https://example.com/not-in-catalog", # stale entry: skipped
"titles": ["X"]},
]}), encoding="utf-8")
call_command('seed_initial_resources', '--path', str(rpath),
'--checkpoints-path', str(side), stdout=StringIO(), stderr=StringIO())
resource = Resource.objects.get(url='https://example.com/side')
rows = list(resource.checkpoints.order_by('order_index'))
assert [r.title for r in rows] == ['Mod 1', 'Mod 2', 'Mod 3']
assert all(r.source == 'jsonld' for r in rows)
def test_seed_sidecar_invalid_source_raises(tmp_path, python_skill):
rpath = _write_yaml(tmp_path, [_entry(url='https://example.com/side')])
side = tmp_path / "checkpoints.yaml"
side.write_text(yaml.safe_dump({"checkpoints": [
{"url": "https://example.com/side", "source": "nope", "titles": ["A"]},
]}), encoding="utf-8")
with pytest.raises(CommandError):
call_command('seed_initial_resources', '--path', str(rpath),
'--checkpoints-path', str(side),
stdout=StringIO(), stderr=StringIO())
def test_seed_rejects_invalid_checkpoint_source(tmp_path, python_skill):
entry = _entry(url='https://example.com/cp')
entry['checkpoints'] = ['A', 'B']
entry['checkpoint_source'] = 'bogus'
path = _write_yaml(tmp_path, [entry])
with pytest.raises(CommandError) as exc:
_run(path)
assert 'checkpoint_source' in str(exc.value)
assert not ResourceCheckpoint.objects.exists()
def test_full_curated_catalog_loads_cleanly():
"""Sanity: the real seed_data/resources.yaml loads after seed_initial_skills,
and produces ≥60 resources with ≥1 skill link each."""
# Seed skills from the curated role file so references resolve.
call_command('seed_initial_skills', stdout=StringIO())
out = StringIO()
call_command('seed_initial_resources', stdout=out)
count = Resource.objects.count()
assert count >= 60, f'expected ≥60 resources, got {count}'
# Every resource has ≥1 skill link.
orphans = [
r.title for r in Resource.objects.all()
if not SkillResource.objects.filter(resource=r).exists()
]
assert orphans == []
# The checkpoints.yaml sidecar populated the catalog: every COURSE/VIDEO
# resource ends with >=2 checkpoints (the guarantee check_checkpoints gates).
from django.db.models import Count
thin = list(
Resource.objects.filter(type__in=['COURSE', 'VIDEO'])
.annotate(n=Count('checkpoints'))
.filter(n__lt=2)
.values_list('title', flat=True)
)
assert thin == [], f'COURSE/VIDEO resources with <2 checkpoints: {thin}'