Spaces:

arifRB
/

gapguide-api

Sleeping

App Files Files Community

gapguide-api / apps /resources /tests /test_seed_resources.py

arifRB

Deploy GapGuide backend (Docker)

ffd36e0 verified 15 days ago

Raw

History Blame Contribute Delete

11.2 kB

	"""Tests for python manage.py seed_initial_resources.

	See research/08-next-modules-build-plan.md §Module A seed test list.
	"""
	from io import StringIO
	from pathlib import Path

	import pytest
	import yaml
	from django.core.management import call_command
	from django.core.management.base import CommandError

	from apps.resources.models import Resource, ResourceCheckpoint, SkillResource
	from apps.skills.models import Skill

	pytestmark = pytest.mark.django_db


	def _write_yaml(tmp_path: Path, resources: list[dict]) -> Path:
	p = tmp_path / "resources.yaml"
	p.write_text(yaml.safe_dump({"resources": resources}), encoding="utf-8")
	return p


	_MISSING = object()


	def _entry(title='Python Intro', url='https://example.com/py', skills=_MISSING,
	type_='COURSE', difficulty='BEGINNER', rating=4.0, duration=60):
	if skills is _MISSING:
	skills = [{'skill_name': 'Python', 'relevance_score': 0.9}]
	return {
	'title': title,
	'provider': 'TestProvider',
	'url': url,
	'difficulty_level': difficulty,
	'duration': duration,
	'type': type_,
	'rating': rating,
	'skills': skills,
	}


	@pytest.fixture
	def python_skill(db):
	return Skill.objects.create(
	skill_name='Python', category='Programming', difficulty_level='BEGINNER',
	)


	@pytest.fixture
	def sql_skill(db):
	return Skill.objects.create(
	skill_name='SQL', category='Database', difficulty_level='BEGINNER',
	)


	def _run(path, prune=False):
	out = StringIO()
	args = ['seed_initial_resources', '--path', str(path)]
	if prune:
	args.append('--prune')
	call_command(*args, stdout=out)
	return out.getvalue()


	def test_seed_is_idempotent(tmp_path, python_skill):
	path = _write_yaml(tmp_path, [_entry()])
	_run(path)
	first_count = Resource.objects.count()
	first_links = SkillResource.objects.count()

	_run(path)
	assert Resource.objects.count() == first_count
	assert SkillResource.objects.count() == first_links


	def test_seed_creates_skill_links(tmp_path, python_skill, sql_skill):
	path = _write_yaml(tmp_path, [
	_entry(
	title='Multi', url='https://example.com/multi',
	skills=[
	{'skill_name': 'Python', 'relevance_score': 0.9},
	{'skill_name': 'SQL', 'relevance_score': 0.5},
	],
	),
	])
	_run(path)
	resource = Resource.objects.get(url='https://example.com/multi')
	linked = {
	link.skill.skill_name: link.relevance_score
	for link in SkillResource.objects.filter(resource=resource)
	}
	assert linked == {'Python': 0.9, 'SQL': 0.5}


	def test_seed_updates_existing_row(tmp_path, python_skill):
	path = _write_yaml(tmp_path, [_entry(rating=3.0)])
	_run(path)
	first = Resource.objects.get(url='https://example.com/py')
	assert first.rating == 3.0

	# Same URL, new rating → update, not duplicate.
	path2 = _write_yaml(tmp_path, [_entry(rating=4.8)])
	_run(path2)
	again = Resource.objects.get(url='https://example.com/py')
	assert again.id == first.id
	assert again.rating == 4.8


	def test_prune_removes_stale_resources_and_links(tmp_path, python_skill):
	path = _write_yaml(tmp_path, [
	_entry(title='Keep', url='https://example.com/keep'),
	_entry(title='Drop', url='https://example.com/drop'),
	])
	_run(path)
	assert Resource.objects.count() == 2

	path2 = _write_yaml(tmp_path, [
	_entry(title='Keep', url='https://example.com/keep'),
	])
	_run(path2, prune=True)

	assert set(Resource.objects.values_list('url', flat=True)) == {
	'https://example.com/keep',
	}
	# SkillResource rows for the deleted resource are cascaded out.
	assert not SkillResource.objects.filter(
	resource__url='https://example.com/drop'
	).exists()


	def test_prune_removes_stale_skill_links_within_same_resource(tmp_path,
	python_skill,
	sql_skill):
	"""Running seed against a reduced skill list on an existing resource should
	drop the orphan SkillResource rows — not leave them stranded."""
	path = _write_yaml(tmp_path, [
	_entry(
	title='Multi', url='https://example.com/multi',
	skills=[
	{'skill_name': 'Python', 'relevance_score': 0.9},
	{'skill_name': 'SQL', 'relevance_score': 0.5},
	],
	),
	])
	_run(path)
	assert SkillResource.objects.count() == 2

	path2 = _write_yaml(tmp_path, [
	_entry(
	title='Multi', url='https://example.com/multi',
	skills=[{'skill_name': 'Python', 'relevance_score': 0.9}],
	),
	])
	_run(path2)
	remaining = list(SkillResource.objects.values_list('skill__skill_name', flat=True))
	assert remaining == ['Python']


	def test_seed_is_transactional_on_bad_row(tmp_path, python_skill):
	"""Validation abort happens before any writes, so partial writes can't leak."""
	path = _write_yaml(tmp_path, [
	_entry(title='Good', url='https://example.com/good'),
	{ # missing most fields
	'title': 'Bad',
	'provider': 'X',
	'url': 'https://example.com/bad',
	},
	])
	with pytest.raises(CommandError):
	_run(path)
	assert Resource.objects.count() == 0


	def test_seed_rejects_unknown_skill(tmp_path, python_skill):
	path = _write_yaml(tmp_path, [
	_entry(
	title='Unknown skill',
	url='https://example.com/unknown',
	skills=[{'skill_name': 'NotAnOnetSkill', 'relevance_score': 0.9}],
	),
	])
	with pytest.raises(CommandError) as exc:
	_run(path)
	assert 'NotAnOnetSkill' in str(exc.value)
	assert Resource.objects.count() == 0


	def test_seed_rejects_invalid_type(tmp_path, python_skill):
	path = _write_yaml(tmp_path, [_entry(type_='PODCAST')])
	with pytest.raises(CommandError):
	_run(path)


	def test_seed_rejects_duplicate_url_in_yaml(tmp_path, python_skill):
	path = _write_yaml(tmp_path, [
	_entry(title='A', url='https://example.com/dup'),
	_entry(title='B', url='https://example.com/dup'),
	])
	with pytest.raises(CommandError) as exc:
	_run(path)
	assert 'Duplicate url' in str(exc.value)


	def test_seed_rejects_empty_skills_list(tmp_path, python_skill):
	path = _write_yaml(tmp_path, [_entry(skills=[])])
	with pytest.raises(CommandError) as exc:
	_run(path)
	assert 'skills' in str(exc.value).lower()


	def test_seed_creates_checkpoints_from_yaml(tmp_path, python_skill):
	entry = _entry(url='https://example.com/cp')
	entry['checkpoints'] = ['Week 1: Intro', 'Week 2: Loops', 'Week 3: Done']
	entry['checkpoint_source'] = 'jsonld'
	path = _write_yaml(tmp_path, [entry])
	_run(path)
	resource = Resource.objects.get(url='https://example.com/cp')
	rows = list(resource.checkpoints.order_by('order_index'))
	assert [r.title for r in rows] == ['Week 1: Intro', 'Week 2: Loops', 'Week 3: Done']
	assert [r.order_index for r in rows] == [1, 2, 3]
	assert all(r.source == 'jsonld' for r in rows)


	def test_seed_checkpoints_create_if_absent_no_clobber(tmp_path, python_skill):
	"""Re-running seed must NOT duplicate or replace existing checkpoints —
	UserCheckpointProgress FKs them and a re-seed can't drop a learner's rows."""
	entry = _entry(url='https://example.com/cp')
	entry['checkpoints'] = ['A', 'B']
	path = _write_yaml(tmp_path, [entry])
	_run(path)
	resource = Resource.objects.get(url='https://example.com/cp')
	first_ids = set(resource.checkpoints.values_list('id', flat=True))
	assert len(first_ids) == 2

	# Re-run with a DIFFERENT list: existing rows are untouched (create-if-absent).
	entry2 = _entry(url='https://example.com/cp')
	entry2['checkpoints'] = ['A', 'B', 'C', 'D']
	_run(_write_yaml(tmp_path, [entry2]))
	again = set(resource.checkpoints.values_list('id', flat=True))
	assert again == first_ids # unchanged — same rows, no dup, no delete


	def test_seed_applies_checkpoints_sidecar_by_url(tmp_path, python_skill):
	rpath = _write_yaml(tmp_path, [_entry(url='https://example.com/side')])
	side = tmp_path / "checkpoints.yaml"
	side.write_text(yaml.safe_dump({"checkpoints": [
	{"url": "https://example.com/side", "source": "jsonld",
	"titles": ["Mod 1", "Mod 2", "Mod 3"]},
	{"url": "https://example.com/not-in-catalog", # stale entry: skipped
	"titles": ["X"]},
	]}), encoding="utf-8")
	call_command('seed_initial_resources', '--path', str(rpath),
	'--checkpoints-path', str(side), stdout=StringIO(), stderr=StringIO())
	resource = Resource.objects.get(url='https://example.com/side')
	rows = list(resource.checkpoints.order_by('order_index'))
	assert [r.title for r in rows] == ['Mod 1', 'Mod 2', 'Mod 3']
	assert all(r.source == 'jsonld' for r in rows)


	def test_seed_sidecar_invalid_source_raises(tmp_path, python_skill):
	rpath = _write_yaml(tmp_path, [_entry(url='https://example.com/side')])
	side = tmp_path / "checkpoints.yaml"
	side.write_text(yaml.safe_dump({"checkpoints": [
	{"url": "https://example.com/side", "source": "nope", "titles": ["A"]},
	]}), encoding="utf-8")
	with pytest.raises(CommandError):
	call_command('seed_initial_resources', '--path', str(rpath),
	'--checkpoints-path', str(side),
	stdout=StringIO(), stderr=StringIO())


	def test_seed_rejects_invalid_checkpoint_source(tmp_path, python_skill):
	entry = _entry(url='https://example.com/cp')
	entry['checkpoints'] = ['A', 'B']
	entry['checkpoint_source'] = 'bogus'
	path = _write_yaml(tmp_path, [entry])
	with pytest.raises(CommandError) as exc:
	_run(path)
	assert 'checkpoint_source' in str(exc.value)
	assert not ResourceCheckpoint.objects.exists()


	def test_full_curated_catalog_loads_cleanly():
	"""Sanity: the real seed_data/resources.yaml loads after seed_initial_skills,
	and produces ≥60 resources with ≥1 skill link each."""
	# Seed skills from the curated role file so references resolve.
	call_command('seed_initial_skills', stdout=StringIO())
	out = StringIO()
	call_command('seed_initial_resources', stdout=out)

	count = Resource.objects.count()
	assert count >= 60, f'expected ≥60 resources, got {count}'
	# Every resource has ≥1 skill link.
	orphans = [
	r.title for r in Resource.objects.all()
	if not SkillResource.objects.filter(resource=r).exists()
	]
	assert orphans == []

	# The checkpoints.yaml sidecar populated the catalog: every COURSE/VIDEO
	# resource ends with >=2 checkpoints (the guarantee check_checkpoints gates).
	from django.db.models import Count
	thin = list(
	Resource.objects.filter(type__in=['COURSE', 'VIDEO'])
	.annotate(n=Count('checkpoints'))
	.filter(n__lt=2)
	.values_list('title', flat=True)
	)
	assert thin == [], f'COURSE/VIDEO resources with <2 checkpoints: {thin}'