Spaces:

arifRB
/

gapguide-api

Sleeping

App Files Files Community

gapguide-api / tests /test_e2e_student_flow.py

arifRB

Deploy GapGuide backend (Docker)

ffd36e0 verified 15 days ago

Raw

History Blame Contribute Delete

23 kB

	"""End-to-end cross-module smoke tests.

	Exercises the full Student happy path through the HTTP layer only:
	register -> skills catalog -> set user skills -> roles catalog -> set target
	role -> gap analysis -> resources browse -> checkpoint toggle -> progress rollup.

	If any seam between apps breaks (model FK drift, URL routing, serializer
	shape), these fail. Unit tests in each app won't catch those regressions.
	"""
	from pathlib import Path

	import pytest
	from django.core.files.uploadedfile import SimpleUploadedFile
	from rest_framework.test import APIClient

	from apps.resources.models import Resource, ResourceCheckpoint, SkillResource
	from apps.roles.models import Role, RoleSkill
	from apps.skills.models import Skill

	pytestmark = pytest.mark.django_db


	REGISTER = '/api/auth/register/'
	SKILLS = '/api/skills/'
	USER_SKILLS = '/api/user-skills/'
	ROLES = '/api/roles/'
	TARGET_ROLE = '/api/target-role/'
	GAP_ANALYSIS = '/api/analysis/'
	RECOMMENDATIONS = '/api/recommendations/'
	RESOURCES = '/api/resources/'
	PROGRESS_LIST = '/api/progress/'
	PARSE_RESUME = '/api/auth/profile/parse-resume/'
	UPGRADE_SUGGESTIONS = '/api/progress/upgrade-suggestions/'

	FIXTURES_DIR = Path(__file__).resolve().parent / 'fixtures' / 'resumes'


	def _register(client: APIClient, email: str, name: str = 'Test') -> str:
	r = client.post(REGISTER, data={
	'name': name, 'email': email,
	'password': 'StrongPass123!', 'password_confirm': 'StrongPass123!',
	}, format='json')
	assert r.status_code == 201, r.data
	return r.data['access']


	def auth_client(access_token):
	c = APIClient()
	c.credentials(HTTP_AUTHORIZATION=f'Bearer {access_token}')
	return c


	@pytest.fixture
	def catalog():
	"""Minimal catalog wired across roles/skills/resources."""
	python = Skill.objects.create(
	skill_name='Python', category='Programming', difficulty_level='BEGINNER',
	)
	sql = Skill.objects.create(
	skill_name='SQL', category='Database', difficulty_level='BEGINNER',
	)
	ml = Skill.objects.create(
	skill_name='Machine Learning', category='AI', difficulty_level='INTERMEDIATE',
	)

	role = Role.objects.create(
	role_name='Data Scientist', description='DS role',
	industry='Tech', is_active=True,
	)
	RoleSkill.objects.create(
	role=role, skill=python, required_level='ADVANCED',
	weight=1.0, is_mandatory=True,
	)
	RoleSkill.objects.create(
	role=role, skill=sql, required_level='INTERMEDIATE',
	weight=0.8, is_mandatory=True,
	)
	RoleSkill.objects.create(
	role=role, skill=ml, required_level='INTERMEDIATE',
	weight=0.6, is_mandatory=False,
	)

	ml_course = Resource.objects.create(
	title='Intro to ML', provider='Coursera',
	url='https://coursera.org/ml',
	difficulty_level='INTERMEDIATE', duration=3600, type='COURSE',
	rating=4.8,
	)
	SkillResource.objects.create(skill=ml, resource=ml_course, relevance_score=0.95)
	for i, t in enumerate(['Week 1', 'Week 2', 'Week 3', 'Week 4'], start=1):
	ResourceCheckpoint.objects.create(
	resource=ml_course, order_index=i, title=t, source='manual',
	)

	sql_docs = Resource.objects.create(
	title='SQL Docs', provider='PostgreSQL.org',
	url='https://postgresql.org/docs/',
	difficulty_level='INTERMEDIATE', duration=0, type='DOCS',
	rating=4.5,
	)
	SkillResource.objects.create(skill=sql, resource=sql_docs, relevance_score=0.9)

	return {
	'skills': {'python': python, 'sql': sql, 'ml': ml},
	'role': role,
	'resources': {'ml_course': ml_course, 'sql_docs': sql_docs},
	}


	def test_student_flow_end_to_end(catalog):
	"""Full happy-path flow — if ANY cross-module seam is broken, this fails."""
	# 1. Register ---------------------------------------------------------
	client = APIClient()
	r = client.post(REGISTER, data={
	'name': 'Student A', 'email': 's@x.com',
	'password': 'StrongPass123!', 'password_confirm': 'StrongPass123!',
	}, format='json')
	assert r.status_code == 201, r.data
	access = r.data['access']
	c = auth_client(access)

	# 2. Browse skills (paginated) ----------------------------------------
	r = c.get(SKILLS)
	assert r.status_code == 200
	names = {s['skill_name'] for s in r.data['results']}
	assert {'Python', 'SQL', 'Machine Learning'} <= names

	# 3. Set user skills (partial coverage — ML is not set) ---------------
	# Numbers chosen so raw readiness > 60 AND mandatory (Python) is short:
	# weighted_sat = 0.71.0 + 1.00.8 + 00.6 = 1.5; / 2.4 100 = 62.5
	# => mandatory cap pins readiness to 60.0 (PARTIAL band).
	for skill_id, prof in [
	(catalog['skills']['python'].id, 70), # below ADVANCED (T=100), mandatory
	(catalog['skills']['sql'].id, 90), # above INTERMEDIATE (T=60) => MET
	]:
	r = c.post(USER_SKILLS, data={'skill_id': skill_id, 'proficiency': prof},
	format='json')
	assert r.status_code == 201, r.data

	r = c.get(USER_SKILLS)
	assert r.status_code == 200
	assert len(r.data) == 2

	# 4. Browse roles -----------------------------------------------------
	r = c.get(ROLES)
	assert r.status_code == 200
	assert any(role['role_name'] == 'Data Scientist' for role in r.data)

	# 5. Set target role --------------------------------------------------
	r = c.post(TARGET_ROLE, data={'role_id': catalog['role'].id}, format='json')
	assert r.status_code == 201, r.data

	# 6. Gap analysis — verifies skills + roles + analysis seam ----------
	r = c.get(GAP_ANALYSIS)
	assert r.status_code == 200, r.data
	report = r.data
	assert report['role_name'] == 'Data Scientist'
	assert report['no_requirements'] is False
	# Mandatory Python is unmet (70 < 100) -> raw 62.5 capped to 60.
	assert report['mandatory_cap_applied'] is True
	assert report['readiness'] == 60.0
	assert report['band'] == 'PARTIAL'

	gaps_by_name = {g['skill_name']: g for g in report['gaps']}
	assert gaps_by_name['Python']['gap_type'] == 'INSUFFICIENT'
	assert gaps_by_name['Python']['severity'] == 'HIGH' # mandatory shortfall
	assert gaps_by_name['SQL']['gap_type'] == 'MET'
	assert gaps_by_name['Machine Learning']['gap_type'] == 'MISSING'
	assert gaps_by_name['Machine Learning']['severity'] == 'MEDIUM' # not mandatory

	# 7. Browse resources filtered by the ML gap (paginated) -------------
	ml_id = catalog['skills']['ml'].id
	r = c.get(f'{RESOURCES}?skill={ml_id}')
	assert r.status_code == 200
	assert r.data['count'] == 1
	results = r.data['results']
	assert results[0]['title'] == 'Intro to ML'
	assert results[0]['has_checkpoints'] is True

	ml_course_id = results[0]['id']
	r = c.get(f'{RESOURCES}{ml_course_id}/')
	assert r.status_code == 200
	checkpoints = r.data['checkpoints']
	assert len(checkpoints) == 4
	assert [cp['order_index'] for cp in checkpoints] == [1, 2, 3, 4]

	# 8. Toggle checkpoints — verifies resources + progress seam ---------
	cp1, cp2 = checkpoints[0]['id'], checkpoints[1]['id']
	r = c.post(f'/api/progress/checkpoint/{cp1}/toggle/')
	assert r.status_code == 200
	assert r.data['progress'] == 25
	assert r.data['status'] == 'IN_PROGRESS'

	r = c.post(f'/api/progress/checkpoint/{cp2}/toggle/')
	assert r.data['progress'] == 50

	# 9. Manual slider rejected because checkpoints exist ----------------
	r = c.post(f'/api/progress/resource/{ml_course_id}/',
	data={'progress': 90}, format='json')
	assert r.status_code == 400

	# 10. Manual slider allowed for docs resource (no checkpoints) -------
	docs_id = catalog['resources']['sql_docs'].id
	r = c.post(f'/api/progress/resource/{docs_id}/',
	data={'progress': 100}, format='json')
	assert r.status_code == 200
	assert r.data['status'] == 'COMPLETED'
	assert r.data['completed_at'] is not None

	# 11. Progress list shows BOTH resources, scoped to this user --------
	r = c.get(PROGRESS_LIST)
	assert r.status_code == 200
	assert len(r.data) == 2
	by_resource = {row['resource']: row for row in r.data}
	assert by_resource[ml_course_id]['progress'] == 50
	assert by_resource[ml_course_id]['has_checkpoints'] is True
	assert by_resource[docs_id]['progress'] == 100
	assert by_resource[docs_id]['has_checkpoints'] is False


	def test_gap_analysis_requires_target_role(catalog):
	client = APIClient()
	r = client.post(REGISTER, data={
	'name': 'Student B', 'email': 'b@x.com',
	'password': 'StrongPass123!', 'password_confirm': 'StrongPass123!',
	}, format='json')
	c = auth_client(r.data['access'])

	r = c.get(GAP_ANALYSIS)
	assert r.status_code == 400
	assert 'target role' in r.data['detail'].lower()


	def test_gap_analysis_role_id_override_bypasses_target(catalog):
	client = APIClient()
	r = client.post(REGISTER, data={
	'name': 'Student C', 'email': 'c@x.com',
	'password': 'StrongPass123!', 'password_confirm': 'StrongPass123!',
	}, format='json')
	c = auth_client(r.data['access'])

	# No target role set — but ?role_id=X should still work for "what-if".
	r = c.get(f"{GAP_ANALYSIS}?role_id={catalog['role'].id}")
	assert r.status_code == 200
	assert r.data['role_name'] == 'Data Scientist'


	def test_recommendations_flow(catalog):
	"""Full recommendations happy path — every gap skill gets ≥1 rec, the top
	rec resolves via /api/resources/<id>/, and the top ML pick matches the
	single ML row filtered via ?skill=."""
	client = APIClient()
	r = client.post(REGISTER, data={
	'name': 'Rec Student', 'email': 'rec@x.com',
	'password': 'StrongPass123!', 'password_confirm': 'StrongPass123!',
	}, format='json')
	c = auth_client(r.data['access'])

	# Python=70 (mandatory, below ADVANCED=100) → INSUFFICIENT
	# SQL=90 (above INTERMEDIATE=60) → MET (excluded from recs)
	# ML=0 → MISSING
	for skill_id, prof in [
	(catalog['skills']['python'].id, 70),
	(catalog['skills']['sql'].id, 90),
	]:
	c.post(USER_SKILLS, data={'skill_id': skill_id, 'proficiency': prof},
	format='json')

	c.post(TARGET_ROLE, data={'role_id': catalog['role'].id}, format='json')

	gap = c.get(GAP_ANALYSIS).data
	rec = c.get(RECOMMENDATIONS).data
	assert rec['role_id'] == catalog['role'].id

	gap_skill_ids = {g['skill_id'] for g in gap['gaps'] if g['gap_type'] != 'MET'}
	rec_skill_ids = {int(k) for k in rec['recommendations']}
	assert gap_skill_ids == rec_skill_ids
	# SQL is MET → must not appear in recommendations.
	assert catalog['skills']['sql'].id not in rec_skill_ids

	ml_id = catalog['skills']['ml'].id
	ml_recs = rec['recommendations'].get(str(ml_id)) \
	or rec['recommendations'].get(ml_id)
	assert ml_recs, 'ML gap should surface ≥1 recommendation'
	top_ml = ml_recs[0]
	assert top_ml['resource_id'] == catalog['resources']['ml_course'].id

	# Top rec must be fetchable via /api/resources/<id>/
	r = c.get(f'{RESOURCES}{top_ml["resource_id"]}/')
	assert r.status_code == 200
	assert r.data['url'] == top_ml['url']

	# And cross-check: filtering resources by the ML skill returns the same row.
	r = c.get(f'{RESOURCES}?skill={ml_id}')
	assert r.status_code == 200
	assert r.data['results'][0]['id'] == top_ml['resource_id']


	def test_full_closed_loop(catalog):
	"""Full gap → recommend → complete → upgrade → re-analyze cycle.

	Start: Python=70 (mandatory short), SQL=90 (MET), ML=0.
	→ gap shows readiness capped at 60 (raw 62.5).
	→ recommendations suggest the ML course.
	→ complete ML course via checkpoints → COMPLETED.
	→ upgrade-suggestions lists ML@60.
	→ apply → user_skill.ML = 60, user_level INTERMEDIATE.
	→ re-analyze → ML MET, readiness still capped by Python shortfall.
	→ complete the SQL DOCS resource (slider path) → apply its upgrade → (no-op
	since SQL already at 90, bump map goes to 100 which is new).
	"""
	client = APIClient()
	r = client.post(REGISTER, data={
	'name': 'Closed Loop Student', 'email': 'closed@x.com',
	'password': 'StrongPass123!', 'password_confirm': 'StrongPass123!',
	}, format='json')
	c = auth_client(r.data['access'])

	for skill_id, prof in [
	(catalog['skills']['python'].id, 70),
	(catalog['skills']['sql'].id, 90),
	]:
	c.post(USER_SKILLS, data={'skill_id': skill_id, 'proficiency': prof},
	format='json')
	c.post(TARGET_ROLE, data={'role_id': catalog['role'].id}, format='json')

	pre = c.get(GAP_ANALYSIS).data
	assert pre['readiness'] == 60.0
	assert pre['mandatory_cap_applied'] is True

	rec = c.get(RECOMMENDATIONS).data
	ml_id = catalog['skills']['ml'].id
	ml_recs = rec['recommendations'].get(str(ml_id)) \
	or rec['recommendations'].get(ml_id)
	top = ml_recs[0]
	assert top['resource_id'] == catalog['resources']['ml_course'].id

	# Complete ML via checkpoints.
	detail = c.get(f'{RESOURCES}{top["resource_id"]}/').data
	for cp in detail['checkpoints']:
	c.post(f'/api/progress/checkpoint/{cp["id"]}/toggle/')

	suggestions = c.get('/api/progress/upgrade-suggestions/').data['suggestions']
	ml_sug = next(s for s in suggestions if s['skill_id'] == ml_id)
	assert ml_sug['suggested_proficiency'] == 60

	r = c.post(f'/api/progress/upgrade-suggestions/{ml_id}/apply/')
	assert r.status_code == 200
	assert r.data['applied'] is True

	mid = c.get(GAP_ANALYSIS).data
	ml_gap = next(g for g in mid['gaps'] if g['skill_id'] == ml_id)
	assert ml_gap['gap_type'] == 'MET'
	# Python still mandatory short → cap still applied.
	assert mid['mandatory_cap_applied'] is True

	# Now drive SQL upgrade too via the DOCS slider path (complete → bump).
	sql_doc = catalog['resources']['sql_docs']
	r = c.post(f'/api/progress/resource/{sql_doc.id}/',
	data={'progress': 100}, format='json')
	assert r.data['status'] == 'COMPLETED'

	sql_id = catalog['skills']['sql'].id
	r = c.post(f'/api/progress/upgrade-suggestions/{sql_id}/apply/')
	# Bump map: user at 90 → target 100. So this applies.
	assert r.status_code == 200
	assert r.data['applied'] is True


	def test_progress_isolated_per_user(catalog):
	"""User A's checkpoint toggles must not leak into User B's progress."""
	a_client = APIClient()
	b_client = APIClient()
	ra = a_client.post(REGISTER, data={
	'name': 'A', 'email': 'isoa@x.com',
	'password': 'StrongPass123!', 'password_confirm': 'StrongPass123!',
	}, format='json')
	rb = b_client.post(REGISTER, data={
	'name': 'B', 'email': 'isob@x.com',
	'password': 'StrongPass123!', 'password_confirm': 'StrongPass123!',
	}, format='json')
	a = auth_client(ra.data['access'])
	b = auth_client(rb.data['access'])

	# Both users must select the target role so progress toggles are in-plan.
	a.post(TARGET_ROLE, data={'role_id': catalog['role'].id}, format='json')
	b.post(TARGET_ROLE, data={'role_id': catalog['role'].id}, format='json')

	ml_course = catalog['resources']['ml_course']
	cp = ml_course.checkpoints.first()
	a.post(f'/api/progress/checkpoint/{cp.id}/toggle/')

	ra_list = a.get(PROGRESS_LIST).data
	rb_list = b.get(PROGRESS_LIST).data
	assert len(ra_list) == 1 and ra_list[0]['progress'] == 25
	assert len(rb_list) == 0


	# -------------------- 7b: added e2e scenarios --------------------

	def test_register_duplicate_email_rejected():
	"""Second register with the same email (case-insensitive) must 400."""
	client = APIClient()
	r1 = client.post(REGISTER, data={
	'name': 'First', 'email': 'dup@x.com',
	'password': 'StrongPass123!', 'password_confirm': 'StrongPass123!',
	}, format='json')
	assert r1.status_code == 201

	# Same email, different casing — must still collide.
	r2 = client.post(REGISTER, data={
	'name': 'Second', 'email': 'DUP@x.com',
	'password': 'StrongPass123!', 'password_confirm': 'StrongPass123!',
	}, format='json')
	assert r2.status_code == 400
	# Serializer puts the clash on the email field.
	body = r2.data
	error_text = str(body).lower()
	assert 'email' in error_text and 'already' in error_text


	def test_dismiss_then_next_suggestion_excludes(catalog):
	"""After dismiss, the same (user, skill) suggestion must not reappear until
	a newer resource completion arrives."""
	client = APIClient()
	access = _register(client, 'dismiss@x.com')
	c = auth_client(access)

	for skill_id, prof in [
	(catalog['skills']['python'].id, 70),
	(catalog['skills']['sql'].id, 90),
	]:
	c.post(USER_SKILLS, data={'skill_id': skill_id, 'proficiency': prof},
	format='json')
	c.post(TARGET_ROLE, data={'role_id': catalog['role'].id}, format='json')

	# Complete the ML course → upgrade suggestion for ML appears.
	ml_course = catalog['resources']['ml_course']
	for cp in ml_course.checkpoints.all():
	c.post(f'/api/progress/checkpoint/{cp.id}/toggle/')

	ml_id = catalog['skills']['ml'].id
	initial = c.get(UPGRADE_SUGGESTIONS).data['suggestions']
	assert any(s['skill_id'] == ml_id for s in initial)

	# Dismiss.
	r = c.post(f'{UPGRADE_SUGGESTIONS}{ml_id}/dismiss/')
	assert r.status_code == 200
	assert r.data['dismissed'] is True

	# Fetch again — ML suggestion must be gone.
	after = c.get(UPGRADE_SUGGESTIONS).data['suggestions']
	assert not any(s['skill_id'] == ml_id for s in after)


	def test_resume_parse_accept_changes_readiness(catalog):
	"""Upload a fixture resume → accept predictions → readiness must move.

	Bridges Module 8: the parse-resume endpoint returns skill predictions
	that, when accepted and upserted into UserSkill, move the gap report.
	"""
	client = APIClient()
	access = _register(client, 'resume@x.com')
	c = auth_client(access)

	c.post(TARGET_ROLE, data={'role_id': catalog['role'].id}, format='json')

	# Before: no UserSkill rows → readiness is low.
	before = c.get(GAP_ANALYSIS).data
	assert before['readiness'] < 40

	# Upload the DS-strong fixture. The lexical-layer MVP extracts Python + SQL
	# (both in the catalog fixture) — assert they appear in predictions.
	pdf_path = FIXTURES_DIR / 'resume_ds_strong.pdf'
	assert pdf_path.exists(), 'fixture missing — run scripts/generate_resume_fixtures.py'
	upload = SimpleUploadedFile(
	pdf_path.name, pdf_path.read_bytes(), content_type='application/pdf',
	)
	r = c.post(PARSE_RESUME, data={'resume': upload}, format='multipart')
	assert r.status_code == 200, r.data
	predicted_names = {p['skill_name'] for p in r.data['skills']}
	assert {'Python', 'SQL'}.issubset(predicted_names), predicted_names

	# Accept the Python + SQL predictions.
	predictions_by_name = {p['skill_name']: p for p in r.data['skills']}
	for name in ('Python', 'SQL'):
	p = predictions_by_name[name]
	r = c.post(USER_SKILLS, data={
	'skill_id': p['skill_id'],
	'proficiency': p['proficiency'],
	}, format='json')
	assert r.status_code == 201, r.data

	after = c.get(GAP_ANALYSIS).data
	assert after['readiness'] > before['readiness']


	def test_target_role_switch_recomputes_gap(catalog):
	"""Switching target role must invalidate stale gap output."""
	# Add a second role inline.
	other_role = Role.objects.create(
	role_name='Frontend Developer', description='FE',
	industry='Tech', is_active=True,
	)
	# No RoleSkills on the second role — gap becomes a "no requirements" report.

	client = APIClient()
	access = _register(client, 'switch@x.com')
	c = auth_client(access)

	c.post(TARGET_ROLE, data={'role_id': catalog['role'].id}, format='json')
	first = c.get(GAP_ANALYSIS).data
	assert first['role_name'] == 'Data Scientist'

	c.post(TARGET_ROLE, data={'role_id': other_role.id}, format='json')
	second = c.get(GAP_ANALYSIS).data
	assert second['role_name'] == 'Frontend Developer'
	assert second['no_requirements'] is True
	assert second['readiness'] == 100.0


	def test_checkpoint_untoggle_decreases_progress(catalog):
	"""Toggle endpoint is bidirectional — untoggle must reduce progress."""
	client = APIClient()
	access = _register(client, 'untoggle@x.com')
	c = auth_client(access)
	c.post(TARGET_ROLE, data={'role_id': catalog['role'].id}, format='json')

	ml_course = catalog['resources']['ml_course']
	cps = list(ml_course.checkpoints.order_by('order_index'))

	r = c.post(f'/api/progress/checkpoint/{cps[0].id}/toggle/')
	assert r.data['progress'] == 25

	r = c.post(f'/api/progress/checkpoint/{cps[1].id}/toggle/')
	assert r.data['progress'] == 50

	# Untoggle the first checkpoint — progress must drop back to 25.
	r = c.post(f'/api/progress/checkpoint/{cps[0].id}/toggle/')
	assert r.data['progress'] == 25
	assert r.data['status'] == 'IN_PROGRESS'


	def test_manual_slider_rejection_includes_checkpoint_word(catalog):
	"""The FE at Learning.jsx:55 substring-matches 'checkpoint' in the error
	detail to decide whether to hide the slider. Contract lives here."""
	client = APIClient()
	access = _register(client, 'slider@x.com')
	c = auth_client(access)
	c.post(TARGET_ROLE, data={'role_id': catalog['role'].id}, format='json')

	ml_course = catalog['resources']['ml_course'] # has checkpoints
	r = c.post(f'/api/progress/resource/{ml_course.id}/',
	data={'progress': 50}, format='json')
	assert r.status_code == 400
	detail = str(r.data).lower()
	assert 'checkpoint' in detail, r.data


	def test_resources_pagination_contract(catalog):
	"""/api/resources/?skill=X must return {count, results} regardless of
	row count. Contract the FE pagination code relies on."""
	# Add 12 extra resources linked to ML so count > default page sample.
	ml = catalog['skills']['ml']
	for i in range(12):
	r = Resource.objects.create(
	title=f'ML extra {i}', provider='Test',
	url=f'https://example.com/ml/{i}',
	difficulty_level='INTERMEDIATE', duration=60, type='ARTICLE',
	rating=3.5,
	)
	SkillResource.objects.create(skill=ml, resource=r, relevance_score=0.5)

	client = APIClient()
	access = _register(client, 'page@x.com')
	c = auth_client(access)

	r = c.get(f'{RESOURCES}?skill={ml.id}')
	assert r.status_code == 200
	# catalog fixture already adds 1 ML resource; + 12 extras = 13 total.
	assert r.data['count'] == 13
	assert 'results' in r.data
	assert len(r.data['results']) == 13 # under default page_size=50