Spaces:

dima806
/

developer_salary_prediction

Running

App Files Files Community

developer_salary_prediction / test_feature_impact.py

dima806

Upload 32 files

a32e584 verified 2 months ago

raw

history blame contribute delete

22 kB

	"""Test that changing input features actually changes predictions."""

	from src.schema import SalaryInput
	from src.infer import predict_salary, valid_categories


	def test_years_experience_impact():
	"""Test that changing years of experience changes prediction."""
	print("\n" + "=" * 70)
	print("TEST 1: Total Years of Coding Impact")
	print("=" * 70)

	base_input = {
	"country": "United States of America",
	"work_exp": 3.0,
	"education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
	"dev_type": "Developer, full-stack",
	"industry": "Software Development",
	"age": "25-34 years old",
	"ic_or_pm": "Individual contributor",
	}

	# Test with different years of experience
	years_tests = [0, 2, 5, 10, 20]
	predictions = []

	for years in years_tests:
	input_data = SalaryInput(**base_input, years_code=years)
	salary = predict_salary(input_data)
	predictions.append(salary)
	print(f" Years: {years:2d} -> Salary: ${salary:,.2f}")

	# Check if predictions are different
	unique_predictions = len(set(predictions))
	if unique_predictions == len(predictions):
	print(f"\n✅ PASS: All {len(predictions)} predictions are different")
	return True
	else:
	print(
	f"\n❌ FAIL: Only {unique_predictions}/{len(predictions)} unique predictions"
	)
	return False


	def test_country_impact():
	"""Test that changing country changes prediction."""
	print("\n" + "=" * 70)
	print("TEST 2: Country Impact")
	print("=" * 70)

	base_input = {
	"years_code": 5.0,
	"work_exp": 3.0,
	"education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
	"dev_type": "Developer, full-stack",
	"industry": "Software Development",
	"age": "25-34 years old",
	"ic_or_pm": "Individual contributor",
	}

	# Test with different countries (select diverse ones)
	test_countries = [
	"United States of America",
	"Germany",
	"India",
	"Brazil",
	"Poland",
	]

	# Filter to only countries that exist in valid categories
	test_countries = [c for c in test_countries if c in valid_categories["Country"]]

	predictions = []
	for country in test_countries:
	input_data = SalaryInput(**base_input, country=country)
	salary = predict_salary(input_data)
	predictions.append(salary)
	print(f" Country: {country:40s} -> Salary: ${salary:,.2f}")

	# Check if predictions are different
	unique_predictions = len(set(predictions))
	if unique_predictions == len(predictions):
	print(f"\n✅ PASS: All {len(predictions)} predictions are different")
	return True
	elif unique_predictions == 1:
	print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
	print(" This indicates the model is NOT using country as a feature!")
	return False
	else:
	print(
	f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
	)
	print(" Duplicate salaries found - possible feature issue")
	return False


	def test_education_impact():
	"""Test that changing education level changes prediction."""
	print("\n" + "=" * 70)
	print("TEST 3: Education Level Impact")
	print("=" * 70)

	base_input = {
	"country": "United States of America",
	"years_code": 5.0,
	"work_exp": 3.0,
	"dev_type": "Developer, full-stack",
	"industry": "Software Development",
	"age": "25-34 years old",
	"ic_or_pm": "Individual contributor",
	}

	# Test with different education levels
	test_education = [
	"Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)",
	"Some college/university study without earning a degree",
	"Associate degree (A.A., A.S., etc.)",
	"Bachelor's degree (B.A., B.S., B.Eng., etc.)",
	"Master's degree (M.A., M.S., M.Eng., MBA, etc.)",
	"Professional degree (JD, MD, Ph.D, Ed.D, etc.)",
	]

	# Filter to only education levels that exist in valid categories
	test_education = [e for e in test_education if e in valid_categories["EdLevel"]]

	predictions = []
	for education in test_education:
	input_data = SalaryInput(**base_input, education_level=education)
	salary = predict_salary(input_data)
	predictions.append(salary)
	print(f" Education: {education[:50]:50s} -> Salary: ${salary:,.2f}")

	# Check if predictions are different
	unique_predictions = len(set(predictions))
	if unique_predictions == len(predictions):
	print(f"\n✅ PASS: All {len(predictions)} predictions are different")
	return True
	elif unique_predictions == 1:
	print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
	print(" This indicates the model is NOT using education level as a feature!")
	return False
	else:
	print(
	f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
	)
	print(" Duplicate salaries found - possible feature issue")
	return False


	def test_devtype_impact():
	"""Test that changing developer type changes prediction."""
	print("\n" + "=" * 70)
	print("TEST 4: Developer Type Impact")
	print("=" * 70)

	base_input = {
	"country": "United States of America",
	"years_code": 5.0,
	"work_exp": 3.0,
	"education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
	"industry": "Software Development",
	"age": "25-34 years old",
	"ic_or_pm": "Individual contributor",
	}

	# Test with different developer types (using actual values from trained model)
	test_devtypes = [
	"Developer, front-end",
	"Developer, back-end",
	"Developer, full-stack",
	"Data scientist",
	"Engineering manager",
	"DevOps engineer or professional",
	]

	# Filter to only developer types that exist in valid categories
	test_devtypes = [d for d in test_devtypes if d in valid_categories["DevType"]]

	predictions = []
	for devtype in test_devtypes:
	input_data = SalaryInput(**base_input, dev_type=devtype)
	salary = predict_salary(input_data)
	predictions.append(salary)
	print(f" Dev Type: {devtype[:50]:50s} -> Salary: ${salary:,.2f}")

	# Check if predictions are different
	unique_predictions = len(set(predictions))
	if unique_predictions == len(predictions):
	print(f"\n✅ PASS: All {len(predictions)} predictions are different")
	return True
	elif unique_predictions == 1:
	print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
	print(" This indicates the model is NOT using developer type as a feature!")
	return False
	else:
	print(
	f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
	)
	print(" Duplicate salaries found - possible feature issue")
	return False


	def test_industry_impact():
	"""Test that changing industry changes prediction."""
	print("\n" + "=" * 70)
	print("TEST 5: Industry Impact")
	print("=" * 70)

	base_input = {
	"country": "United States of America",
	"years_code": 5.0,
	"work_exp": 3.0,
	"education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
	"dev_type": "Developer, full-stack",
	"age": "25-34 years old",
	"ic_or_pm": "Individual contributor",
	}

	# Test with different industries (using actual values from trained model)
	test_industries = [
	"Software Development",
	"Fintech",
	"Banking/Financial Services",
	"Healthcare",
	"Manufacturing",
	"Government",
	]

	# Filter to only industries that exist in valid categories
	test_industries = [i for i in test_industries if i in valid_categories["Industry"]]

	predictions = []
	for industry in test_industries:
	input_data = SalaryInput(**base_input, industry=industry)
	salary = predict_salary(input_data)
	predictions.append(salary)
	print(f" Industry: {industry[:50]:50s} -> Salary: ${salary:,.2f}")

	# Check if predictions are different
	unique_predictions = len(set(predictions))
	if unique_predictions == len(predictions):
	print(f"\n✅ PASS: All {len(predictions)} predictions are different")
	return True
	elif unique_predictions == 1:
	print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
	print(" This indicates the model is NOT using industry as a feature!")
	return False
	else:
	print(
	f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
	)
	print(" Duplicate salaries found - possible feature issue")
	return False


	def test_age_impact():
	"""Test that changing age changes prediction."""
	print("\n" + "=" * 70)
	print("TEST 6: Age Impact")
	print("=" * 70)

	base_input = {
	"country": "United States of America",
	"years_code": 5.0,
	"work_exp": 3.0,
	"education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
	"dev_type": "Developer, full-stack",
	"industry": "Software Development",
	"ic_or_pm": "Individual contributor",
	}

	# Test with different age ranges (using actual values from trained model)
	test_ages = [
	"18-24 years old",
	"25-34 years old",
	"35-44 years old",
	"45-54 years old",
	"55-64 years old",
	]

	# Filter to only ages that exist in valid categories
	test_ages = [a for a in test_ages if a in valid_categories["Age"]]

	predictions = []
	for age in test_ages:
	input_data = SalaryInput(**base_input, age=age)
	salary = predict_salary(input_data)
	predictions.append(salary)
	print(f" Age: {age[:50]:50s} -> Salary: ${salary:,.2f}")

	# Check if predictions are different
	unique_predictions = len(set(predictions))
	if unique_predictions == len(predictions):
	print(f"\n✅ PASS: All {len(predictions)} predictions are different")
	return True
	elif unique_predictions == 1:
	print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
	print(" This indicates the model is NOT using age as a feature!")
	return False
	else:
	print(
	f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
	)
	print(" Duplicate salaries found - possible feature issue")
	return False


	def test_work_exp_impact():
	"""Test that changing years of work experience changes prediction."""
	print("\n" + "=" * 70)
	print("TEST 7: Work Experience Impact")
	print("=" * 70)

	base_input = {
	"country": "United States of America",
	"years_code": 10.0,
	"education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
	"dev_type": "Developer, full-stack",
	"industry": "Software Development",
	"age": "25-34 years old",
	"ic_or_pm": "Individual contributor",
	}

	# Test with different years of work experience
	work_exp_tests = [0, 1, 3, 5, 10, 20]
	predictions = []

	for work_exp in work_exp_tests:
	input_data = SalaryInput(**base_input, work_exp=work_exp)
	salary = predict_salary(input_data)
	predictions.append(salary)
	print(f" Work Exp: {work_exp:2d} -> Salary: ${salary:,.2f}")

	# Check if predictions are different
	unique_predictions = len(set(predictions))
	if unique_predictions == len(predictions):
	print(f"\n✅ PASS: All {len(predictions)} predictions are different")
	return True
	elif unique_predictions == 1:
	print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
	print(" This indicates the model is NOT using work experience as a feature!")
	return False
	else:
	print(
	f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
	)
	print(" Duplicate salaries found - possible feature issue")
	return False


	def test_icorpm_impact():
	"""Test that changing IC or PM changes prediction."""
	print("\n" + "=" * 70)
	print("TEST 8: IC or PM Impact")
	print("=" * 70)

	base_input = {
	"country": "United States of America",
	"years_code": 5.0,
	"work_exp": 3.0,
	"education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
	"dev_type": "Developer, full-stack",
	"industry": "Software Development",
	"age": "25-34 years old",
	}

	# Test with different IC/PM values (using actual values from trained model)
	test_icorpm = [
	"Individual contributor",
	"People manager",
	]

	# Filter to only values that exist in valid categories
	test_icorpm = [v for v in test_icorpm if v in valid_categories["ICorPM"]]

	predictions = []
	for icorpm in test_icorpm:
	input_data = SalaryInput(**base_input, ic_or_pm=icorpm)
	salary = predict_salary(input_data)
	predictions.append(salary)
	print(f" IC/PM: {icorpm[:50]:50s} -> Salary: ${salary:,.2f}")

	# Check if predictions are different
	unique_predictions = len(set(predictions))
	if unique_predictions == len(predictions):
	print(f"\n✅ PASS: All {len(predictions)} predictions are different")
	return True
	elif unique_predictions == 1:
	print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
	print(" This indicates the model is NOT using IC/PM as a feature!")
	return False
	else:
	print(
	f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
	)
	print(" Duplicate salaries found - possible feature issue")
	return False


	def test_combined_features():
	"""Test that combining different features produces expected variations."""
	print("\n" + "=" * 70)
	print("TEST 9: Combined Feature Variations")
	print("=" * 70)

	# Create diverse combinations (using actual values from trained model)
	test_cases = [
	(
	"India",
	2,
	1,
	"Bachelor's degree (B.A., B.S., B.Eng., etc.)",
	"Developer, back-end",
	"Software Development",
	"18-24 years old",
	"Individual contributor",
	),
	(
	"Germany",
	5,
	3,
	"Master's degree (M.A., M.S., M.Eng., MBA, etc.)",
	"Developer, full-stack",
	"Manufacturing",
	"25-34 years old",
	"Individual contributor",
	),
	(
	"United States of America",
	10,
	8,
	"Master's degree (M.A., M.S., M.Eng., MBA, etc.)",
	"Engineering manager",
	"Fintech",
	"35-44 years old",
	"People manager",
	),
	(
	"Poland",
	15,
	12,
	"Bachelor's degree (B.A., B.S., B.Eng., etc.)",
	"Developer, front-end",
	"Healthcare",
	"45-54 years old",
	"Individual contributor",
	),
	(
	"Brazil",
	5,
	3,
	"Some college/university study without earning a degree",
	"DevOps engineer or professional",
	"Government",
	"25-34 years old",
	"Individual contributor",
	),
	]

	predictions = []
	for (
	country,
	years,
	work_exp,
	education,
	devtype,
	industry,
	age,
	icorpm,
	) in test_cases:
	# Skip if not in valid categories
	if (
	country not in valid_categories["Country"]
	or education not in valid_categories["EdLevel"]
	or devtype not in valid_categories["DevType"]
	or industry not in valid_categories["Industry"]
	or age not in valid_categories["Age"]
	or icorpm not in valid_categories["ICorPM"]
	):
	continue

	input_data = SalaryInput(
	country=country,
	years_code=years,
	work_exp=work_exp,
	education_level=education,
	dev_type=devtype,
	industry=industry,
	age=age,
	ic_or_pm=icorpm,
	)
	salary = predict_salary(input_data)
	predictions.append(salary)
	print(
	f" {country[:15]:15s} \| {years:2d}y \| {work_exp:2d}w \| {education[:25]:25s} \| {devtype[:25]:25s} \| {industry[:20]:20s} \| {age[:15]:15s} \| {icorpm[:5]:5s} -> ${salary:,.2f}"
	)

	# Check if predictions are different
	unique_predictions = len(set(predictions))
	if unique_predictions == len(predictions):
	print(f"\n✅ PASS: All {len(predictions)} combined predictions are different")
	return True
	else:
	print(f"\n⚠️ Only {unique_predictions}/{len(predictions)} unique predictions")
	print(" Some combinations produce identical salaries")
	return False


	def print_feature_analysis():
	"""Analyze which features the model is actually using."""
	print("\n" + "=" * 70)
	print("FEATURE ANALYSIS")
	print("=" * 70)

	from src.infer import feature_columns

	print(f"\nTotal features in model: {len(feature_columns)}")

	# Count by type
	country_features = [f for f in feature_columns if f.startswith("Country_")]
	edlevel_features = [f for f in feature_columns if f.startswith("EdLevel_")]
	devtype_features = [f for f in feature_columns if f.startswith("DevType_")]
	industry_features = [f for f in feature_columns if f.startswith("Industry_")]
	age_features = [f for f in feature_columns if f.startswith("Age_")]
	icorpm_features = [f for f in feature_columns if f.startswith("ICorPM_")]
	numeric_features = [
	f
	for f in feature_columns
	if not f.startswith(
	("Country_", "EdLevel_", "DevType_", "Industry_", "Age_", "ICorPM_")
	)
	]

	print(f" - Numeric features: {len(numeric_features)} -> {numeric_features}")
	print(f" - Country features: {len(country_features)}")
	print(f" - Education features: {len(edlevel_features)}")
	print(f" - DevType features: {len(devtype_features)}")
	print(f" - Industry features: {len(industry_features)}")
	print(f" - Age features: {len(age_features)}")
	print(f" - ICorPM features: {len(icorpm_features)}")

	if len(country_features) > 0:
	print("\nSample country features:")
	for feat in country_features[:5]:
	print(f" - {feat}")

	if len(edlevel_features) > 0:
	print("\nSample education features:")
	for feat in edlevel_features[:5]:
	print(f" - {feat}")

	if len(devtype_features) > 0:
	print("\nSample developer type features:")
	for feat in devtype_features[:5]:
	print(f" - {feat}")

	if len(industry_features) > 0:
	print("\nSample industry features:")
	for feat in industry_features[:5]:
	print(f" - {feat}")

	if len(age_features) > 0:
	print("\nSample age features:")
	for feat in age_features[:5]:
	print(f" - {feat}")

	if len(icorpm_features) > 0:
	print("\nSample IC/PM features:")
	for feat in icorpm_features[:5]:
	print(f" - {feat}")

	# Check if there are any features at all
	if len(country_features) == 0:
	print("\n⚠️ WARNING: No country features found!")
	if len(edlevel_features) == 0:
	print("\n⚠️ WARNING: No education features found!")
	if len(devtype_features) == 0:
	print("\n⚠️ WARNING: No developer type features found!")
	if len(industry_features) == 0:
	print("\n⚠️ WARNING: No industry features found!")
	if len(age_features) == 0:
	print("\n⚠️ WARNING: No age features found!")
	if len(icorpm_features) == 0:
	print("\n⚠️ WARNING: No IC/PM features found!")


	def main():
	"""Run all tests."""
	print("\n" + "=" * 70)
	print("FEATURE IMPACT TESTS")
	print("Testing if changing inputs actually changes predictions")
	print("=" * 70)

	# First, analyze what features exist
	print_feature_analysis()

	# Run all tests
	results = {
	"Years of Coding": test_years_experience_impact(),
	"Country": test_country_impact(),
	"Education Level": test_education_impact(),
	"Developer Type": test_devtype_impact(),
	"Industry": test_industry_impact(),
	"Age": test_age_impact(),
	"Work Experience": test_work_exp_impact(),
	"IC or PM": test_icorpm_impact(),
	"Combined Features": test_combined_features(),
	}

	# Summary
	print("\n" + "=" * 70)
	print("TEST SUMMARY")
	print("=" * 70)

	for test_name, passed in results.items():
	status = "✅ PASS" if passed else "❌ FAIL"
	print(f" {status} - {test_name}")

	passed_count = sum(results.values())
	total_count = len(results)

	print(f"\n{passed_count}/{total_count} tests passed")

	if passed_count == total_count:
	print("\n🎉 All tests passed! The model is using all features correctly.")
	else:
	print(
	"\n⚠️ Some tests failed. The model may not be using all features properly."
	)
	print(
	" This indicates potential training-testing skew or feature engineering issues."
	)


	if __name__ == "__main__":
	main()