Spaces:
Running
Running
File size: 8,865 Bytes
0ade07c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | """λΆμ ν¨μ λ¨μ ν
μ€νΈ."""
import sys
from pathlib import Path
from datetime import date, timedelta
sys.path.insert(0, str(Path(__file__).parent.parent))
import pandas as pd
import pytest
from analysis.trends import (
normalize_location,
weekly_job_counts,
top_skills_by_category,
skill_trend_weekly,
salary_by_category,
experience_distribution,
skill_growth_rate,
new_jobs_count,
_trend_date,
)
# ββ normalize_location ββββββββββββββββββββββββββββββββββββββββββββ
class TestNormalizeLocation:
def test_seoul(self):
assert normalize_location("μμΈ κ°λ¨κ΅¬") == "μμΈ"
def test_gyeonggi(self):
assert normalize_location("κ²½κΈ° μ±λ¨μ λΆλΉκ΅¬") == "κ²½κΈ°"
def test_exact_match(self):
assert normalize_location("λΆμ°") == "λΆμ°"
def test_overseas(self):
assert normalize_location("San Francisco") == "ν΄μΈ"
def test_none(self):
assert normalize_location(None) == ""
def test_nan(self):
assert normalize_location(float("nan")) == ""
# ββ _trend_date βββββββββββββββββββββββββββββββββββββββββββββββββββ
class TestTrendDate:
def _make_df(self, posted, collected):
return pd.DataFrame({
"posted_date": pd.to_datetime(posted),
"collected_at": pd.to_datetime(collected),
})
def test_prefers_posted_date(self):
df = self._make_df(["2025-01-10"], ["2025-01-15"])
result = _trend_date(df)
assert str(result.iloc[0].date()) == "2025-01-10"
def test_fallback_to_collected_at(self):
df = self._make_df([None], ["2025-01-15"])
result = _trend_date(df)
assert str(result.iloc[0].date()) == "2025-01-15"
# ββ weekly_job_counts βββββββββββββββββββββββββββββββββββββββββββββ
class TestWeeklyJobCounts:
def _make_jobs(self):
today = pd.Timestamp.now()
return pd.DataFrame({
"job_category": ["λ°μ΄ν° μμ§λμ΄", "λ°μ΄ν° λΆμκ°", "λ°μ΄ν° μμ§λμ΄"],
"posted_date": [today - timedelta(days=3), today - timedelta(days=10), None],
"collected_at": [today, today, today],
})
def test_returns_dataframe(self):
df = self._make_jobs()
result = weekly_job_counts(df)
assert isinstance(result, pd.DataFrame)
assert "week" in result.columns
assert "count" in result.columns
def test_count_correct(self):
df = self._make_jobs()
result = weekly_job_counts(df)
assert result["count"].sum() == 3
def test_empty_input(self):
empty = pd.DataFrame(columns=["job_category", "posted_date", "collected_at"])
result = weekly_job_counts(empty)
assert result.empty
# ββ top_skills_by_category ββββββββββββββββββββββββββββββββββββββββ
class TestTopSkillsByCategory:
def _make_skills(self):
return pd.DataFrame({
"job_id": [1, 1, 2, 2, 3],
"skill_name": ["Python", "SQL", "Python", "AWS", "Python"],
"job_category": ["DE", "DE", "DE", "DE", "DA"],
"source_site": ["wanted"] * 5,
"posted_date": [None] * 5,
"collected_at": [pd.Timestamp.now()] * 5,
})
def _make_jobs(self):
return pd.DataFrame({
"id": [1, 2, 3],
"job_category": ["DE", "DE", "DA"],
})
def test_basic(self):
sf = self._make_skills()
result = top_skills_by_category(sf, top_n=10)
assert "skill_name" in result.columns
assert "count" in result.columns
assert "pct" in result.columns
def test_pct_with_jobs_df(self):
sf = self._make_skills()
jf = self._make_jobs()
result = top_skills_by_category(sf, jobs_df=jf, top_n=10)
de = result[result["job_category"] == "DE"]
python_row = de[de["skill_name"] == "Python"].iloc[0]
# DE κ³΅κ³ 2κ° μ€ Pythonμ 2κ° β 100%
assert python_row["pct"] == 100.0
def test_top_n_limit(self):
sf = self._make_skills()
result = top_skills_by_category(sf, top_n=1)
# κ° μ§κ΅°μμ 1κ°λ§
assert result.groupby("job_category").size().max() == 1
def test_empty_input(self):
empty = pd.DataFrame(columns=["job_id", "skill_name", "job_category",
"source_site", "posted_date", "collected_at"])
result = top_skills_by_category(empty)
assert result.empty
# ββ salary_by_category ββββββββββββββββββββββββββββββββββββββββββββ
class TestSalaryByCategory:
def _make_jobs(self):
return pd.DataFrame({
"job_category": ["DE", "DA", "DE"],
"salary_min": [4000, 3000, None],
"salary_max": [6000, 5000, None],
"company_name": ["A", "B", "C"],
})
def test_drops_null_salary(self):
df = self._make_jobs()
result = salary_by_category(df)
assert len(result) == 2 # None ν μ κ±°
def test_salary_mid_calculation(self):
df = self._make_jobs()
result = salary_by_category(df)
de_row = result[result["job_category"] == "DE"].iloc[0]
assert de_row["salary_mid"] == 5000.0 # (4000+6000)/2
# ββ experience_distribution βββββββββββββββββββββββββββββββββββββββ
class TestExperienceDistribution:
def _make_jobs(self):
return pd.DataFrame({
"job_category": ["DE"] * 5,
"experience_min": [0, 1, 3, 7, None],
"collected_at": [pd.Timestamp.now()] * 5,
})
def test_basic_groups(self):
df = self._make_jobs()
result = experience_distribution(df)
labels = result["exp_group"].astype(str).tolist()
assert "μ μ
" in labels
assert "1-2λ
" in labels
def test_null_excluded(self):
df = self._make_jobs()
result = experience_distribution(df)
assert result["count"].sum() == 4 # None μ μΈ
# ββ skill_growth_rate βββββββββββββββββββββββββββββββββββββββββββββ
class TestSkillGrowthRate:
def _make_skills(self, recent_count=5, prev_count=2):
now = pd.Timestamp.now()
rows = []
for _ in range(recent_count):
rows.append({"skill_name": "Python", "job_id": 1,
"job_category": "DE", "source_site": "wanted",
"posted_date": None, "collected_at": now - timedelta(days=3)})
for _ in range(prev_count):
rows.append({"skill_name": "Python", "job_id": 2,
"job_category": "DE", "source_site": "wanted",
"posted_date": None, "collected_at": now - timedelta(days=20)})
return pd.DataFrame(rows)
def test_returns_dataframe(self):
df = self._make_skills()
result = skill_growth_rate(df)
assert isinstance(result, pd.DataFrame)
def test_growth_positive(self):
df = self._make_skills(recent_count=5, prev_count=2)
result = skill_growth_rate(df)
assert not result.empty
assert result.iloc[0]["growth_pct"] > 0
def test_prev_filter_removes_noise(self):
"""prev < 2μΈ μ€ν¬μ ν¬ν¨νμ§ μμμΌ ν¨."""
df = self._make_skills(recent_count=5, prev_count=1)
result = skill_growth_rate(df)
assert result.empty # prev=1 μ΄λ―λ‘ νν°λ§
def test_empty_input(self):
empty = pd.DataFrame(columns=["skill_name", "job_id", "job_category",
"source_site", "posted_date", "collected_at"])
result = skill_growth_rate(empty)
assert result.empty
# ββ new_jobs_count ββββββββββββββββββββββββββββββββββββββββββββββββ
class TestNewJobsCount:
def test_recent_jobs(self):
now = pd.Timestamp.now()
df = pd.DataFrame({
"collected_at": [now - timedelta(days=i) for i in range(10)],
})
assert new_jobs_count(df, days=7) == 8 # 0~7μΌ ν¬ν¨
def test_empty(self):
assert new_jobs_count(pd.DataFrame(columns=["collected_at"]), days=7) == 0
|