Spaces:
Running
Running
MiniMing
refactor: μ λ©΄ μ½λ κ°μ β λ²κ·Έ μμ , λΆμ μ νλ, ꡬ쑰ν, ν
μ€νΈ μΆκ°
0ade07c | """λΆμ ν¨μ λ¨μ ν μ€νΈ.""" | |
| import sys | |
| from pathlib import Path | |
| from datetime import date, timedelta | |
| sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| import pandas as pd | |
| import pytest | |
| from analysis.trends import ( | |
| normalize_location, | |
| weekly_job_counts, | |
| top_skills_by_category, | |
| skill_trend_weekly, | |
| salary_by_category, | |
| experience_distribution, | |
| skill_growth_rate, | |
| new_jobs_count, | |
| _trend_date, | |
| ) | |
| # ββ normalize_location ββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestNormalizeLocation: | |
| def test_seoul(self): | |
| assert normalize_location("μμΈ κ°λ¨κ΅¬") == "μμΈ" | |
| def test_gyeonggi(self): | |
| assert normalize_location("κ²½κΈ° μ±λ¨μ λΆλΉκ΅¬") == "κ²½κΈ°" | |
| def test_exact_match(self): | |
| assert normalize_location("λΆμ°") == "λΆμ°" | |
| def test_overseas(self): | |
| assert normalize_location("San Francisco") == "ν΄μΈ" | |
| def test_none(self): | |
| assert normalize_location(None) == "" | |
| def test_nan(self): | |
| assert normalize_location(float("nan")) == "" | |
| # ββ _trend_date βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestTrendDate: | |
| def _make_df(self, posted, collected): | |
| return pd.DataFrame({ | |
| "posted_date": pd.to_datetime(posted), | |
| "collected_at": pd.to_datetime(collected), | |
| }) | |
| def test_prefers_posted_date(self): | |
| df = self._make_df(["2025-01-10"], ["2025-01-15"]) | |
| result = _trend_date(df) | |
| assert str(result.iloc[0].date()) == "2025-01-10" | |
| def test_fallback_to_collected_at(self): | |
| df = self._make_df([None], ["2025-01-15"]) | |
| result = _trend_date(df) | |
| assert str(result.iloc[0].date()) == "2025-01-15" | |
| # ββ weekly_job_counts βββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestWeeklyJobCounts: | |
| def _make_jobs(self): | |
| today = pd.Timestamp.now() | |
| return pd.DataFrame({ | |
| "job_category": ["λ°μ΄ν° μμ§λμ΄", "λ°μ΄ν° λΆμκ°", "λ°μ΄ν° μμ§λμ΄"], | |
| "posted_date": [today - timedelta(days=3), today - timedelta(days=10), None], | |
| "collected_at": [today, today, today], | |
| }) | |
| def test_returns_dataframe(self): | |
| df = self._make_jobs() | |
| result = weekly_job_counts(df) | |
| assert isinstance(result, pd.DataFrame) | |
| assert "week" in result.columns | |
| assert "count" in result.columns | |
| def test_count_correct(self): | |
| df = self._make_jobs() | |
| result = weekly_job_counts(df) | |
| assert result["count"].sum() == 3 | |
| def test_empty_input(self): | |
| empty = pd.DataFrame(columns=["job_category", "posted_date", "collected_at"]) | |
| result = weekly_job_counts(empty) | |
| assert result.empty | |
| # ββ top_skills_by_category ββββββββββββββββββββββββββββββββββββββββ | |
| class TestTopSkillsByCategory: | |
| def _make_skills(self): | |
| return pd.DataFrame({ | |
| "job_id": [1, 1, 2, 2, 3], | |
| "skill_name": ["Python", "SQL", "Python", "AWS", "Python"], | |
| "job_category": ["DE", "DE", "DE", "DE", "DA"], | |
| "source_site": ["wanted"] * 5, | |
| "posted_date": [None] * 5, | |
| "collected_at": [pd.Timestamp.now()] * 5, | |
| }) | |
| def _make_jobs(self): | |
| return pd.DataFrame({ | |
| "id": [1, 2, 3], | |
| "job_category": ["DE", "DE", "DA"], | |
| }) | |
| def test_basic(self): | |
| sf = self._make_skills() | |
| result = top_skills_by_category(sf, top_n=10) | |
| assert "skill_name" in result.columns | |
| assert "count" in result.columns | |
| assert "pct" in result.columns | |
| def test_pct_with_jobs_df(self): | |
| sf = self._make_skills() | |
| jf = self._make_jobs() | |
| result = top_skills_by_category(sf, jobs_df=jf, top_n=10) | |
| de = result[result["job_category"] == "DE"] | |
| python_row = de[de["skill_name"] == "Python"].iloc[0] | |
| # DE κ³΅κ³ 2κ° μ€ Pythonμ 2κ° β 100% | |
| assert python_row["pct"] == 100.0 | |
| def test_top_n_limit(self): | |
| sf = self._make_skills() | |
| result = top_skills_by_category(sf, top_n=1) | |
| # κ° μ§κ΅°μμ 1κ°λ§ | |
| assert result.groupby("job_category").size().max() == 1 | |
| def test_empty_input(self): | |
| empty = pd.DataFrame(columns=["job_id", "skill_name", "job_category", | |
| "source_site", "posted_date", "collected_at"]) | |
| result = top_skills_by_category(empty) | |
| assert result.empty | |
| # ββ salary_by_category ββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestSalaryByCategory: | |
| def _make_jobs(self): | |
| return pd.DataFrame({ | |
| "job_category": ["DE", "DA", "DE"], | |
| "salary_min": [4000, 3000, None], | |
| "salary_max": [6000, 5000, None], | |
| "company_name": ["A", "B", "C"], | |
| }) | |
| def test_drops_null_salary(self): | |
| df = self._make_jobs() | |
| result = salary_by_category(df) | |
| assert len(result) == 2 # None ν μ κ±° | |
| def test_salary_mid_calculation(self): | |
| df = self._make_jobs() | |
| result = salary_by_category(df) | |
| de_row = result[result["job_category"] == "DE"].iloc[0] | |
| assert de_row["salary_mid"] == 5000.0 # (4000+6000)/2 | |
| # ββ experience_distribution βββββββββββββββββββββββββββββββββββββββ | |
| class TestExperienceDistribution: | |
| def _make_jobs(self): | |
| return pd.DataFrame({ | |
| "job_category": ["DE"] * 5, | |
| "experience_min": [0, 1, 3, 7, None], | |
| "collected_at": [pd.Timestamp.now()] * 5, | |
| }) | |
| def test_basic_groups(self): | |
| df = self._make_jobs() | |
| result = experience_distribution(df) | |
| labels = result["exp_group"].astype(str).tolist() | |
| assert "μ μ " in labels | |
| assert "1-2λ " in labels | |
| def test_null_excluded(self): | |
| df = self._make_jobs() | |
| result = experience_distribution(df) | |
| assert result["count"].sum() == 4 # None μ μΈ | |
| # ββ skill_growth_rate βββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestSkillGrowthRate: | |
| def _make_skills(self, recent_count=5, prev_count=2): | |
| now = pd.Timestamp.now() | |
| rows = [] | |
| for _ in range(recent_count): | |
| rows.append({"skill_name": "Python", "job_id": 1, | |
| "job_category": "DE", "source_site": "wanted", | |
| "posted_date": None, "collected_at": now - timedelta(days=3)}) | |
| for _ in range(prev_count): | |
| rows.append({"skill_name": "Python", "job_id": 2, | |
| "job_category": "DE", "source_site": "wanted", | |
| "posted_date": None, "collected_at": now - timedelta(days=20)}) | |
| return pd.DataFrame(rows) | |
| def test_returns_dataframe(self): | |
| df = self._make_skills() | |
| result = skill_growth_rate(df) | |
| assert isinstance(result, pd.DataFrame) | |
| def test_growth_positive(self): | |
| df = self._make_skills(recent_count=5, prev_count=2) | |
| result = skill_growth_rate(df) | |
| assert not result.empty | |
| assert result.iloc[0]["growth_pct"] > 0 | |
| def test_prev_filter_removes_noise(self): | |
| """prev < 2μΈ μ€ν¬μ ν¬ν¨νμ§ μμμΌ ν¨.""" | |
| df = self._make_skills(recent_count=5, prev_count=1) | |
| result = skill_growth_rate(df) | |
| assert result.empty # prev=1 μ΄λ―λ‘ νν°λ§ | |
| def test_empty_input(self): | |
| empty = pd.DataFrame(columns=["skill_name", "job_id", "job_category", | |
| "source_site", "posted_date", "collected_at"]) | |
| result = skill_growth_rate(empty) | |
| assert result.empty | |
| # ββ new_jobs_count ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestNewJobsCount: | |
| def test_recent_jobs(self): | |
| now = pd.Timestamp.now() | |
| df = pd.DataFrame({ | |
| "collected_at": [now - timedelta(days=i) for i in range(10)], | |
| }) | |
| assert new_jobs_count(df, days=7) == 8 # 0~7μΌ ν¬ν¨ | |
| def test_empty(self): | |
| assert new_jobs_count(pd.DataFrame(columns=["collected_at"]), days=7) == 0 | |