refactor: remove dead code, extract shared job helpers
Browse filesagent/utils.py:
- Add job_institution(job) β eliminates repeated .get("institution", .get("company",...))
pattern; the "company" fallback was dead (no scraper ever sets it)
- Add job_description(job, max_chars) β eliminates repeated [:3000] truncation pattern
agent/job_searcher.py:
- Remove _TYPE_QUERY dict (defined but never referenced anywhere)
- Move _MLSCI_NON_COUNTRY from inside the loop body to module-level frozenset
- Fix module docstring (remove stale FindAPhD/jobs.ac.uk-only references)
agent/job_matcher.py, cover_letter.py, cv_tailor.py:
- Use job_institution() and job_description() from utils
app.py:
- Use job_institution() in all 7 display/logic locations
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- agent/cover_letter.py +4 -3
- agent/cv_tailor.py +3 -3
- agent/job_matcher.py +3 -3
- agent/job_searcher.py +9 -17
- agent/utils.py +10 -0
- app.py +9 -7
agent/cover_letter.py
CHANGED
|
@@ -6,6 +6,7 @@ import re
|
|
| 6 |
from typing import Any
|
| 7 |
|
| 8 |
from agent.llm_client import LLMClient
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
_ITALIAN_KEYWORDS = {
|
|
@@ -78,7 +79,7 @@ class CoverLetterWriter:
|
|
| 78 |
"""
|
| 79 |
language = self._detect_language(job)
|
| 80 |
title = job.get("title", "Unknown Position")
|
| 81 |
-
institution =
|
| 82 |
|
| 83 |
prompt = _PROMPT.format(
|
| 84 |
profile=profile_text,
|
|
@@ -86,7 +87,7 @@ class CoverLetterWriter:
|
|
| 86 |
institution=institution,
|
| 87 |
location=job.get("location", "Unknown"),
|
| 88 |
pos_type=job.get("type", "research"),
|
| 89 |
-
description=(job
|
| 90 |
language=language,
|
| 91 |
regen_note=_REGEN_NOTE if regenerate else "",
|
| 92 |
)
|
|
@@ -108,7 +109,7 @@ class CoverLetterWriter:
|
|
| 108 |
job.get("title", ""),
|
| 109 |
job.get("description", ""),
|
| 110 |
job.get("location", ""),
|
| 111 |
-
|
| 112 |
]).lower()
|
| 113 |
hits = sum(1 for kw in _ITALIAN_KEYWORDS if kw.lower() in text)
|
| 114 |
return "Italian" if hits >= 2 else "English"
|
|
|
|
| 6 |
from typing import Any
|
| 7 |
|
| 8 |
from agent.llm_client import LLMClient
|
| 9 |
+
from agent.utils import job_institution, job_description
|
| 10 |
|
| 11 |
|
| 12 |
_ITALIAN_KEYWORDS = {
|
|
|
|
| 79 |
"""
|
| 80 |
language = self._detect_language(job)
|
| 81 |
title = job.get("title", "Unknown Position")
|
| 82 |
+
institution = job_institution(job) or "Unknown Institution"
|
| 83 |
|
| 84 |
prompt = _PROMPT.format(
|
| 85 |
profile=profile_text,
|
|
|
|
| 87 |
institution=institution,
|
| 88 |
location=job.get("location", "Unknown"),
|
| 89 |
pos_type=job.get("type", "research"),
|
| 90 |
+
description=job_description(job),
|
| 91 |
language=language,
|
| 92 |
regen_note=_REGEN_NOTE if regenerate else "",
|
| 93 |
)
|
|
|
|
| 109 |
job.get("title", ""),
|
| 110 |
job.get("description", ""),
|
| 111 |
job.get("location", ""),
|
| 112 |
+
job_institution(job),
|
| 113 |
]).lower()
|
| 114 |
hits = sum(1 for kw in _ITALIAN_KEYWORDS if kw.lower() in text)
|
| 115 |
return "Italian" if hits >= 2 else "English"
|
agent/cv_tailor.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
| 5 |
from typing import Any, TypedDict
|
| 6 |
|
| 7 |
from agent.llm_client import LLMClient
|
| 8 |
-
from agent.utils import parse_json
|
| 9 |
|
| 10 |
|
| 11 |
class TailoringHints(TypedDict, total=False):
|
|
@@ -95,9 +95,9 @@ class CVTailor:
|
|
| 95 |
"""Generate actionable tailoring hints for a specific position."""
|
| 96 |
prompt = _PROMPT.format(
|
| 97 |
title=job.get("title", "Unknown"),
|
| 98 |
-
institution=
|
| 99 |
pos_type=job.get("type", "unknown"),
|
| 100 |
-
description=(job
|
| 101 |
profile=profile_text,
|
| 102 |
)
|
| 103 |
|
|
|
|
| 5 |
from typing import Any, TypedDict
|
| 6 |
|
| 7 |
from agent.llm_client import LLMClient
|
| 8 |
+
from agent.utils import parse_json, job_institution, job_description
|
| 9 |
|
| 10 |
|
| 11 |
class TailoringHints(TypedDict, total=False):
|
|
|
|
| 95 |
"""Generate actionable tailoring hints for a specific position."""
|
| 96 |
prompt = _PROMPT.format(
|
| 97 |
title=job.get("title", "Unknown"),
|
| 98 |
+
institution=job_institution(job) or "Unknown",
|
| 99 |
pos_type=job.get("type", "unknown"),
|
| 100 |
+
description=job_description(job),
|
| 101 |
profile=profile_text,
|
| 102 |
)
|
| 103 |
|
agent/job_matcher.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
| 5 |
from typing import Any, TypedDict
|
| 6 |
|
| 7 |
from agent.llm_client import LLMClient
|
| 8 |
-
from agent.utils import parse_json
|
| 9 |
|
| 10 |
|
| 11 |
class MatchResult(TypedDict, total=False):
|
|
@@ -83,10 +83,10 @@ class JobMatcher:
|
|
| 83 |
prompt = _PROMPT.format(
|
| 84 |
profile=profile_text,
|
| 85 |
title=job.get("title", "Unknown"),
|
| 86 |
-
institution=
|
| 87 |
location=job.get("location", "Unknown"),
|
| 88 |
pos_type=job.get("type", "unknown"),
|
| 89 |
-
description=(job
|
| 90 |
)
|
| 91 |
|
| 92 |
try:
|
|
|
|
| 5 |
from typing import Any, TypedDict
|
| 6 |
|
| 7 |
from agent.llm_client import LLMClient
|
| 8 |
+
from agent.utils import parse_json, job_institution, job_description
|
| 9 |
|
| 10 |
|
| 11 |
class MatchResult(TypedDict, total=False):
|
|
|
|
| 83 |
prompt = _PROMPT.format(
|
| 84 |
profile=profile_text,
|
| 85 |
title=job.get("title", "Unknown"),
|
| 86 |
+
institution=job_institution(job) or "Unknown",
|
| 87 |
location=job.get("location", "Unknown"),
|
| 88 |
pos_type=job.get("type", "unknown"),
|
| 89 |
+
description=job_description(job),
|
| 90 |
)
|
| 91 |
|
| 92 |
try:
|
agent/job_searcher.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
"""Job searcher: finds PhD / postdoc / research positions from free public sources.
|
| 2 |
|
| 3 |
Sources:
|
| 4 |
-
-
|
| 5 |
-
-
|
|
|
|
|
|
|
| 6 |
|
| 7 |
All scrapers are wrapped in try/except β if one source is down the rest continue.
|
| 8 |
"""
|
|
@@ -57,15 +59,6 @@ _TYPE_KEYWORDS: dict[str, list[str]] = {
|
|
| 57 |
],
|
| 58 |
}
|
| 59 |
|
| 60 |
-
# Position type β keywords appended to search query for sites without native facets
|
| 61 |
-
_TYPE_QUERY: dict[str, str] = {
|
| 62 |
-
"predoctoral": "predoctoral OR \"early-stage researcher\" OR \"research trainee\"",
|
| 63 |
-
"phd": "PhD",
|
| 64 |
-
"postdoc": "postdoc OR \"research associate\" OR \"research fellow\"",
|
| 65 |
-
"fellowship": "fellowship OR scholarship",
|
| 66 |
-
"research_staff": "researcher OR lecturer OR professor",
|
| 67 |
-
}
|
| 68 |
-
|
| 69 |
_HEADERS = {
|
| 70 |
"User-Agent": (
|
| 71 |
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
|
@@ -119,6 +112,11 @@ _MLSCI_TYPE_SLUG: dict[str, str] = {
|
|
| 119 |
"any": "jobs",
|
| 120 |
}
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
# mlscientist.com country slug mapping (lowercase location β slug)
|
| 123 |
_MLSCI_COUNTRY_SLUG: dict[str, str] = {
|
| 124 |
"uk": "united-kingdom",
|
|
@@ -166,12 +164,6 @@ def _search_mlscientist(field: str, location: str, position_type: str) -> list[d
|
|
| 166 |
listings: list[dict] = []
|
| 167 |
seen_urls: set[str] = set()
|
| 168 |
|
| 169 |
-
# Non-type category slugs to ignore when extracting country from CSS classes
|
| 170 |
-
_MLSCI_NON_COUNTRY = {
|
| 171 |
-
"jobs", "phd-positions", "postdoc-positions", "featured",
|
| 172 |
-
"conference-calls", "mlnews",
|
| 173 |
-
}
|
| 174 |
-
|
| 175 |
for url in urls_to_try:
|
| 176 |
try:
|
| 177 |
resp = requests.get(url, headers=_HEADERS, timeout=15)
|
|
|
|
| 1 |
"""Job searcher: finds PhD / postdoc / research positions from free public sources.
|
| 2 |
|
| 3 |
Sources:
|
| 4 |
+
- Euraxess (euraxess.ec.europa.eu) β EU/worldwide research portal, country-filtered
|
| 5 |
+
- mlscientist.com β ML/AI academic positions, WordPress category + search
|
| 6 |
+
- jobs.ac.uk β UK academic jobs (queried only for UK/worldwide locations)
|
| 7 |
+
- DuckDuckGo web search β targeted queries for open calls
|
| 8 |
|
| 9 |
All scrapers are wrapped in try/except β if one source is down the rest continue.
|
| 10 |
"""
|
|
|
|
| 59 |
],
|
| 60 |
}
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
_HEADERS = {
|
| 63 |
"User-Agent": (
|
| 64 |
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
|
|
|
| 112 |
"any": "jobs",
|
| 113 |
}
|
| 114 |
|
| 115 |
+
_MLSCI_NON_COUNTRY: frozenset[str] = frozenset({
|
| 116 |
+
"jobs", "phd-positions", "postdoc-positions", "featured",
|
| 117 |
+
"conference-calls", "mlnews",
|
| 118 |
+
})
|
| 119 |
+
|
| 120 |
# mlscientist.com country slug mapping (lowercase location β slug)
|
| 121 |
_MLSCI_COUNTRY_SLUG: dict[str, str] = {
|
| 122 |
"uk": "united-kingdom",
|
|
|
|
| 164 |
listings: list[dict] = []
|
| 165 |
seen_urls: set[str] = set()
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
for url in urls_to_try:
|
| 168 |
try:
|
| 169 |
resp = requests.get(url, headers=_HEADERS, timeout=15)
|
agent/utils.py
CHANGED
|
@@ -30,3 +30,13 @@ def parse_json(raw: str) -> dict[str, Any] | None:
|
|
| 30 |
except json.JSONDecodeError:
|
| 31 |
pass
|
| 32 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
except json.JSONDecodeError:
|
| 31 |
pass
|
| 32 |
return None
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def job_institution(job: dict) -> str:
|
| 36 |
+
"""Return the job's institution name, empty string if absent."""
|
| 37 |
+
return job.get("institution") or ""
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def job_description(job: dict, max_chars: int = 3000) -> str:
|
| 41 |
+
"""Return the job description truncated to max_chars."""
|
| 42 |
+
return (job.get("description") or "No description provided.")[:max_chars]
|
app.py
CHANGED
|
@@ -22,6 +22,8 @@ from typing import Any
|
|
| 22 |
|
| 23 |
import gradio as gr
|
| 24 |
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# ---------------------------------------------------------------------------
|
| 27 |
# Formatting helpers (pure functions β no LLM dependency)
|
|
@@ -59,7 +61,7 @@ def _fmt_profile(profile: dict) -> str:
|
|
| 59 |
|
| 60 |
def _fmt_jobs_table(jobs: list) -> list[list]:
|
| 61 |
return [
|
| 62 |
-
[i, j.get("title", ""),
|
| 63 |
j.get("location", ""), j.get("type", ""), j.get("source", ""),
|
| 64 |
j.get("deadline") or "β"]
|
| 65 |
for i, j in enumerate(jobs, 1)
|
|
@@ -74,7 +76,7 @@ def _fmt_scored_table(jobs: list) -> list[list]:
|
|
| 74 |
why = m.get("why_good_fit") or ""
|
| 75 |
rows.append([
|
| 76 |
i, m.get("match_score", 0), job.get("title", ""),
|
| 77 |
-
|
| 78 |
icons.get(m.get("recommendation", ""), ""),
|
| 79 |
why[:60] + "..." if len(why) > 60 else why,
|
| 80 |
])
|
|
@@ -89,7 +91,7 @@ def _fmt_job_details(job: dict, match: dict) -> str:
|
|
| 89 |
url = job.get("url", "")
|
| 90 |
lines = [
|
| 91 |
f"## {job.get('title', 'Unknown')}",
|
| 92 |
-
f"**{
|
| 93 |
"",
|
| 94 |
f"**Type:** {job.get('type', '')} | **Deadline:** {job.get('deadline') or 'N/A'}",
|
| 95 |
]
|
|
@@ -248,7 +250,7 @@ def load_position(
|
|
| 248 |
hints, cover_letter = agent.prepare_application(job, profile_text)
|
| 249 |
|
| 250 |
progress(1.0, desc="Done!")
|
| 251 |
-
status = f"β
Loaded: **{job.get('title', '')}** @ {
|
| 252 |
return _fmt_job_details(job, match), _fmt_hints(hints), cover_letter, status, idx
|
| 253 |
|
| 254 |
except Exception as exc:
|
|
@@ -285,7 +287,7 @@ def approve_position(
|
|
| 285 |
if current_idx < 0 or not scored_jobs or current_idx >= len(scored_jobs):
|
| 286 |
return approved, "β No position loaded."
|
| 287 |
job = scored_jobs[current_idx]
|
| 288 |
-
title, institution = job.get("title", "Unknown"),
|
| 289 |
if any(a["job"].get("title") == title and a["job"].get("institution") == institution for a in approved):
|
| 290 |
return approved, f"β οΈ **{title}** @ {institution} already approved."
|
| 291 |
new_approved = list(approved) + [{
|
|
@@ -299,7 +301,7 @@ def skip_position(current_idx: int, scored_jobs: list) -> str:
|
|
| 299 |
if current_idx < 0 or not scored_jobs or current_idx >= len(scored_jobs):
|
| 300 |
return "β Skipped."
|
| 301 |
job = scored_jobs[current_idx]
|
| 302 |
-
return f"β Skipped: **{job.get('title', '')}** @ {
|
| 303 |
|
| 304 |
|
| 305 |
def approved_display(approved: list) -> str:
|
|
@@ -330,7 +332,7 @@ def export_zip(approved: list) -> tuple:
|
|
| 330 |
for entry in approved:
|
| 331 |
job = entry.get("job") or {}
|
| 332 |
title = job.get("title", "Unknown")
|
| 333 |
-
institution =
|
| 334 |
safe = (
|
| 335 |
f"{institution}_{title}"
|
| 336 |
.replace(" ", "_").replace("/", "-").replace("\\", "-")
|
|
|
|
| 22 |
|
| 23 |
import gradio as gr
|
| 24 |
|
| 25 |
+
from agent.utils import job_institution
|
| 26 |
+
|
| 27 |
|
| 28 |
# ---------------------------------------------------------------------------
|
| 29 |
# Formatting helpers (pure functions β no LLM dependency)
|
|
|
|
| 61 |
|
| 62 |
def _fmt_jobs_table(jobs: list) -> list[list]:
|
| 63 |
return [
|
| 64 |
+
[i, j.get("title", ""), job_institution(j),
|
| 65 |
j.get("location", ""), j.get("type", ""), j.get("source", ""),
|
| 66 |
j.get("deadline") or "β"]
|
| 67 |
for i, j in enumerate(jobs, 1)
|
|
|
|
| 76 |
why = m.get("why_good_fit") or ""
|
| 77 |
rows.append([
|
| 78 |
i, m.get("match_score", 0), job.get("title", ""),
|
| 79 |
+
job_institution(job), job.get("type", ""),
|
| 80 |
icons.get(m.get("recommendation", ""), ""),
|
| 81 |
why[:60] + "..." if len(why) > 60 else why,
|
| 82 |
])
|
|
|
|
| 91 |
url = job.get("url", "")
|
| 92 |
lines = [
|
| 93 |
f"## {job.get('title', 'Unknown')}",
|
| 94 |
+
f"**{job_institution(job) or 'Unknown'}** β {job.get('location', '')}",
|
| 95 |
"",
|
| 96 |
f"**Type:** {job.get('type', '')} | **Deadline:** {job.get('deadline') or 'N/A'}",
|
| 97 |
]
|
|
|
|
| 250 |
hints, cover_letter = agent.prepare_application(job, profile_text)
|
| 251 |
|
| 252 |
progress(1.0, desc="Done!")
|
| 253 |
+
status = f"β
Loaded: **{job.get('title', '')}** @ {job_institution(job)}"
|
| 254 |
return _fmt_job_details(job, match), _fmt_hints(hints), cover_letter, status, idx
|
| 255 |
|
| 256 |
except Exception as exc:
|
|
|
|
| 287 |
if current_idx < 0 or not scored_jobs or current_idx >= len(scored_jobs):
|
| 288 |
return approved, "β No position loaded."
|
| 289 |
job = scored_jobs[current_idx]
|
| 290 |
+
title, institution = job.get("title", "Unknown"), job_institution(job) or "Unknown"
|
| 291 |
if any(a["job"].get("title") == title and a["job"].get("institution") == institution for a in approved):
|
| 292 |
return approved, f"β οΈ **{title}** @ {institution} already approved."
|
| 293 |
new_approved = list(approved) + [{
|
|
|
|
| 301 |
if current_idx < 0 or not scored_jobs or current_idx >= len(scored_jobs):
|
| 302 |
return "β Skipped."
|
| 303 |
job = scored_jobs[current_idx]
|
| 304 |
+
return f"β Skipped: **{job.get('title', '')}** @ {job_institution(job)}"
|
| 305 |
|
| 306 |
|
| 307 |
def approved_display(approved: list) -> str:
|
|
|
|
| 332 |
for entry in approved:
|
| 333 |
job = entry.get("job") or {}
|
| 334 |
title = job.get("title", "Unknown")
|
| 335 |
+
institution = job_institution(job) or "Unknown"
|
| 336 |
safe = (
|
| 337 |
f"{institution}_{title}"
|
| 338 |
.replace(" ", "_").replace("/", "-").replace("\\", "-")
|