HipFil98 Claude Sonnet 4.6 commited on
Commit
cd035f4
Β·
1 Parent(s): c58f98c

refactor: remove dead code, extract shared job helpers

Browse files

agent/utils.py:
- Add job_institution(job) β€” eliminates repeated .get("institution", .get("company",...))
pattern; the "company" fallback was dead (no scraper ever sets it)
- Add job_description(job, max_chars) β€” eliminates repeated [:3000] truncation pattern

agent/job_searcher.py:
- Remove _TYPE_QUERY dict (defined but never referenced anywhere)
- Move _MLSCI_NON_COUNTRY from inside the loop body to module-level frozenset
- Fix module docstring (remove stale FindAPhD/jobs.ac.uk-only references)

agent/job_matcher.py, cover_letter.py, cv_tailor.py:
- Use job_institution() and job_description() from utils

app.py:
- Use job_institution() in all 7 display/logic locations

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

agent/cover_letter.py CHANGED
@@ -6,6 +6,7 @@ import re
6
  from typing import Any
7
 
8
  from agent.llm_client import LLMClient
 
9
 
10
 
11
  _ITALIAN_KEYWORDS = {
@@ -78,7 +79,7 @@ class CoverLetterWriter:
78
  """
79
  language = self._detect_language(job)
80
  title = job.get("title", "Unknown Position")
81
- institution = job.get("institution", job.get("company", "Unknown Institution"))
82
 
83
  prompt = _PROMPT.format(
84
  profile=profile_text,
@@ -86,7 +87,7 @@ class CoverLetterWriter:
86
  institution=institution,
87
  location=job.get("location", "Unknown"),
88
  pos_type=job.get("type", "research"),
89
- description=(job.get("description") or "No description provided.")[:3000],
90
  language=language,
91
  regen_note=_REGEN_NOTE if regenerate else "",
92
  )
@@ -108,7 +109,7 @@ class CoverLetterWriter:
108
  job.get("title", ""),
109
  job.get("description", ""),
110
  job.get("location", ""),
111
- job.get("institution", job.get("company", "")),
112
  ]).lower()
113
  hits = sum(1 for kw in _ITALIAN_KEYWORDS if kw.lower() in text)
114
  return "Italian" if hits >= 2 else "English"
 
6
  from typing import Any
7
 
8
  from agent.llm_client import LLMClient
9
+ from agent.utils import job_institution, job_description
10
 
11
 
12
  _ITALIAN_KEYWORDS = {
 
79
  """
80
  language = self._detect_language(job)
81
  title = job.get("title", "Unknown Position")
82
+ institution = job_institution(job) or "Unknown Institution"
83
 
84
  prompt = _PROMPT.format(
85
  profile=profile_text,
 
87
  institution=institution,
88
  location=job.get("location", "Unknown"),
89
  pos_type=job.get("type", "research"),
90
+ description=job_description(job),
91
  language=language,
92
  regen_note=_REGEN_NOTE if regenerate else "",
93
  )
 
109
  job.get("title", ""),
110
  job.get("description", ""),
111
  job.get("location", ""),
112
+ job_institution(job),
113
  ]).lower()
114
  hits = sum(1 for kw in _ITALIAN_KEYWORDS if kw.lower() in text)
115
  return "Italian" if hits >= 2 else "English"
agent/cv_tailor.py CHANGED
@@ -5,7 +5,7 @@ from __future__ import annotations
5
  from typing import Any, TypedDict
6
 
7
  from agent.llm_client import LLMClient
8
- from agent.utils import parse_json
9
 
10
 
11
  class TailoringHints(TypedDict, total=False):
@@ -95,9 +95,9 @@ class CVTailor:
95
  """Generate actionable tailoring hints for a specific position."""
96
  prompt = _PROMPT.format(
97
  title=job.get("title", "Unknown"),
98
- institution=job.get("institution", job.get("company", "Unknown")),
99
  pos_type=job.get("type", "unknown"),
100
- description=(job.get("description") or "No description provided.")[:3000],
101
  profile=profile_text,
102
  )
103
 
 
5
  from typing import Any, TypedDict
6
 
7
  from agent.llm_client import LLMClient
8
+ from agent.utils import parse_json, job_institution, job_description
9
 
10
 
11
  class TailoringHints(TypedDict, total=False):
 
95
  """Generate actionable tailoring hints for a specific position."""
96
  prompt = _PROMPT.format(
97
  title=job.get("title", "Unknown"),
98
+ institution=job_institution(job) or "Unknown",
99
  pos_type=job.get("type", "unknown"),
100
+ description=job_description(job),
101
  profile=profile_text,
102
  )
103
 
agent/job_matcher.py CHANGED
@@ -5,7 +5,7 @@ from __future__ import annotations
5
  from typing import Any, TypedDict
6
 
7
  from agent.llm_client import LLMClient
8
- from agent.utils import parse_json
9
 
10
 
11
  class MatchResult(TypedDict, total=False):
@@ -83,10 +83,10 @@ class JobMatcher:
83
  prompt = _PROMPT.format(
84
  profile=profile_text,
85
  title=job.get("title", "Unknown"),
86
- institution=job.get("institution", job.get("company", "Unknown")),
87
  location=job.get("location", "Unknown"),
88
  pos_type=job.get("type", "unknown"),
89
- description=(job.get("description") or "No description provided.")[:3000],
90
  )
91
 
92
  try:
 
5
  from typing import Any, TypedDict
6
 
7
  from agent.llm_client import LLMClient
8
+ from agent.utils import parse_json, job_institution, job_description
9
 
10
 
11
  class MatchResult(TypedDict, total=False):
 
83
  prompt = _PROMPT.format(
84
  profile=profile_text,
85
  title=job.get("title", "Unknown"),
86
+ institution=job_institution(job) or "Unknown",
87
  location=job.get("location", "Unknown"),
88
  pos_type=job.get("type", "unknown"),
89
+ description=job_description(job),
90
  )
91
 
92
  try:
agent/job_searcher.py CHANGED
@@ -1,8 +1,10 @@
1
  """Job searcher: finds PhD / postdoc / research positions from free public sources.
2
 
3
  Sources:
4
- - jobs.ac.uk (UK academic jobs) β€” HTML scraping with facet filters; only queried for UK/worldwide
5
- - FindAPhD (worldwide PhD board) β€” HTML scraping; handles location filtering globally
 
 
6
 
7
  All scrapers are wrapped in try/except β€” if one source is down the rest continue.
8
  """
@@ -57,15 +59,6 @@ _TYPE_KEYWORDS: dict[str, list[str]] = {
57
  ],
58
  }
59
 
60
- # Position type β†’ keywords appended to search query for sites without native facets
61
- _TYPE_QUERY: dict[str, str] = {
62
- "predoctoral": "predoctoral OR \"early-stage researcher\" OR \"research trainee\"",
63
- "phd": "PhD",
64
- "postdoc": "postdoc OR \"research associate\" OR \"research fellow\"",
65
- "fellowship": "fellowship OR scholarship",
66
- "research_staff": "researcher OR lecturer OR professor",
67
- }
68
-
69
  _HEADERS = {
70
  "User-Agent": (
71
  "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
@@ -119,6 +112,11 @@ _MLSCI_TYPE_SLUG: dict[str, str] = {
119
  "any": "jobs",
120
  }
121
 
 
 
 
 
 
122
  # mlscientist.com country slug mapping (lowercase location β†’ slug)
123
  _MLSCI_COUNTRY_SLUG: dict[str, str] = {
124
  "uk": "united-kingdom",
@@ -166,12 +164,6 @@ def _search_mlscientist(field: str, location: str, position_type: str) -> list[d
166
  listings: list[dict] = []
167
  seen_urls: set[str] = set()
168
 
169
- # Non-type category slugs to ignore when extracting country from CSS classes
170
- _MLSCI_NON_COUNTRY = {
171
- "jobs", "phd-positions", "postdoc-positions", "featured",
172
- "conference-calls", "mlnews",
173
- }
174
-
175
  for url in urls_to_try:
176
  try:
177
  resp = requests.get(url, headers=_HEADERS, timeout=15)
 
1
  """Job searcher: finds PhD / postdoc / research positions from free public sources.
2
 
3
  Sources:
4
+ - Euraxess (euraxess.ec.europa.eu) β€” EU/worldwide research portal, country-filtered
5
+ - mlscientist.com β€” ML/AI academic positions, WordPress category + search
6
+ - jobs.ac.uk β€” UK academic jobs (queried only for UK/worldwide locations)
7
+ - DuckDuckGo web search β€” targeted queries for open calls
8
 
9
  All scrapers are wrapped in try/except β€” if one source is down the rest continue.
10
  """
 
59
  ],
60
  }
61
 
 
 
 
 
 
 
 
 
 
62
  _HEADERS = {
63
  "User-Agent": (
64
  "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
 
112
  "any": "jobs",
113
  }
114
 
115
+ _MLSCI_NON_COUNTRY: frozenset[str] = frozenset({
116
+ "jobs", "phd-positions", "postdoc-positions", "featured",
117
+ "conference-calls", "mlnews",
118
+ })
119
+
120
  # mlscientist.com country slug mapping (lowercase location β†’ slug)
121
  _MLSCI_COUNTRY_SLUG: dict[str, str] = {
122
  "uk": "united-kingdom",
 
164
  listings: list[dict] = []
165
  seen_urls: set[str] = set()
166
 
 
 
 
 
 
 
167
  for url in urls_to_try:
168
  try:
169
  resp = requests.get(url, headers=_HEADERS, timeout=15)
agent/utils.py CHANGED
@@ -30,3 +30,13 @@ def parse_json(raw: str) -> dict[str, Any] | None:
30
  except json.JSONDecodeError:
31
  pass
32
  return None
 
 
 
 
 
 
 
 
 
 
 
30
  except json.JSONDecodeError:
31
  pass
32
  return None
33
+
34
+
35
+ def job_institution(job: dict) -> str:
36
+ """Return the job's institution name, empty string if absent."""
37
+ return job.get("institution") or ""
38
+
39
+
40
+ def job_description(job: dict, max_chars: int = 3000) -> str:
41
+ """Return the job description truncated to max_chars."""
42
+ return (job.get("description") or "No description provided.")[:max_chars]
app.py CHANGED
@@ -22,6 +22,8 @@ from typing import Any
22
 
23
  import gradio as gr
24
 
 
 
25
 
26
  # ---------------------------------------------------------------------------
27
  # Formatting helpers (pure functions β€” no LLM dependency)
@@ -59,7 +61,7 @@ def _fmt_profile(profile: dict) -> str:
59
 
60
  def _fmt_jobs_table(jobs: list) -> list[list]:
61
  return [
62
- [i, j.get("title", ""), j.get("institution", j.get("company", "")),
63
  j.get("location", ""), j.get("type", ""), j.get("source", ""),
64
  j.get("deadline") or "β€”"]
65
  for i, j in enumerate(jobs, 1)
@@ -74,7 +76,7 @@ def _fmt_scored_table(jobs: list) -> list[list]:
74
  why = m.get("why_good_fit") or ""
75
  rows.append([
76
  i, m.get("match_score", 0), job.get("title", ""),
77
- job.get("institution", job.get("company", "")), job.get("type", ""),
78
  icons.get(m.get("recommendation", ""), ""),
79
  why[:60] + "..." if len(why) > 60 else why,
80
  ])
@@ -89,7 +91,7 @@ def _fmt_job_details(job: dict, match: dict) -> str:
89
  url = job.get("url", "")
90
  lines = [
91
  f"## {job.get('title', 'Unknown')}",
92
- f"**{job.get('institution', job.get('company', 'Unknown'))}** β€” {job.get('location', '')}",
93
  "",
94
  f"**Type:** {job.get('type', '')} | **Deadline:** {job.get('deadline') or 'N/A'}",
95
  ]
@@ -248,7 +250,7 @@ def load_position(
248
  hints, cover_letter = agent.prepare_application(job, profile_text)
249
 
250
  progress(1.0, desc="Done!")
251
- status = f"βœ… Loaded: **{job.get('title', '')}** @ {job.get('institution', job.get('company', ''))}"
252
  return _fmt_job_details(job, match), _fmt_hints(hints), cover_letter, status, idx
253
 
254
  except Exception as exc:
@@ -285,7 +287,7 @@ def approve_position(
285
  if current_idx < 0 or not scored_jobs or current_idx >= len(scored_jobs):
286
  return approved, "❌ No position loaded."
287
  job = scored_jobs[current_idx]
288
- title, institution = job.get("title", "Unknown"), job.get("institution", job.get("company", "Unknown"))
289
  if any(a["job"].get("title") == title and a["job"].get("institution") == institution for a in approved):
290
  return approved, f"⚠️ **{title}** @ {institution} already approved."
291
  new_approved = list(approved) + [{
@@ -299,7 +301,7 @@ def skip_position(current_idx: int, scored_jobs: list) -> str:
299
  if current_idx < 0 or not scored_jobs or current_idx >= len(scored_jobs):
300
  return "⏭ Skipped."
301
  job = scored_jobs[current_idx]
302
- return f"⏭ Skipped: **{job.get('title', '')}** @ {job.get('institution', job.get('company', ''))}"
303
 
304
 
305
  def approved_display(approved: list) -> str:
@@ -330,7 +332,7 @@ def export_zip(approved: list) -> tuple:
330
  for entry in approved:
331
  job = entry.get("job") or {}
332
  title = job.get("title", "Unknown")
333
- institution = job.get("institution", job.get("company", "Unknown"))
334
  safe = (
335
  f"{institution}_{title}"
336
  .replace(" ", "_").replace("/", "-").replace("\\", "-")
 
22
 
23
  import gradio as gr
24
 
25
+ from agent.utils import job_institution
26
+
27
 
28
  # ---------------------------------------------------------------------------
29
  # Formatting helpers (pure functions β€” no LLM dependency)
 
61
 
62
  def _fmt_jobs_table(jobs: list) -> list[list]:
63
  return [
64
+ [i, j.get("title", ""), job_institution(j),
65
  j.get("location", ""), j.get("type", ""), j.get("source", ""),
66
  j.get("deadline") or "β€”"]
67
  for i, j in enumerate(jobs, 1)
 
76
  why = m.get("why_good_fit") or ""
77
  rows.append([
78
  i, m.get("match_score", 0), job.get("title", ""),
79
+ job_institution(job), job.get("type", ""),
80
  icons.get(m.get("recommendation", ""), ""),
81
  why[:60] + "..." if len(why) > 60 else why,
82
  ])
 
91
  url = job.get("url", "")
92
  lines = [
93
  f"## {job.get('title', 'Unknown')}",
94
+ f"**{job_institution(job) or 'Unknown'}** β€” {job.get('location', '')}",
95
  "",
96
  f"**Type:** {job.get('type', '')} | **Deadline:** {job.get('deadline') or 'N/A'}",
97
  ]
 
250
  hints, cover_letter = agent.prepare_application(job, profile_text)
251
 
252
  progress(1.0, desc="Done!")
253
+ status = f"βœ… Loaded: **{job.get('title', '')}** @ {job_institution(job)}"
254
  return _fmt_job_details(job, match), _fmt_hints(hints), cover_letter, status, idx
255
 
256
  except Exception as exc:
 
287
  if current_idx < 0 or not scored_jobs or current_idx >= len(scored_jobs):
288
  return approved, "❌ No position loaded."
289
  job = scored_jobs[current_idx]
290
+ title, institution = job.get("title", "Unknown"), job_institution(job) or "Unknown"
291
  if any(a["job"].get("title") == title and a["job"].get("institution") == institution for a in approved):
292
  return approved, f"⚠️ **{title}** @ {institution} already approved."
293
  new_approved = list(approved) + [{
 
301
  if current_idx < 0 or not scored_jobs or current_idx >= len(scored_jobs):
302
  return "⏭ Skipped."
303
  job = scored_jobs[current_idx]
304
+ return f"⏭ Skipped: **{job.get('title', '')}** @ {job_institution(job)}"
305
 
306
 
307
  def approved_display(approved: list) -> str:
 
332
  for entry in approved:
333
  job = entry.get("job") or {}
334
  title = job.get("title", "Unknown")
335
+ institution = job_institution(job) or "Unknown"
336
  safe = (
337
  f"{institution}_{title}"
338
  .replace(" ", "_").replace("/", "-").replace("\\", "-")