PhDScout / agent /utils.py
HipFil98's picture
refactor: remove dead code, extract shared job helpers
cd035f4
"""Shared utilities for parsing LLM responses."""
from __future__ import annotations
import json
import re
from typing import Any
def strip_fences(text: str) -> str:
"""Remove markdown code fences that models sometimes emit."""
text = re.sub(r"^```(?:json)?\s*", "", text.strip())
text = re.sub(r"\s*```$", "", text)
return text.strip()
def parse_json(raw: str) -> dict[str, Any] | None:
"""Parse JSON from a model response, tolerating fences and partial wrapping.
Returns None if parsing fails entirely.
"""
clean = strip_fences(raw)
try:
return json.loads(clean)
except json.JSONDecodeError:
m = re.search(r"\{.*\}", clean, re.DOTALL)
if m:
try:
return json.loads(m.group())
except json.JSONDecodeError:
pass
return None
def job_institution(job: dict) -> str:
"""Return the job's institution name, empty string if absent."""
return job.get("institution") or ""
def job_description(job: dict, max_chars: int = 3000) -> str:
"""Return the job description truncated to max_chars."""
return (job.get("description") or "No description provided.")[:max_chars]