| """Shared utilities for parsing LLM responses.""" |
|
|
| from __future__ import annotations |
|
|
| import json |
| import re |
| from typing import Any |
|
|
|
|
| def strip_fences(text: str) -> str: |
| """Remove markdown code fences that models sometimes emit.""" |
| text = re.sub(r"^```(?:json)?\s*", "", text.strip()) |
| text = re.sub(r"\s*```$", "", text) |
| return text.strip() |
|
|
|
|
| def parse_json(raw: str) -> dict[str, Any] | None: |
| """Parse JSON from a model response, tolerating fences and partial wrapping. |
| |
| Returns None if parsing fails entirely. |
| """ |
| clean = strip_fences(raw) |
| try: |
| return json.loads(clean) |
| except json.JSONDecodeError: |
| m = re.search(r"\{.*\}", clean, re.DOTALL) |
| if m: |
| try: |
| return json.loads(m.group()) |
| except json.JSONDecodeError: |
| pass |
| return None |
|
|
|
|
| def job_institution(job: dict) -> str: |
| """Return the job's institution name, empty string if absent.""" |
| return job.get("institution") or "" |
|
|
|
|
| def job_description(job: dict, max_chars: int = 3000) -> str: |
| """Return the job description truncated to max_chars.""" |
| return (job.get("description") or "No description provided.")[:max_chars] |
|
|