File size: 1,207 Bytes
7419404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd035f4
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""Shared utilities for parsing LLM responses."""

from __future__ import annotations

import json
import re
from typing import Any


def strip_fences(text: str) -> str:
    """Remove markdown code fences that models sometimes emit."""
    text = re.sub(r"^```(?:json)?\s*", "", text.strip())
    text = re.sub(r"\s*```$", "", text)
    return text.strip()


def parse_json(raw: str) -> dict[str, Any] | None:
    """Parse JSON from a model response, tolerating fences and partial wrapping.

    Returns None if parsing fails entirely.
    """
    clean = strip_fences(raw)
    try:
        return json.loads(clean)
    except json.JSONDecodeError:
        m = re.search(r"\{.*\}", clean, re.DOTALL)
        if m:
            try:
                return json.loads(m.group())
            except json.JSONDecodeError:
                pass
    return None


def job_institution(job: dict) -> str:
    """Return the job's institution name, empty string if absent."""
    return job.get("institution") or ""


def job_description(job: dict, max_chars: int = 3000) -> str:
    """Return the job description truncated to max_chars."""
    return (job.get("description") or "No description provided.")[:max_chars]