Piyush1225's picture
push
5df8a73
"""Utility helpers for the visualize pipeline."""
from __future__ import annotations
import json
import re
from typing import Any
def extract_json_object(text: str) -> dict[str, Any]:
"""Extract a JSON object from raw model output."""
raw = (text or "").strip()
if not raw:
return {}
fenced = re.findall(r"```(?:json)?\s*([\s\S]*?)\s*```", raw)
candidates = fenced + [raw]
for candidate in candidates:
try:
parsed = json.loads(candidate)
if isinstance(parsed, dict):
return parsed
except json.JSONDecodeError:
parsed = _decode_first_json_object(candidate)
if parsed is not None:
return parsed
start = raw.find("{")
end = raw.rfind("}")
if start != -1 and end != -1 and end > start:
snippet = raw[start : end + 1]
try:
return json.loads(snippet)
except json.JSONDecodeError:
parsed = _decode_first_json_object(snippet)
if parsed is not None:
return parsed
raise json.JSONDecodeError("No JSON object found", raw, 0)
def _decode_first_json_object(text: str) -> dict[str, Any] | None:
decoder = json.JSONDecoder()
stripped = (text or "").lstrip()
if not stripped:
return None
starts = [0]
brace_index = stripped.find("{")
if brace_index > 0:
starts.append(brace_index)
for start in starts:
try:
parsed, _end = decoder.raw_decode(stripped[start:])
except json.JSONDecodeError:
continue
if isinstance(parsed, dict):
return parsed
return None
def extract_code_block(text: str, language: str = "") -> str:
"""Extract a fenced code block from LLM output.
If *language* is given the block must start with that tag;
otherwise any triple-backtick fence is accepted.
"""
if language:
pattern = rf"```{re.escape(language)}\s*\n([\s\S]*?)\n```"
else:
pattern = r"```[A-Za-z]*\s*\n([\s\S]*?)\n```"
match = re.search(pattern, text or "", re.IGNORECASE)
if match:
return match.group(1).strip()
return (text or "").strip()
__all__ = [
"extract_code_block",
"extract_json_object",
]