Spaces:

halsabbah
/

depscreen

Sleeping

App Files Files Community

depscreen / app /utils /json_extract.py

halsabbah

deploy: sync code from GitHub main

ebadfda verified about 1 month ago

raw

history blame contribute delete

2.53 kB

	"""Robust JSON extraction from LLM responses.

	Handles common LLM output artifacts: <think> tags, markdown fences,
	preamble text, and truncated responses.
	"""

	import json
	import logging
	import re

	logger = logging.getLogger(__name__)


	def extract_json(text: str) -> dict:
	"""Extract JSON from LLM response that may contain artifacts or truncation.

	Strategies (in order):
	1. Strip <think> tags and markdown fences, try direct parse.
	2. Depth-tracking brace scan for complete JSON objects.
	3. Truncation repair — close unclosed braces and retry.
	"""
	if not text:
	raise ValueError("Empty response from LLM")

	cleaned = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
	# Strip markdown code fences (```json ... ``` or ``` ... ```)
	cleaned = re.sub(r"```(?:json)?\s*\n?", "", cleaned).strip()
	cleaned = cleaned.rstrip("`").strip()

	# Strategy 1: direct parse
	try:
	return json.loads(cleaned)
	except json.JSONDecodeError:
	pass

	# Strategy 2: depth-tracking brace scan for complete objects
	depth = 0
	start = None
	in_string = False
	escape_next = False

	for i, ch in enumerate(cleaned):
	if escape_next:
	escape_next = False
	continue
	if ch == "\\":
	if in_string:
	escape_next = True
	continue
	if ch == '"' and not escape_next:
	in_string = not in_string
	continue
	if in_string:
	continue
	if ch == "{":
	if depth == 0:
	start = i
	depth += 1
	elif ch == "}":
	depth -= 1
	if depth == 0 and start is not None:
	try:
	return json.loads(cleaned[start : i + 1])
	except json.JSONDecodeError:
	start = None

	# Strategy 3: truncation repair — close unclosed braces
	if depth > 0 and start is not None:
	fragment = cleaned[start:]
	# Close any open strings, then close braces
	repaired = fragment + '"' * (fragment.count('"') % 2) + "}" * depth
	try:
	result = json.loads(repaired)
	logger.warning(
	"Repaired truncated JSON (closed %d brace(s)): %s...",
	depth,
	fragment[:80],
	)
	return result
	except json.JSONDecodeError:
	pass

	raise ValueError(f"No valid JSON found in LLM response: {text[:200]}")