Spaces:

build-small-hackathon
/

openher

Running

App Files Files Community

openher / agent /parser.py

kellyxiaowei

Deploy OpenHer Gradio Space — gemma-4-E4B served on Modal

dff25f7 verified 11 days ago

Raw

History Blame Contribute Delete

3.14 kB

	"""
	LLM output parsing utilities for the persona engine.

	Parses structured output from the single-pass Actor:
	【内心独白】 → monologue
	【最终回复】 → reply
	【表达方式】 → modality (语音/文字/照片/…)

	Supports both Chinese and English section headers.
	"""

	from __future__ import annotations

	import re


	# -- Modality Parsing --
	# No hardcoded map — registered SKILLs are the source of truth.
	# Parser only extracts the raw modality keyword from LLM output.

	def _parse_modality(raw: str) -> str:
	"""Extract primary modality keyword from Actor output.

	Returns the first token before any punctuation/space.
	Skill engine decides if it maps to a registered skill.
	"""
	import re
	cleaned = raw.strip().lstrip("\uff1a: \n")
	# Take first token before punctuation (。.，,、/ or whitespace)
	match = re.match(r'[\w\u4e00-\u9fff]+', cleaned)
	return match.group(0) if match else "文字"


	# -- Section header regex: Chinese 【】 and English [] formats --
	_SECTION_RE = re.compile(
	r'(?:【(?P<zh>内心独白\|最终回复\|表达方式)】'
	r'\|\[(?P<en>Inner Monologue\|Final Reply\|Expression Mode)\])'
	)
	_TAG_MAP = {
	'内心独白': 'monologue', 'Inner Monologue': 'monologue',
	'最终回复': 'reply', 'Final Reply': 'reply',
	'表达方式': 'modality', 'Expression Mode': 'modality',
	}


	def extract_reply(raw: str) -> tuple[str, str, str]:
	"""Extract monologue, reply, and modality from Actor output.

	Supports both Chinese (【最终回复】) and English ([Final Reply]) section headers.
	Returns canonical Chinese modality key for internal consistency.
	"""
	# Strip <think>...</think> blocks from reasoning models (e.g. MiniMax M2.7)
	raw = re.sub(r'<think>.*?</think>', '', raw, flags=re.DOTALL).strip()

	sections: dict[str, str] = {}
	matches = list(_SECTION_RE.finditer(raw))
	for i, m in enumerate(matches):
	tag = m.group('zh') or m.group('en')
	key = _TAG_MAP[tag]
	start = m.end()
	end = matches[i + 1].start() if i + 1 < len(matches) else len(raw)
	sections[key] = raw[start:end].strip()

	monologue = sections.get('monologue', '')
	reply = sections.get('reply', '')
	modality_raw = sections.get('modality', '')

	# Parse modality with bilingual map
	modality = _parse_modality(modality_raw) if modality_raw else "文字"

	# Silence short-circuit: Actor chose not to speak
	if modality in ("静默", "silence", "Silence"):
	return monologue, "", "静默"

	if not reply:
	# Fallback: strip action descriptions
	reply = re.sub(r'[(（(][^)）)]*[)）)]', '', raw).strip()
	reply = re.sub(r'\[^]+\*', '', reply).strip()
	if not reply:
	reply = "..."
	else:
	# Normal path: strip action tags like 顿了顿 / ＊顿了顿＊ and （沉默） from reply
	reply = re.sub(r'[＊][^＊]+[*＊]', '', reply).strip()
	reply = re.sub(r'[（(][^（(）)]{1,40}[）)]', '', reply).strip()
	if not reply:
	reply = "..."

	return monologue, reply, modality