Spaces:

build-small-hackathon
/

innerspace

Running on Zero

Codex

refactor: normalize prompt generation, implement robust section parsing, and update inference parameters for stability

48fccfa 4 days ago

7.22 kB

	from __future__ import annotations

	from pathlib import Path
	import re
	from env.config import SUPPORTED_SUFFIXES


	# Match explicit report markers even when the model emits them inline.
	_EXPLICIT_MARKER_PATTERN = re.compile(
	r"(?i)(={2,}[ \t]*"
	r"(?:dominant emotions?\|emotions?\|"
	r"affected life areas?\|life areas?\|areas\|"
	r"cognitive distortions?\|distortions?\|"
	r"balanced reframe\|cognitive reframe\|reframe\|"
	r"tiny next steps?\|small next steps?\|next steps?\|"
	r"reflection)"
	r"[ \t]*={2,})"
	)

	# Match the section heading variants the local model commonly emits.
	_SECTION_MARKER_PATTERN = re.compile(
	r"(?im)^[ \t](?:[-][ \t])?(?:#{1,6}[ \t])?"
	r"(?:\\)?(?:={2,}[ \t]*)?"
	r"(?P<label>"
	r"dominant emotions?\|emotions?\|"
	r"affected life areas?\|life areas?\|areas\|"
	r"cognitive distortions?\|distortions?\|"
	r"balanced reframe\|cognitive reframe\|reframe\|"
	r"tiny next steps?\|small next steps?\|next steps?\|"
	r"reflection"
	r")"
	r"\b"
	r"(?:[ \t]={2,})?(?:\\)?[ \t](?::\|-)?[ \t]*"
	r"(?P<trailing>[^\n]*)$"
	)

	# Fixed output order expected by the analysis cards.
	_SECTION_ORDER = (
	"emotions",
	"areas",
	"distortions",
	"reframe",
	"next_step",
	"reflection",
	)

	# Defaults keep the UI stable when a section is genuinely absent.
	_SECTION_DEFAULTS = {
	"emotions": "- Emotions not resolved.",
	"areas": "- Life areas not resolved.",
	"distortions": "- Distortions not resolved.",
	"reframe": "- Balanced reframe not resolved.",
	"next_step": "- Tiny next step not resolved.",
	"reflection": "How are you feeling about these thoughts today?",
	}


	def _resolve_file_path(file_input: object) -> Path \| None:
	"""Normalizes Gradio file payload variants into a local path."""
	# Empty or cleared file components should let the textbox drive analysis.
	if not file_input:
	return None

	# Gradio may return a single-item list when file mode changes.
	if isinstance(file_input, (list, tuple)):
	for item in file_input:
	path = _resolve_file_path(item)
	if path:
	return path
	return None

	# Newer Gradio payloads can be dictionaries with path-like fields.
	if isinstance(file_input, dict):
	for key in ("path", "name", "orig_name"):
	value = file_input.get(key)
	if value:
	return Path(str(value))
	return None

	# Local runs usually pass a string path from the upload component.
	return Path(str(file_input))


	def extract_journal_text(file_path: object \| None) -> str:
	"""Reads journal entry from a text or markdown file, catching any disk or format errors."""
	# Empty file inputs fall back to the text box.
	path = _resolve_file_path(file_path)
	if not path:
	return ""
	try:
	# Accept only the supported private text formats.
	suffix = path.suffix.lower()
	if suffix in SUPPORTED_SUFFIXES:
	return path.read_text(encoding="utf-8", errors="ignore").strip()
	return f"Unsupported file: {suffix}. Try a text or markdown file."
	except Exception as e:
	return f"Error reading diary file: {e}"


	def _canonical_section(label: str) -> str:
	"""Maps a model heading variant onto the app's fixed output slots."""
	normalized = re.sub(r"[^a-z]+", " ", label.lower()).strip()
	if "emotion" in normalized:
	return "emotions"
	if "life area" in normalized or normalized == "areas":
	return "areas"
	if "distortion" in normalized:
	return "distortions"
	if "reframe" in normalized:
	return "reframe"
	if "next step" in normalized:
	return "next_step"
	return "reflection"


	def _normalize_report_markers(response: str) -> str:
	"""Places explicit section markers on their own lines before parsing."""
	return _EXPLICIT_MARKER_PATTERN.sub(r"\n\1\n", response)


	def _clean_section_value(value: str) -> str:
	"""Removes empty lines and leftover bracket-only prompt placeholders."""
	cleaned = value.strip()
	lines = [line.strip() for line in cleaned.splitlines() if line.strip()]
	if lines and all(
	re.fullmatch(r"(?:[-][ \t])?\[[^\]]+\]\.?", line) for line in lines
	):
	return ""
	return cleaned


	def parse_sections(response: str) -> tuple[str, str, str, str, str, str]:
	"""Extracts CBT elements from the model's structured text response using section markers."""
	# Normalize explicit markers so merged sections still split cleanly.
	response = _normalize_report_markers(response)

	# Find candidate headings before assigning text to output cards.
	matches = list(_SECTION_MARKER_PATTERN.finditer(response))
	sections = dict(_SECTION_DEFAULTS)

	# If no section markers are found, return the default values.
	if not matches:
	return (
	sections["emotions"],
	sections["areas"],
	sections["distortions"],
	sections["reframe"],
	sections["next_step"],
	sections["reflection"],
	)

	# Attempt to find a single contiguous block of ordered sections.
	best_values: dict[str, str] = {}
	best_count = -1

	# Iterate through all matches as possible starting points.
	for start_index, match in enumerate(matches):
	values: dict[str, str] = {}
	last_order_index = -1

	# Check each subsequent match for ascending order.
	for current_index in range(start_index, len(matches)):
	current = matches[current_index]
	section = _canonical_section(current.group("label"))
	section_order_index = _SECTION_ORDER.index(section)

	# Stop if sections are out of order.
	if section_order_index <= last_order_index:
	break

	# Capture heading text plus content until the next heading.
	next_start = (
	matches[current_index + 1].start()
	if current_index + 1 < len(matches)
	else len(response)
	)

	# Clean up heading and capture section value.
	value = _clean_section_value(
	"\n".join(
	[current.group("trailing"), response[current.end() : next_start]]
	)
	)

	# Store the value if it's not empty.
	if value:
	values[section] = value
	last_order_index = section_order_index

	# Stop if we've found a complete-looking ordered section block.
	if len(values) == len(_SECTION_ORDER):
	break

	# Prefer the last complete-looking ordered section block.
	if len(values) >= best_count:
	best_values = values
	best_count = len(values)

	# Assign extracted sections to the default dictionary.
	for section, value in best_values.items():
	sections[section] = value

	# Return extracted sections in the expected order.
	return (
	sections["emotions"],
	sections["areas"],
	sections["distortions"],
	sections["reframe"],
	sections["next_step"],
	sections["reflection"],
	)