innerspace / core /parser.py
Codex
refactor: normalize prompt generation, implement robust section parsing, and update inference parameters for stability
48fccfa
from __future__ import annotations
from pathlib import Path
import re
from env.config import SUPPORTED_SUFFIXES
# Match explicit report markers even when the model emits them inline.
_EXPLICIT_MARKER_PATTERN = re.compile(
r"(?i)(={2,}[ \t]*"
r"(?:dominant emotions?|emotions?|"
r"affected life areas?|life areas?|areas|"
r"cognitive distortions?|distortions?|"
r"balanced reframe|cognitive reframe|reframe|"
r"tiny next steps?|small next steps?|next steps?|"
r"reflection)"
r"[ \t]*={2,})"
)
# Match the section heading variants the local model commonly emits.
_SECTION_MARKER_PATTERN = re.compile(
r"(?im)^[ \t]*(?:[-*][ \t]*)?(?:#{1,6}[ \t]*)?"
r"(?:\*\*)?(?:={2,}[ \t]*)?"
r"(?P<label>"
r"dominant emotions?|emotions?|"
r"affected life areas?|life areas?|areas|"
r"cognitive distortions?|distortions?|"
r"balanced reframe|cognitive reframe|reframe|"
r"tiny next steps?|small next steps?|next steps?|"
r"reflection"
r")"
r"\b"
r"(?:[ \t]*={2,})?(?:\*\*)?[ \t]*(?::|-)?[ \t]*"
r"(?P<trailing>[^\n]*)$"
)
# Fixed output order expected by the analysis cards.
_SECTION_ORDER = (
"emotions",
"areas",
"distortions",
"reframe",
"next_step",
"reflection",
)
# Defaults keep the UI stable when a section is genuinely absent.
_SECTION_DEFAULTS = {
"emotions": "- Emotions not resolved.",
"areas": "- Life areas not resolved.",
"distortions": "- Distortions not resolved.",
"reframe": "- Balanced reframe not resolved.",
"next_step": "- Tiny next step not resolved.",
"reflection": "How are you feeling about these thoughts today?",
}
def _resolve_file_path(file_input: object) -> Path | None:
"""Normalizes Gradio file payload variants into a local path."""
# Empty or cleared file components should let the textbox drive analysis.
if not file_input:
return None
# Gradio may return a single-item list when file mode changes.
if isinstance(file_input, (list, tuple)):
for item in file_input:
path = _resolve_file_path(item)
if path:
return path
return None
# Newer Gradio payloads can be dictionaries with path-like fields.
if isinstance(file_input, dict):
for key in ("path", "name", "orig_name"):
value = file_input.get(key)
if value:
return Path(str(value))
return None
# Local runs usually pass a string path from the upload component.
return Path(str(file_input))
def extract_journal_text(file_path: object | None) -> str:
"""Reads journal entry from a text or markdown file, catching any disk or format errors."""
# Empty file inputs fall back to the text box.
path = _resolve_file_path(file_path)
if not path:
return ""
try:
# Accept only the supported private text formats.
suffix = path.suffix.lower()
if suffix in SUPPORTED_SUFFIXES:
return path.read_text(encoding="utf-8", errors="ignore").strip()
return f"Unsupported file: {suffix}. Try a text or markdown file."
except Exception as e:
return f"Error reading diary file: {e}"
def _canonical_section(label: str) -> str:
"""Maps a model heading variant onto the app's fixed output slots."""
normalized = re.sub(r"[^a-z]+", " ", label.lower()).strip()
if "emotion" in normalized:
return "emotions"
if "life area" in normalized or normalized == "areas":
return "areas"
if "distortion" in normalized:
return "distortions"
if "reframe" in normalized:
return "reframe"
if "next step" in normalized:
return "next_step"
return "reflection"
def _normalize_report_markers(response: str) -> str:
"""Places explicit section markers on their own lines before parsing."""
return _EXPLICIT_MARKER_PATTERN.sub(r"\n\1\n", response)
def _clean_section_value(value: str) -> str:
"""Removes empty lines and leftover bracket-only prompt placeholders."""
cleaned = value.strip()
lines = [line.strip() for line in cleaned.splitlines() if line.strip()]
if lines and all(
re.fullmatch(r"(?:[-*][ \t]*)?\[[^\]]+\]\.?", line) for line in lines
):
return ""
return cleaned
def parse_sections(response: str) -> tuple[str, str, str, str, str, str]:
"""Extracts CBT elements from the model's structured text response using section markers."""
# Normalize explicit markers so merged sections still split cleanly.
response = _normalize_report_markers(response)
# Find candidate headings before assigning text to output cards.
matches = list(_SECTION_MARKER_PATTERN.finditer(response))
sections = dict(_SECTION_DEFAULTS)
# If no section markers are found, return the default values.
if not matches:
return (
sections["emotions"],
sections["areas"],
sections["distortions"],
sections["reframe"],
sections["next_step"],
sections["reflection"],
)
# Attempt to find a single contiguous block of ordered sections.
best_values: dict[str, str] = {}
best_count = -1
# Iterate through all matches as possible starting points.
for start_index, match in enumerate(matches):
values: dict[str, str] = {}
last_order_index = -1
# Check each subsequent match for ascending order.
for current_index in range(start_index, len(matches)):
current = matches[current_index]
section = _canonical_section(current.group("label"))
section_order_index = _SECTION_ORDER.index(section)
# Stop if sections are out of order.
if section_order_index <= last_order_index:
break
# Capture heading text plus content until the next heading.
next_start = (
matches[current_index + 1].start()
if current_index + 1 < len(matches)
else len(response)
)
# Clean up heading and capture section value.
value = _clean_section_value(
"\n".join(
[current.group("trailing"), response[current.end() : next_start]]
)
)
# Store the value if it's not empty.
if value:
values[section] = value
last_order_index = section_order_index
# Stop if we've found a complete-looking ordered section block.
if len(values) == len(_SECTION_ORDER):
break
# Prefer the last complete-looking ordered section block.
if len(values) >= best_count:
best_values = values
best_count = len(values)
# Assign extracted sections to the default dictionary.
for section, value in best_values.items():
sections[section] = value
# Return extracted sections in the expected order.
return (
sections["emotions"],
sections["areas"],
sections["distortions"],
sections["reframe"],
sections["next_step"],
sections["reflection"],
)