Spaces:
Running on Zero
Running on Zero
File size: 7,216 Bytes
6c05976 48fccfa b9db94b 6c05976 48fccfa 6c05976 1db4430 48fccfa 6c05976 1db4430 6c05976 48fccfa 1db4430 6c05976 48fccfa 6c05976 48fccfa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | from __future__ import annotations
from pathlib import Path
import re
from env.config import SUPPORTED_SUFFIXES
# Match explicit report markers even when the model emits them inline.
_EXPLICIT_MARKER_PATTERN = re.compile(
r"(?i)(={2,}[ \t]*"
r"(?:dominant emotions?|emotions?|"
r"affected life areas?|life areas?|areas|"
r"cognitive distortions?|distortions?|"
r"balanced reframe|cognitive reframe|reframe|"
r"tiny next steps?|small next steps?|next steps?|"
r"reflection)"
r"[ \t]*={2,})"
)
# Match the section heading variants the local model commonly emits.
_SECTION_MARKER_PATTERN = re.compile(
r"(?im)^[ \t]*(?:[-*][ \t]*)?(?:#{1,6}[ \t]*)?"
r"(?:\*\*)?(?:={2,}[ \t]*)?"
r"(?P<label>"
r"dominant emotions?|emotions?|"
r"affected life areas?|life areas?|areas|"
r"cognitive distortions?|distortions?|"
r"balanced reframe|cognitive reframe|reframe|"
r"tiny next steps?|small next steps?|next steps?|"
r"reflection"
r")"
r"\b"
r"(?:[ \t]*={2,})?(?:\*\*)?[ \t]*(?::|-)?[ \t]*"
r"(?P<trailing>[^\n]*)$"
)
# Fixed output order expected by the analysis cards.
_SECTION_ORDER = (
"emotions",
"areas",
"distortions",
"reframe",
"next_step",
"reflection",
)
# Defaults keep the UI stable when a section is genuinely absent.
_SECTION_DEFAULTS = {
"emotions": "- Emotions not resolved.",
"areas": "- Life areas not resolved.",
"distortions": "- Distortions not resolved.",
"reframe": "- Balanced reframe not resolved.",
"next_step": "- Tiny next step not resolved.",
"reflection": "How are you feeling about these thoughts today?",
}
def _resolve_file_path(file_input: object) -> Path | None:
"""Normalizes Gradio file payload variants into a local path."""
# Empty or cleared file components should let the textbox drive analysis.
if not file_input:
return None
# Gradio may return a single-item list when file mode changes.
if isinstance(file_input, (list, tuple)):
for item in file_input:
path = _resolve_file_path(item)
if path:
return path
return None
# Newer Gradio payloads can be dictionaries with path-like fields.
if isinstance(file_input, dict):
for key in ("path", "name", "orig_name"):
value = file_input.get(key)
if value:
return Path(str(value))
return None
# Local runs usually pass a string path from the upload component.
return Path(str(file_input))
def extract_journal_text(file_path: object | None) -> str:
"""Reads journal entry from a text or markdown file, catching any disk or format errors."""
# Empty file inputs fall back to the text box.
path = _resolve_file_path(file_path)
if not path:
return ""
try:
# Accept only the supported private text formats.
suffix = path.suffix.lower()
if suffix in SUPPORTED_SUFFIXES:
return path.read_text(encoding="utf-8", errors="ignore").strip()
return f"Unsupported file: {suffix}. Try a text or markdown file."
except Exception as e:
return f"Error reading diary file: {e}"
def _canonical_section(label: str) -> str:
"""Maps a model heading variant onto the app's fixed output slots."""
normalized = re.sub(r"[^a-z]+", " ", label.lower()).strip()
if "emotion" in normalized:
return "emotions"
if "life area" in normalized or normalized == "areas":
return "areas"
if "distortion" in normalized:
return "distortions"
if "reframe" in normalized:
return "reframe"
if "next step" in normalized:
return "next_step"
return "reflection"
def _normalize_report_markers(response: str) -> str:
"""Places explicit section markers on their own lines before parsing."""
return _EXPLICIT_MARKER_PATTERN.sub(r"\n\1\n", response)
def _clean_section_value(value: str) -> str:
"""Removes empty lines and leftover bracket-only prompt placeholders."""
cleaned = value.strip()
lines = [line.strip() for line in cleaned.splitlines() if line.strip()]
if lines and all(
re.fullmatch(r"(?:[-*][ \t]*)?\[[^\]]+\]\.?", line) for line in lines
):
return ""
return cleaned
def parse_sections(response: str) -> tuple[str, str, str, str, str, str]:
"""Extracts CBT elements from the model's structured text response using section markers."""
# Normalize explicit markers so merged sections still split cleanly.
response = _normalize_report_markers(response)
# Find candidate headings before assigning text to output cards.
matches = list(_SECTION_MARKER_PATTERN.finditer(response))
sections = dict(_SECTION_DEFAULTS)
# If no section markers are found, return the default values.
if not matches:
return (
sections["emotions"],
sections["areas"],
sections["distortions"],
sections["reframe"],
sections["next_step"],
sections["reflection"],
)
# Attempt to find a single contiguous block of ordered sections.
best_values: dict[str, str] = {}
best_count = -1
# Iterate through all matches as possible starting points.
for start_index, match in enumerate(matches):
values: dict[str, str] = {}
last_order_index = -1
# Check each subsequent match for ascending order.
for current_index in range(start_index, len(matches)):
current = matches[current_index]
section = _canonical_section(current.group("label"))
section_order_index = _SECTION_ORDER.index(section)
# Stop if sections are out of order.
if section_order_index <= last_order_index:
break
# Capture heading text plus content until the next heading.
next_start = (
matches[current_index + 1].start()
if current_index + 1 < len(matches)
else len(response)
)
# Clean up heading and capture section value.
value = _clean_section_value(
"\n".join(
[current.group("trailing"), response[current.end() : next_start]]
)
)
# Store the value if it's not empty.
if value:
values[section] = value
last_order_index = section_order_index
# Stop if we've found a complete-looking ordered section block.
if len(values) == len(_SECTION_ORDER):
break
# Prefer the last complete-looking ordered section block.
if len(values) >= best_count:
best_values = values
best_count = len(values)
# Assign extracted sections to the default dictionary.
for section, value in best_values.items():
sections[section] = value
# Return extracted sections in the expected order.
return (
sections["emotions"],
sections["areas"],
sections["distortions"],
sections["reframe"],
sections["next_step"],
sections["reflection"],
)
|