Spaces:
Running
Running
| import re, json | |
| from typing import List | |
| # Heuristic normalization & role extraction. If roles unknown, we keep all text. | |
| SPEAKER_RE = re.compile(r"^(user|client|customer|advisor|agent|assistant|model)\s*[:\-]", re.I) | |
| def normalize_conversation(text: str) -> List[str]: | |
| lines = [l.strip() for l in text.splitlines() if l.strip()] | |
| return lines | |
| def extract_model_utterances(lines: List[str], prefer_llm_provider: str = None) -> str: | |
| model_lines = [] | |
| user_aliases = ("user", "client", "customer") | |
| model_aliases = ("advisor", "agent", "assistant", "model") | |
| for l in lines: | |
| m = SPEAKER_RE.match(l) | |
| if m: | |
| who = m.group(1).lower() | |
| if who in model_aliases: | |
| model_lines.append(SPEAKER_RE.sub("", l).strip()) | |
| # if user line, skip | |
| else: | |
| # Untagged; keep it (we prefer to include content to avoid missing eval) | |
| model_lines.append(l) | |
| return "\n".join(model_lines).strip() | |