File size: 12,561 Bytes
a1bf219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
"""Markdown validation utility for agent response quality assurance.

This module validates that agent responses follow the structured markdown format
with proper sections, tables, bullet points, and numbered summaries.

Feature 004 - User Story 3: Enhanced Agent Dialog Content Quality
"""

import logging
import re
from dataclasses import dataclass
from typing import List, Optional

logger = logging.getLogger(__name__)


@dataclass
class ValidationResult:
    """Result of markdown validation."""

    is_valid: bool
    score: float  # 0-100 percentage score
    issues: list[str]
    warnings: list[str]
    sections_found: list[str]
    has_tables: bool
    has_bullets: bool
    has_numbered_list: bool
    has_conclusion: bool


class MarkdownValidator:
    """
    Validates agent response markdown structure and content quality.

    Checks for:
    - Structured sections with ## headings
    - Data tables with markdown table syntax (|)
    - Bullet-pointed insights (- or *)
    - Numbered summary (1., 2., 3.)
    - Conclusion section with recommendation
    """

    def __init__(self, strict_mode: bool = False):
        """
        Initialize the markdown validator.

        Args:
            strict_mode: If True, all checks must pass for validation to succeed.
                        If False, validation passes with warnings for minor issues.
        """
        self.strict_mode = strict_mode

    def validate(
        self, content: str, agent_type: str | None = None
    ) -> ValidationResult:
        """
        Validate markdown content structure and format.

        Args:
            content: Markdown content to validate
            agent_type: Optional agent type for specialized validation
                       (fundamental, technical, manager, research)

        Returns:
            ValidationResult with validation status and details
        """
        if not content or not content.strip():
            return ValidationResult(
                is_valid=False,
                score=0.0,
                issues=["Empty or whitespace-only content"],
                warnings=[],
                sections_found=[],
                has_tables=False,
                has_bullets=False,
                has_numbered_list=False,
                has_conclusion=False,
            )

        issues = []
        warnings = []
        score_components = []

        # Check structured sections
        sections_result = self._check_structured_sections(content)
        sections_found = sections_result["sections"]
        if sections_result["has_sections"]:
            score_components.append(25.0)
        else:
            issues.append("Missing structured sections with ## headings")

        if len(sections_found) < 3:
            warnings.append(
                f"Only {len(sections_found)} sections found. Expected at least 3-4 major sections."
            )

        # Check data tables
        tables_result = self._check_data_tables(content)
        has_tables = tables_result["has_tables"]
        if has_tables:
            score_components.append(25.0)
            if tables_result["table_count"] < 2:
                warnings.append(
                    f"Only {tables_result['table_count']} table(s) found. Multiple tables recommended for comprehensive analysis."
                )
        else:
            if agent_type in ["fundamental", "technical", "manager"]:
                issues.append(
                    "No markdown tables found. Tables required for data presentation."
                )
            else:
                warnings.append(
                    "No markdown tables found. Consider using tables for structured data."
                )

        # Check bullet insights
        bullets_result = self._check_bullet_insights(content)
        has_bullets = bullets_result["has_bullets"]
        if has_bullets:
            score_components.append(20.0)
            if bullets_result["bullet_count"] < 3:
                warnings.append(
                    f"Only {bullets_result['bullet_count']} bullet point(s) found. More insights recommended."
                )
        else:
            warnings.append(
                "No bullet-pointed insights found. Bullet points improve readability."
            )

        # Check numbered summary
        numbered_result = self._check_numbered_summary(content)
        has_numbered_list = numbered_result["has_numbered_list"]
        if has_numbered_list:
            score_components.append(15.0)
            if numbered_result["item_count"] < 3:
                warnings.append(
                    f"Only {numbered_result['item_count']} numbered item(s) in summary. 3-5 items recommended."
                )
        else:
            warnings.append(
                "No numbered summary list found. Numbered summaries aid comprehension."
            )

        # Check conclusion
        conclusion_result = self._check_conclusion(content)
        has_conclusion = conclusion_result["has_conclusion"]
        if has_conclusion:
            score_components.append(15.0)
        else:
            issues.append("Missing conclusion section with clear recommendation.")

        # Calculate overall score
        score = sum(score_components)

        # Determine if valid
        is_valid = True
        if self.strict_mode:
            is_valid = len(issues) == 0
        else:
            # Non-strict mode: valid if score >= 60% and no critical issues
            is_valid = score >= 60.0 and len(issues) <= 2

        logger.info(
            f"Markdown validation complete: score={score:.1f}%, "
            f"sections={len(sections_found)}, tables={has_tables}, "
            f"bullets={has_bullets}, numbered={has_numbered_list}, "
            f"conclusion={has_conclusion}, issues={len(issues)}, warnings={len(warnings)}"
        )

        return ValidationResult(
            is_valid=is_valid,
            score=score,
            issues=issues,
            warnings=warnings,
            sections_found=sections_found,
            has_tables=has_tables,
            has_bullets=has_bullets,
            has_numbered_list=has_numbered_list,
            has_conclusion=has_conclusion,
        )

    def _check_structured_sections(self, content: str) -> dict:
        """
        Check for structured sections with ## markdown headings.

        Args:
            content: Markdown content

        Returns:
            Dict with has_sections bool and list of section titles
        """
        # Match ## headings (level 2)
        heading_pattern = r"^##\s+(.+)$"
        matches = re.findall(heading_pattern, content, re.MULTILINE)

        sections = [match.strip() for match in matches]

        return {
            "has_sections": len(sections) >= 2,
            "sections": sections,
            "section_count": len(sections),
        }

    def _check_data_tables(self, content: str) -> dict:
        """
        Check for markdown tables with pipes (|).

        Args:
            content: Markdown content

        Returns:
            Dict with has_tables bool and table count
        """
        # Match markdown table rows (must have at least 2 pipes per line)
        # Table header: | Col1 | Col2 | Col3 |
        # Table divider: |------|------|------|
        # Table row: | Val1 | Val2 | Val3 |

        # Find table dividers (|---|---|)
        divider_pattern = r"^\|[\s\-:]+\|[\s\-:|]+$"
        divider_matches = re.findall(divider_pattern, content, re.MULTILINE)

        # Find table rows with actual data (not just dashes)
        row_pattern = r"^\|[^\-\n][^\n]*\|[^\n]*$"
        row_matches = re.findall(row_pattern, content, re.MULTILINE)

        # Consider it a valid table if we have both dividers and data rows
        has_tables = len(divider_matches) >= 1 and len(row_matches) >= 2

        return {
            "has_tables": has_tables,
            "table_count": len(divider_matches),
            "row_count": len(row_matches),
        }

    def _check_bullet_insights(self, content: str) -> dict:
        """
        Check for bullet-pointed insights (- or *).

        Args:
            content: Markdown content

        Returns:
            Dict with has_bullets bool and bullet count
        """
        # Match bullet points (- or * at start of line, followed by content)
        bullet_pattern = r"^[\-\*]\s+(.+)$"
        matches = re.findall(bullet_pattern, content, re.MULTILINE)

        return {
            "has_bullets": len(matches) >= 2,
            "bullet_count": len(matches),
        }

    def _check_numbered_summary(self, content: str) -> dict:
        """
        Check for numbered summary list (1., 2., 3.).

        Args:
            content: Markdown content

        Returns:
            Dict with has_numbered_list bool and item count
        """
        # Match numbered list items (1., 2., 3., etc.)
        numbered_pattern = r"^\d+\.\s+(.+)$"
        matches = re.findall(numbered_pattern, content, re.MULTILINE)

        # Check for sequential numbering (1, 2, 3, ...)
        has_sequence = False
        if len(matches) >= 3:
            # Extract numbers from the full content to verify sequence
            number_pattern = r"^(\d+)\.\s+"
            numbers = [
                int(m.group(1))
                for m in re.finditer(number_pattern, content, re.MULTILINE)
            ]
            # Check if we have at least 3 consecutive numbers starting from 1
            has_sequence = len(numbers) >= 3 and numbers[0] == 1 and numbers[1] == 2

        return {
            "has_numbered_list": has_sequence,
            "item_count": len(matches),
        }

    def _check_conclusion(self, content: str) -> dict:
        """
        Check for conclusion section with recommendation.

        Args:
            content: Markdown content

        Returns:
            Dict with has_conclusion bool and details
        """
        # Check for conclusion-related section headings
        conclusion_keywords = [
            "conclusion",
            "recommendation",
            "final decision",
            "summary",
            "investment decision",
            "trading implication",
        ]

        content_lower = content.lower()
        has_conclusion_heading = any(
            keyword in content_lower for keyword in conclusion_keywords
        )

        # Check for decision-related terms in the content
        decision_keywords = [
            "buy",
            "sell",
            "hold",
            "bullish",
            "bearish",
            "neutral",
            "recommend",
            "advise",
            "suggest",
        ]

        has_decision_language = any(
            keyword in content_lower for keyword in decision_keywords
        )

        return {
            "has_conclusion": has_conclusion_heading and has_decision_language,
            "has_conclusion_heading": has_conclusion_heading,
            "has_decision_language": has_decision_language,
        }


def validate_agent_response(
    content: str, agent_name: str, strict: bool = False
) -> ValidationResult:
    """
    Convenience function to validate agent response.

    Args:
        content: Agent response markdown content
        agent_name: Name of the agent (for specialized validation)
        strict: Whether to use strict validation mode

    Returns:
        ValidationResult with validation details
    """
    # Determine agent type from name
    agent_type = None
    if "fundamental" in agent_name.lower():
        agent_type = "fundamental"
    elif any(
        keyword in agent_name.lower()
        for keyword in ["indicator", "pattern", "trend", "technical"]
    ):
        agent_type = "technical"
    elif any(
        keyword in agent_name.lower() for keyword in ["portfolio", "risk", "manager"]
    ):
        agent_type = "manager"
    elif "research" in agent_name.lower():
        agent_type = "research"

    validator = MarkdownValidator(strict_mode=strict)
    result = validator.validate(content, agent_type=agent_type)

    # Log results
    if result.is_valid:
        logger.info(
            f"✓ {agent_name} response validated successfully (score: {result.score:.1f}%)"
        )
    else:
        logger.warning(
            f"✗ {agent_name} response validation failed (score: {result.score:.1f}%)"
        )
        for issue in result.issues:
            logger.warning(f"  - Issue: {issue}")

    for warning in result.warnings:
        logger.debug(f"  - Warning: {warning}")

    return result