"""Markdown validation utility for agent response quality assurance.

This module validates that agent responses follow the structured markdown format
with proper sections, tables, bullet points, and numbered summaries.

Feature 004 - User Story 3: Enhanced Agent Dialog Content Quality
"""

import logging
import re
from dataclasses import dataclass
from typing import List, Optional

logger = logging.getLogger(__name__)


@dataclass
class ValidationResult:
    """Result of markdown validation."""

    is_valid: bool
    score: float  # 0-100 percentage score
    issues: list[str]
    warnings: list[str]
    sections_found: list[str]
    has_tables: bool
    has_bullets: bool
    has_numbered_list: bool
    has_conclusion: bool


class MarkdownValidator:
    """
    Validates agent response markdown structure and content quality.

    Checks for:
    - Structured sections with ## headings
    - Data tables with markdown table syntax (|)
    - Bullet-pointed insights (- or *)
    - Numbered summary (1., 2., 3.)
    - Conclusion section with recommendation
    """

    def __init__(self, strict_mode: bool = False):
        """
        Initialize the markdown validator.

        Args:
            strict_mode: If True, all checks must pass for validation to succeed.
                        If False, validation passes with warnings for minor issues.
        """
        self.strict_mode = strict_mode

    def validate(
        self, content: str, agent_type: str | None = None
    ) -> ValidationResult:
        """
        Validate markdown content structure and format.

        Args:
            content: Markdown content to validate
            agent_type: Optional agent type for specialized validation
                       (fundamental, technical, manager, research)

        Returns:
            ValidationResult with validation status and details
        """
        if not content or not content.strip():
            return ValidationResult(
                is_valid=False,
                score=0.0,
                issues=["Empty or whitespace-only content"],
                warnings=[],
                sections_found=[],
                has_tables=False,
                has_bullets=False,
                has_numbered_list=False,
                has_conclusion=False,
            )

        issues = []
        warnings = []
        score_components = []

        # Check structured sections
        sections_result = self._check_structured_sections(content)
        sections_found = sections_result["sections"]
        if sections_result["has_sections"]:
            score_components.append(25.0)
        else:
            issues.append("Missing structured sections with ## headings")

        if len(sections_found) < 3:
            warnings.append(
                f"Only {len(sections_found)} sections found. Expected at least 3-4 major sections."
            )

        # Check data tables
        tables_result = self._check_data_tables(content)
        has_tables = tables_result["has_tables"]
        if has_tables:
            score_components.append(25.0)
            if tables_result["table_count"] < 2:
                warnings.append(
                    f"Only {tables_result['table_count']} table(s) found. Multiple tables recommended for comprehensive analysis."
                )
        else:
            if agent_type in ["fundamental", "technical", "manager"]:
                issues.append(
                    "No markdown tables found. Tables required for data presentation."
                )
            else:
                warnings.append(
                    "No markdown tables found. Consider using tables for structured data."
                )

        # Check bullet insights
        bullets_result = self._check_bullet_insights(content)
        has_bullets = bullets_result["has_bullets"]
        if has_bullets:
            score_components.append(20.0)
            if bullets_result["bullet_count"] < 3:
                warnings.append(
                    f"Only {bullets_result['bullet_count']} bullet point(s) found. More insights recommended."
                )
        else:
            warnings.append(
                "No bullet-pointed insights found. Bullet points improve readability."
            )

        # Check numbered summary
        numbered_result = self._check_numbered_summary(content)
        has_numbered_list = numbered_result["has_numbered_list"]
        if has_numbered_list:
            score_components.append(15.0)
            if numbered_result["item_count"] < 3:
                warnings.append(
                    f"Only {numbered_result['item_count']} numbered item(s) in summary. 3-5 items recommended."
                )
        else:
            warnings.append(
                "No numbered summary list found. Numbered summaries aid comprehension."
            )

        # Check conclusion
        conclusion_result = self._check_conclusion(content)
        has_conclusion = conclusion_result["has_conclusion"]
        if has_conclusion:
            score_components.append(15.0)
        else:
            issues.append("Missing conclusion section with clear recommendation.")

        # Calculate overall score
        score = sum(score_components)

        # Determine if valid
        is_valid = True
        if self.strict_mode:
            is_valid = len(issues) == 0
        else:
            # Non-strict mode: valid if score >= 60% and no critical issues
            is_valid = score >= 60.0 and len(issues) <= 2

        logger.info(
            f"Markdown validation complete: score={score:.1f}%, "
            f"sections={len(sections_found)}, tables={has_tables}, "
            f"bullets={has_bullets}, numbered={has_numbered_list}, "
            f"conclusion={has_conclusion}, issues={len(issues)}, warnings={len(warnings)}"
        )

        return ValidationResult(
            is_valid=is_valid,
            score=score,
            issues=issues,
            warnings=warnings,
            sections_found=sections_found,
            has_tables=has_tables,
            has_bullets=has_bullets,
            has_numbered_list=has_numbered_list,
            has_conclusion=has_conclusion,
        )

    def _check_structured_sections(self, content: str) -> dict:
        """
        Check for structured sections with ## markdown headings.

        Args:
            content: Markdown content

        Returns:
            Dict with has_sections bool and list of section titles
        """
        # Match ## headings (level 2)
        heading_pattern = r"^##\s+(.+)$"
        matches = re.findall(heading_pattern, content, re.MULTILINE)

        sections = [match.strip() for match in matches]

        return {
            "has_sections": len(sections) >= 2,
            "sections": sections,
            "section_count": len(sections),
        }

    def _check_data_tables(self, content: str) -> dict:
        """
        Check for markdown tables with pipes (|).

        Args:
            content: Markdown content

        Returns:
            Dict with has_tables bool and table count
        """
        # Match markdown table rows (must have at least 2 pipes per line)
        # Table header: | Col1 | Col2 | Col3 |
        # Table divider: |------|------|------|
        # Table row: | Val1 | Val2 | Val3 |

        # Find table dividers (|---|---|)
        divider_pattern = r"^\|[\s\-:]+\|[\s\-:|]+$"
        divider_matches = re.findall(divider_pattern, content, re.MULTILINE)

        # Find table rows with actual data (not just dashes)
        row_pattern = r"^\|[^\-\n][^\n]*\|[^\n]*$"
        row_matches = re.findall(row_pattern, content, re.MULTILINE)

        # Consider it a valid table if we have both dividers and data rows
        has_tables = len(divider_matches) >= 1 and len(row_matches) >= 2

        return {
            "has_tables": has_tables,
            "table_count": len(divider_matches),
            "row_count": len(row_matches),
        }

    def _check_bullet_insights(self, content: str) -> dict:
        """
        Check for bullet-pointed insights (- or *).

        Args:
            content: Markdown content

        Returns:
            Dict with has_bullets bool and bullet count
        """
        # Match bullet points (- or * at start of line, followed by content)
        bullet_pattern = r"^[\-\*]\s+(.+)$"
        matches = re.findall(bullet_pattern, content, re.MULTILINE)

        return {
            "has_bullets": len(matches) >= 2,
            "bullet_count": len(matches),
        }

    def _check_numbered_summary(self, content: str) -> dict:
        """
        Check for numbered summary list (1., 2., 3.).

        Args:
            content: Markdown content

        Returns:
            Dict with has_numbered_list bool and item count
        """
        # Match numbered list items (1., 2., 3., etc.)
        numbered_pattern = r"^\d+\.\s+(.+)$"
        matches = re.findall(numbered_pattern, content, re.MULTILINE)

        # Check for sequential numbering (1, 2, 3, ...)
        has_sequence = False
        if len(matches) >= 3:
            # Extract numbers from the full content to verify sequence
            number_pattern = r"^(\d+)\.\s+"
            numbers = [
                int(m.group(1))
                for m in re.finditer(number_pattern, content, re.MULTILINE)
            ]
            # Check if we have at least 3 consecutive numbers starting from 1
            has_sequence = len(numbers) >= 3 and numbers[0] == 1 and numbers[1] == 2

        return {
            "has_numbered_list": has_sequence,
            "item_count": len(matches),
        }

    def _check_conclusion(self, content: str) -> dict:
        """
        Check for conclusion section with recommendation.

        Args:
            content: Markdown content

        Returns:
            Dict with has_conclusion bool and details
        """
        # Check for conclusion-related section headings
        conclusion_keywords = [
            "conclusion",
            "recommendation",
            "final decision",
            "summary",
            "investment decision",
            "trading implication",
        ]

        content_lower = content.lower()
        has_conclusion_heading = any(
            keyword in content_lower for keyword in conclusion_keywords
        )

        # Check for decision-related terms in the content
        decision_keywords = [
            "buy",
            "sell",
            "hold",
            "bullish",
            "bearish",
            "neutral",
            "recommend",
            "advise",
            "suggest",
        ]

        has_decision_language = any(
            keyword in content_lower for keyword in decision_keywords
        )

        return {
            "has_conclusion": has_conclusion_heading and has_decision_language,
            "has_conclusion_heading": has_conclusion_heading,
            "has_decision_language": has_decision_language,
        }


def validate_agent_response(
    content: str, agent_name: str, strict: bool = False
) -> ValidationResult:
    """
    Convenience function to validate agent response.

    Args:
        content: Agent response markdown content
        agent_name: Name of the agent (for specialized validation)
        strict: Whether to use strict validation mode

    Returns:
        ValidationResult with validation details
    """
    # Determine agent type from name
    agent_type = None
    if "fundamental" in agent_name.lower():
        agent_type = "fundamental"
    elif any(
        keyword in agent_name.lower()
        for keyword in ["indicator", "pattern", "trend", "technical"]
    ):
        agent_type = "technical"
    elif any(
        keyword in agent_name.lower() for keyword in ["portfolio", "risk", "manager"]
    ):
        agent_type = "manager"
    elif "research" in agent_name.lower():
        agent_type = "research"

    validator = MarkdownValidator(strict_mode=strict)
    result = validator.validate(content, agent_type=agent_type)

    # Log results
    if result.is_valid:
        logger.info(
            f"✓ {agent_name} response validated successfully (score: {result.score:.1f}%)"
        )
    else:
        logger.warning(
            f"✗ {agent_name} response validation failed (score: {result.score:.1f}%)"
        )
        for issue in result.issues:
            logger.warning(f"  - Issue: {issue}")

    for warning in result.warnings:
        logger.debug(f"  - Warning: {warning}")

    return result