"""
Aclarador Analyzer Wrapper
Uses Groq API to analyze text clarity based on Aclarador principles
"""

import logging
import os
from typing import Dict, Any, List
from pathlib import Path

logger = logging.getLogger(__name__)

# Try to import Groq
try:
    from groq import Groq
    GROQ_AVAILABLE = True
except ImportError:
    GROQ_AVAILABLE = False
    logger.warning("Groq library not available - install with: pip install groq")


class AclaradorAnalyzer:
    """
    Clarity analyzer using Groq API
    Based on Aclarador's system prompt for Spanish clarity analysis
    """

    def __init__(self):
        self.system_prompt = self._load_system_prompt()
        self.groq_client = None

        if GROQ_AVAILABLE:
            # Get Groq API key from environment
            api_key = os.getenv('GROQ_API_KEY')
            if api_key:
                self.groq_client = Groq(api_key=api_key)
                logger.info("✅ Aclarador analyzer initialized with Groq API")
            else:
                logger.warning("⚠️  GROQ_API_KEY not found - using fallback analyzer")
        else:
            logger.warning("⚠️  Groq not available - using fallback analyzer")

    def _load_system_prompt(self) -> str:
        """Load system prompt from system_prompt.md"""
        try:
            prompt_path = Path(__file__).parent / 'aclarador' / 'system_prompt.md'
            with open(prompt_path, 'r', encoding='utf-8') as f:
                content = f.read()

            # Extract content between ``` markers
            if '```' in content:
                parts = content.split('```')
                if len(parts) >= 3:
                    return parts[1].strip()

            # Fallback: use whole content
            return content

        except Exception as e:
            logger.error(f"Error loading system prompt: {e}")
            return self._get_default_system_prompt()

    def _get_default_system_prompt(self) -> str:
        """Default system prompt if file not found"""
        return """Eres un experto en lenguaje claro especializado en la mejora de textos en español.
Analiza el texto y proporciona:
1. Una versión mejorada más clara
2. Explicación de las mejoras realizadas
3. Identificación de problemas de claridad"""

    def analyze(self, text: str, title: str = None) -> Dict[str, Any]:
        """
        Analyze text using Groq API

        Returns Madrid Analyzer's expected format with clarity scores and suggestions
        """

        # Use Groq if available, otherwise fallback
        if not self.groq_client:
            return self._fallback_analysis(text)

        try:
            # Call Groq API
            response = self.groq_client.chat.completions.create(
                model="llama-3.3-70b-versatile",
                messages=[
                    {"role": "system", "content": self.system_prompt},
                    {"role": "user", "content": text}
                ],
                temperature=0.3,
                max_tokens=2000
            )

            # Extract response text
            analysis_text = response.choices[0].message.content

            # Parse response and calculate scores
            return self._parse_groq_response(analysis_text, text)

        except Exception as e:
            logger.error(f"Error calling Groq API: {e}")
            import traceback
            traceback.print_exc()
            return self._fallback_analysis(text)

    def _parse_groq_response(self, analysis_text: str, original_text: str) -> Dict[str, Any]:
        """
        Parse Groq's response and map to expected format

        Groq returns sections like:
        ### TEXTO CORREGIDO
        ### EXPLICACIÓN DE MEJORAS
        ### PRINCIPIOS APLICADOS
        """

        # Extract sections
        sections = self._extract_sections(analysis_text)

        # Analyze original text for statistics
        sentences = [s.strip() for s in original_text.split('.') if s.strip()]
        words = original_text.split()

        # Detect issues from the explanation
        issues = self._extract_issues_from_explanation(sections.get('explicacion', ''))

        # Calculate scores based on analysis
        readability_score = self._calculate_readability_from_analysis(original_text, issues)
        complexity_score = self._calculate_complexity_from_analysis(issues)
        overall_score = (readability_score * 0.5 + complexity_score * 0.5)

        # Extract suggestions from explanation
        suggestions = self._extract_suggestions(sections.get('explicacion', ''))

        # Detect jargon from original text
        jargon_words = self._detect_jargon(words)

        # Get sentence statistics
        sentence_stats = self._get_sentence_stats(sentences)
        vocabulary_stats = self._get_vocabulary_stats(words)

        return {
            'overall_score': overall_score,
            'readability_score': readability_score,
            'complexity_score': complexity_score,
            'sentence_stats': sentence_stats,
            'vocabulary_stats': vocabulary_stats,
            'readability_metrics': {
                'issues_detected': issues,
                'corrected_text': sections.get('corregido', '')
            },
            'grammar_stats': {
                'issues_count': len(issues)
            },
            'jargon_count': len(jargon_words),
            'jargon_words': jargon_words,
            'long_sentences_count': sentence_stats.get('long_sentences', 0),
            'suggestions': suggestions
        }

    def _extract_sections(self, text: str) -> Dict[str, str]:
        """Extract sections from Groq response"""
        sections = {}

        # Look for section headers
        if '### TEXTO CORREGIDO' in text or '###TEXTO CORREGIDO' in text:
            parts = text.split('###')
            for i, part in enumerate(parts):
                part_lower = part.lower()
                if 'texto corregido' in part_lower:
                    # Get content until next section
                    content = part.split('\n', 1)[1] if '\n' in part else part
                    sections['corregido'] = content.split('###')[0].strip()
                elif 'explicación' in part_lower or 'explicacion' in part_lower:
                    content = part.split('\n', 1)[1] if '\n' in part else part
                    sections['explicacion'] = content.split('###')[0].strip()
                elif 'principios' in part_lower:
                    content = part.split('\n', 1)[1] if '\n' in part else part
                    sections['principios'] = content.split('###')[0].strip()

        return sections

    def _extract_issues_from_explanation(self, explanation: str) -> List[str]:
        """Extract detected issues from explanation text"""
        issues = []
        explanation_lower = explanation.lower()

        # Check for common issue mentions
        if 'oración' in explanation_lower and ('larga' in explanation_lower or 'compleja' in explanation_lower):
            issues.append('long_sentences')

        if 'vocabulario' in explanation_lower or 'tecnicismo' in explanation_lower or 'jerga' in explanation_lower:
            issues.append('complex_vocabulary')

        if 'voz pasiva' in explanation_lower or 'pasiva' in explanation_lower:
            issues.append('passive_voice')

        if 'redundancia' in explanation_lower or 'repetición' in explanation_lower:
            issues.append('redundancy')

        return issues

    def _extract_suggestions(self, explanation: str) -> List[str]:
        """Extract improvement suggestions from explanation"""
        suggestions = []

        # Split by sections in the explanation
        lines = explanation.split('\n')

        for line in lines:
            line = line.strip()
            # Look for bullet points or numbered items
            if line.startswith('-') or line.startswith('*') or (line and line[0].isdigit() and '.' in line[:3]):
                # Clean up the line
                clean_line = line.lstrip('-*0123456789. ').strip()
                if clean_line and len(clean_line) > 10:  # Meaningful suggestion
                    suggestions.append(clean_line)

        # If no suggestions found, add a general one
        if not suggestions:
            suggestions.append('Texto analizado con principios de lenguaje claro')

        return suggestions[:5]  # Limit to 5

    def _calculate_readability_from_analysis(self, text: str, issues: List[str]) -> float:
        """Calculate readability score based on text and detected issues"""
        sentences = [s.strip() for s in text.split('.') if s.strip()]
        if not sentences:
            return 50.0

        # Base score from sentence structure
        avg_length = sum(len(s.split()) for s in sentences) / len(sentences)
        score = 100 - abs(avg_length - 20) * 2

        # Penalize for issues
        score -= len(issues) * 8

        return max(0, min(100, score))

    def _calculate_complexity_from_analysis(self, issues: List[str]) -> float:
        """Calculate complexity score (inverse of complexity)"""
        # Start with high score
        score = 100.0

        # Deduct for each issue type
        score -= len(issues) * 12

        return max(0, min(100, score))

    def _get_sentence_stats(self, sentences: List[str]) -> Dict[str, Any]:
        """Get statistics about sentences"""
        if not sentences:
            return {'count': 0, 'avg_length': 0, 'long_sentences': 0}

        sentence_lengths = [len(s.split()) for s in sentences]
        long_sentences = [s for s in sentences if len(s.split()) > 30]

        return {
            'count': len(sentences),
            'avg_length': sum(sentence_lengths) / len(sentences),
            'max_length': max(sentence_lengths) if sentence_lengths else 0,
            'min_length': min(sentence_lengths) if sentence_lengths else 0,
            'long_sentences': len(long_sentences)
        }

    def _get_vocabulary_stats(self, words: List[str]) -> Dict[str, Any]:
        """Get statistics about vocabulary"""
        if not words:
            return {'total_words': 0, 'unique_words': 0, 'lexical_diversity': 0}

        unique_words = set(w.lower() for w in words)
        lexical_diversity = len(unique_words) / len(words)

        return {
            'total_words': len(words),
            'unique_words': len(unique_words),
            'lexical_diversity': lexical_diversity,
            'avg_word_length': sum(len(w) for w in words) / len(words)
        }

    def _detect_jargon(self, words: List[str]) -> List[str]:
        """Detect potential jargon/technical terms"""
        # Common administrative jargon in Spanish
        admin_jargon = [
            'normativa', 'procedimiento', 'expediente', 'tramitación',
            'reglamento', 'disposición', 'resolución', 'acreditación',
            'competencias', 'subsanación', 'notificación', 'administrativo'
        ]

        jargon = []

        # Check for long words (likely technical)
        for word in words:
            clean_word = word.lower().strip('.,;:¿?¡!')
            if len(clean_word) > 12 and clean_word not in jargon:
                jargon.append(clean_word)

        # Check for known administrative jargon
        for word in words:
            clean_word = word.lower().strip('.,;:¿?¡!')
            if clean_word in admin_jargon and clean_word not in jargon:
                jargon.append(clean_word)

        return jargon[:10]  # Limit to 10 terms

    def _fallback_analysis(self, text: str) -> Dict[str, Any]:
        """
        Fallback analysis when Groq is not available
        Uses simple heuristics
        """
        logger.warning("Using fallback analysis - Groq API not available")

        sentences = [s.strip() for s in text.split('.') if s.strip()]
        words = text.split()

        if not sentences or not words:
            return self._get_empty_result()

        # Simple scoring
        avg_sentence_length = sum(len(s.split()) for s in sentences) / len(sentences)
        avg_word_length = sum(len(w) for w in words) / len(words)

        readability_score = max(0, 100 - abs(avg_sentence_length - 20) * 2)
        long_sentences = [s for s in sentences if len(s.split()) > 30]
        complexity_score = max(0, 100 - len(long_sentences) * 10)
        overall_score = (readability_score + complexity_score) / 2

        return {
            'overall_score': overall_score,
            'readability_score': readability_score,
            'complexity_score': complexity_score,
            'sentence_stats': self._get_sentence_stats(sentences),
            'vocabulary_stats': self._get_vocabulary_stats(words),
            'readability_metrics': {'issues_detected': []},
            'grammar_stats': {'issues_count': 0},
            'jargon_count': len(self._detect_jargon(words)),
            'jargon_words': self._detect_jargon(words),
            'long_sentences_count': len(long_sentences),
            'suggestions': [
                'Groq API no disponible - usando análisis simple',
                'Configurar GROQ_API_KEY para análisis completo'
            ]
        }

    def _get_empty_result(self) -> Dict[str, Any]:
        """Return empty result for invalid text"""
        return {
            'overall_score': 0,
            'readability_score': 0,
            'complexity_score': 0,
            'sentence_stats': {'count': 0, 'avg_length': 0, 'long_sentences': 0},
            'vocabulary_stats': {'total_words': 0, 'unique_words': 0, 'lexical_diversity': 0},
            'readability_metrics': {},
            'grammar_stats': {},
            'jargon_count': 0,
            'jargon_words': [],
            'long_sentences_count': 0,
            'suggestions': ['Texto vacío o inválido']
        }