##________generate natural language insights from analysis_________##

import os
import json
from typing import Dict, Any


##___________________________
class InsightGenerator:
    def __init__(self, use_openai=False, api_key=None):
        self.use_openai = use_openai
        if use_openai and api_key:
            import openai
            openai.api_key = api_key
            self.openai = openai
        else:
            print(" Using template-based insight generation")

    def generate_insights(self, df, schema, analysis):
        """generate human readable insights"""
        insights = []
        
        # 1.dataset overview
        insights.append(f" **Dataset Overview**: Your dataset has {len(df)} rows and {len(df.columns)} columns.")
        
        # 2. Key statistics
        insights.extend(self._generate_statistical_insights(analysis['descriptive_stats']))
        
        # 3. Correlation insights
        insights.extend(self._generate_correlation_insights(analysis['correlations']))
        
        # 4. Trend insights
        insights.extend(self._generate_trend_insights(analysis['trends']))
        
        # 5. Group analysis insights
        insights.extend(self._generate_group_insights(analysis['group_analysis']))
        
        # 6. Outlier insights
        insights.extend(self._generate_outlier_insights(analysis['outliers']))
        
        # 7. Distribution insights
        insights.extend(self._generate_distribution_insights(analysis['distributions']))
        
        # 8. Actionable recommendations
        insights.extend(self._generate_recommendations(analysis))
        
        return insights
    
    def _generate_statistical_insights(self, stats):
        """generate insights from descriptive statistics"""

        insights = []
        
        for col, values in stats.items():
            if values['mean'] > values['median'] * 1.2:
                insights.append(f" **{col}** is right-skewed (mean {values['mean']:.2f} > median {values['median']:.2f}), suggesting some high values pulling the average up.")
            elif values['median'] > values['mean'] * 1.2:
                insights.append(f" **{col}** is left-skewed (median {values['median']:.2f} > mean {values['mean']:.2f}).")
        
        return insights[:3]  ### limit to top 3
    
    def _generate_correlation_insights(self, correlations):
        """generate insights from correlations"""
        insights = []

        for corr in correlations[:3]:  # Top 3 correlations
            strength = "strong positive" if corr['strength'] == 'positive' else "strong negative"
            insights.append(f" **{corr['col1']}** and **{corr['col2']}** show a {strength} correlation ({corr['correlation']:.2f}).")
            
            if corr['strength'] == 'positive':
                insights.append(f"  → When {corr['col1']} increases, {corr['col2']} tends to increase as well.")
            else:
                insights.append(f"  → When {corr['col1']} increases, {corr['col2']} tends to decrease.")
        
        return insights
    
    def _generate_trend_insights(self, trends):
        """generate insights from trends"""

        insights =[]

        for trend in trends:
            direction = "increased" if trend['direction'] == 'increasing' else "decreased"
            change_abs = abs(trend['percent_change'])
            
            if change_abs > 20:
                insights.append(f" **{trend['column']}** has {direction} significantly by {change_abs:.1f}% over time.")
            elif change_abs > 5:
                insights.append(f" **{trend['column']}** has {direction} by {change_abs:.1f}% over the period.")
        
        return insights
    
    def _generate_group_insights(self, group_analysis):
        """generate insights from group analysis"""

        insights = []
        
        for cat_col, analyses in group_analysis.items():
            for num_col, analysis in analyses.items():
                if analysis['top_category']:
                    insights.append(f" **{analysis['top_category']}** is the top performer in {cat_col} for {num_col} with {analysis['top_value']:.2f}.")
        
        return insights[:3]
    
    def _generate_outlier_insights(self, outliers):
        """generate insights about outliers"""

        insights = []
        
        for col, data in outliers.items():
            if data['percentage'] < 5:
                insights.append(f" **{col}** contains {data['count']} outliers ({data['percentage']:.1f}% of data). These might be worth investigating.")
        
        return insights
    
    def _generate_distribution_insights(self, distributions):
        """generate insights about distributions"""

        insights = []
        
        for col, dist in distributions.items():
            if dist['shape'] != 'approximately normal':
                insights.append(f" **{col}** has a {dist['shape']} distribution (skewness: {dist['skewness']:.2f}).")
        
        return insights[:2]
    
    def _generate_recommendations(self, analysis):
        """generate actionable recommendations"""
        recommendations = []

        # Check for opportunities
        if analysis['correlations']:
            strong_corr = analysis['correlations'][0]
            if strong_corr['strength'] == 'positive':
                recommendations.append(f" **Recommendation**: Focus on increasing {strong_corr['col1']} to potentially boost {strong_corr['col2']}.")
        
        # Check for declining trends
        for trend in analysis['trends']:
            if trend['direction'] == 'decreasing' and abs(trend['percent_change']) > 10:
                recommendations.append(f" **Action Required**: {trend['column']} is declining. Consider investigating causes.")
                break

        if not recommendations:
            recommendations.append(" **Status**: No urgent issues detected. Continue monitoring key metrics.")
        
        return recommendations
    
    def generate_openai_insights(self, df_summary, analysis):
        """use OpenAI to generate insights"""

        if not self.use_openai:
            return self.generate_insights(df_summary, analysis)
        
        prompt = f"""
        You are a data analyst. Analyze this dataset and provide key business insights:
        
        Dataset: {df_summary['rows']} rows, {df_summary['columns']} columns
        Columns: {df_summary['column_names']}
        
        Key Statistics: {analysis.get('descriptive_stats', {})}
        Correlations: {analysis.get('correlations', [])}
        Trends: {analysis.get('trends', [])}

        Provide:
        1. Top 3 key findings
        2. One actionable recommendation
        3. One question the user should explore further
        
        Keep it concise and business-friendly.
        """

        try:
            response = self.openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                max_tokens=300
            )
            return [response.choices[0].message.content]
        except Exception as e:
            print(f"OpenAI error: {e}")
            return self.generate_insights(df_summary, analysis)