Spaces:

pcreem
/

sdgToPic

Running

File size: 34,107 Bytes

"""
優化的 AI 報告生成引擎 - 支援 SDG 儀表板應用程式
Enhanced AI Report Generation Engine for SDG Dashboard Application

作者: Kilo Code
版本: 2025.1
功能: 
- 支援 4 種報告類型
- 多語言支援 (繁體中文/English)  
- 可調節報告長度
- 智能數據分析整合
- 報告模板系統
- 錯誤處理和容錯機制
- 性能優化和快取
- API 配置管理
- 報告品質保證
"""

import json
import time
import hashlib
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass, asdict
from enum import Enum
import pandas as pd
import re
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
from src.config_manager import get_config

# Set up logging with higher level to suppress verbose output
logging.basicConfig(level=logging.WARNING)

# Suppress httpx HTTP request logs to keep UI clean
logging.getLogger('httpx').setLevel(logging.WARNING)
logging.getLogger('openai').setLevel(logging.WARNING)

logger = logging.getLogger(__name__)

class ReportType(Enum):
    """報告類型枚舉"""
    SUMMARY = "summary"  # 國家整體進度摘要（適合大眾）
    PROFESSIONAL = "professional"  # SDG 工作者專業報告（含數據引用、優先領域）
    POLICY = "policy"  # 政策簡報版（重點式、適合決策者）
    FORECAST = "forecast"  # 2030 展望與建議（預測是否 on track + 行動呼籲）

class ReportLanguage(Enum):
    """報告語言枚舉"""
    TRADITIONAL_CHINESE = "繁體中文"
    CHINESE = "Chinese"
    ENGLISH = "English"
    JAPANESE = "Japanese"

class ReportLength(Enum):
    """報告長度枚舉"""
    SHORT = 300  # 簡短（約300字）
    STANDARD = 800  # 標準（約800字）
    DETAILED = 1500  # 詳細（約1500字）

@dataclass
class ReportMetadata:
    """報告元數據結構"""
    country: str
    start_year: int
    end_year: int
    report_type: ReportType
    language: ReportLanguage
    length: ReportLength
    latest_score: Optional[float] = None
    rank: Optional[int] = None
    total_countries: Optional[int] = None
    global_avg: Optional[float] = None
    generated_at: Optional[str] = None
    
    def to_dict(self) -> Dict[str, Any]:
        """轉換為字典格式"""
        return asdict(self)

@dataclass
class QualityMetrics:
    """報告品質指標"""
    data_coverage: float  # 數據覆蓋率 (0-1)
    logical_consistency: float  # 邏輯一致性 (0-1)
    language_quality: float  # 語言品質 (0-1)
    format_compliance: float  # 格式合規性 (0-1)
    overall_score: float  # 總體評分 (0-1)

class ReportTemplate:
    """報告模板系統"""
    
    TEMPLATES = {
        ReportType.SUMMARY: {
            "structure": [
                "執行摘要",
                "整體表現概覽", 
                "主要成就",
                "挑戰領域",
                "未來展望"
            ],
            "tone": "friendly_expert",
            "focus": "public_communication",
            "data_density": "medium"
        },
        ReportType.PROFESSIONAL: {
            "structure": [
                "專業執行摘要",
                "數據分析與方法論",
                "詳細績效評估",
                "優先領域識別",
                "國際比較分析",
                "技術建議與最佳實踐"
            ],
            "tone": "technical_expert",
            "focus": "professional_analysis",
            "data_density": "high"
        },
        ReportType.POLICY: {
            "structure": [
                "政策執行摘要",
                "關鍵績效指標",
                "戰略重點領域",
                "政策建議",
                "實施路線圖"
            ],
            "tone": "policy_decision",
            "focus": "decision_making",
            "data_density": "medium"
        },
        ReportType.FORECAST: {
            "structure": [
                "2030 年展望執行摘要",
                "當前軌跡分析",
                "情境建模",
                "關鍵風險與機遇",
                "2030 年目標達成評估",
                "加速行動建議"
            ],
            "tone": "strategic_analyst",
            "focus": "future_planning",
            "data_density": "high"
        }
    }
    
    @classmethod
    def get_structure(cls, report_type: ReportType) -> List[str]:
        """獲取報告結構"""
        return cls.TEMPLATES[report_type]["structure"]
    
    @classmethod
    def get_style_guide(cls, report_type: ReportType) -> Dict[str, str]:
        """獲取風格指南"""
        return cls.TEMPLATES[report_type]

class CacheManager:
    """快取管理器"""
    
    def __init__(self, ttl_hours: int = 24):
        self.cache = {}
        self.ttl_seconds = ttl_hours * 3600
        self.lock = threading.Lock()
    
    def _generate_key(self, meta: ReportMetadata, data_hash: str) -> str:
        """生成快取鍵"""
        content = f"{meta.to_dict()}_{data_hash}"
        return hashlib.md5(content.encode()).hexdigest()
    
    def _is_expired(self, timestamp: float) -> bool:
        """檢查是否過期"""
        return time.time() - timestamp > self.ttl_seconds
    
    def get(self, meta: ReportMetadata, data_hash: str) -> Optional[str]:
        """獲取快取內容"""
        with self.lock:
            key = self._generate_key(meta, data_hash)
            if key in self.cache:
                content, timestamp = self.cache[key]
                if not self._is_expired(timestamp):
                    return content
                else:
                    del self.cache[key]
            return None
    
    def set(self, meta: ReportMetadata, data_hash: str, content: str):
        """設置快取"""
        with self.lock:
            key = self._generate_key(meta, data_hash)
            self.cache[key] = (content, time.time())
    
    def clear_expired(self):
        """清理過期快取"""
        with self.lock:
            expired_keys = [
                key for key, (_, timestamp) in self.cache.items()
                if self._is_expired(timestamp)
            ]
            for key in expired_keys:
                del self.cache[key]

class DataAnalyzer:
    """智能數據分析器"""
    
    @staticmethod
    def analyze_trends(df: pd.DataFrame, country: str) -> Dict[str, Any]:
        """分析數據趨勢"""
        try:
            country_data = df[df['country'] == country].sort_values('year')
            
            if country_data.empty:
                return {"error": "No data available"}
            
            # 基本統計
            latest_score = float(country_data['sdg_index_score'].iloc[-1])
            earliest_score = float(country_data['sdg_index_score'].iloc[0])
            total_change = latest_score - earliest_score
            
            # 年均變化率
            years_span = len(country_data) - 1
            annual_change = total_change / years_span if years_span > 0 else 0
            
            # 趨勢分析
            recent_data = country_data.tail(5)  # 最近5年
            if len(recent_data) >= 2:
                recent_trend = float(recent_data['sdg_index_score'].iloc[-1] - recent_data['sdg_index_score'].iloc[0])
            else:
                recent_trend = 0
            
            # 目標分析
            goal_trends = {}
            for i in range(1, 18):
                goal_col = f'goal_{i}_score'
                if goal_col in country_data.columns:
                    goal_data = country_data[goal_col].dropna()
                    if len(goal_data) >= 2:
                        goal_trends[f'goal_{i}'] = {
                            'latest': float(goal_data.iloc[-1]),
                            'change': float(goal_data.iloc[-1] - goal_data.iloc[0]),
                            'annual_rate': float((goal_data.iloc[-1] - goal_data.iloc[0]) / (len(goal_data) - 1))
                        }
            
            return {
                'overall_trend': {
                    'latest_score': latest_score,
                    'total_change': total_change,
                    'annual_change': annual_change,
                    'recent_trend': recent_trend,
                    'status': 'improving' if recent_trend > 0 else 'declining' if recent_trend < 0 else 'stable'
                },
                'goal_trends': goal_trends,
                'data_points': len(country_data),
                'year_range': f"{int(country_data['year'].min())}-{int(country_data['year'].max())}"
            }
        
        except Exception as e:
            logger.error(f"Error in trend analysis: {str(e)}")
            return {"error": str(e)}
    
    @staticmethod
    def get_regional_comparison(df: pd.DataFrame, country: str, region: str = "global") -> Dict[str, Any]:
        """獲取區域比較"""
        try:
            latest_data = df[df['year'] == df['year'].max()]
            
            country_score = latest_data[latest_data['country'] == country]['sdg_index_score'].values
            if len(country_score) == 0:
                return {"error": "Country not found"}
            
            country_score = float(country_score[0])
            
            # 全球排名
            ranking = int((latest_data['sdg_index_score'] > country_score).sum() + 1)
            total_countries = len(latest_data)
            
            # 百分位排名
            percentile = float((ranking / total_countries) * 100)
            
            # 分位數比較
            quartiles = latest_data['sdg_index_score'].quantile([0.25, 0.5, 0.75])
            
            # 目標比較
            goal_comparison = {}
            for i in range(1, 18):
                goal_col = f'goal_{i}_score'
                if goal_col in latest_data.columns:
                    country_goal = latest_data[latest_data['country'] == country][goal_col].values
                    if len(country_goal) > 0 and not pd.isna(country_goal[0]):
                        global_avg = float(latest_data[goal_col].mean())
                        goal_comparison[f'goal_{i}'] = {
                            'country_score': float(country_goal[0]),
                            'global_average': global_avg,
                            'difference': float(country_goal[0] - global_avg),
                            'percentile': float(((latest_data[goal_col] < country_goal[0]).sum() / len(latest_data)) * 100)
                        }
            
            return {
                'ranking': {
                    'global_rank': ranking,
                    'total_countries': total_countries,
                    'percentile': percentile,
                    'score': country_score
                },
                'global_context': {
                    'global_average': float(latest_data['sdg_index_score'].mean()),
                    'global_median': float(quartiles[0.5]),
                    'top_quartile': float(quartiles[0.75]),
                    'bottom_quartile': float(quartiles[0.25])
                },
                'goal_comparison': goal_comparison
            }
        
        except Exception as e:
            logger.error(f"Error in regional comparison: {str(e)}")
            return {"error": str(e)}

class QualityAssurance:
    """報告品質保證"""
    
    @staticmethod
    def validate_data_coverage(df: pd.DataFrame, meta: ReportMetadata) -> float:
        """驗證數據覆蓋率"""
        try:
            country_data = df[df['country'] == meta.country]
            if country_data.empty:
                return 0.0
            
            # 檢查年份覆蓋
            year_coverage = len(country_data) / (meta.end_year - meta.start_year + 1)
            
            # 檢查目標數據完整性
            goal_columns = [f'goal_{i}_score' for i in range(1, 18)]
            available_goals = sum(1 for col in goal_columns if col in country_data.columns)
            goal_coverage = available_goals / 17
            
            # 綜合覆蓋率
            overall_coverage = (year_coverage + goal_coverage) / 2
            return min(overall_coverage, 1.0)
        
        except Exception as e:
            logger.error(f"Error validating data coverage: {str(e)}")
            return 0.0
    
    @staticmethod
    def check_logical_consistency(report_content: str, meta: ReportMetadata) -> float:
        """檢查邏輯一致性"""
        try:
            score = 1.0
            
            # 檢查國家名稱一致性
            if meta.country not in report_content:
                score -= 0.3
            
            # 檢查年份範圍一致性
            year_range_str = f"{meta.start_year}-{meta.end_year}"
            if year_range_str not in report_content and str(meta.end_year) not in report_content:
                score -= 0.2
            
            # 檢查數據引用一致性
            if meta.latest_score:
                # 處理浮點數和整數格式的 latest_score
                score_str = str(float(meta.latest_score)).split('.')[0]  # 取整數部分
                if score_str not in report_content:
                    score -= 0.2
            
            # 檢查基本結構
            expected_sections = ReportTemplate.get_structure(meta.report_type)
            found_sections = sum(1 for section in expected_sections if section in report_content)
            if found_sections < len(expected_sections) * 0.6:  # 至少60%的章節
                score -= 0.3
            
            return max(score, 0.0)
        
        except Exception as e:
            logger.error(f"Error checking logical consistency: {str(e)}")
            return 0.5
    
    @staticmethod
    def assess_language_quality(report_content: str, language: ReportLanguage) -> float:
        """評估語言品質"""
        try:
            score = 1.0
            
            # 基本長度檢查
            min_length = {
                ReportLength.SHORT: 200,
                ReportLength.STANDARD: 600,
                ReportLength.DETAILED: 1200
            }
            
            word_count = len(report_content.split())
            expected_words = min_length.get(ReportLength.STANDARD, 600)  # 預設標準長度
            
            if word_count < expected_words * 0.7:
                score -= 0.3
            
            # 檢查格式
            if not re.search(r'^#', report_content, re.MULTILINE):
                score -= 0.2
            
            # 檢查語言特定元素
            if language == ReportLanguage.TRADITIONAL_CHINESE:
                chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', report_content))
                if chinese_chars < word_count * 0.3:  # 至少30%中文字符
                    score -= 0.2
            
            return max(score, 0.0)
        
        except Exception as e:
            logger.error(f"Error assessing language quality: {str(e)}")
            return 0.5
    
    @staticmethod
    def check_format_compliance(report_content: str, report_type: ReportType) -> float:
        """檢查格式合規性"""
        try:
            score = 1.0
            
            # 檢查基本 Markdown 格式
            if not re.search(r'#+', report_content):
                score -= 0.3
            
            # 檢查報告類型特定格式
            structure = ReportTemplate.get_structure(report_type)
            
            # 檢查是否包含關鍵元素
            required_elements = ['摘要', '總結', '建議', '結論']
            found_elements = sum(1 for element in required_elements if element in report_content)
            
            if found_elements < 2:
                score -= 0.3
            
            # 檢查數據引用格式
            data_patterns = [r'\d+\.\d+', r'\d+%', r'排名', r'分數']
            found_patterns = sum(1 for pattern in data_patterns if re.search(pattern, report_content))
            
            if found_patterns < 2:
                score -= 0.2
            
            return max(score, 0.0)
        
        except Exception as e:
            logger.error(f"Error checking format compliance: {str(e)}")
            return 0.5

class EnhancedAIReportEngine:
    """增強的 AI 報告生成引擎"""
    
    def __init__(self, base_url: str = None, api_key: str = None):
        """
        初始化 AI 報告生成引擎
        
        Args:
            base_url: API 基礎 URL (如果未提供，將使用配置中的值)
            api_key: API 密鑰 (如果未提供，將使用配置中的值)
        """
        # Get configuration values
        config = get_config()
        self.base_url = base_url or config.get('ai_engine.base_url')
        self.api_key = api_key or config.get('ai_engine.api_key')
        self.model = config.get('ai_engine.default_model', 'azure/gpt-4o')
        
        # Cache TTL from config
        cache_ttl = config.get('ai_engine.cache_ttl_hours', 24)
        self.cache_manager = CacheManager(ttl_hours=cache_ttl)
        self.data_analyzer = DataAnalyzer()
        self.quality_assurance = QualityAssurance()
        
        # 嘗試導入 OpenAI 客戶端
        try:
            if self.api_key: # Azure doesn't strictly need base_url if it's in the key or handled by LiteLLM environment
                from openai import OpenAI
                self.client = OpenAI(base_url=self.base_url, api_key=self.api_key)
                self.available = True
            else:
                self.client = None
                self.available = False
                logger.info("AI engine initialized in mock mode (no credentials)")
        except ImportError:
            logger.warning("OpenAI client not available. Using mock mode.")
            self.client = None
            self.available = False
    
    def _get_data_hash(self, df: pd.DataFrame) -> str:
        """生成數據哈希值"""
        try:
            # 使用關鍵列生成哈希
            key_columns = ['country', 'year', 'sdg_index_score']
            available_columns = [col for col in key_columns if col in df.columns]
            
            if not available_columns:
                return hashlib.md5(str(len(df)).encode()).hexdigest()
            
            sample_data = df[available_columns].head(100)  # 取樣以提高效率
            content = sample_data.to_string()
            return hashlib.md5(content.encode()).hexdigest()
        except Exception as e:
            logger.error(f"Error generating data hash: {str(e)}")
            return hashlib.md5(str(time.time()).encode()).hexdigest()
    
    def _prepare_prompt(self, df: pd.DataFrame, meta: ReportMetadata) -> Tuple[str, str]:
        """準備提示詞"""
        try:
            # 數據分析
            trends = self.data_analyzer.analyze_trends(df, meta.country)
            comparison = self.data_analyzer.get_regional_comparison(df, meta.country)
            
            # 系統提示詞
            style_guide = ReportTemplate.get_style_guide(meta.report_type)
            
            system_prompt = f"""
您是聯合國永續發展解決方案網路（SDSN）的首席環境經濟學家與頂級 AI 策略專家。
您的任務是為 {meta.country} 撰寫一份數據驅動且具備戰略前瞻性的 SDG 評估報告。

報告要求：
1. **深度與專業度**：分析總體得分，並針對各項目標進行深入探討。
2. **數據驅動**：必須廣泛引用提供的趨勢數據、排名以及與全球平均的對比進行量化分析。
3. **專業口吻**：使用權威性的政策分析術語（如：Decoupling, Circular Economy, Carbon Neutrality 等）。
4. **結構化**：使用 Markdown 標題、清單、表格。**長度必須與要求相符，不可敷衍。**
5. **長度要求**：這是一份約 {meta.length.value} 字/字符的報告。
   - 如果是 1500 字「Detailed」報告，請務必提供極具深度的細節分析，涵蓋多個學科視角。
   - 如果是 300 字「Short」報告，請保持極度精煉。
6. **語言**：完全使用 {meta.language.value} 撰寫。
7. **結尾標記**：請在報告最後一行加上「【報告結束】」以示完整。

您的分析應根據報告類型 ({meta.report_type.value}) 提供相應的深度，特別是針對最新的 SDR 2025 數據進行解讀。
"""
            
            # 用戶提示詞
            user_prompt = f"""
請為 {meta.country} 生成 {meta.report_type.value} 類型的 SDG 評估報告。

## 基本信息
- 國家：{meta.country}
- 數據年份：{meta.start_year} - {meta.end_year}
- 最新得分：{meta.latest_score if meta.latest_score else 'N/A'}
- 全球排名：{meta.rank if meta.rank else 'N/A'} / {meta.total_countries if meta.total_countries else 'N/A'}
- 全球平均：{meta.global_avg if meta.global_avg else 'N/A'}

## 數據分析結果
### 趨勢分析
{json.dumps(trends, indent=2, ensure_ascii=False)}

### 區域比較
{json.dumps(comparison, indent=2, ensure_ascii=False)}

## 報告要求
- 結構：{', '.join(ReportTemplate.get_structure(meta.report_type))}
- 語言：{meta.language.value}
- 目標長度：**嚴格限制在 {meta.length.value} 字以內**。
- 如果是 300 字版本，請合併部分章節，保持簡潔明瞭，避免冗長描述。

請開始生成報告，並以「【報告結束】」結尾。
"""
            
            return system_prompt, user_prompt
        
        except Exception as e:
            logger.error(f"Error preparing prompt: {str(e)}")
            raise
    
    def _generate_with_retry(self, system_prompt: str, user_prompt: str, meta: ReportMetadata, max_retries: int = 3) -> str:
        """帶重試機制的報告生成"""
        last_error = None
        
        for attempt in range(max_retries):
            try:
                if not self.available:
                    # Mock 模式
                    return self._generate_mock_report(system_prompt, user_prompt, meta)
                
                # 動態調整 max_tokens，根據目標長度給予足夠空間（中文字符與 token 比例約 1:2.5 - 3）
                # 給予更加寬鬆的空間（倍數從 4 提高到 6-8），確保報告不被截斷
                dynamic_max_tokens = min(16384, max(4096, meta.length.value * 6))
                
                response = self.client.chat.completions.create(
                    model=self.model,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt}
                    ],
                    temperature=0.7,
                    max_tokens=dynamic_max_tokens,
                    timeout=300  # Increased timeout (5 min) for detailed reports
                )
                
                content = response.choices[0].message.content
                
                # 檢查內容是否有效
                if content and len(content.strip()) > 50:
                    return content
                else:
                    raise Exception("Generated content is too short or empty")
            
            except Exception as e:
                last_error = e
                logger.warning(f"Attempt {attempt + 1} failed: {str(e)}")
                if attempt < max_retries - 1:
                    time.sleep(2 ** attempt)  # 指數退避
                continue
        
        # 所有嘗試都失敗
        error_msg = f"Failed to generate report after {max_retries} attempts. Last error: {str(last_error)}"
        logger.error(error_msg)
        return error_msg # Return the error string to be handled by the caller
    
    def _generate_mock_report(self, system_prompt: str, user_prompt: str, meta: ReportMetadata) -> str:
        """生成模擬報告（用於測試）"""
        return f"""
# {meta.country} SDG 評估報告

## 執行摘要

本報告基於最新數據對 {meta.country} 的永續發展目標（SDG）表現進行了全面分析。

## 主要發現

- 當前 SDG 指數得分：{meta.latest_score}
- 全球排名：{meta.rank} / {meta.total_countries}
- 與全球平均的差距：{meta.global_avg}

## 建議

1. 繼續加強在環境保護領域的努力
2. 提高教育和健康指標
3. 加強國際合作

*注意：此為模擬報告，實際 API 調用時將生成真實報告*
"""
    
    def _generate_fallback_report(self, error_msg: str, meta: Optional[ReportMetadata] = None) -> str:
        """生成備用報告"""
        country = meta.country if meta else "N/A"
        years = f"{meta.start_year} - {meta.end_year}" if meta else "N/A"
        score = meta.latest_score if meta else "N/A"
        
        return f"""
# SDG 評估報告生成失敗

很抱歉，在生成報告時遇到了技術問題。

## 錯誤詳情
{error_msg}

## 建議解決方案
1. 檢查 API 連接狀態
2. 確認 API 密鑰有效性
3. 稍後重試

## 基本數據摘要
- 國家：{country}
- 數據年份：{years}
- 最新得分：{score}

---
*本報告由 SDG AI 引擎自動生成*
"""
    
    def _assess_quality(self, report_content: str, meta: ReportMetadata, df: pd.DataFrame) -> QualityMetrics:
        """評估報告品質"""
        try:
            data_coverage = self.quality_assurance.validate_data_coverage(df, meta)
            logical_consistency = self.quality_assurance.check_logical_consistency(report_content, meta)
            language_quality = self.quality_assurance.assess_language_quality(report_content, meta.language)
            format_compliance = self.quality_assurance.check_format_compliance(report_content, meta.report_type)
            
            # 計算總體評分
            overall_score = (data_coverage + logical_consistency + language_quality + format_compliance) / 4
            
            return QualityMetrics(
                data_coverage=data_coverage,
                logical_consistency=logical_consistency,
                language_quality=language_quality,
                format_compliance=format_compliance,
                overall_score=overall_score
            )
        
        except Exception as e:
            logger.error(f"Error assessing quality: {str(e)}")
            return QualityMetrics(0.5, 0.5, 0.5, 0.5, 0.5)
    
    def generate_report(self, df: pd.DataFrame, meta_info: Dict[str, Any], language: str = "繁體中文") -> str:
        """
        生成專業的 SDG 評估報告
        
        Args:
            df: 包含 SDG 數據的 DataFrame
            meta_info: 報告元數據
            language: 報告語言
            
        Returns:
            生成的報告內容
        """
        try:
            # 驗證輸入
            if df is None or df.empty:
                raise ValueError("DataFrame is empty or None")
            
            # 構建元數據對象
            meta = ReportMetadata(
                country=meta_info.get('country', 'Unknown'),
                start_year=int(meta_info.get('start_year', 2020)),
                end_year=int(meta_info.get('end_year', 2025)),
                report_type=ReportType(meta_info.get('report_type', 'summary')),
                language=ReportLanguage(language),
                length=ReportLength(int(meta_info.get('length', 800))),
                latest_score=meta_info.get('latest_score'),
                rank=meta_info.get('rank'),
                total_countries=meta_info.get('total_countries'),
                global_avg=meta_info.get('global_avg'),
                generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            )
            
            logger.info(f"Generating {meta.report_type.value} report for {meta.country}")
            
            # 檢查快取
            data_hash = self._get_data_hash(df)
            cached_report = self.cache_manager.get(meta, data_hash)
            if cached_report:
                logger.info("Returning cached report")
                return cached_report
            
            # 生成報告
            system_prompt, user_prompt = self._prepare_prompt(df, meta)
            report_content = self._generate_with_retry(system_prompt, user_prompt, meta)
            
            # If report_content contains error message from _generate_with_retry
            if "Failed to generate report" in report_content:
                return self._generate_fallback_report(report_content, meta)
            
            # 檢查報告完整性（是否有結束標記）
            end_marker = "【報告結束】"
            if end_marker not in report_content:
                logger.warning(f"Report for {meta.country} may be truncated (marker not found)")
                # 調整截斷判斷邏輯：
                # 對於日文/中文，1500「字」通常指字符。
                # 只有當內容長度非常接近 max_tokens 限制或明顯異常時才顯示
                # 這裡調高閾值到 3 倍，避免誤報
                if len(report_content) > meta.length.value * 3:
                    truncation_notice = "\n\n> ⚠️ **（註：因模型輸出長度限制，報告內容可能未完整顯示，請選擇較短報告類型或聯繫管理員）**"
                    report_content += truncation_notice
            else:
                # 移除結束標記，保持報告美觀
                report_content = report_content.replace(end_marker, "").strip()

            # 快取報告
            self.cache_manager.set(meta, data_hash, report_content)
            
            return report_content
        
        except Exception as e:
            logger.error(f"Error in generate_report: {str(e)}")
            return self._generate_fallback_report(str(e))
    
    def get_available_models(self) -> Dict[str, str]:
        """獲取可用的 AI 模型列表"""
        return {
            # 高性能模型
            "gemini-2.5-flash": "Gemini 2.5 Flash (快速可靠)",
            "gpt-4o-mini": "GPT-4o Mini (成本效益)",
            "claude-3.5-sonnet": "Claude 3.5 Sonnet (細緻分析)",
            
            # 智能領先
            "gemini-2.0-pro": "Gemini 2.0 Pro (高智能)",
            "gpt-4o": "GPT-4o (標準選擇)",
            "claude-3-opus": "Claude 3 Opus (高級推理)",
            
            # 專業用途
            "gemini-1.5-pro": "Gemini 1.5 Pro (長上下文)",
            "gpt-4-turbo": "GPT-4 Turbo (平衡性能)",
            "claude-3-haiku": "Claude 3 Haiku (快速處理)",
            
            # 開源選項
            "llama-3.1-70b": "Llama 3.1 70B (開源)",
            "llama-3.1-8b": "Llama 3.1 8B (輕量級)",
            "mistral-large": "Mistral Large (歐洲模型)",
            
            # 專業推理
            "o1-preview": "O1 Preview (深度推理)",
            "o1-mini": "O1 Mini (高效推理)",
            
            # 其他可靠選項
            "qwen-turbo": "Qwen Turbo (阿里巴巴)",
            "deepseek-chat": "DeepSeek Chat (高級邏輯)"
        }
    
    def clear_cache(self):
        """清理快取"""
        self.cache_manager.clear_expired()
        logger.info("Cache cleared")
    
    def get_cache_stats(self) -> Dict[str, Any]:
        """獲取快取統計信息"""
        total_entries = len(self.cache_manager.cache)
        expired_entries = sum(
            1 for _, (_, timestamp) in self.cache_manager.cache.items()
            if self.cache_manager._is_expired(timestamp)
        )
        
        return {
            "total_entries": total_entries,
            "expired_entries": expired_entries,
            "active_entries": total_entries - expired_entries,
            "ttl_hours": self.cache_manager.ttl_seconds / 3600
        }
    
    def batch_generate_reports(self, report_configs: List[Dict[str, Any]], df: pd.DataFrame, max_workers: int = 3) -> List[Tuple[str, str]]:
        """
        批量生成報告
        
        Args:
            report_configs: 報告配置列表
            df: 數據 DataFrame
            max_workers: 最大並行工作線程數
            
        Returns:
            (報告類型, 報告內容) 的列表
        """
        results = []
        
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            # 提交所有任務
            future_to_config = {
                executor.submit(self.generate_report, df, config, config.get('language', '繁體中文')): config
                for config in report_configs
            }
            
            # 收集結果
            for future in as_completed(future_to_config):
                config = future_to_config[future]
                try:
                    report_content = future.result()
                    report_type = config.get('report_type', 'unknown')
                    results.append((report_type, report_content))
                except Exception as e:
                    logger.error(f"Batch generation failed for config {config}: {str(e)}")
                    results.append((config.get('report_type', 'unknown'), f"Generation failed: {str(e)}"))
        
        return results

# 向後相容性：保持原始類別名稱
SDG_AI_Report_Engine = EnhancedAIReportEngine

# 使用示例
if __name__ == "__main__":
    # 創建引擎實例
    engine = EnhancedAIReportEngine(
        base_url="your_base_url",
        api_key="your_api_key"
    )
    
    # 示例報告生成
    sample_meta = {
        'country': 'Taiwan',
        'start_year': 2020,
        'end_year': 2025,
        'report_type': 'summary',
        'language': '繁體中文',
        'length': 800,
        'latest_score': 75.2,
        'rank': 15,
        'total_countries': 166,
        'global_avg': 68.5
    }
    
    print("Enhanced AI Report Engine initialized successfully!")
    print(f"Available models: {len(engine.get_available_models())}")
    print(f"Cache stats: {engine.get_cache_stats()}")