|
|
""" |
|
|
優化的 AI 報告生成引擎 - 支援 SDG 儀表板應用程式 |
|
|
Enhanced AI Report Generation Engine for SDG Dashboard Application |
|
|
|
|
|
作者: Kilo Code |
|
|
版本: 2025.1 |
|
|
功能: |
|
|
- 支援 4 種報告類型 |
|
|
- 多語言支援 (繁體中文/English) |
|
|
- 可調節報告長度 |
|
|
- 智能數據分析整合 |
|
|
- 報告模板系統 |
|
|
- 錯誤處理和容錯機制 |
|
|
- 性能優化和快取 |
|
|
- API 配置管理 |
|
|
- 報告品質保證 |
|
|
""" |
|
|
|
|
|
import json |
|
|
import time |
|
|
import hashlib |
|
|
import logging |
|
|
from datetime import datetime, timedelta |
|
|
from typing import Dict, List, Any, Optional, Tuple |
|
|
from dataclasses import dataclass, asdict |
|
|
from enum import Enum |
|
|
import pandas as pd |
|
|
import re |
|
|
import os |
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed |
|
|
import threading |
|
|
from src.config_manager import get_config |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.WARNING) |
|
|
|
|
|
|
|
|
logging.getLogger('httpx').setLevel(logging.WARNING) |
|
|
logging.getLogger('openai').setLevel(logging.WARNING) |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class ReportType(Enum): |
|
|
"""報告類型枚舉""" |
|
|
SUMMARY = "summary" |
|
|
PROFESSIONAL = "professional" |
|
|
POLICY = "policy" |
|
|
FORECAST = "forecast" |
|
|
|
|
|
class ReportLanguage(Enum): |
|
|
"""報告語言枚舉""" |
|
|
TRADITIONAL_CHINESE = "繁體中文" |
|
|
CHINESE = "Chinese" |
|
|
ENGLISH = "English" |
|
|
JAPANESE = "Japanese" |
|
|
|
|
|
class ReportLength(Enum): |
|
|
"""報告長度枚舉""" |
|
|
SHORT = 300 |
|
|
STANDARD = 800 |
|
|
DETAILED = 1500 |
|
|
|
|
|
@dataclass |
|
|
class ReportMetadata: |
|
|
"""報告元數據結構""" |
|
|
country: str |
|
|
start_year: int |
|
|
end_year: int |
|
|
report_type: ReportType |
|
|
language: ReportLanguage |
|
|
length: ReportLength |
|
|
latest_score: Optional[float] = None |
|
|
rank: Optional[int] = None |
|
|
total_countries: Optional[int] = None |
|
|
global_avg: Optional[float] = None |
|
|
generated_at: Optional[str] = None |
|
|
|
|
|
def to_dict(self) -> Dict[str, Any]: |
|
|
"""轉換為字典格式""" |
|
|
return asdict(self) |
|
|
|
|
|
@dataclass |
|
|
class QualityMetrics: |
|
|
"""報告品質指標""" |
|
|
data_coverage: float |
|
|
logical_consistency: float |
|
|
language_quality: float |
|
|
format_compliance: float |
|
|
overall_score: float |
|
|
|
|
|
class ReportTemplate: |
|
|
"""報告模板系統""" |
|
|
|
|
|
TEMPLATES = { |
|
|
ReportType.SUMMARY: { |
|
|
"structure": [ |
|
|
"執行摘要", |
|
|
"整體表現概覽", |
|
|
"主要成就", |
|
|
"挑戰領域", |
|
|
"未來展望" |
|
|
], |
|
|
"tone": "friendly_expert", |
|
|
"focus": "public_communication", |
|
|
"data_density": "medium" |
|
|
}, |
|
|
ReportType.PROFESSIONAL: { |
|
|
"structure": [ |
|
|
"專業執行摘要", |
|
|
"數據分析與方法論", |
|
|
"詳細績效評估", |
|
|
"優先領域識別", |
|
|
"國際比較分析", |
|
|
"技術建議與最佳實踐" |
|
|
], |
|
|
"tone": "technical_expert", |
|
|
"focus": "professional_analysis", |
|
|
"data_density": "high" |
|
|
}, |
|
|
ReportType.POLICY: { |
|
|
"structure": [ |
|
|
"政策執行摘要", |
|
|
"關鍵績效指標", |
|
|
"戰略重點領域", |
|
|
"政策建議", |
|
|
"實施路線圖" |
|
|
], |
|
|
"tone": "policy_decision", |
|
|
"focus": "decision_making", |
|
|
"data_density": "medium" |
|
|
}, |
|
|
ReportType.FORECAST: { |
|
|
"structure": [ |
|
|
"2030 年展望執行摘要", |
|
|
"當前軌跡分析", |
|
|
"情境建模", |
|
|
"關鍵風險與機遇", |
|
|
"2030 年目標達成評估", |
|
|
"加速行動建議" |
|
|
], |
|
|
"tone": "strategic_analyst", |
|
|
"focus": "future_planning", |
|
|
"data_density": "high" |
|
|
} |
|
|
} |
|
|
|
|
|
@classmethod |
|
|
def get_structure(cls, report_type: ReportType) -> List[str]: |
|
|
"""獲取報告結構""" |
|
|
return cls.TEMPLATES[report_type]["structure"] |
|
|
|
|
|
@classmethod |
|
|
def get_style_guide(cls, report_type: ReportType) -> Dict[str, str]: |
|
|
"""獲取風格指南""" |
|
|
return cls.TEMPLATES[report_type] |
|
|
|
|
|
class CacheManager: |
|
|
"""快取管理器""" |
|
|
|
|
|
def __init__(self, ttl_hours: int = 24): |
|
|
self.cache = {} |
|
|
self.ttl_seconds = ttl_hours * 3600 |
|
|
self.lock = threading.Lock() |
|
|
|
|
|
def _generate_key(self, meta: ReportMetadata, data_hash: str) -> str: |
|
|
"""生成快取鍵""" |
|
|
content = f"{meta.to_dict()}_{data_hash}" |
|
|
return hashlib.md5(content.encode()).hexdigest() |
|
|
|
|
|
def _is_expired(self, timestamp: float) -> bool: |
|
|
"""檢查是否過期""" |
|
|
return time.time() - timestamp > self.ttl_seconds |
|
|
|
|
|
def get(self, meta: ReportMetadata, data_hash: str) -> Optional[str]: |
|
|
"""獲取快取內容""" |
|
|
with self.lock: |
|
|
key = self._generate_key(meta, data_hash) |
|
|
if key in self.cache: |
|
|
content, timestamp = self.cache[key] |
|
|
if not self._is_expired(timestamp): |
|
|
return content |
|
|
else: |
|
|
del self.cache[key] |
|
|
return None |
|
|
|
|
|
def set(self, meta: ReportMetadata, data_hash: str, content: str): |
|
|
"""設置快取""" |
|
|
with self.lock: |
|
|
key = self._generate_key(meta, data_hash) |
|
|
self.cache[key] = (content, time.time()) |
|
|
|
|
|
def clear_expired(self): |
|
|
"""清理過期快取""" |
|
|
with self.lock: |
|
|
expired_keys = [ |
|
|
key for key, (_, timestamp) in self.cache.items() |
|
|
if self._is_expired(timestamp) |
|
|
] |
|
|
for key in expired_keys: |
|
|
del self.cache[key] |
|
|
|
|
|
class DataAnalyzer: |
|
|
"""智能數據分析器""" |
|
|
|
|
|
@staticmethod |
|
|
def analyze_trends(df: pd.DataFrame, country: str) -> Dict[str, Any]: |
|
|
"""分析數據趨勢""" |
|
|
try: |
|
|
country_data = df[df['country'] == country].sort_values('year') |
|
|
|
|
|
if country_data.empty: |
|
|
return {"error": "No data available"} |
|
|
|
|
|
|
|
|
latest_score = float(country_data['sdg_index_score'].iloc[-1]) |
|
|
earliest_score = float(country_data['sdg_index_score'].iloc[0]) |
|
|
total_change = latest_score - earliest_score |
|
|
|
|
|
|
|
|
years_span = len(country_data) - 1 |
|
|
annual_change = total_change / years_span if years_span > 0 else 0 |
|
|
|
|
|
|
|
|
recent_data = country_data.tail(5) |
|
|
if len(recent_data) >= 2: |
|
|
recent_trend = float(recent_data['sdg_index_score'].iloc[-1] - recent_data['sdg_index_score'].iloc[0]) |
|
|
else: |
|
|
recent_trend = 0 |
|
|
|
|
|
|
|
|
goal_trends = {} |
|
|
for i in range(1, 18): |
|
|
goal_col = f'goal_{i}_score' |
|
|
if goal_col in country_data.columns: |
|
|
goal_data = country_data[goal_col].dropna() |
|
|
if len(goal_data) >= 2: |
|
|
goal_trends[f'goal_{i}'] = { |
|
|
'latest': float(goal_data.iloc[-1]), |
|
|
'change': float(goal_data.iloc[-1] - goal_data.iloc[0]), |
|
|
'annual_rate': float((goal_data.iloc[-1] - goal_data.iloc[0]) / (len(goal_data) - 1)) |
|
|
} |
|
|
|
|
|
return { |
|
|
'overall_trend': { |
|
|
'latest_score': latest_score, |
|
|
'total_change': total_change, |
|
|
'annual_change': annual_change, |
|
|
'recent_trend': recent_trend, |
|
|
'status': 'improving' if recent_trend > 0 else 'declining' if recent_trend < 0 else 'stable' |
|
|
}, |
|
|
'goal_trends': goal_trends, |
|
|
'data_points': len(country_data), |
|
|
'year_range': f"{int(country_data['year'].min())}-{int(country_data['year'].max())}" |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error in trend analysis: {str(e)}") |
|
|
return {"error": str(e)} |
|
|
|
|
|
@staticmethod |
|
|
def get_regional_comparison(df: pd.DataFrame, country: str, region: str = "global") -> Dict[str, Any]: |
|
|
"""獲取區域比較""" |
|
|
try: |
|
|
latest_data = df[df['year'] == df['year'].max()] |
|
|
|
|
|
country_score = latest_data[latest_data['country'] == country]['sdg_index_score'].values |
|
|
if len(country_score) == 0: |
|
|
return {"error": "Country not found"} |
|
|
|
|
|
country_score = float(country_score[0]) |
|
|
|
|
|
|
|
|
ranking = int((latest_data['sdg_index_score'] > country_score).sum() + 1) |
|
|
total_countries = len(latest_data) |
|
|
|
|
|
|
|
|
percentile = float((ranking / total_countries) * 100) |
|
|
|
|
|
|
|
|
quartiles = latest_data['sdg_index_score'].quantile([0.25, 0.5, 0.75]) |
|
|
|
|
|
|
|
|
goal_comparison = {} |
|
|
for i in range(1, 18): |
|
|
goal_col = f'goal_{i}_score' |
|
|
if goal_col in latest_data.columns: |
|
|
country_goal = latest_data[latest_data['country'] == country][goal_col].values |
|
|
if len(country_goal) > 0 and not pd.isna(country_goal[0]): |
|
|
global_avg = float(latest_data[goal_col].mean()) |
|
|
goal_comparison[f'goal_{i}'] = { |
|
|
'country_score': float(country_goal[0]), |
|
|
'global_average': global_avg, |
|
|
'difference': float(country_goal[0] - global_avg), |
|
|
'percentile': float(((latest_data[goal_col] < country_goal[0]).sum() / len(latest_data)) * 100) |
|
|
} |
|
|
|
|
|
return { |
|
|
'ranking': { |
|
|
'global_rank': ranking, |
|
|
'total_countries': total_countries, |
|
|
'percentile': percentile, |
|
|
'score': country_score |
|
|
}, |
|
|
'global_context': { |
|
|
'global_average': float(latest_data['sdg_index_score'].mean()), |
|
|
'global_median': float(quartiles[0.5]), |
|
|
'top_quartile': float(quartiles[0.75]), |
|
|
'bottom_quartile': float(quartiles[0.25]) |
|
|
}, |
|
|
'goal_comparison': goal_comparison |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error in regional comparison: {str(e)}") |
|
|
return {"error": str(e)} |
|
|
|
|
|
class QualityAssurance: |
|
|
"""報告品質保證""" |
|
|
|
|
|
@staticmethod |
|
|
def validate_data_coverage(df: pd.DataFrame, meta: ReportMetadata) -> float: |
|
|
"""驗證數據覆蓋率""" |
|
|
try: |
|
|
country_data = df[df['country'] == meta.country] |
|
|
if country_data.empty: |
|
|
return 0.0 |
|
|
|
|
|
|
|
|
year_coverage = len(country_data) / (meta.end_year - meta.start_year + 1) |
|
|
|
|
|
|
|
|
goal_columns = [f'goal_{i}_score' for i in range(1, 18)] |
|
|
available_goals = sum(1 for col in goal_columns if col in country_data.columns) |
|
|
goal_coverage = available_goals / 17 |
|
|
|
|
|
|
|
|
overall_coverage = (year_coverage + goal_coverage) / 2 |
|
|
return min(overall_coverage, 1.0) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error validating data coverage: {str(e)}") |
|
|
return 0.0 |
|
|
|
|
|
@staticmethod |
|
|
def check_logical_consistency(report_content: str, meta: ReportMetadata) -> float: |
|
|
"""檢查邏輯一致性""" |
|
|
try: |
|
|
score = 1.0 |
|
|
|
|
|
|
|
|
if meta.country not in report_content: |
|
|
score -= 0.3 |
|
|
|
|
|
|
|
|
year_range_str = f"{meta.start_year}-{meta.end_year}" |
|
|
if year_range_str not in report_content and str(meta.end_year) not in report_content: |
|
|
score -= 0.2 |
|
|
|
|
|
|
|
|
if meta.latest_score: |
|
|
|
|
|
score_str = str(float(meta.latest_score)).split('.')[0] |
|
|
if score_str not in report_content: |
|
|
score -= 0.2 |
|
|
|
|
|
|
|
|
expected_sections = ReportTemplate.get_structure(meta.report_type) |
|
|
found_sections = sum(1 for section in expected_sections if section in report_content) |
|
|
if found_sections < len(expected_sections) * 0.6: |
|
|
score -= 0.3 |
|
|
|
|
|
return max(score, 0.0) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error checking logical consistency: {str(e)}") |
|
|
return 0.5 |
|
|
|
|
|
@staticmethod |
|
|
def assess_language_quality(report_content: str, language: ReportLanguage) -> float: |
|
|
"""評估語言品質""" |
|
|
try: |
|
|
score = 1.0 |
|
|
|
|
|
|
|
|
min_length = { |
|
|
ReportLength.SHORT: 200, |
|
|
ReportLength.STANDARD: 600, |
|
|
ReportLength.DETAILED: 1200 |
|
|
} |
|
|
|
|
|
word_count = len(report_content.split()) |
|
|
expected_words = min_length.get(ReportLength.STANDARD, 600) |
|
|
|
|
|
if word_count < expected_words * 0.7: |
|
|
score -= 0.3 |
|
|
|
|
|
|
|
|
if not re.search(r'^#', report_content, re.MULTILINE): |
|
|
score -= 0.2 |
|
|
|
|
|
|
|
|
if language == ReportLanguage.TRADITIONAL_CHINESE: |
|
|
chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', report_content)) |
|
|
if chinese_chars < word_count * 0.3: |
|
|
score -= 0.2 |
|
|
|
|
|
return max(score, 0.0) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error assessing language quality: {str(e)}") |
|
|
return 0.5 |
|
|
|
|
|
@staticmethod |
|
|
def check_format_compliance(report_content: str, report_type: ReportType) -> float: |
|
|
"""檢查格式合規性""" |
|
|
try: |
|
|
score = 1.0 |
|
|
|
|
|
|
|
|
if not re.search(r'#+', report_content): |
|
|
score -= 0.3 |
|
|
|
|
|
|
|
|
structure = ReportTemplate.get_structure(report_type) |
|
|
|
|
|
|
|
|
required_elements = ['摘要', '總結', '建議', '結論'] |
|
|
found_elements = sum(1 for element in required_elements if element in report_content) |
|
|
|
|
|
if found_elements < 2: |
|
|
score -= 0.3 |
|
|
|
|
|
|
|
|
data_patterns = [r'\d+\.\d+', r'\d+%', r'排名', r'分數'] |
|
|
found_patterns = sum(1 for pattern in data_patterns if re.search(pattern, report_content)) |
|
|
|
|
|
if found_patterns < 2: |
|
|
score -= 0.2 |
|
|
|
|
|
return max(score, 0.0) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error checking format compliance: {str(e)}") |
|
|
return 0.5 |
|
|
|
|
|
class EnhancedAIReportEngine: |
|
|
"""增強的 AI 報告生成引擎""" |
|
|
|
|
|
def __init__(self, base_url: str = None, api_key: str = None): |
|
|
""" |
|
|
初始化 AI 報告生成引擎 |
|
|
|
|
|
Args: |
|
|
base_url: API 基礎 URL (如果未提供,將使用配置中的值) |
|
|
api_key: API 密鑰 (如果未提供,將使用配置中的值) |
|
|
""" |
|
|
|
|
|
config = get_config() |
|
|
self.base_url = base_url or config.get('ai_engine.base_url') |
|
|
self.api_key = api_key or config.get('ai_engine.api_key') |
|
|
self.model = config.get('ai_engine.default_model', 'azure/gpt-4o') |
|
|
|
|
|
|
|
|
cache_ttl = config.get('ai_engine.cache_ttl_hours', 24) |
|
|
self.cache_manager = CacheManager(ttl_hours=cache_ttl) |
|
|
self.data_analyzer = DataAnalyzer() |
|
|
self.quality_assurance = QualityAssurance() |
|
|
|
|
|
|
|
|
try: |
|
|
if self.api_key: |
|
|
from openai import OpenAI |
|
|
self.client = OpenAI(base_url=self.base_url, api_key=self.api_key) |
|
|
self.available = True |
|
|
else: |
|
|
self.client = None |
|
|
self.available = False |
|
|
logger.info("AI engine initialized in mock mode (no credentials)") |
|
|
except ImportError: |
|
|
logger.warning("OpenAI client not available. Using mock mode.") |
|
|
self.client = None |
|
|
self.available = False |
|
|
|
|
|
def _get_data_hash(self, df: pd.DataFrame) -> str: |
|
|
"""生成數據哈希值""" |
|
|
try: |
|
|
|
|
|
key_columns = ['country', 'year', 'sdg_index_score'] |
|
|
available_columns = [col for col in key_columns if col in df.columns] |
|
|
|
|
|
if not available_columns: |
|
|
return hashlib.md5(str(len(df)).encode()).hexdigest() |
|
|
|
|
|
sample_data = df[available_columns].head(100) |
|
|
content = sample_data.to_string() |
|
|
return hashlib.md5(content.encode()).hexdigest() |
|
|
except Exception as e: |
|
|
logger.error(f"Error generating data hash: {str(e)}") |
|
|
return hashlib.md5(str(time.time()).encode()).hexdigest() |
|
|
|
|
|
def _prepare_prompt(self, df: pd.DataFrame, meta: ReportMetadata) -> Tuple[str, str]: |
|
|
"""準備提示詞""" |
|
|
try: |
|
|
|
|
|
trends = self.data_analyzer.analyze_trends(df, meta.country) |
|
|
comparison = self.data_analyzer.get_regional_comparison(df, meta.country) |
|
|
|
|
|
|
|
|
style_guide = ReportTemplate.get_style_guide(meta.report_type) |
|
|
|
|
|
system_prompt = f""" |
|
|
您是聯合國永續發展解決方案網路(SDSN)的首席環境經濟學家與頂級 AI 策略專家。 |
|
|
您的任務是為 {meta.country} 撰寫一份數據驅動且具備戰略前瞻性的 SDG 評估報告。 |
|
|
|
|
|
報告要求: |
|
|
1. **深度與專業度**:分析總體得分,並針對各項目標進行深入探討。 |
|
|
2. **數據驅動**:必須廣泛引用提供的趨勢數據、排名以及與全球平均的對比進行量化分析。 |
|
|
3. **專業口吻**:使用權威性的政策分析術語(如:Decoupling, Circular Economy, Carbon Neutrality 等)。 |
|
|
4. **結構化**:使用 Markdown 標題、清單、表格。**長度必須與要求相符,不可敷衍。** |
|
|
5. **長度要求**:這是一份約 {meta.length.value} 字/字符的報告。 |
|
|
- 如果是 1500 字「Detailed」報告,請務必提供極具深度的細節分析,涵蓋多個學科視角。 |
|
|
- 如果是 300 字「Short」報告,請保持極度精煉。 |
|
|
6. **語言**:完全使用 {meta.language.value} 撰寫。 |
|
|
7. **結尾標記**:請在報告最後一行加上「【報告結束】」以示完整。 |
|
|
|
|
|
您的分析應根據報告類型 ({meta.report_type.value}) 提供相應的深度,特別是針對最新的 SDR 2025 數據進行解讀。 |
|
|
""" |
|
|
|
|
|
|
|
|
user_prompt = f""" |
|
|
請為 {meta.country} 生成 {meta.report_type.value} 類型的 SDG 評估報告。 |
|
|
|
|
|
## 基本信息 |
|
|
- 國家:{meta.country} |
|
|
- 數據年份:{meta.start_year} - {meta.end_year} |
|
|
- 最新得分:{meta.latest_score if meta.latest_score else 'N/A'} |
|
|
- 全球排名:{meta.rank if meta.rank else 'N/A'} / {meta.total_countries if meta.total_countries else 'N/A'} |
|
|
- 全球平均:{meta.global_avg if meta.global_avg else 'N/A'} |
|
|
|
|
|
## 數據分析結果 |
|
|
### 趨勢分析 |
|
|
{json.dumps(trends, indent=2, ensure_ascii=False)} |
|
|
|
|
|
### 區域比較 |
|
|
{json.dumps(comparison, indent=2, ensure_ascii=False)} |
|
|
|
|
|
## 報告要求 |
|
|
- 結構:{', '.join(ReportTemplate.get_structure(meta.report_type))} |
|
|
- 語言:{meta.language.value} |
|
|
- 目標長度:**嚴格限制在 {meta.length.value} 字以內**。 |
|
|
- 如果是 300 字版本,請合併部分章節,保持簡潔明瞭,避免冗長描述。 |
|
|
|
|
|
請開始生成報告,並以「【報告結束】」結尾。 |
|
|
""" |
|
|
|
|
|
return system_prompt, user_prompt |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error preparing prompt: {str(e)}") |
|
|
raise |
|
|
|
|
|
def _generate_with_retry(self, system_prompt: str, user_prompt: str, meta: ReportMetadata, max_retries: int = 3) -> str: |
|
|
"""帶重試機制的報告生成""" |
|
|
last_error = None |
|
|
|
|
|
for attempt in range(max_retries): |
|
|
try: |
|
|
if not self.available: |
|
|
|
|
|
return self._generate_mock_report(system_prompt, user_prompt, meta) |
|
|
|
|
|
|
|
|
|
|
|
dynamic_max_tokens = min(16384, max(4096, meta.length.value * 6)) |
|
|
|
|
|
response = self.client.chat.completions.create( |
|
|
model=self.model, |
|
|
messages=[ |
|
|
{"role": "system", "content": system_prompt}, |
|
|
{"role": "user", "content": user_prompt} |
|
|
], |
|
|
temperature=0.7, |
|
|
max_tokens=dynamic_max_tokens, |
|
|
timeout=300 |
|
|
) |
|
|
|
|
|
content = response.choices[0].message.content |
|
|
|
|
|
|
|
|
if content and len(content.strip()) > 50: |
|
|
return content |
|
|
else: |
|
|
raise Exception("Generated content is too short or empty") |
|
|
|
|
|
except Exception as e: |
|
|
last_error = e |
|
|
logger.warning(f"Attempt {attempt + 1} failed: {str(e)}") |
|
|
if attempt < max_retries - 1: |
|
|
time.sleep(2 ** attempt) |
|
|
continue |
|
|
|
|
|
|
|
|
error_msg = f"Failed to generate report after {max_retries} attempts. Last error: {str(last_error)}" |
|
|
logger.error(error_msg) |
|
|
return error_msg |
|
|
|
|
|
def _generate_mock_report(self, system_prompt: str, user_prompt: str, meta: ReportMetadata) -> str: |
|
|
"""生成模擬報告(用於測試)""" |
|
|
return f""" |
|
|
# {meta.country} SDG 評估報告 |
|
|
|
|
|
## 執行摘要 |
|
|
|
|
|
本報告基於最新數據對 {meta.country} 的永續發展目標(SDG)表現進行了全面分析。 |
|
|
|
|
|
## 主要發現 |
|
|
|
|
|
- 當前 SDG 指數得分:{meta.latest_score} |
|
|
- 全球排名:{meta.rank} / {meta.total_countries} |
|
|
- 與全球平均的差距:{meta.global_avg} |
|
|
|
|
|
## 建議 |
|
|
|
|
|
1. 繼續加強在環境保護領域的努力 |
|
|
2. 提高教育和健康指標 |
|
|
3. 加強國際合作 |
|
|
|
|
|
*注意:此為模擬報告,實際 API 調用時將生成真實報告* |
|
|
""" |
|
|
|
|
|
def _generate_fallback_report(self, error_msg: str, meta: Optional[ReportMetadata] = None) -> str: |
|
|
"""生成備用報告""" |
|
|
country = meta.country if meta else "N/A" |
|
|
years = f"{meta.start_year} - {meta.end_year}" if meta else "N/A" |
|
|
score = meta.latest_score if meta else "N/A" |
|
|
|
|
|
return f""" |
|
|
# SDG 評估報告生成失敗 |
|
|
|
|
|
很抱歉,在生成報告時遇到了技術問題。 |
|
|
|
|
|
## 錯誤詳情 |
|
|
{error_msg} |
|
|
|
|
|
## 建議解決方案 |
|
|
1. 檢查 API 連接狀態 |
|
|
2. 確認 API 密鑰有效性 |
|
|
3. 稍後重試 |
|
|
|
|
|
## 基本數據摘要 |
|
|
- 國家:{country} |
|
|
- 數據年份:{years} |
|
|
- 最新得分:{score} |
|
|
|
|
|
--- |
|
|
*本報告由 SDG AI 引擎自動生成* |
|
|
""" |
|
|
|
|
|
def _assess_quality(self, report_content: str, meta: ReportMetadata, df: pd.DataFrame) -> QualityMetrics: |
|
|
"""評估報告品質""" |
|
|
try: |
|
|
data_coverage = self.quality_assurance.validate_data_coverage(df, meta) |
|
|
logical_consistency = self.quality_assurance.check_logical_consistency(report_content, meta) |
|
|
language_quality = self.quality_assurance.assess_language_quality(report_content, meta.language) |
|
|
format_compliance = self.quality_assurance.check_format_compliance(report_content, meta.report_type) |
|
|
|
|
|
|
|
|
overall_score = (data_coverage + logical_consistency + language_quality + format_compliance) / 4 |
|
|
|
|
|
return QualityMetrics( |
|
|
data_coverage=data_coverage, |
|
|
logical_consistency=logical_consistency, |
|
|
language_quality=language_quality, |
|
|
format_compliance=format_compliance, |
|
|
overall_score=overall_score |
|
|
) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error assessing quality: {str(e)}") |
|
|
return QualityMetrics(0.5, 0.5, 0.5, 0.5, 0.5) |
|
|
|
|
|
def generate_report(self, df: pd.DataFrame, meta_info: Dict[str, Any], language: str = "繁體中文") -> str: |
|
|
""" |
|
|
生成專業的 SDG 評估報告 |
|
|
|
|
|
Args: |
|
|
df: 包含 SDG 數據的 DataFrame |
|
|
meta_info: 報告元數據 |
|
|
language: 報告語言 |
|
|
|
|
|
Returns: |
|
|
生成的報告內容 |
|
|
""" |
|
|
try: |
|
|
|
|
|
if df is None or df.empty: |
|
|
raise ValueError("DataFrame is empty or None") |
|
|
|
|
|
|
|
|
meta = ReportMetadata( |
|
|
country=meta_info.get('country', 'Unknown'), |
|
|
start_year=int(meta_info.get('start_year', 2020)), |
|
|
end_year=int(meta_info.get('end_year', 2025)), |
|
|
report_type=ReportType(meta_info.get('report_type', 'summary')), |
|
|
language=ReportLanguage(language), |
|
|
length=ReportLength(int(meta_info.get('length', 800))), |
|
|
latest_score=meta_info.get('latest_score'), |
|
|
rank=meta_info.get('rank'), |
|
|
total_countries=meta_info.get('total_countries'), |
|
|
global_avg=meta_info.get('global_avg'), |
|
|
generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
|
|
) |
|
|
|
|
|
logger.info(f"Generating {meta.report_type.value} report for {meta.country}") |
|
|
|
|
|
|
|
|
data_hash = self._get_data_hash(df) |
|
|
cached_report = self.cache_manager.get(meta, data_hash) |
|
|
if cached_report: |
|
|
logger.info("Returning cached report") |
|
|
return cached_report |
|
|
|
|
|
|
|
|
system_prompt, user_prompt = self._prepare_prompt(df, meta) |
|
|
report_content = self._generate_with_retry(system_prompt, user_prompt, meta) |
|
|
|
|
|
|
|
|
if "Failed to generate report" in report_content: |
|
|
return self._generate_fallback_report(report_content, meta) |
|
|
|
|
|
|
|
|
end_marker = "【報告結束】" |
|
|
if end_marker not in report_content: |
|
|
logger.warning(f"Report for {meta.country} may be truncated (marker not found)") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if len(report_content) > meta.length.value * 3: |
|
|
truncation_notice = "\n\n> ⚠️ **(註:因模型輸出長度限制,報告內容可能未完整顯示,請選擇較短報告類型或聯繫管理員)**" |
|
|
report_content += truncation_notice |
|
|
else: |
|
|
|
|
|
report_content = report_content.replace(end_marker, "").strip() |
|
|
|
|
|
|
|
|
self.cache_manager.set(meta, data_hash, report_content) |
|
|
|
|
|
return report_content |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error in generate_report: {str(e)}") |
|
|
return self._generate_fallback_report(str(e)) |
|
|
|
|
|
def get_available_models(self) -> Dict[str, str]: |
|
|
"""獲取可用的 AI 模型列表""" |
|
|
return { |
|
|
|
|
|
"gemini-2.5-flash": "Gemini 2.5 Flash (快速可靠)", |
|
|
"gpt-4o-mini": "GPT-4o Mini (成本效益)", |
|
|
"claude-3.5-sonnet": "Claude 3.5 Sonnet (細緻分析)", |
|
|
|
|
|
|
|
|
"gemini-2.0-pro": "Gemini 2.0 Pro (高智能)", |
|
|
"gpt-4o": "GPT-4o (標準選擇)", |
|
|
"claude-3-opus": "Claude 3 Opus (高級推理)", |
|
|
|
|
|
|
|
|
"gemini-1.5-pro": "Gemini 1.5 Pro (長上下文)", |
|
|
"gpt-4-turbo": "GPT-4 Turbo (平衡性能)", |
|
|
"claude-3-haiku": "Claude 3 Haiku (快速處理)", |
|
|
|
|
|
|
|
|
"llama-3.1-70b": "Llama 3.1 70B (開源)", |
|
|
"llama-3.1-8b": "Llama 3.1 8B (輕量級)", |
|
|
"mistral-large": "Mistral Large (歐洲模型)", |
|
|
|
|
|
|
|
|
"o1-preview": "O1 Preview (深度推理)", |
|
|
"o1-mini": "O1 Mini (高效推理)", |
|
|
|
|
|
|
|
|
"qwen-turbo": "Qwen Turbo (阿里巴巴)", |
|
|
"deepseek-chat": "DeepSeek Chat (高級邏輯)" |
|
|
} |
|
|
|
|
|
def clear_cache(self): |
|
|
"""清理快取""" |
|
|
self.cache_manager.clear_expired() |
|
|
logger.info("Cache cleared") |
|
|
|
|
|
def get_cache_stats(self) -> Dict[str, Any]: |
|
|
"""獲取快取統計信息""" |
|
|
total_entries = len(self.cache_manager.cache) |
|
|
expired_entries = sum( |
|
|
1 for _, (_, timestamp) in self.cache_manager.cache.items() |
|
|
if self.cache_manager._is_expired(timestamp) |
|
|
) |
|
|
|
|
|
return { |
|
|
"total_entries": total_entries, |
|
|
"expired_entries": expired_entries, |
|
|
"active_entries": total_entries - expired_entries, |
|
|
"ttl_hours": self.cache_manager.ttl_seconds / 3600 |
|
|
} |
|
|
|
|
|
def batch_generate_reports(self, report_configs: List[Dict[str, Any]], df: pd.DataFrame, max_workers: int = 3) -> List[Tuple[str, str]]: |
|
|
""" |
|
|
批量生成報告 |
|
|
|
|
|
Args: |
|
|
report_configs: 報告配置列表 |
|
|
df: 數據 DataFrame |
|
|
max_workers: 最大並行工作線程數 |
|
|
|
|
|
Returns: |
|
|
(報告類型, 報告內容) 的列表 |
|
|
""" |
|
|
results = [] |
|
|
|
|
|
with ThreadPoolExecutor(max_workers=max_workers) as executor: |
|
|
|
|
|
future_to_config = { |
|
|
executor.submit(self.generate_report, df, config, config.get('language', '繁體中文')): config |
|
|
for config in report_configs |
|
|
} |
|
|
|
|
|
|
|
|
for future in as_completed(future_to_config): |
|
|
config = future_to_config[future] |
|
|
try: |
|
|
report_content = future.result() |
|
|
report_type = config.get('report_type', 'unknown') |
|
|
results.append((report_type, report_content)) |
|
|
except Exception as e: |
|
|
logger.error(f"Batch generation failed for config {config}: {str(e)}") |
|
|
results.append((config.get('report_type', 'unknown'), f"Generation failed: {str(e)}")) |
|
|
|
|
|
return results |
|
|
|
|
|
|
|
|
SDG_AI_Report_Engine = EnhancedAIReportEngine |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
engine = EnhancedAIReportEngine( |
|
|
base_url="your_base_url", |
|
|
api_key="your_api_key" |
|
|
) |
|
|
|
|
|
|
|
|
sample_meta = { |
|
|
'country': 'Taiwan', |
|
|
'start_year': 2020, |
|
|
'end_year': 2025, |
|
|
'report_type': 'summary', |
|
|
'language': '繁體中文', |
|
|
'length': 800, |
|
|
'latest_score': 75.2, |
|
|
'rank': 15, |
|
|
'total_countries': 166, |
|
|
'global_avg': 68.5 |
|
|
} |
|
|
|
|
|
print("Enhanced AI Report Engine initialized successfully!") |
|
|
print(f"Available models: {len(engine.get_available_models())}") |
|
|
print(f"Cache stats: {engine.get_cache_stats()}") |
|
|
|