import os
import re
import asyncio
import logging
from datetime import datetime
from typing import List, Dict, Optional
from dataclasses import dataclass
from contextlib import asynccontextmanager

import httpx
from fastapi import FastAPI, Request, HTTPException
from slack_bolt import App
from slack_bolt.adapter.fastapi import SlackRequestHandler
from newspaper import Article
import uvicorn
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Configuration
@dataclass
class Config:
    slack_bot_token: Optional[str] = os.getenv('SLACK_BOT_TOKEN')
    slack_signing_secret: Optional[str] = os.getenv('SLACK_SIGNING_SECRET')
    azure_openai_endpoint: Optional[str] = os.getenv('AZURE_OPENAI_ENDPOINT')
    azure_openai_api_key: Optional[str] = os.getenv('AZURE_OPENAI_API_KEY')
    azure_openai_deployment_name: str = os.getenv('AZURE_OPENAI_DEPLOYMENT_NAME', 'gpt-4')
    azure_openai_api_version: str = os.getenv('AZURE_OPENAI_API_VERSION', '2025-01-01')
    max_content_length: int = 10000
    processing_timeout: int = 30

config = Config()

# Early validation for required environment variables
required_vars = {
    'SLACK_BOT_TOKEN': config.slack_bot_token,
    'SLACK_SIGNING_SECRET': config.slack_signing_secret,
    'AZURE_OPENAI_ENDPOINT': config.azure_openai_endpoint,
    'AZURE_OPENAI_API_KEY': config.azure_openai_api_key
}

missing = [k for k, v in required_vars.items() if not v]
if missing:
    error_msg = f"Missing required environment variables: {', '.join(missing)}. Please set these in Hugging Face Space Secrets."
    logger.error(error_msg)
    # Don't raise here yet to allow the process to be visible in logs

@asynccontextmanager
async def lifespan(app: FastAPI):
    """FastAPI lifespan event handler"""
    # Startup
    logger.info("Starting Slack URL Summarizer Bot")
    
    # Validate configuration
    required_vars = [
        'SLACK_BOT_TOKEN',
        'SLACK_SIGNING_SECRET',
        'AZURE_OPENAI_ENDPOINT',
        'AZURE_OPENAI_API_KEY'
    ]
    
    missing_vars = [var for var in required_vars if not os.getenv(var)]
    if missing_vars:
        logger.error(f"Missing required environment variables: {', '.join(missing_vars)}")
        raise Exception(f"Missing required environment variables: {', '.join(missing_vars)}")
    
    logger.info("Bot started successfully")
    
    yield
    
    # Shutdown
    logger.info("Shutting down Slack URL Summarizer Bot")
    if hasattr(processor, 'http_client'):
        await processor.http_client.aclose()

# Initialize Slack app
try:
    slack_app = App(
        token=config.slack_bot_token,
        signing_secret=config.slack_signing_secret,
        process_before_response=True,
        # 暫時停用簽名驗證進行測試
        request_verification_enabled=False
    )
except Exception as e:
    logger.error(f"Failed to initialize Slack App: {str(e)}")
    if missing:
        logger.error(f"CRITICAL: The following environment variables are MISSING: {', '.join(missing)}")
    # We still need a slack_app object for handler, but it will be broken
    slack_app = None

# Initialize FastAPI with lifespan
api = FastAPI(title="Slack URL Summarizer Bot", lifespan=lifespan)
handler = SlackRequestHandler(slack_app)

class URLProcessor:
    """Core URL processing functionality"""
    
    def __init__(self, config: Config):
        self.config = config
        self.http_client = httpx.AsyncClient(
            timeout=httpx.Timeout(30.0),
            follow_redirects=True
        )
    
    async def __aenter__(self):
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        await self.http_client.aclose()
    
    def extract_urls(self, text: str) -> List[str]:
        """Extract all URLs from message text"""
        pattern = r'https?://[^\s<>"{\[\]|\\^`]+'
        urls = re.findall(pattern, text)
        logger.info(f"Extracted {len(urls)} URLs from message")
        return urls
    
    async def extract_content(self, url: str) -> Dict:
        """Extract main content from URL"""
        try:
            logger.info(f"Extracting content from: {url}")
            
            # 設定更好的用戶代理來避免被阻擋
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }
            
            # 先嘗試使用 httpx 直接獲取內容
            try:
                response = await self.http_client.get(url, headers=headers)
                response.raise_for_status()
                
                # 使用 newspaper4k 解析 HTML 內容
                article = Article(url)
                article.set_html(response.text)
                article.parse()
                
            except Exception as e:
                # 如果 httpx 失敗，嘗試 newspaper4k 的原始方法
                logger.warning(f"Direct HTTP request failed, trying newspaper4k: {str(e)}")
                
                article = Article(url)
                # 設定用戶代理
                article.config.browser_user_agent = headers['User-Agent']
                article.download()
                article.parse()
            
            # 驗證內容
            if not article.text or len(article.text.strip()) < 50:
                # 如果提取的內容太少，嘗試使用基本的網頁內容
                if 'response' in locals() and response.text:
                    # 簡單的 HTML 解析
                    import re
                    from html import unescape
                    
                    # 移除 HTML 標籤
                    text = re.sub(r'<[^>]+>', '', response.text)
                    text = unescape(text)
                    text = re.sub(r'\s+', ' ', text).strip()
                    
                    if len(text) > 100:
                        # 取前 3000 字符作為內容
                        text = text[:3000] + "..." if len(text) > 3000 else text
                        
                        result = {
                            'title': url.split('/')[-1].replace('-', ' ').title(),
                            'text': text,
                            'authors': [],
                            'publish_date': None,
                            'url': url
                        }
                        
                        logger.info(f"Successfully extracted content using fallback method from {url}")
                        return result
                
                raise Exception("Insufficient content extracted")
            
            # 截斷內容如果太長
            text = article.text
            if len(text) > self.config.max_content_length:
                text = text[:self.config.max_content_length] + "..."
            
            result = {
                'title': article.title or "No title available",
                'text': text,
                'authors': article.authors,
                'publish_date': article.publish_date,
                'url': url
            }
            
            logger.info(f"Successfully extracted content from {url}")
            return result
            
        except Exception as e:
            logger.error(f"Error extracting content from {url}: {str(e)}")
            
            # 最後的備用方案：返回基本信息讓 AI 處理
            fallback_result = {
                'title': f"無法完全提取內容的網頁: {url}",
                'text': f"由於網站限制，無法提取完整內容。網址: {url}. 請嘗試直接訪問該網站查看內容。",
                'authors': [],
                'publish_date': None,
                'url': url
            }
            
            logger.info(f"Using fallback content for {url}")
            return fallback_result
    
    async def summarize_and_translate(self, content: Dict) -> str:
        """Summarize content and translate to Traditional Chinese using Azure OpenAI"""
        try:
            logger.info(f"Summarizing content for: {content['url']}")
            
            # 檢查是否為備用內容
            if "無法完全提取內容" in content['title']:
                prompt = f"""這個網址因為網站限制無法完全提取內容：{content['url']}

請用繁體中文回覆一個友善的訊息，說明：
1. 由於網站的保護機制，無法自動提取該網頁的完整內容
2. 建議用戶直接點擊連結查看完整內容
3. 如果是知名網站，可以簡單說明該網站的性質（如新聞、技術等）

請保持簡潔友善的語調。"""
                
                # 對於備用內容，使用簡化的處理
                url = f"{self.config.azure_openai_endpoint}/openai/deployments/{self.config.azure_openai_deployment_name}/chat/completions?api-version={self.config.azure_openai_api_version}"
                
                headers = {
                    "Content-Type": "application/json",
                    "api-key": self.config.azure_openai_api_key,
                }
                
                body = {
                    "messages": [
                        {
                            "role": "system",
                            "content": "你是一個友善的助手，會提供實用的建議。"
                        },
                        {
                            "role": "user",
                            "content": prompt
                        }
                    ],
                    "temperature": 0.3,
                    "max_tokens": 300
                }
                
                response = await self.http_client.post(url, headers=headers, json=body)
                response.raise_for_status()
                
                result = response.json()
                summary = result["choices"][0]["message"]["content"].strip()
                
                # 提取 token 使用量資訊
                usage_info = result.get("usage", {})
                token_stats = {
                    "prompt_tokens": usage_info.get("prompt_tokens", 0),
                    "completion_tokens": usage_info.get("completion_tokens", 0),
                    "total_tokens": usage_info.get("total_tokens", 0)
                }
                
                logger.info(f"Generated fallback response for: {content['url']}")
                logger.info(f"Token usage - Prompt: {token_stats['prompt_tokens']}, Completion: {token_stats['completion_tokens']}, Total: {token_stats['total_tokens']}")
                
                return summary, token_stats
            else:
                # 正常的摘要處理
                prompt = f"""請將以下文章摘要成 3-5 句重點，並翻譯為繁體中文。請確保摘要簡潔明瞭且包含最重要的資訊：

標題：{content['title']}
內容：{content['text']}

請用繁體中文回覆摘要。"""
            
            # Azure OpenAI API call
            url = f"{self.config.azure_openai_endpoint}/openai/deployments/{self.config.azure_openai_deployment_name}/chat/completions?api-version={self.config.azure_openai_api_version}"
            
            headers = {
                "Content-Type": "application/json",
                "api-key": self.config.azure_openai_api_key,
            }
            
            body = {
                "messages": [
                    {
                        "role": "system",
                        "content": "你是一個專業的技術文章摘要與翻譯專家，精通各種技術領域，能夠準確保留技術術語、專有名詞、數據細節，並將內容翻譯成自然流暢的繁體中文。你特別擅長處理科技、醫療、商業和學術文章，能夠識別並保留重要的技術細節。"
                    },
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                "temperature": 0.3,
                "max_tokens": 800
            }
            
            response = await self.http_client.post(url, headers=headers, json=body)
            response.raise_for_status()
            
            result = response.json()
            summary = result["choices"][0]["message"]["content"].strip()
            
            logger.info(f"Successfully generated summary for: {content['url']}")
            return summary
            
        except Exception as e:
            logger.error(f"Error in summarization: {str(e)}")
            
            # 回傳錯誤時也要保持 tuple 格式
            error_summary = f"抱歉，AI 處理時發生錯誤。錯誤訊息：{str(e)}"
            error_token_stats = {
                "prompt_tokens": 0,
                "completion_tokens": 0,
                "total_tokens": 0
            }
            
            return error_summary, error_token_stats
    
    def format_response(self, url: str, title: str, summary: str, token_stats: dict = None) -> str:
        """Format the response message"""
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        
        response = f"""🔗 原始網址: {url}
📰 標題: {title}

{summary}

---
⏰ 處理時間: {timestamp}"""
        
        # 加入 token 使用統計
        if token_stats:
            response += f"""
📊 Token 使用量: 輸入 {token_stats['prompt_tokens']} + 輸出 {token_stats['completion_tokens']} = 總計 {token_stats['total_tokens']} tokens"""
        
        return response
    
    def format_error_response(self, url: str, error_message: str) -> str:
        """Format error response message"""
        return f"""❌ 處理失敗: {url}
🔍 錯誤原因: {error_message}
💡 建議: 請檢查網址是否正確或稍後再試"""

# Global processor instance and deduplication cache
processor = URLProcessor(config)
processing_cache = set()  # 用於去重的快取

async def process_url_async(url: str, channel: str, say):
    """Asynchronous URL processing pipeline"""
    # 建立唯一的處理 ID
    process_id = f"{url}:{channel}:{int(datetime.now().timestamp())//60}"  # 每分鐘重置
    
    # 檢查是否已經在處理中
    if process_id in processing_cache:
        logger.info(f"URL {url} is already being processed, skipping duplicate")
        return
    
    # 添加到處理快取
    processing_cache.add(process_id)
    
    try:
        logger.info(f"Starting to process URL: {url}")
        
        async with URLProcessor(config) as proc:
            # Step 1: Extract content
            logger.info(f"Step 1: Extracting content from {url}")
            content = await proc.extract_content(url)
            logger.info(f"Content extracted successfully. Title: {content.get('title', 'N/A')}")
            
            # Step 2: Summarize and translate
            logger.info(f"Step 2: Summarizing and translating content for {url}")
            try:
                result = await proc.summarize_and_translate(content)
                
                # 處理回傳值 - 可能是 tuple 或只是 string
                if isinstance(result, tuple):
                    summary, token_stats = result
                else:
                    summary = result
                    token_stats = None
                    
                logger.info(f"Summary generated successfully for {url}")
            except Exception as e:
                logger.error(f"Error in summarization, trying fallback: {str(e)}")
                # 如果 AI 處理失敗，提供基本回應
                summary = f"抱歉，由於技術問題無法生成摘要。請直接查看原始網址：{url}"
                token_stats = None
            
            # Step 3: Format and send response
            logger.info(f"Step 3: Formatting and sending response for {url}")
            response = proc.format_response(url, content['title'], summary, token_stats)
            
            # Send to Slack (使用同步的 say 函數)
            say(channel=channel, text=response)
            
            logger.info(f"Successfully processed and sent response for: {url}")
            
    except Exception as e:
        logger.error(f"Error processing URL {url}: {str(e)}", exc_info=True)
        error_message = processor.format_error_response(url, str(e))
        say(channel=channel, text=error_message)
    finally:
        # 處理完成後從快取中移除（延遲5秒）
        import threading
        def remove_from_cache():
            import time
            time.sleep(5)
            processing_cache.discard(process_id)
        
        threading.Thread(target=remove_from_cache).start()

# Slack event handlers
@slack_app.event("message")
def handle_message(event, say, ack):
    """Handle incoming Slack messages"""
    ack()  # 確認收到事件
    
    try:
        logger.info(f"Received message event: {event}")
        
        # Skip bot messages
        if event.get('bot_id'):
            logger.info("Skipping bot message")
            return
        
        # Skip app_mention events (這些會由 handle_app_mention 處理)
        if event.get('type') == 'app_mention':
            logger.info("Skipping app_mention in message handler")
            return
        
        # Skip messages without text
        if 'text' not in event:
            logger.info("Skipping message without text")
            return
        
        message_text = event.get('text', '')
        channel = event.get('channel')
        user = event.get('user')
        
        # 檢查是否為提及機器人的訊息 (避免重複處理)
        if '<@U094J502LLC>' in message_text:
            logger.info("Skipping mention message in message handler (will be handled by app_mention)")
            return
        
        logger.info(f"Processing message from user {user} in channel {channel}: {message_text}")
        
        # Extract URLs from message
        urls = processor.extract_urls(message_text)
        
        if not urls:
            logger.info("No URLs found in message")
            return
        
        logger.info(f"Found {len(urls)} URLs: {urls}")
        
        # Send initial acknowledgment for multiple URLs
        if len(urls) > 1:
            say(
                channel=channel,
                text=f"🔄 正在處理 {len(urls)} 個網址，請稍候..."
            )
        
        # Process each URL asynchronously
        import threading
        for url in urls:
            logger.info(f"Creating thread for URL: {url}")
            thread = threading.Thread(
                target=lambda u=url: asyncio.run(process_url_async(u, channel, say))
            )
            thread.start()
        
    except Exception as e:
        logger.error(f"Error in message handler: {str(e)}", exc_info=True)
        say(
            channel=event.get('channel'),
            text="❌ 處理訊息時發生錯誤，請稍後再試"
        )

@slack_app.event("app_mention")
def handle_app_mention(event, say, ack):
    """Handle app mentions"""
    ack()  # 確認收到事件
    
    logger.info(f"Received app mention: {event}")
    
    # 檢查訊息中是否包含 URL
    message_text = event.get('text', '')
    urls = processor.extract_urls(message_text)
    
    if urls:
        # 如果有 URL，則處理 URL
        logger.info(f"App mention contains URLs: {urls}")
        
        # Send initial acknowledgment
        say(
            channel=event['channel'],
            text=f"🔄 收到！正在處理 {len(urls)} 個網址..."
        )
        
        # Process URLs in threads
        import threading
        for url in urls:
            logger.info(f"Creating thread for app mention URL: {url}")
            thread = threading.Thread(
                target=lambda u=url: asyncio.run(process_url_async(u, event['channel'], say))
            )
            thread.start()
    else:
        # 沒有 URL，回覆歡迎訊息
        say(
            channel=event["channel"],
            text="👋 你好！我是網址摘要機器人。只要在頻道中貼上網址，我就會自動為你生成繁體中文摘要！"
        )

# FastAPI routes
@api.get("/")
async def root():
    """Health check endpoint"""
    return {"status": "healthy", "service": "Slack URL Summarizer Bot"}

@api.get("/health")
async def health_check():
    """Detailed health check"""
    return {
        "status": "healthy",
        "timestamp": datetime.now().isoformat(),
        "config": {
            "slack_configured": bool(config.slack_bot_token),
            "azure_openai_configured": bool(config.azure_openai_endpoint),
        }
    }

@api.get("/slack/events")
async def slack_events_get():
    """Handle GET requests to slack events endpoint"""
    return {"message": "Slack events endpoint is ready", "methods": ["POST"]}

@api.post("/slack/events")
async def slack_events(request: Request):
    """Handle Slack events"""
    try:
        # Get the request body
        body = await request.body()
        
        # Parse JSON
        import json
        data = json.loads(body)
        
        # Handle URL verification challenge
        if data.get("type") == "url_verification":
            challenge = data.get("challenge")
            logger.info(f"Received URL verification challenge: {challenge}")
            return {"challenge": challenge}
        
        # Handle regular Slack events
        logger.info(f"Received Slack event: {data.get('type')}")
        return await handler.handle(request)
        
    except json.JSONDecodeError:
        logger.error("Invalid JSON in Slack request")
        raise HTTPException(status_code=400, detail="Invalid JSON")
    except Exception as e:
        logger.error(f"Error handling Slack event: {str(e)}")
        raise HTTPException(status_code=500, detail="Internal server error")

# Error handling middleware
@api.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
    logger.error(f"Unhandled exception: {str(exc)}")
    raise HTTPException(status_code=500, detail="Internal server error")

if __name__ == "__main__":
    # Run the FastAPI application
    uvicorn.run(
        "main:api",
        host="0.0.0.0",
        port=int(os.getenv("PORT", 7860)),
        log_level="info",
        reload=os.getenv("ENVIRONMENT") == "development"
    )