Spaces:

fongci
/

slack_url_bot

Runtime error

App Files Files Community

tonebeta commited on Feb 9

Commit

6468574

1 Parent(s): c529e22

feat: Set up initial project structure for slack_url_bot including core application, dependencies, documentation, and build automation.

Browse files

Files changed (9) hide show

.python-version +1 -0
Dockerfile +62 -0
Makefile +60 -0
PRD.md +313 -0
README.md +80 -11
main.py +569 -0
pyproject.toml +25 -0
requirements.txt +9 -0
uv.lock +0 -0

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.13

Dockerfile ADDED Viewed

	@@ -0,0 +1,62 @@

+# Dockerfile for Slack URL Summarizer Bot - Hugging Face Spaces
+# Hugging Face Spaces requires port 7860
+# --- Stage 1: Builder ---
+FROM python:3.12-slim AS builder
+WORKDIR /app
+# Install uv package manager
+RUN pip install uv
+# Create virtual environment
+RUN uv venv /opt/venv
+# Copy dependency files
+COPY pyproject.toml uv.lock ./
+# Install dependencies
+RUN . /opt/venv/bin/activate && uv pip sync pyproject.toml --no-cache
+# --- Stage 2: Final Image ---
+FROM python:3.12-slim
+# Install curl for healthcheck
+RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*
+# Create non-privileged user
+RUN useradd --create-home --shell /bin/bash appuser
+WORKDIR /home/appuser/app
+# Copy virtual environment from builder
+COPY --from=builder --chown=appuser:appuser /opt/venv /opt/venv
+# Copy application code
+COPY --chown=appuser:appuser main.py .
+# Switch to non-privileged user
+USER appuser
+# Set environment variables
+# Hugging Face Spaces requires port 7860
+ENV PATH="/opt/venv/bin:$PATH"
+ENV PYTHONUNBUFFERED=1
+ENV PORT=7860
+# Expose Hugging Face Spaces required port
+EXPOSE 7860
+# --- Runtime Configuration ---
+# Required environment variables (set in HF Space Secrets):
+# - SLACK_BOT_TOKEN
+# - SLACK_SIGNING_SECRET
+# - AZURE_OPENAI_ENDPOINT
+# - AZURE_OPENAI_API_KEY
+# Healthcheck
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+  CMD curl -f http://localhost:7860/health || exit 1
+# Run the application on port 7860
+CMD ["uvicorn", "main:api", "--host", "0.0.0.0", "--port", "7860"]

Makefile ADDED Viewed

	@@ -0,0 +1,60 @@

+# Makefile for managing the Slack URL Summarizer Bot Docker container
+# --- Variables ---
+IMAGE_NAME := slack-crawler-summary-bot
+CONTAINER_NAME := slack-crawler-summary-bot_container
+ENV_FILE := .env
+# Hugging Face variables - **EDIT THESE**
+HF_USERNAME := your-hf-username
+HF_SPACE_NAME := your-hf-space-name
+HF_IMAGE_NAME := registry.hf.space/$(HF_USERNAME)/$(HF_SPACE_NAME):latest
+# --- Local Docker Management ---
+# Build the Docker image
+build:
+	docker build -t $(IMAGE_NAME) .
+# Run the Docker container in detached mode
+deploy:
+	docker run -d --name $(CONTAINER_NAME) --env-file $(ENV_FILE) -p 8000:8000 $(IMAGE_NAME)
+# Stop the Docker container
+stop:
+	docker stop $(CONTAINER_NAME)
+# Remove the Docker container
+rm:
+	docker rm $(CONTAINER_NAME)
+# Restart the Docker container
+restart: stop rm deploy
+# Run the container in interactive mode for debugging
+debug:
+	docker run -it --rm --name $(CONTAINER_NAME)-debug --env-file $(ENV_FILE) -p 8000:8000 $(IMAGE_NAME) /bin/bash
+# Show logs of the running container
+logs:
+	docker logs -f $(CONTAINER_NAME)
+# --- Hugging Face Deployment ---
+# Log in to Hugging Face Docker registry
+hf-login:
+	@echo "You will be prompted for your Hugging Face username and a User Access Token with write permissions."
+	docker login registry.hf.space
+# Tag the Docker image for Hugging Face
+hf-tag:
+	docker tag $(IMAGE_NAME) $(HF_IMAGE_NAME)
+# Push the Docker image to Hugging Face Spaces
+hf-push:
+	docker push $(HF_IMAGE_NAME)
+# Build, tag, and push to Hugging Face
+hf-deploy: build hf-tag hf-push
+.PHONY: build deploy stop rm restart debug logs hf-login hf-tag hf-push hf-deploy

PRD.md ADDED Viewed

	@@ -0,0 +1,313 @@

+# Slack URL Summarizer Bot - Technical Specification
+## 1. Project Overview
+### 1.1 Purpose
+Develop a Slack bot that automatically detects URLs in messages, extracts content from those URLs, generates summaries, translates them to Traditional Chinese, and posts the results back to the channel.
+### 1.2 Key Features
+- Automatic URL detection in Slack messages
+- Web content extraction and parsing
+- Content summarization using AI
+- Translation to Traditional Chinese
+- Automated response posting to Slack channels
+## 2. Functional Requirements
+### 2.1 Core Functionality
+| Feature | Description | Priority |
+|---------|-------------|----------|
+| URL Detection | Detect and extract URLs from Slack messages | High |
+| Content Extraction | Extract main content from web pages | High |
+| Content Summarization | Generate concise summaries of extracted content | High |
+| Translation | Translate summaries to Traditional Chinese | High |
+| Slack Integration | Post results back to originating channel | High |
+### 2.2 User Stories
+- **As a Slack user**, I want to paste a URL and automatically receive a Chinese summary, so I can quickly understand the content without reading the full article
+- **As a team member**, I want summaries posted in the same channel, so everyone can benefit from the content
+- **As a user**, I want the bot to handle multiple URLs in one message, so I can share multiple resources efficiently
+## 3. Technical Architecture
+### 3.1 System Architecture
+```
+┌─────────────────┐    ┌─────────────────┐    ┌─────────────────┐
+│   Slack App     │    │   Web Server    │    │   AI Services   │
+│                 │    │   (FastAPI)     │    │                 │
+│ • Events API    │◄──►│ • URL Extract   │◄──►│ • OpenAI API    │
+│ • Bot Token     │    │ • Content Parse │    │ • Summarization │
+│ • Webhooks      │    │ • Response Send │    │ • Translation   │
+└─────────────────┘    └─────────────────┘    └─────────────────┘
+```
+### 3.2 Technology Stack
+- **Backend Framework**: Python + FastAPI
+- **Slack Integration**: Slack Bolt SDK for Python
+- **Content Extraction**: newspaper3k / readability-lxml
+- **AI Services**: Azure OpenAI API
+- **HTTP Client**: httpx
+- **Deployment**: Docker + Cloud hosting (AWS/GCP/Azure)
+- **Database**: Redis (for caching) - Optional
+## 4. Implementation Details
+### 4.1 Slack Bot Setup
+#### Required Scopes
+- `chat:write` - Post messages to channels
+- `channels:read` - Read channel information
+- `app_mentions:read` - Read mentions
+- `channels:history` - Read channel messages
+#### Event Subscriptions
+- `message.channels` - Listen to channel messages
+- `app_mention` - Listen to bot mentions
+### 4.2 Core Components
+#### 4.2.1 URL Detection Module
+```python
+import re
+def extract_urls(text: str) -> List[str]:
+    """Extract all URLs from message text"""
+    pattern = r'https?://[^\s<>"{\[\]|\\^`]+'
+    return re.findall(pattern, text)
+```
+#### 4.2.2 Content Extraction Module
+```python
+from newspaper import Article
+def extract_content(url: str) -> dict:
+    """Extract main content from URL"""
+    try:
+        article = Article(url)
+        article.download()
+        article.parse()
+        return {
+            'title': article.title,
+            'text': article.text,
+            'authors': article.authors,
+            'publish_date': article.publish_date
+        }
+    except Exception as e:
+        return {'error': str(e)}
+```
+#### 4.2.3 AI Processing Module
+```python
+import httpx
+import os
+from dotenv import load_dotenv
+load_dotenv()
+def summarize_and_translate(text: str) -> str:
+    """Summarize content and translate to Traditional Chinese using Azure OpenAI"""
+    url = f"{os.getenv('AZURE_OPENAI_ENDPOINT')}/openai/deployments/{os.getenv('AZURE_OPENAI_DEPLOYMENT_NAME')}/chat/completions?api-version={os.getenv('AZURE_OPENAI_API_VERSION')}"
+    headers = {
+        "Content-Type": "application/json",
+        "api-key": os.getenv("AZURE_OPENAI_API_KEY"),
+    }
+    body = {
+        "messages": [
+            {
+                "role": "user",
+                "content": f"請將以下文章摘要成 3–5 句，並翻譯為繁體中文：\n\n{text}"
+            }
+        ],
+        "temperature": 0.7
+    }
+    resp = httpx.post(url, headers=headers, json=body)
+    resp.raise_for_status()
+    return resp.json()["choices"][0]["message"]["content"].strip()
+```
+## 5. API Specifications
+### 5.1 Slack Event Handler
+```python
+@app.event("message")
+def handle_message(event, say):
+    """Handle incoming Slack messages"""
+    # Extract URLs from message
+    urls = extract_urls(event.get('text', ''))
+    if not urls:
+        return
+    # Process each URL
+    for url in urls:
+        process_url_async(url, event['channel'], say)
+```
+### 5.2 URL Processing Pipeline
+```python
+async def process_url_async(url: str, channel: str, say):
+    """Asynchronous URL processing pipeline"""
+    try:
+        # Step 1: Extract content
+        content = extract_content(url)
+        # Step 2: Summarize and translate
+        summary = summarize_and_translate(content['text'])
+        # Step 3: Format and send response
+        response = format_response(url, content['title'], summary)
+        say(channel=channel, text=response)
+    except Exception as e:
+        error_message = f"❌ 處理網址時發生錯誤: {url}"
+        say(channel=channel, text=error_message)
+```
+## 6. Error Handling & Edge Cases
+### 6.1 URL Validation
+- Invalid URLs (malformed, unreachable)
+- Protected content (login required, paywalls)
+- Unsupported content types (PDFs, images, videos)
+- Rate limiting from target websites
+### 6.2 Content Processing
+- Empty or insufficient content
+- Non-text content (images, videos)
+- Multiple languages in source content
+- Extremely long articles (token limits)
+### 6.3 API Failures
+- Azure OpenAI API rate limits
+- Network timeouts
+- Slack API failures
+- Service unavailability
+## 7. Response Format
+### 7.1 Successful Response Template
+```
+🔗 **原始網址**: {url}
+📰 **標題**: {title}
+📝 **中文摘要**:
+{summary}
+---
+⏰ 處理時間: {timestamp}
+```
+### 7.2 Error Response Template
+```
+❌ **處理失敗**: {url}
+🔍 **錯誤原因**: {error_message}
+💡 **建議**: 請檢查網址是否正確或稍後再試
+```
+## 8. Performance Requirements
+### 8.1 Response Time
+- URL processing: < 30 seconds
+- Simple pages: < 10 seconds
+- Complex pages: < 20 seconds
+### 8.2 Throughput
+- Support 100 concurrent requests
+- Handle 1000 URLs per hour
+- Rate limiting: 5 requests per user per minute
+## 9. Security Considerations
+### 9.1 Input Validation
+- URL sanitization and validation
+- Content length limits
+- Malicious URL detection
+### 9.2 API Security
+- Secure storage of API keys
+- Rate limiting implementation
+- Request logging and monitoring
+## 10. Deployment Strategy
+### 10.1 Environment Setup
+- Development: Local Docker containers
+- Staging: Cloud-based testing environment
+- Production: Container orchestration (Kubernetes/ECS)
+### 10.2 Configuration Management
+```python
+# Environment variables
+SLACK_BOT_TOKEN = os.getenv('SLACK_BOT_TOKEN')
+SLACK_SIGNING_SECRET = os.getenv('SLACK_SIGNING_SECRET')
+AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
+AZURE_OPENAI_API_KEY = os.getenv('AZURE_OPENAI_API_KEY')
+AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv('AZURE_OPENAI_DEPLOYMENT_NAME')
+AZURE_OPENAI_API_VERSION = os.getenv('AZURE_OPENAI_API_VERSION')
+```
+## 11. Testing Strategy
+### 11.1 Unit Tests
+- URL extraction logic
+- Content parsing functions
+- AI API integration
+- Response formatting
+### 11.2 Integration Tests
+- End-to-end Slack workflow
+- External API interactions
+- Error handling scenarios
+## 12. Monitoring & Logging
+### 12.1 Metrics to Track
+- Response times
+- Success/failure rates
+- API usage costs
+- User engagement
+### 12.2 Logging Requirements
+- All URL processing attempts
+- API call results
+- Error occurrences
+- Performance metrics
+## 13. Future Enhancements
+### 13.1 Phase 2 Features
+- Multiple language support
+- Custom summary length options
+- Content caching for repeated URLs
+- User preference settings
+### 13.2 Advanced Features
+- Batch URL processing
+- Scheduled summary delivery
+- Content categorization
+- Analytics dashboard
+## 14. Acceptance Criteria
+### 14.1 MVP Success Criteria
+- [x] Bot responds to URLs in Slack messages
+- [x] Successfully extracts content from common websites
+- [x] Generates coherent Chinese summaries
+- [x] Posts formatted responses to correct channels
+- [x] Handles basic error scenarios gracefully
+### 14.2 Quality Gates
+- 95% uptime requirement
+- <20 second average response time
+- <5% error rate for valid URLs
+- Positive user feedback (>4.0/5.0)
+## 15. Timeline & Milestones
+| Phase | Duration | Deliverables |
+|-------|----------|-------------|
+| Setup & Planning | 1 week | Project setup, Slack app creation |
+| Core Development | 2 weeks | Basic URL processing pipeline |
+| AI Integration | 1 week | Summarization and translation |
+| Testing & Debugging | 1 week | Unit tests, integration tests |
+| Deployment | 1 week | Production deployment, monitoring |
+| **Total** | **6 weeks** | **Production-ready MVP** |

README.md CHANGED Viewed

@@ -1,11 +1,80 @@
----
-title: Slack Url Bot
-emoji: 🌍
-colorFrom: red
-colorTo: purple
-sdk: docker
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Slack URL Bot
+emoji: 🔗
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+license: mit
+---
+# Slack URL Summarizer Bot
+A Slack bot that automatically summarizes URLs shared in channels, translating content to Traditional Chinese using Azure OpenAI.
+## Features
+- 🔗 **Automatic URL Detection**: Detects URLs in Slack messages
+- 📰 **Content Extraction**: Extracts main content from web pages using newspaper3k
+- 🤖 **AI Summarization**: Summarizes content using Azure OpenAI GPT-4
+- 🌏 **Traditional Chinese**: All summaries are in Traditional Chinese
+- ⚡ **Real-time Processing**: Async processing for fast responses
+## Configuration
+This Space requires the following secrets to be set in your Hugging Face Space settings:
+| Secret | Description |
+|--------|-------------|
+| `SLACK_BOT_TOKEN` | Your Slack Bot OAuth Token (xoxb-...) |
+| `SLACK_SIGNING_SECRET` | Your Slack App Signing Secret |
+| `AZURE_OPENAI_ENDPOINT` | Azure OpenAI endpoint URL |
+| `AZURE_OPENAI_API_KEY` | Azure OpenAI API key |
+### Optional Environment Variables
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `AZURE_OPENAI_DEPLOYMENT_NAME` | gpt-4 | Azure OpenAI deployment name |
+| `AZURE_OPENAI_API_VERSION` | 2025-01-01 | API version |
+## Slack App Setup
+1. Create a new Slack App at [api.slack.com/apps](https://api.slack.com/apps)
+2. Add Bot Token Scopes:
+   - `chat:write` - Send messages
+   - `channels:read` - Read channel info
+   - `channels:history` - Read message history
+   - `app_mentions:read` - Read mentions
+3. Enable Event Subscriptions with URL: `https://fongci-slack-url-bot.hf.space/slack/events`
+4. Subscribe to bot events:
+   - `message.channels`
+   - `app_mention`
+5. Install the app to your workspace
+## API Endpoints
+- `GET /` - Health check
+- `GET /health` - Detailed health status
+- `POST /slack/events` - Slack events webhook
+## Local Development
+```bash
+# Install dependencies
+pip install uv
+uv sync
+# Set environment variables
+export SLACK_BOT_TOKEN=xoxb-...
+export SLACK_SIGNING_SECRET=...
+export AZURE_OPENAI_ENDPOINT=...
+export AZURE_OPENAI_API_KEY=...
+# Run the app
+python main.py
+```
+## License
+MIT

main.py ADDED Viewed

	@@ -0,0 +1,569 @@

+import os
+import re
+import asyncio
+import logging
+from datetime import datetime
+from typing import List, Dict, Optional
+from dataclasses import dataclass
+from contextlib import asynccontextmanager
+import httpx
+from fastapi import FastAPI, Request, HTTPException
+from slack_bolt import App
+from slack_bolt.adapter.fastapi import SlackRequestHandler
+from newspaper import Article
+import uvicorn
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Configuration
+@dataclass
+class Config:
+    slack_bot_token: str = os.getenv('SLACK_BOT_TOKEN', '')
+    slack_signing_secret: str = os.getenv('SLACK_SIGNING_SECRET', '')
+    azure_openai_endpoint: str = os.getenv('AZURE_OPENAI_ENDPOINT', '')
+    azure_openai_api_key: str = os.getenv('AZURE_OPENAI_API_KEY', '')
+    azure_openai_deployment_name: str = os.getenv('AZURE_OPENAI_DEPLOYMENT_NAME', 'gpt-4')
+    azure_openai_api_version: str = os.getenv('AZURE_OPENAI_API_VERSION', '2025-01-01')
+    max_content_length: int = 10000
+    processing_timeout: int = 30
+config = Config()
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """FastAPI lifespan event handler"""
+    # Startup
+    logger.info("Starting Slack URL Summarizer Bot")
+    # Validate configuration
+    required_vars = [
+        'SLACK_BOT_TOKEN',
+        'SLACK_SIGNING_SECRET',
+        'AZURE_OPENAI_ENDPOINT',
+        'AZURE_OPENAI_API_KEY'
+    ]
+    missing_vars = [var for var in required_vars if not os.getenv(var)]
+    if missing_vars:
+        logger.error(f"Missing required environment variables: {', '.join(missing_vars)}")
+        raise Exception(f"Missing required environment variables: {', '.join(missing_vars)}")
+    logger.info("Bot started successfully")
+    yield
+    # Shutdown
+    logger.info("Shutting down Slack URL Summarizer Bot")
+    if hasattr(processor, 'http_client'):
+        await processor.http_client.aclose()
+# Initialize Slack app
+slack_app = App(
+    token=config.slack_bot_token,
+    signing_secret=config.slack_signing_secret,
+    process_before_response=True,
+    # 暫時停用簽名驗證進行測試
+    request_verification_enabled=False
+)
+# Initialize FastAPI with lifespan
+api = FastAPI(title="Slack URL Summarizer Bot", lifespan=lifespan)
+handler = SlackRequestHandler(slack_app)
+class URLProcessor:
+    """Core URL processing functionality"""
+    def __init__(self, config: Config):
+        self.config = config
+        self.http_client = httpx.AsyncClient(
+            timeout=httpx.Timeout(30.0),
+            follow_redirects=True
+        )
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.http_client.aclose()
+    def extract_urls(self, text: str) -> List[str]:
+        """Extract all URLs from message text"""
+        pattern = r'https?://[^\s<>"{\[\]|\\^`]+'
+        urls = re.findall(pattern, text)
+        logger.info(f"Extracted {len(urls)} URLs from message")
+        return urls
+    async def extract_content(self, url: str) -> Dict:
+        """Extract main content from URL"""
+        try:
+            logger.info(f"Extracting content from: {url}")
+            # 設定更好的用戶代理來避免被阻擋
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            }
+            # 先嘗試使用 httpx 直接獲取內容
+            try:
+                response = await self.http_client.get(url, headers=headers)
+                response.raise_for_status()
+                # 使用 newspaper3k 解析 HTML 內容
+                article = Article(url)
+                article.set_html(response.text)
+                article.parse()
+            except Exception as e:
+                # 如果 httpx 失敗，嘗試 newspaper3k 的原始方法
+                logger.warning(f"Direct HTTP request failed, trying newspaper3k: {str(e)}")
+                article = Article(url)
+                # 設定用戶代理
+                article.config.browser_user_agent = headers['User-Agent']
+                article.download()
+                article.parse()
+            # 驗證內容
+            if not article.text or len(article.text.strip()) < 50:
+                # 如果提取的內容太少，嘗試使用基本的網頁內容
+                if 'response' in locals() and response.text:
+                    # 簡單的 HTML 解析
+                    import re
+                    from html import unescape
+                    # 移除 HTML 標籤
+                    text = re.sub(r'<[^>]+>', '', response.text)
+                    text = unescape(text)
+                    text = re.sub(r'\s+', ' ', text).strip()
+                    if len(text) > 100:
+                        # 取前 3000 字符作為內容
+                        text = text[:3000] + "..." if len(text) > 3000 else text
+                        result = {
+                            'title': url.split('/')[-1].replace('-', ' ').title(),
+                            'text': text,
+                            'authors': [],
+                            'publish_date': None,
+                            'url': url
+                        }
+                        logger.info(f"Successfully extracted content using fallback method from {url}")
+                        return result
+                raise Exception("Insufficient content extracted")
+            # 截斷內容如果太長
+            text = article.text
+            if len(text) > self.config.max_content_length:
+                text = text[:self.config.max_content_length] + "..."
+            result = {
+                'title': article.title or "No title available",
+                'text': text,
+                'authors': article.authors,
+                'publish_date': article.publish_date,
+                'url': url
+            }
+            logger.info(f"Successfully extracted content from {url}")
+            return result
+        except Exception as e:
+            logger.error(f"Error extracting content from {url}: {str(e)}")
+            # 最後的備用方案：返回基本信息讓 AI 處理
+            fallback_result = {
+                'title': f"無法完全提取內容的網頁: {url}",
+                'text': f"由於網站限制，無法提取完整內容。網址: {url}. 請嘗試直接訪問該網站查看內容。",
+                'authors': [],
+                'publish_date': None,
+                'url': url
+            }
+            logger.info(f"Using fallback content for {url}")
+            return fallback_result
+    async def summarize_and_translate(self, content: Dict) -> str:
+        """Summarize content and translate to Traditional Chinese using Azure OpenAI"""
+        try:
+            logger.info(f"Summarizing content for: {content['url']}")
+            # 檢查是否為備用內容
+            if "無法完全提取內容" in content['title']:
+                prompt = f"""這個網址因為網站限制無法完全提取內容：{content['url']}
+請用繁體中文回覆一個友善的訊息，說明：
+1. 由於網站的保護機制，無法自動提取該網頁的完整內容
+2. 建議用戶直接點擊連結查看完整內容
+3. 如果是知名網站，可以簡單說明該網站的性質（如新聞、技術等）
+請保持簡潔友善的語調。"""
+                # 對於備用內容，使用簡化的處理
+                url = f"{self.config.azure_openai_endpoint}/openai/deployments/{self.config.azure_openai_deployment_name}/chat/completions?api-version={self.config.azure_openai_api_version}"
+                headers = {
+                    "Content-Type": "application/json",
+                    "api-key": self.config.azure_openai_api_key,
+                }
+                body = {
+                    "messages": [
+                        {
+                            "role": "system",
+                            "content": "你是一個友善的助手，會提供實用的建議。"
+                        },
+                        {
+                            "role": "user",
+                            "content": prompt
+                        }
+                    ],
+                    "temperature": 0.3,
+                    "max_tokens": 300
+                }
+                response = await self.http_client.post(url, headers=headers, json=body)
+                response.raise_for_status()
+                result = response.json()
+                summary = result["choices"][0]["message"]["content"].strip()
+                # 提取 token 使用量資訊
+                usage_info = result.get("usage", {})
+                token_stats = {
+                    "prompt_tokens": usage_info.get("prompt_tokens", 0),
+                    "completion_tokens": usage_info.get("completion_tokens", 0),
+                    "total_tokens": usage_info.get("total_tokens", 0)
+                }
+                logger.info(f"Generated fallback response for: {content['url']}")
+                logger.info(f"Token usage - Prompt: {token_stats['prompt_tokens']}, Completion: {token_stats['completion_tokens']}, Total: {token_stats['total_tokens']}")
+                return summary, token_stats
+            else:
+                # 正常的摘要處理
+                prompt = f"""請將以下文章摘要成 3-5 句重點，並翻譯為繁體中文。請確保摘要簡潔明瞭且包含最重要的資訊：
+標題：{content['title']}
+內容：{content['text']}
+請用繁體中文回覆摘要。"""
+            # Azure OpenAI API call
+            url = f"{self.config.azure_openai_endpoint}/openai/deployments/{self.config.azure_openai_deployment_name}/chat/completions?api-version={self.config.azure_openai_api_version}"
+            headers = {
+                "Content-Type": "application/json",
+                "api-key": self.config.azure_openai_api_key,
+            }
+            body = {
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": "你是一個專業的技術文章摘要與翻譯專家，精通各種技術領域，能夠準確保留技術術語、專有名詞、數據細節，並將內容翻譯成自然流暢的繁體中文。你特別擅長處理科技、醫療、商業和學術文章，能夠識別並保留重要的技術細節。"
+                    },
+                    {
+                        "role": "user",
+                        "content": prompt
+                    }
+                ],
+                "temperature": 0.3,
+                "max_tokens": 800
+            }
+            response = await self.http_client.post(url, headers=headers, json=body)
+            response.raise_for_status()
+            result = response.json()
+            summary = result["choices"][0]["message"]["content"].strip()
+            logger.info(f"Successfully generated summary for: {content['url']}")
+            return summary
+        except Exception as e:
+            logger.error(f"Error in summarization: {str(e)}")
+            # 回傳錯誤時也要保持 tuple 格式
+            error_summary = f"抱歉，AI 處理時發生錯誤。錯誤訊息：{str(e)}"
+            error_token_stats = {
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "total_tokens": 0
+            }
+            return error_summary, error_token_stats
+    def format_response(self, url: str, title: str, summary: str, token_stats: dict = None) -> str:
+        """Format the response message"""
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        response = f"""🔗 原始網址: {url}
+📰 標題: {title}
+{summary}
+---
+⏰ 處理時間: {timestamp}"""
+        # 加入 token 使用統計
+        if token_stats:
+            response += f"""
+📊 Token 使用量: 輸入 {token_stats['prompt_tokens']} + 輸出 {token_stats['completion_tokens']} = 總計 {token_stats['total_tokens']} tokens"""
+        return response
+    def format_error_response(self, url: str, error_message: str) -> str:
+        """Format error response message"""
+        return f"""❌ 處理失敗: {url}
+🔍 錯誤原因: {error_message}
+💡 建議: 請檢查網址是否正確或稍後再試"""
+# Global processor instance and deduplication cache
+processor = URLProcessor(config)
+processing_cache = set()  # 用於去重的快取
+async def process_url_async(url: str, channel: str, say):
+    """Asynchronous URL processing pipeline"""
+    # 建立唯一的處理 ID
+    process_id = f"{url}:{channel}:{int(datetime.now().timestamp())//60}"  # 每分鐘重置
+    # 檢查是否已經在處理中
+    if process_id in processing_cache:
+        logger.info(f"URL {url} is already being processed, skipping duplicate")
+        return
+    # 添加到處理快取
+    processing_cache.add(process_id)
+    try:
+        logger.info(f"Starting to process URL: {url}")
+        async with URLProcessor(config) as proc:
+            # Step 1: Extract content
+            logger.info(f"Step 1: Extracting content from {url}")
+            content = await proc.extract_content(url)
+            logger.info(f"Content extracted successfully. Title: {content.get('title', 'N/A')}")
+            # Step 2: Summarize and translate
+            logger.info(f"Step 2: Summarizing and translating content for {url}")
+            try:
+                result = await proc.summarize_and_translate(content)
+                # 處理回傳值 - 可能是 tuple 或只是 string
+                if isinstance(result, tuple):
+                    summary, token_stats = result
+                else:
+                    summary = result
+                    token_stats = None
+                logger.info(f"Summary generated successfully for {url}")
+            except Exception as e:
+                logger.error(f"Error in summarization, trying fallback: {str(e)}")
+                # 如果 AI 處理失敗，提供基本回應
+                summary = f"抱歉，由於技術問題無法生成摘要。請直接查看原始網址：{url}"
+                token_stats = None
+            # Step 3: Format and send response
+            logger.info(f"Step 3: Formatting and sending response for {url}")
+            response = proc.format_response(url, content['title'], summary, token_stats)
+            # Send to Slack (使用同步的 say 函數)
+            say(channel=channel, text=response)
+            logger.info(f"Successfully processed and sent response for: {url}")
+    except Exception as e:
+        logger.error(f"Error processing URL {url}: {str(e)}", exc_info=True)
+        error_message = processor.format_error_response(url, str(e))
+        say(channel=channel, text=error_message)
+    finally:
+        # 處理完成後從快取中移除（延遲5秒）
+        import threading
+        def remove_from_cache():
+            import time
+            time.sleep(5)
+            processing_cache.discard(process_id)
+        threading.Thread(target=remove_from_cache).start()
+# Slack event handlers
+@slack_app.event("message")
+def handle_message(event, say, ack):
+    """Handle incoming Slack messages"""
+    ack()  # 確認收到事件
+    try:
+        logger.info(f"Received message event: {event}")
+        # Skip bot messages
+        if event.get('bot_id'):
+            logger.info("Skipping bot message")
+            return
+        # Skip app_mention events (這些會由 handle_app_mention 處理)
+        if event.get('type') == 'app_mention':
+            logger.info("Skipping app_mention in message handler")
+            return
+        # Skip messages without text
+        if 'text' not in event:
+            logger.info("Skipping message without text")
+            return
+        message_text = event.get('text', '')
+        channel = event.get('channel')
+        user = event.get('user')
+        # 檢查是否為提及機器人的訊息 (避免重複處理)
+        if '<@U094J502LLC>' in message_text:
+            logger.info("Skipping mention message in message handler (will be handled by app_mention)")
+            return
+        logger.info(f"Processing message from user {user} in channel {channel}: {message_text}")
+        # Extract URLs from message
+        urls = processor.extract_urls(message_text)
+        if not urls:
+            logger.info("No URLs found in message")
+            return
+        logger.info(f"Found {len(urls)} URLs: {urls}")
+        # Send initial acknowledgment for multiple URLs
+        if len(urls) > 1:
+            say(
+                channel=channel,
+                text=f"🔄 正在處理 {len(urls)} 個網址，請稍候..."
+            )
+        # Process each URL asynchronously
+        import threading
+        for url in urls:
+            logger.info(f"Creating thread for URL: {url}")
+            thread = threading.Thread(
+                target=lambda u=url: asyncio.run(process_url_async(u, channel, say))
+            )
+            thread.start()
+    except Exception as e:
+        logger.error(f"Error in message handler: {str(e)}", exc_info=True)
+        say(
+            channel=event.get('channel'),
+            text="❌ 處理訊息時發生錯誤，請稍後再試"
+        )
+@slack_app.event("app_mention")
+def handle_app_mention(event, say, ack):
+    """Handle app mentions"""
+    ack()  # 確認收到事件
+    logger.info(f"Received app mention: {event}")
+    # 檢查訊息中是否包含 URL
+    message_text = event.get('text', '')
+    urls = processor.extract_urls(message_text)
+    if urls:
+        # 如果有 URL，則處理 URL
+        logger.info(f"App mention contains URLs: {urls}")
+        # Send initial acknowledgment
+        say(
+            channel=event['channel'],
+            text=f"🔄 收到！正在處理 {len(urls)} 個網址..."
+        )
+        # Process URLs in threads
+        import threading
+        for url in urls:
+            logger.info(f"Creating thread for app mention URL: {url}")
+            thread = threading.Thread(
+                target=lambda u=url: asyncio.run(process_url_async(u, event['channel'], say))
+            )
+            thread.start()
+    else:
+        # 沒有 URL，回覆歡迎訊息
+        say(
+            channel=event["channel"],
+            text="👋 你好！我是網址摘要機器人。只要在頻道中貼上網址，我就會自動為你生成繁體中文摘要！"
+        )
+# FastAPI routes
+@api.get("/")
+async def root():
+    """Health check endpoint"""
+    return {"status": "healthy", "service": "Slack URL Summarizer Bot"}
+@api.get("/health")
+async def health_check():
+    """Detailed health check"""
+    return {
+        "status": "healthy",
+        "timestamp": datetime.now().isoformat(),
+        "config": {
+            "slack_configured": bool(config.slack_bot_token),
+            "azure_openai_configured": bool(config.azure_openai_endpoint),
+        }
+    }
+@api.get("/slack/events")
+async def slack_events_get():
+    """Handle GET requests to slack events endpoint"""
+    return {"message": "Slack events endpoint is ready", "methods": ["POST"]}
+@api.post("/slack/events")
+async def slack_events(request: Request):
+    """Handle Slack events"""
+    try:
+        # Get the request body
+        body = await request.body()
+        # Parse JSON
+        import json
+        data = json.loads(body)
+        # Handle URL verification challenge
+        if data.get("type") == "url_verification":
+            challenge = data.get("challenge")
+            logger.info(f"Received URL verification challenge: {challenge}")
+            return {"challenge": challenge}
+        # Handle regular Slack events
+        logger.info(f"Received Slack event: {data.get('type')}")
+        return await handler.handle(request)
+    except json.JSONDecodeError:
+        logger.error("Invalid JSON in Slack request")
+        raise HTTPException(status_code=400, detail="Invalid JSON")
+    except Exception as e:
+        logger.error(f"Error handling Slack event: {str(e)}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+# Error handling middleware
+@api.exception_handler(Exception)
+async def global_exception_handler(request: Request, exc: Exception):
+    logger.error(f"Unhandled exception: {str(exc)}")
+    raise HTTPException(status_code=500, detail="Internal server error")
+if __name__ == "__main__":
+    # Run the FastAPI application
+    uvicorn.run(
+        "main:api",
+        host="0.0.0.0",
+        port=int(os.getenv("PORT", 7860)),
+        log_level="info",
+        reload=os.getenv("ENVIRONMENT") == "development"
+    )

pyproject.toml ADDED Viewed

	@@ -0,0 +1,25 @@

+[project]
+name = "slack-url-bot"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "aiohttp>=3.12.13",
+    "fastapi>=0.115.14",
+    "httpx>=0.28.1",
+    "lxml[html-clean]>=6.0.0",
+    "newspaper3k>=0.2.8",
+    "pytest>=8.4.1",
+    "pytest-asyncio>=1.0.0",
+    "python-dotenv>=1.1.1",
+    "ruff>=0.12.2",
+    "slack-bolt>=1.23.0",
+    "slack-sdk>=3.35.0",
+    "uvicorn>=0.35.0",
+]
+[dependency-groups]
+dev = [
+    "ruff>=0.12.2",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+aiohttp>=3.12.13
+fastapi>=0.115.14
+httpx>=0.28.1
+lxml[html-clean]>=6.0.0
+newspaper3k>=0.2.8
+python-dotenv>=1.1.1
+slack-bolt>=1.23.0
+slack-sdk>=3.35.0
+uvicorn>=0.35.0

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff