# -*- coding: utf-8 -*-
"""
AI 뉴스 & 허깅페이스 트렌딩 분석 시스템
- AI Times 뉴스 크롤링 및 카테고리 분류
- 허깅페이스 모델/스페이스 트렌딩 정보 수집
- Fireworks AI (Qwen) 를 통한 뉴스 분석
- Brave Search를 통한 팩트 체크
"""

import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime
from typing import List, Dict, Optional
import time
import re


class AINewsAnalyzer:
    def __init__(self, fireworks_api_key: str, brave_api_key: str):
        """
        Args:
            fireworks_api_key: Fireworks AI API 키
            brave_api_key: Brave Search API 키
        """
        self.fireworks_api_key = fireworks_api_key
        self.brave_api_key = brave_api_key
        
        # 뉴스 카테고리 정의
        self.categories = {
            "산업동향": ["산업", "기업", "투자", "인수", "파트너십", "시장"],
            "기술혁신": ["기술", "모델", "알고리즘", "개발", "연구", "논문"],
            "제품출시": ["출시", "공개", "발표", "서비스", "제품"],
            "정책규제": ["규제", "정책", "법", "정부", "제재"],
            "보안이슈": ["보안", "취약점", "해킹", "위험", "프라이버시"],
        }
        
        self.huggingface_data = {
            "models": [],
            "spaces": []
        }
        
        self.news_data = []
    
    def fetch_aitimes_news(self, urls: List[str]) -> List[Dict]:
        """AI Times 뉴스 크롤링"""
        all_news = []
        
        for url in urls:
            try:
                print(f"📰 뉴스 크롤링 중: {url}")
                response = requests.get(url, headers={
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                })
                soup = BeautifulSoup(response.content, 'html.parser')
                
                # 뉴스 기사 추출 (실제 구조에 맞게 조정 필요)
                articles = []
                
                # 제목과 링크가 있는 a 태그 찾기
                for link in soup.find_all('a', href=True):
                    if '/news/articleView.html' in link['href']:
                        title = link.get_text(strip=True)
                        article_url = link['href']
                        
                        if not article_url.startswith('http'):
                            article_url = 'https://www.aitimes.com' + article_url
                        
                        # 날짜 추출 (형제 요소에서)
                        date_text = ""
                        parent = link.parent
                        if parent:
                            date_elem = parent.find(text=re.compile(r'\d{2}-\d{2}'))
                            if date_elem:
                                date_text = date_elem.strip()
                        
                        if title and len(title) > 10:
                            articles.append({
                                'title': title,
                                'url': article_url,
                                'date': date_text,
                                'source': 'AI Times'
                            })
                
                all_news.extend(articles[:10])  # 상위 10개만
                time.sleep(1)  # 크롤링 예의
                
            except Exception as e:
                print(f"❌ 크롤링 오류: {e}")
        
        return all_news
    
    def fetch_huggingface_trending(self) -> Dict:
        """허깅페이스 트렌딩 모델 및 스페이스 수집"""
        print("🤗 허깅페이스 트렌딩 정보 수집 중...")
        
        # 모델 트렌딩
        try:
            models_url = "https://huggingface.co/api/models"
            params = {
                'sort': 'trending',
                'limit': 30
            }
            
            response = requests.get(models_url, params=params, timeout=10)
            if response.status_code == 200:
                models = response.json()
                
                for model in models[:30]:
                    self.huggingface_data['models'].append({
                        'name': model.get('id', 'Unknown'),
                        'downloads': model.get('downloads', 0),
                        'likes': model.get('likes', 0),
                        'task': model.get('pipeline_tag', 'N/A'),
                        'url': f"https://huggingface.co/{model.get('id', '')}"
                    })
                
                print(f"✅ {len(self.huggingface_data['models'])}개 트렌딩 모델 수집 완료")
        
        except Exception as e:
            print(f"❌ 모델 수집 오류: {e}")
        
        # 스페이스 트렌딩 (웹 크롤링)
        try:
            spaces_url = "https://huggingface.co/spaces"
            response = requests.get(spaces_url, headers={
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
            }, timeout=10)
            
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # 스페이스 링크 추출
            space_count = 0
            for link in soup.find_all('a', href=True):
                if '/spaces/' in link['href'] and space_count < 30:
                    space_name = link['href'].replace('/spaces/', '')
                    if '/' in space_name and len(space_name) > 3:
                        title = link.get_text(strip=True)
                        if title:
                            self.huggingface_data['spaces'].append({
                                'name': space_name,
                                'title': title[:100],
                                'url': f"https://huggingface.co{link['href']}"
                            })
                            space_count += 1
            
            print(f"✅ {len(self.huggingface_data['spaces'])}개 트렌딩 스페이스 수집 완료")
        
        except Exception as e:
            print(f"❌ 스페이스 수집 오류: {e}")
        
        return self.huggingface_data
    
    def categorize_news(self, news_list: List[Dict]) -> List[Dict]:
        """뉴스 카테고리 분류"""
        for news in news_list:
            title = news['title'].lower()
            news['category'] = "기타"
            
            for category, keywords in self.categories.items():
                if any(keyword in title for keyword in keywords):
                    news['category'] = category
                    break
        
        return news_list
    
    def analyze_with_qwen(self, text: str, instruction: str) -> str:
        """Fireworks AI Qwen 모델을 사용한 분석"""
        url = "https://api.fireworks.ai/inference/v1/chat/completions"
        
        payload = {
            "model": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
            "max_tokens": 4096,
            "top_p": 1,
            "top_k": 40,
            "presence_penalty": 0,
            "frequency_penalty": 0,
            "temperature": 0.6,
            "messages": [
                {
                    "role": "system",
                    "content": "당신은 AI 뉴스를 초등학생도 이해할 수 있게 쉽게 설명하는 전문가입니다."
                },
                {
                    "role": "user",
                    "content": f"{instruction}\n\n뉴스: {text}"
                }
            ]
        }
        
        headers = {
            "Accept": "application/json",
            "Content-Type": "application/json",
            "Authorization": f"Bearer {self.fireworks_api_key}"
        }
        
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=30)
            
            if response.status_code == 200:
                result = response.json()
                return result['choices'][0]['message']['content']
            else:
                return f"분석 실패 (상태 코드: {response.status_code})"
        
        except Exception as e:
            return f"분석 오류: {str(e)}"
    
    def fact_check_with_brave(self, query: str) -> List[Dict]:
        """Brave Search를 통한 팩트 체크"""
        url = "https://api.search.brave.com/res/v1/web/search"
        
        headers = {
            "Accept": "application/json",
            "X-Subscription-Token": self.brave_api_key
        }
        
        params = {
            "q": query,
            "count": 5,
            "text_decorations": False,
            "search_lang": "ko"
        }
        
        try:
            response = requests.get(url, headers=headers, params=params, timeout=10)
            
            if response.status_code == 200:
                data = response.json()
                results = []
                
                if 'web' in data and 'results' in data['web']:
                    for item in data['web']['results'][:3]:
                        results.append({
                            'title': item.get('title', ''),
                            'description': item.get('description', ''),
                            'url': item.get('url', '')
                        })
                
                return results
            else:
                return []
        
        except Exception as e:
            print(f"❌ Brave Search 오류: {e}")
            return []
    
    def generate_report(self, news_list: List[Dict], analyze_news: bool = True) -> str:
        """종합 리포트 생성"""
        report = []
        report.append("=" * 80)
        report.append("📊 AI 뉴스 & 허깅페이스 트렌딩 종합 리포트")
        report.append(f"📅 생성일시: {datetime.now().strftime('%Y년 %m월 %d일 %H:%M')}")
        report.append("=" * 80)
        report.append("")
        
        # 1. 카테고리별 뉴스 분석
        report.append("📰 === AI TIMES 뉴스 분석 ===")
        report.append("")
        
        categorized_news = {}
        for news in news_list:
            category = news.get('category', '기타')
            if category not in categorized_news:
                categorized_news[category] = []
            categorized_news[category].append(news)
        
        for category, articles in categorized_news.items():
            report.append(f"📌 [{category}] ({len(articles)}건)")
            report.append("-" * 80)
            
            for i, article in enumerate(articles[:5], 1):  # 카테고리당 5개만
                report.append(f"{i}. {article['title']}")
                report.append(f"   🔗 {article['url']}")
                report.append(f"   📅 {article.get('date', 'N/A')}")
                
                # LLM 분석 (선택적)
                if analyze_news and i <= 2:  # 각 카테고리 상위 2개만 분석
                    print(f"🤖 LLM 분석 중: {article['title'][:50]}...")
                    
                    instruction = """이 뉴스를 다음 형식으로 분석해주세요:
1. 핵심 내용 (2-3문장, 초등학생 수준)
2. 왜 중요한가? (1-2문장)
3. 당신이 해야 할 행동 (1-2개 항목)

간결하고 명확하게 작성해주세요."""
                    
                    analysis = self.analyze_with_qwen(article['title'], instruction)
                    report.append(f"\n   🤖 AI 분석:")
                    for line in analysis.split('\n'):
                        if line.strip():
                            report.append(f"      {line.strip()}")
                    
                    # 팩트 체크 (선택적)
                    fact_check = self.fact_check_with_brave(article['title'][:100])
                    if fact_check:
                        report.append(f"\n   ✅ 팩트 체크 (Brave Search):")
                        for fc in fact_check[:2]:
                            report.append(f"      • {fc['title']}")
                            report.append(f"        {fc['url']}")
                    
                    time.sleep(2)  # API 레이트 리밋 고려
                
                report.append("")
            
            report.append("")
        
        # 2. 허깅페이스 트렌딩
        report.append("🤗 === 허깅페이스 트렌딩 TOP 30 ===")
        report.append("")
        
        # 모델
        report.append("🔥 트렌딩 모델 TOP 30")
        report.append("-" * 80)
        for i, model in enumerate(self.huggingface_data['models'][:30], 1):
            report.append(f"{i:2d}. {model['name']}")
            report.append(f"    📊 다운로드: {model['downloads']:,} | ❤️ 좋아요: {model['likes']:,}")
            report.append(f"    🏷️  Task: {model['task']}")
            report.append(f"    🔗 {model['url']}")
            report.append("")
        
        report.append("")
        
        # 스페이스
        report.append("🚀 트렌딩 스페이스 TOP 30")
        report.append("-" * 80)
        for i, space in enumerate(self.huggingface_data['spaces'][:30], 1):
            report.append(f"{i:2d}. {space['name']}")
            report.append(f"    📝 {space['title']}")
            report.append(f"    🔗 {space['url']}")
            report.append("")
        
        # 3. 종합 요약
        report.append("=" * 80)
        report.append("📈 종합 요약")
        report.append("=" * 80)
        report.append(f"• 총 뉴스 수집: {len(news_list)}건")
        report.append(f"• 카테고리 수: {len(categorized_news)}개")
        report.append(f"• 트렌딩 모델: {len(self.huggingface_data['models'])}개")
        report.append(f"• 트렌딩 스페이스: {len(self.huggingface_data['spaces'])}개")
        report.append("")
        
        return '\n'.join(report)
    
    def run_full_analysis(self, news_urls: List[str], analyze_with_llm: bool = True) -> str:
        """전체 분석 실행"""
        print("🚀 AI 뉴스 & 허깅페이스 트렌딩 분석 시작...")
        print("")
        
        # 1. 뉴스 수집
        news_list = self.fetch_aitimes_news(news_urls)
        print(f"✅ 총 {len(news_list)}건의 뉴스 수집 완료")
        print("")
        
        # 2. 뉴스 카테고리 분류
        categorized_news = self.categorize_news(news_list)
        print("✅ 뉴스 카테고리 분류 완료")
        print("")
        
        # 3. 허깅페이스 트렌딩 수집
        self.fetch_huggingface_trending()
        print("")
        
        # 4. 리포트 생성
        print("📝 리포트 생성 중...")
        report = self.generate_report(categorized_news, analyze_news=analyze_with_llm)
        
        print("")
        print("✅ 분석 완료!")
        
        return report
    
    def save_report(self, report: str, filename: str = None):
        """리포트 저장"""
        if filename is None:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            filename = f"ai_news_report_{timestamp}.txt"
        
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(report)
        
        print(f"💾 리포트 저장 완료: {filename}")


# ==================== 사용 예시 ====================

def main():
    """메인 실행 함수"""
    
    # API 키 설정
    FIREWORKS_API_KEY = "YOUR_FIREWORKS_API_KEY"  # 여기에 Fireworks API 키 입력
    BRAVE_API_KEY = "YOUR_BRAVE_API_KEY"  # 여기에 Brave Search API 키 입력
    
    # AI Times 뉴스 URL
    news_urls = [
        "https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm",  # AI 산업
        "https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm"  # AI 기술
    ]
    
    # 분석기 초기화
    analyzer = AINewsAnalyzer(
        fireworks_api_key=FIREWORKS_API_KEY,
        brave_api_key=BRAVE_API_KEY
    )
    
    # 전체 분석 실행
    # analyze_with_llm=False로 설정하면 LLM 분석 없이 빠르게 수집만 함
    report = analyzer.run_full_analysis(
        news_urls=news_urls,
        analyze_with_llm=True  # LLM 분석 활성화 (시간이 오래 걸림)
    )
    
    # 결과 출력
    print("\n" + "=" * 80)
    print(report)
    
    # 파일 저장
    analyzer.save_report(report)


if __name__ == "__main__":
    main()


# ==================== 사용 팁 ====================
"""
1. API 키 설정:
   - Fireworks AI: https://fireworks.ai/
   - Brave Search: https://brave.com/search/api/

2. 빠른 테스트 (LLM 분석 없이):
   analyzer.run_full_analysis(news_urls, analyze_with_llm=False)

3. 특정 카테고리만 분석:
   categorized_news에서 원하는 카테고리 필터링

4. 크롤링 주기 조정:
   time.sleep() 값을 조정하여 속도/안정성 균형

5. 결과 활용:
   - JSON으로 저장: json.dumps(analyzer.huggingface_data)
   - 데이터베이스 저장
   - 대시보드 연동
"""