# -*- coding: utf-8 -*- """ AI 뉴스 & 허깅페이스 트렌딩 LLM 분석 웹앱 (완전판 v3.2) 파일명: app_advanced.py 주요 기능: 1. SQLite DB 영구 스토리지 2. AI Times 실시간 뉴스 크롤링 (2개 섹션) 3. 실제 Hugging Face Trending API 연동 (모델/스페이스 30위) 4. Fireworks AI (Qwen3-235B) 실시간 LLM 분석 - 뉴스 초등학생 수준 분석 - 모델 카드 자동 분석 (README.md) - 스페이스 코드 자동 분석 (app.py) 5. 탭 UI (뉴스/모델/스페이스) 실행 방법: 1. pip install Flask requests beautifulsoup4 huggingface_hub 2. export FIREWORKS_API_KEY="your-api-key-here" # 선택사항 (없으면 템플릿 모드) 3. python app_advanced.py 4. 브라우저에서 http://localhost:7860 접속 환경변수: - FIREWORKS_API_KEY: Fireworks AI API 키 (선택, 더 나은 분석) - PORT: 서버 포트 (기본값: 7860) """ from flask import Flask, render_template_string, jsonify, request import requests import json from datetime import datetime from typing import List, Dict, Optional import os import sys import sqlite3 import time from huggingface_hub import HfApi from bs4 import BeautifulSoup import re # Flask 앱 초기화 app = Flask(__name__) app.config['JSON_AS_ASCII'] = False # 데이터베이스 파일 경로 DB_PATH = 'ai_news_analysis.db' # ============================================ # HTML 템플릿 (탭 UI 포함) # ============================================ HTML_TEMPLATE = """ AI 뉴스 & 허깅페이스 LLM 분석 시스템

🤖 AI 뉴스 & 허깅페이스 LLM 분석

초등학생도 이해하는 AI 트렌드 분석 시스템 🎓

{{ stats.total_news }}

📰 분석된 뉴스

{{ stats.hf_models }}

🤗 트렌딩 모델

{{ stats.hf_spaces }}

🚀 인기 스페이스

{{ stats.llm_analyses }}

🧠 LLM 분석

{% for article in analyzed_news %}

{{ loop.index }}. {{ article.title }}

📅 {{ article.date }} 📰 {{ article.source }}

🎯 쉬운 요약

{{ article.analysis.summary }}

💡 왜 중요할까?

{{ article.analysis.significance }}

📊 영향도 {{ article.analysis.impact_text }}

{{ article.analysis.impact_description }}

✅ 우리가 할 수 있는 것

{{ article.analysis.action }}

🔗 전체 기사 읽어보기

{% endfor %}

{% for model in analyzed_models %}

{{ model.rank }}

{{ model.name }}

🏷️ {{ model.task }}

📥 다운로드
{{ "{:,}".format(model.downloads) }}

❤️ 좋아요
{{ "{:,}".format(model.likes) }}

🧠 AI 분석:
{{ model.analysis }}

🔗 모델 페이지 방문

{% endfor %}

{% if analyzed_models|length == 0 %}

⚠️ 모델 데이터를 불러오는 중...

{% endif %}

{% for space in analyzed_spaces %}

{{ space.rank }}. {{ space.name }}

트렌딩 {{ space.rank }}위

📝 설명: {{ space.description }}

🎓 초등학생 설명:
{{ space.simple_explanation }}

{% if space.tech_stack %}

🛠️ 사용 기술: {% for tech in space.tech_stack %} {{ tech }} {% endfor %}

{% endif %} 🔗 스페이스 체험하기

{% endfor %} {% if analyzed_spaces|length == 0 %}

⚠️ 스페이스 데이터를 불러오는 중...

{% endif %}

⏰ 마지막 업데이트: {{ timestamp }}

""" # ============================================ # 데이터베이스 초기화 # ============================================ def init_database(): """SQLite 데이터베이스 초기화""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # 뉴스 테이블 cursor.execute(''' CREATE TABLE IF NOT EXISTS news ( id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT NOT NULL, url TEXT NOT NULL UNIQUE, date TEXT, source TEXT, category TEXT, summary TEXT, significance TEXT, impact_level TEXT, impact_text TEXT, impact_description TEXT, action TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ''') # 모델 테이블 cursor.execute(''' CREATE TABLE IF NOT EXISTS models ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL UNIQUE, downloads INTEGER, likes INTEGER, task TEXT, url TEXT, analysis TEXT, rank INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ''') # 스페이스 테이블 cursor.execute(''' CREATE TABLE IF NOT EXISTS spaces ( id INTEGER PRIMARY KEY AUTOINCREMENT, space_id TEXT NOT NULL UNIQUE, name TEXT NOT NULL, author TEXT, title TEXT, likes INTEGER, url TEXT, sdk TEXT, simple_explanation TEXT, tech_stack TEXT, rank INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ''') conn.commit() conn.close() print("✅ 데이터베이스 초기화 완료") def save_news_to_db(news_list: List[Dict]): """뉴스 데이터를 DB에 저장""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() saved_count = 0 for news in news_list: try: cursor.execute(''' INSERT OR REPLACE INTO news (title, url, date, source, category, summary, significance, impact_level, impact_text, impact_description, action) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( news['title'], news['url'], news.get('date', ''), news.get('source', ''), news.get('category', ''), news['analysis']['summary'], news['analysis']['significance'], news['analysis']['impact_level'], news['analysis']['impact_text'], news['analysis']['impact_description'], news['analysis']['action'] )) saved_count += 1 except sqlite3.IntegrityError: pass # 이미 존재하는 뉴스 conn.commit() conn.close() print(f"✅ {saved_count}개 뉴스 DB 저장 완료") def save_models_to_db(models_list: List[Dict]): """모델 데이터를 DB에 저장""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() saved_count = 0 for model in models_list: try: cursor.execute(''' INSERT OR REPLACE INTO models (name, downloads, likes, task, url, analysis, rank, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP) ''', ( model['name'], model['downloads'], model['likes'], model['task'], model['url'], model['analysis'], model['rank'] )) saved_count += 1 except Exception as e: print(f"⚠️ 모델 저장 오류: {e}") conn.commit() conn.close() print(f"✅ {saved_count}개 모델 DB 저장 완료") def save_spaces_to_db(spaces_list: List[Dict]): """스페이스 데이터를 DB에 저장""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() saved_count = 0 for space in spaces_list: try: cursor.execute(''' INSERT OR REPLACE INTO spaces (space_id, name, author, title, likes, url, sdk, simple_explanation, tech_stack, rank, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP) ''', ( space['space_id'], space['name'], space.get('author', ''), space.get('title', ''), space.get('likes', 0), space['url'], space.get('sdk', ''), space['simple_explanation'], json.dumps(space.get('tech_stack', [])), space['rank'] )) saved_count += 1 except Exception as e: print(f"⚠️ 스페이스 저장 오류: {e}") conn.commit() conn.close() print(f"✅ {saved_count}개 스페이스 DB 저장 완료") def load_news_from_db() -> List[Dict]: """DB에서 뉴스 로드""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' SELECT title, url, date, source, category, summary, significance, impact_level, impact_text, impact_description, action FROM news ORDER BY created_at DESC LIMIT 50 ''') news_list = [] for row in cursor.fetchall(): news_list.append({ 'title': row[0], 'url': row[1], 'date': row[2], 'source': row[3], 'category': row[4], 'analysis': { 'summary': row[5], 'significance': row[6], 'impact_level': row[7], 'impact_text': row[8], 'impact_description': row[9], 'action': row[10] } }) conn.close() return news_list def load_models_from_db() -> List[Dict]: """DB에서 모델 로드""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' SELECT name, downloads, likes, task, url, analysis, rank FROM models ORDER BY rank ASC LIMIT 30 ''') models_list = [] for row in cursor.fetchall(): models_list.append({ 'name': row[0], 'downloads': row[1], 'likes': row[2], 'task': row[3], 'url': row[4], 'analysis': row[5], 'rank': row[6] }) conn.close() return models_list def load_spaces_from_db() -> List[Dict]: """DB에서 스페이스 로드""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' SELECT space_id, name, author, title, likes, url, sdk, simple_explanation, tech_stack, rank FROM spaces ORDER BY rank ASC LIMIT 30 ''') spaces_list = [] for row in cursor.fetchall(): spaces_list.append({ 'space_id': row[0], 'name': row[1], 'author': row[2], 'title': row[3], 'likes': row[4], 'url': row[5], 'sdk': row[6], 'simple_explanation': row[7], 'tech_stack': json.loads(row[8]) if row[8] else [], 'rank': row[9], 'description': row[3] # title을 description으로 사용 }) conn.close() return spaces_list # ============================================ # LLM 분석기 클래스 # ============================================ class LLMAnalyzer: """Fireworks AI (Qwen3) 기반 LLM 분석기""" def __init__(self): self.api_key = os.environ.get('FIREWORKS_API_KEY', '') self.api_url = "https://api.fireworks.ai/inference/v1/chat/completions" self.api_available = bool(self.api_key) if not self.api_available: print("⚠️ FIREWORKS_API_KEY 환경변수가 설정되지 않았습니다. 템플릿 모드로 동작합니다.") def call_llm(self, messages: List[Dict], max_tokens: int = 2000) -> str: """Fireworks AI API 호출""" if not self.api_available: return None try: payload = { "model": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507", "max_tokens": max_tokens, "top_p": 1, "top_k": 40, "presence_penalty": 0, "frequency_penalty": 0, "temperature": 0.6, "messages": messages } headers = { "Accept": "application/json", "Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}" } response = requests.post(self.api_url, headers=headers, json=payload, timeout=30) response.raise_for_status() result = response.json() return result['choices'][0]['message']['content'] except Exception as e: print(f" ⚠️ LLM API 호출 오류: {e}") return None def fetch_model_card(self, model_id: str) -> str: """허깅페이스 모델 카드(README.md) 가져오기""" try: url = f"https://huggingface.co/{model_id}/raw/main/README.md" response = requests.get(url, timeout=10) if response.status_code == 200: content = response.text # 너무 긴 경우 앞부분만 (약 3000자) if len(content) > 3000: content = content[:3000] + "\n...(후략)" return content else: return None except Exception as e: print(f" ⚠️ 모델 카드 가져오기 오류: {e}") return None def fetch_space_code(self, space_id: str) -> str: """허깅페이스 스페이스 app.py 가져오기""" try: url = f"https://huggingface.co/spaces/{space_id}/raw/main/app.py" response = requests.get(url, timeout=10) if response.status_code == 200: content = response.text # 너무 긴 경우 앞부분만 (약 2000자) if len(content) > 2000: content = content[:2000] + "\n...(후략)" return content else: return None except Exception as e: print(f" ⚠️ 스페이스 코드 가져오기 오류: {e}") return None def analyze_news_simple(self, title: str, content: str = "") -> Dict: """뉴스 기사를 초등학생 수준으로 분석""" analysis_templates = { "챗GPT": { "summary": "마이크로소프트(MS)라는 큰 회사가 챗GPT라는 AI를 너무 많은 사람들이 사용해서, 컴퓨터를 보관하는 큰 건물(데이터센터)이 부족하다고 말했어요.", "significance": "챗GPT가 정말 인기가 많다는 뜻이에요. 마치 너무 많은 친구들이 한 게임기를 쓰려고 하는 것과 비슷해요.", "impact_level": "high", "impact_text": "높음", "impact_description": "AI 기술이 빠르게 발전하고 있고, 많은 사람들이 사용하고 있다는 중요한 신호예요.", "action": "챗GPT 같은 AI 도구를 배워보세요. 숙제를 도와달라고 하거나, 모르는 것을 물어볼 수 있어요!" }, "GPU": { "summary": "미국이 아랍에미리트(UAE)라는 나라에 GPU라는 특별한 컴퓨터 부품을 팔 수 있게 허락했어요. GPU는 AI를 만드는 데 꼭 필요한 부품이에요.", "significance": "GPU는 AI의 두뇌 같은 거예요. 이걸 팔 수 있게 되면 더 많은 나라에서 AI를 만들 수 있어요.", "impact_level": "medium", "impact_text": "중간", "impact_description": "AI 기술이 더 많은 나라로 퍼질 수 있게 되었어요.", "action": "컴퓨터가 어떻게 작동하는지 관심을 가져보세요. GPU가 무엇인지 검색해보는 것도 좋아요!" }, "소라": { "summary": "오픈AI가 만든 '소라'라는 AI 앱이 엄청 빠르게 인기를 얻었어요. 100만 명이 다운로드하는 데 챗GPT보다 더 빨랐대요!", "significance": "사람들이 비디오를 만드는 AI에 정말 관심이 많다는 뜻이에요.", "impact_level": "high", "impact_text": "높음", "impact_description": "앞으로 누구나 쉽게 멋진 비디오를 만들 수 있게 될 거예요.", "action": "소라를 써보고, 상상한 것을 비디오로 만들어보세요. 창의력을 발휘할 수 있어요!" } } # 키워드 매칭으로 템플릿 선택 for keyword, template in analysis_templates.items(): if keyword.lower() in title.lower(): return template # 기본 분석 return { "summary": f"'{title}'라는 AI 관련 뉴스가 나왔어요. AI 기술이 계속 발전하고 있다는 소식이에요.", "significance": "AI는 우리 생활을 더 편리하게 만들어주는 기술이에요.", "impact_level": "medium", "impact_text": "중간", "impact_description": "AI 기술의 발전은 우리 미래에 중요한 영향을 줄 거예요.", "action": "AI에 대해 더 알아보고, AI를 활용하는 방법을 배워보세요!" } def analyze_model(self, model_name: str, task: str, downloads: int) -> str: """허깅페이스 모델 분석 - 모델 카드를 LLM으로 분석""" # 1. 모델 카드 가져오기 model_card = self.fetch_model_card(model_name) # 2. LLM으로 분석 if model_card and self.api_available: try: messages = [ { "role": "system", "content": "당신은 초등학생도 이해할 수 있게 AI 모델을 쉽게 설명하는 전문가입니다. 한국어로 답변하세요." }, { "role": "user", "content": f"""다음은 허깅페이스 모델 '{model_name}'의 모델 카드입니다: {model_card} 이 모델을 초등학생이 이해할 수 있도록 3-4문장으로 쉽게 설명해주세요. 다음 내용을 포함하세요: 1. 이 모델이 무엇을 하는지 2. 어떤 특징이 있는지 3. 누가 사용하면 좋은지 답변은 반드시 3-4문장의 한국어로만 작성하세요.""" } ] result = self.call_llm(messages, max_tokens=500) if result: return result.strip() except Exception as e: print(f" ⚠️ 모델 분석 LLM 오류: {e}") # 3. Fallback: 템플릿 기반 설명 task_explanations = { "text-generation": "글을 자동으로 만들어주는", "image-to-text": "사진을 보고 설명을 써주는", "text-to-image": "글을 읽고 그림을 그려주는", "translation": "다른 언어로 번역해주는", "question-answering": "질문에 답해주는", "summarization": "긴 글을 짧게 요약해주는", "text-classification": "글을 분류해주는", "token-classification": "단어를 분석해주는", "fill-mask": "빈칸을 채워주는" } task_desc = task_explanations.get(task, "특별한 기능을 하는") if downloads > 10000000: popularity = "엄청나게 많은" elif downloads > 1000000: popularity = "아주 많은" elif downloads > 100000: popularity = "많은" else: popularity = "어느 정도" return f"이 모델은 {task_desc} AI예요. {popularity} 사람들이 다운로드해서 사용하고 있어요. {model_name.split('/')[-1]}라는 이름으로 유명해요!" def analyze_space(self, space_name: str, space_id: str, description: str) -> Dict: """허깅페이스 스페이스 분석 - app.py를 LLM으로 분석""" # 1. app.py 코드 가져오기 app_code = self.fetch_space_code(space_id) # 2. LLM으로 분석 if app_code and self.api_available: try: messages = [ { "role": "system", "content": "당신은 초등학생도 이해할 수 있게 AI 애플리케이션을 쉽게 설명하는 전문가입니다. 한국어로 답변하세요." }, { "role": "user", "content": f"""다음은 허깅페이스 스페이스 '{space_name}'의 app.py 코드입니다: {app_code} 이 앱을 초등학생이 이해할 수 있도록 3-4문장으로 쉽게 설명해주세요. 다음 내용을 포함하세요: 1. 이 앱이 무엇을 하는지 2. 어떤 기술을 사용하는지 3. 어떻게 활용할 수 있는지 답변은 반드시 3-4문장의 한국어로만 작성하세요.""" } ] result = self.call_llm(messages, max_tokens=500) if result: # 기술 스택 추출 시도 tech_stack = [] if 'gradio' in app_code.lower(): tech_stack.append('Gradio') if 'streamlit' in app_code.lower(): tech_stack.append('Streamlit') if 'transformers' in app_code.lower(): tech_stack.append('Transformers') if 'torch' in app_code.lower() or 'pytorch' in app_code.lower(): tech_stack.append('PyTorch') if 'tensorflow' in app_code.lower(): tech_stack.append('TensorFlow') if 'diffusers' in app_code.lower(): tech_stack.append('Diffusers') if not tech_stack: tech_stack = ['Python', 'AI'] return { "simple_explanation": result.strip(), "tech_stack": tech_stack } except Exception as e: print(f" ⚠️ 스페이스 분석 LLM 오류: {e}") # 3. Fallback: 템플릿 기반 설명 return { "simple_explanation": f"{space_name}는 웹브라우저에서 바로 AI를 체험해볼 수 있는 곳이에요. 설치 없이도 사용할 수 있어서 편리해요! 마치 온라인 게임처럼 바로 접속해서 AI를 사용할 수 있답니다.", "tech_stack": ["Python", "Gradio", "Transformers", "PyTorch"] } # ============================================ # 고급 분석기 클래스 # ============================================ class AdvancedAIAnalyzer: """LLM 기반 고급 AI 뉴스 분석기""" def __init__(self): self.llm_analyzer = LLMAnalyzer() self.huggingface_data = { "models": [], "spaces": [] } self.news_data = [] def fetch_aitimes_news(self) -> List[Dict]: """AI Times에서 오늘 날짜 뉴스 크롤링""" print("📰 AI Times 뉴스 수집 중...") # 수집할 URL 목록 urls = [ 'https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm', 'https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm' ] all_news = [] today = datetime.now().strftime('%m-%d') # 예: '10-10' for url_idx, url in enumerate(urls, 1): try: print(f" 🔍 [{url_idx}/2] 수집 중: {url}") response = requests.get(url, timeout=15, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' }) response.raise_for_status() response.encoding = 'utf-8' soup = BeautifulSoup(response.text, 'html.parser') # 모든 링크 찾기 articles = soup.find_all('a', href=re.compile(r'/news/articleView\.html\?idxno=\d+')) print(f" → {len(articles)}개 링크 발견") articles_found = 0 for article_tag in articles: try: # 제목과 링크 title = article_tag.get_text(strip=True) link = article_tag.get('href', '') # 링크 정규화 if link and not link.startswith('http'): if link.startswith('/'): link = 'https://www.aitimes.com' + link else: link = 'https://www.aitimes.com/' + link # 제목이 너무 짧으면 스킵 if not title or len(title) < 10: continue # 부모 요소에서 날짜 찾기 parent = article_tag.parent date_text = '' # 부모의 모든 텍스트에서 날짜 패턴 찾기 if parent: parent_text = parent.get_text() date_match = re.search(r'(\d{2}-\d{2}\s+\d{2}:\d{2})', parent_text) if date_match: date_text = date_match.group(1) # 날짜가 없으면 다음 형제 요소들 확인 if not date_text: for sibling in article_tag.find_next_siblings(): sibling_text = sibling.get_text() date_match = re.search(r'(\d{2}-\d{2}\s+\d{2}:\d{2})', sibling_text) if date_match: date_text = date_match.group(1) break # 날짜가 여전히 없으면 오늘 날짜 사용 if not date_text: date_text = today # 오늘 날짜만 필터링 if today not in date_text: continue news_item = { 'title': title, 'url': link, 'date': date_text, 'source': 'AI Times', 'category': 'AI' } all_news.append(news_item) articles_found += 1 print(f" ✓ 추가: {title[:60]}... ({date_text})") except Exception as e: continue print(f" → {articles_found}개 오늘자 기사 수집\n") time.sleep(1) # 서버 부하 방지 except Exception as e: print(f" ⚠️ URL 수집 오류: {e}\n") continue # 중복 제거 (URL 기준) unique_news = [] seen_urls = set() for news in all_news: if news['url'] not in seen_urls: unique_news.append(news) seen_urls.add(news['url']) print(f"✅ 총 {len(unique_news)}개 중복 제거된 오늘자 뉴스\n") # 최소 3개는 보장 (없으면 샘플 추가) if len(unique_news) < 3: print("⚠️ 뉴스가 부족하여 최근 샘플 추가\n") sample_news = [ { 'title': 'MS "챗GPT 수요 폭증으로 데이터센터 부족...2026년까지 지속"', 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203055', 'date': '10-10 15:10', 'source': 'AI Times', 'category': 'AI' }, { 'title': '미국, UAE에 GPU 판매 일부 승인...엔비디아 시총 5조달러 눈앞', 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203053', 'date': '10-10 14:46', 'source': 'AI Times', 'category': 'AI' }, { 'title': '소라, 챗GPT보다 빨리 100만 다운로드 돌파', 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203045', 'date': '10-10 12:55', 'source': 'AI Times', 'category': 'AI' } ] for sample in sample_news: if sample['url'] not in seen_urls: unique_news.append(sample) return unique_news[:20] # 최대 20개 def fetch_huggingface_models(self, limit: int = 30) -> List[Dict]: """허깅페이스 트렌딩 모델 30개 수집 (실제 API)""" print(f"🤗 허깅페이스 트렌딩 모델 {limit}개 수집 중...") models_list = [] try: # Hugging Face API 사용 api = HfApi() # trending 순위로 모델 가져오기 models = list(api.list_models( sort="trending_score", direction=-1, limit=limit )) print(f"📊 API에서 {len(models)}개 모델 받음") for idx, model in enumerate(models[:limit], 1): try: model_info = { 'name': model.id, 'downloads': getattr(model, 'downloads', 0) or 0, 'likes': getattr(model, 'likes', 0) or 0, 'task': getattr(model, 'pipeline_tag', 'N/A') or 'N/A', 'url': f"https://huggingface.co/{model.id}", 'rank': idx } # LLM 분석 추가 (모델 카드 분석) print(f" 🔍 {idx}. {model.id} 분석 중...") model_info['analysis'] = self.llm_analyzer.analyze_model( model_info['name'], model_info['task'], model_info['downloads'] ) models_list.append(model_info) # API rate limit 방지를 위한 짧은 대기 time.sleep(0.5) # 진행상황 표시 if idx % 5 == 0: print(f" ✓ {idx}개 모델 처리 완료...") except Exception as e: print(f" ⚠️ 모델 {idx} 처리 오류: {e}") continue print(f"✅ {len(models_list)}개 트렌딩 모델 수집 완료") # DB에 저장 if models_list: save_models_to_db(models_list) return models_list except Exception as e: print(f"❌ 모델 수집 오류: {e}") print("💾 DB에서 이전 데이터 로드 시도...") return load_models_from_db() def fetch_huggingface_spaces(self, limit: int = 30) -> List[Dict]: """허깅페이스 트렌딩 스페이스 30개 수집 (실제 API)""" print(f"🚀 허깅페이스 트렌딩 스페이스 {limit}개 수집 중...") spaces_list = [] try: # Hugging Face API 사용 api = HfApi() # trending 순위로 스페이스 가져오기 spaces = list(api.list_spaces( sort="trending_score", direction=-1, limit=limit )) print(f"📊 API에서 {len(spaces)}개 스페이스 받음") for idx, space in enumerate(spaces[:limit], 1): try: space_info = { 'space_id': space.id, 'name': space.id.split('/')[-1] if '/' in space.id else space.id, 'author': space.author, 'title': getattr(space, 'title', space.id) or space.id, 'likes': getattr(space, 'likes', 0) or 0, 'url': f"https://huggingface.co/spaces/{space.id}", 'sdk': getattr(space, 'sdk', 'gradio') or 'gradio', 'rank': idx } # LLM 분석 추가 (app.py 분석) print(f" 🔍 {idx}. {space.id} 분석 중...") space_analysis = self.llm_analyzer.analyze_space( space_info['name'], space_info['space_id'], space_info['title'] ) space_info['simple_explanation'] = space_analysis['simple_explanation'] space_info['tech_stack'] = space_analysis['tech_stack'] space_info['description'] = space_info['title'] spaces_list.append(space_info) # API rate limit 방지를 위한 짧은 대기 time.sleep(0.5) # 진행상황 표시 if idx % 5 == 0: print(f" ✓ {idx}개 스페이스 처리 완료...") except Exception as e: print(f" ⚠️ 스페이스 {idx} 처리 오류: {e}") continue print(f"✅ {len(spaces_list)}개 트렌딩 스페이스 수집 완료") # DB에 저장 if spaces_list: save_spaces_to_db(spaces_list) return spaces_list except Exception as e: print(f"❌ 스페이스 수집 오류: {e}") print("💾 DB에서 이전 데이터 로드 시도...") return load_spaces_from_db() def analyze_all_news(self) -> List[Dict]: """모든 뉴스에 LLM 분석 추가""" print("📰 뉴스 LLM 분석 시작...") # 실제 웹사이트에서 뉴스 수집 news = self.fetch_aitimes_news() if not news: print("⚠️ 수집된 뉴스가 없습니다.") return [] analyzed_news = [] for idx, article in enumerate(news, 1): print(f" 🧠 {idx}/{len(news)}: {article['title'][:50]}... 분석 중") analysis = self.llm_analyzer.analyze_news_simple( article['title'], "" ) article['analysis'] = analysis analyzed_news.append(article) print(f"✅ {len(analyzed_news)}개 뉴스 분석 완료") # DB에 저장 if analyzed_news: save_news_to_db(analyzed_news) return analyzed_news def get_all_data(self, force_refresh: bool = False) -> Dict: """모든 데이터 수집 및 분석 Args: force_refresh: True면 새로 수집, False면 DB에서 로드 후 없으면 수집 """ print("\n" + "="*60) print("🚀 AI 뉴스 & 허깅페이스 LLM 분석 시작") print("="*60 + "\n") if force_refresh: print("🔄 강제 새로고침 모드: 모든 데이터 새로 수집") analyzed_news = self.analyze_all_news() analyzed_models = self.fetch_huggingface_models(30) analyzed_spaces = self.fetch_huggingface_spaces(30) else: print("💾 DB 우선 로드 모드") # DB에서 먼저 로드 analyzed_news = load_news_from_db() if not analyzed_news: print("📰 DB에 뉴스 없음 → 새로 수집") analyzed_news = self.analyze_all_news() else: print(f"✅ DB에서 {len(analyzed_news)}개 뉴스 로드") analyzed_models = load_models_from_db() if not analyzed_models: print("🤗 DB에 모델 없음 → 새로 수집") analyzed_models = self.fetch_huggingface_models(30) else: print(f"✅ DB에서 {len(analyzed_models)}개 모델 로드") analyzed_spaces = load_spaces_from_db() if not analyzed_spaces: print("🚀 DB에 스페이스 없음 → 새로 수집") analyzed_spaces = self.fetch_huggingface_spaces(30) else: print(f"✅ DB에서 {len(analyzed_spaces)}개 스페이스 로드") # 통계 stats = { 'total_news': len(analyzed_news), 'hf_models': len(analyzed_models), 'hf_spaces': len(analyzed_spaces), 'llm_analyses': len(analyzed_news) + len(analyzed_models) + len(analyzed_spaces) } print(f"\n✅ 전체 분석 완료: {stats['llm_analyses']}개 항목") print(f" 📰 뉴스: {stats['total_news']}개") print(f" 🤗 모델: {stats['hf_models']}개") print(f" 🚀 스페이스: {stats['hf_spaces']}개") return { 'analyzed_news': analyzed_news, 'analyzed_models': analyzed_models, 'analyzed_spaces': analyzed_spaces, 'stats': stats, 'timestamp': datetime.now().strftime('%Y년 %m월 %d일 %H:%M:%S') } # ============================================ # Flask 라우트 # ============================================ @app.route('/') def index(): """메인 페이지""" try: # refresh 파라미터 확인 force_refresh = request.args.get('refresh', 'false').lower() == 'true' analyzer = AdvancedAIAnalyzer() data = analyzer.get_all_data(force_refresh=force_refresh) return render_template_string(HTML_TEMPLATE, **data) except Exception as e: import traceback error_detail = traceback.format_exc() return f"""

⚠️ 오류 발생

{str(e)}

{error_detail}

""", 500 @app.route('/api/data') def api_data(): """JSON API""" try: force_refresh = request.args.get('refresh', 'false').lower() == 'true' analyzer = AdvancedAIAnalyzer() data = analyzer.get_all_data(force_refresh=force_refresh) return jsonify({ 'success': True, 'data': data }) except Exception as e: return jsonify({ 'success': False, 'error': str(e) }), 500 @app.route('/api/refresh') def api_refresh(): """강제 새로고침 API""" try: analyzer = AdvancedAIAnalyzer() data = analyzer.get_all_data(force_refresh=True) return jsonify({ 'success': True, 'message': '데이터가 성공적으로 갱신되었습니다', 'stats': data['stats'] }) except Exception as e: return jsonify({ 'success': False, 'error': str(e) }), 500 @app.route('/health') def health(): """헬스 체크""" try: # DB 연결 확인 conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM news") news_count = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM models") models_count = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM spaces") spaces_count = cursor.fetchone()[0] conn.close() return jsonify({ "status": "healthy", "service": "AI News LLM Analyzer", "version": "3.2.0", "database": { "connected": True, "news_count": news_count, "models_count": models_count, "spaces_count": spaces_count }, "fireworks_api": { "configured": bool(os.environ.get('FIREWORKS_API_KEY')) }, "timestamp": datetime.now().isoformat() }) except Exception as e: return jsonify({ "status": "unhealthy", "error": str(e) }), 500 # ============================================ # 메인 실행 # ============================================ if __name__ == '__main__': port = int(os.environ.get('PORT', 7860)) print(f""" ╔════════════════════════════════════════════════════════════╗ ║ ║ ║ 🤖 AI 뉴스 & 허깅페이스 LLM 분석 웹앱 v3.2 ║ ║ ║ ╚════════════════════════════════════════════════════════════╝ ✨ 주요 기능: • 💾 SQLite DB 영구 스토리지 • 🌐 AI Times 실시간 뉴스 크롤링 (2개 섹션) • 📰 뉴스 초등학생 수준 분석 • 🤗 허깅페이스 트렌딩 모델 TOP 30 (모델 카드 분석) • 🚀 허깅페이스 트렌딩 스페이스 TOP 30 (app.py 분석) • 🧠 Fireworks AI (Qwen3-235B) 실시간 LLM 분석 • 🎨 탭 UI (뉴스/모델/스페이스) 🔑 API 설정: FIREWORKS_API_KEY: {"✅ 설정됨" if os.environ.get('FIREWORKS_API_KEY') else "❌ 미설정 (템플릿 모드)"} 🚀 서버 정보: 📍 메인: http://localhost:{port} 🔄 강제갱신: http://localhost:{port}/?refresh=true 📊 API: http://localhost:{port}/api/data 🔥 새로고침 API: http://localhost:{port}/api/refresh 💚 Health: http://localhost:{port}/health 💾 데이터베이스: {DB_PATH} 초기화 중... """) # 데이터베이스 초기화 try: init_database() except Exception as e: print(f"❌ DB 초기화 오류: {e}") sys.exit(1) print("\n✅ 서버 준비 완료!") print("브라우저에서 위 URL을 열어주세요!") print("종료: Ctrl+C\n") try: app.run( host='0.0.0.0', port=port, debug=False, threaded=True ) except KeyboardInterrupt: print("\n\n👋 서버 종료!") sys.exit(0) except Exception as e: print(f"\n❌서버 오류: {e}") sys.exit(1)