# -*- coding: utf-8 -*- """ AI 뉴스 & 허깅페이스 트렌딩 LLM 분석 웹앱 (완전판 v3.0) 파일명: app_advanced.py 주요 기능: 1. SQLite DB 영구 스토리지 2. 실제 Hugging Face Trending API 연동 (모델/스페이스 30위) 3. LLM 분석 (초등학생 수준) 4. 탭 UI (뉴스/모델/스페이스) 실행 방법: 1. pip install Flask requests beautifulsoup4 huggingface_hub 2. python app_advanced.py 3. 브라우저에서 http://localhost:7860 접속 """ from flask import Flask, render_template_string, jsonify, request import requests import json from datetime import datetime from typing import List, Dict, Optional import os import sys import sqlite3 import time from huggingface_hub import HfApi # Flask 앱 초기화 app = Flask(__name__) app.config['JSON_AS_ASCII'] = False # 데이터베이스 파일 경로 DB_PATH = 'ai_news_analysis.db' # ============================================ # HTML 템플릿 (탭 UI 포함) # ============================================ HTML_TEMPLATE = """ AI 뉴스 & 허깅페이스 LLM 분석 시스템

🤖 AI 뉴스 & 허깅페이스 LLM 분석

초등학생도 이해하는 AI 트렌드 분석 시스템 🎓

{{ stats.total_news }}

📰 분석된 뉴스

{{ stats.hf_models }}

🤗 트렌딩 모델

{{ stats.hf_spaces }}

🚀 인기 스페이스

{{ stats.llm_analyses }}

🧠 LLM 분석

{% for article in analyzed_news %}

{{ loop.index }}. {{ article.title }}

📅 {{ article.date }} 📰 {{ article.source }}

🎯 쉬운 요약

{{ article.analysis.summary }}

💡 왜 중요할까?

{{ article.analysis.significance }}

📊 영향도 {{ article.analysis.impact_text }}

{{ article.analysis.impact_description }}

✅ 우리가 할 수 있는 것

{{ article.analysis.action }}

🔗 전체 기사 읽어보기

{% endfor %}

{% for model in analyzed_models %}

{{ model.rank }}

{{ model.name }}

🏷️ {{ model.task }}

📥 다운로드
{{ "{:,}".format(model.downloads) }}

❤️ 좋아요
{{ "{:,}".format(model.likes) }}

🧠 AI 분석:
{{ model.analysis }}

🔗 모델 페이지 방문

{% endfor %}

{% if analyzed_models|length == 0 %}

⚠️ 모델 데이터를 불러오는 중...

{% endif %}

{% for space in analyzed_spaces %}

{{ space.rank }}. {{ space.name }}

트렌딩 {{ space.rank }}위

📝 설명: {{ space.description }}

🎓 초등학생 설명:
{{ space.simple_explanation }}

{% if space.tech_stack %}

🛠️ 사용 기술: {% for tech in space.tech_stack %} {{ tech }} {% endfor %}

{% endif %} 🔗 스페이스 체험하기

{% endfor %} {% if analyzed_spaces|length == 0 %}

⚠️ 스페이스 데이터를 불러오는 중...

{% endif %}

⏰ 마지막 업데이트: {{ timestamp }}

""" # ============================================ # 데이터베이스 초기화 # ============================================ def init_database(): """SQLite 데이터베이스 초기화""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # 뉴스 테이블 cursor.execute(''' CREATE TABLE IF NOT EXISTS news ( id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT NOT NULL, url TEXT NOT NULL UNIQUE, date TEXT, source TEXT, category TEXT, summary TEXT, significance TEXT, impact_level TEXT, impact_text TEXT, impact_description TEXT, action TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ''') # 모델 테이블 cursor.execute(''' CREATE TABLE IF NOT EXISTS models ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL UNIQUE, downloads INTEGER, likes INTEGER, task TEXT, url TEXT, analysis TEXT, rank INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ''') # 스페이스 테이블 cursor.execute(''' CREATE TABLE IF NOT EXISTS spaces ( id INTEGER PRIMARY KEY AUTOINCREMENT, space_id TEXT NOT NULL UNIQUE, name TEXT NOT NULL, author TEXT, title TEXT, likes INTEGER, url TEXT, sdk TEXT, simple_explanation TEXT, tech_stack TEXT, rank INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ''') conn.commit() conn.close() print("✅ 데이터베이스 초기화 완료") def save_news_to_db(news_list: List[Dict]): """뉴스 데이터를 DB에 저장""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() saved_count = 0 for news in news_list: try: cursor.execute(''' INSERT OR REPLACE INTO news (title, url, date, source, category, summary, significance, impact_level, impact_text, impact_description, action) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( news['title'], news['url'], news.get('date', ''), news.get('source', ''), news.get('category', ''), news['analysis']['summary'], news['analysis']['significance'], news['analysis']['impact_level'], news['analysis']['impact_text'], news['analysis']['impact_description'], news['analysis']['action'] )) saved_count += 1 except sqlite3.IntegrityError: pass # 이미 존재하는 뉴스 conn.commit() conn.close() print(f"✅ {saved_count}개 뉴스 DB 저장 완료") def save_models_to_db(models_list: List[Dict]): """모델 데이터를 DB에 저장""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() saved_count = 0 for model in models_list: try: cursor.execute(''' INSERT OR REPLACE INTO models (name, downloads, likes, task, url, analysis, rank, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP) ''', ( model['name'], model['downloads'], model['likes'], model['task'], model['url'], model['analysis'], model['rank'] )) saved_count += 1 except Exception as e: print(f"⚠️ 모델 저장 오류: {e}") conn.commit() conn.close() print(f"✅ {saved_count}개 모델 DB 저장 완료") def save_spaces_to_db(spaces_list: List[Dict]): """스페이스 데이터를 DB에 저장""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() saved_count = 0 for space in spaces_list: try: cursor.execute(''' INSERT OR REPLACE INTO spaces (space_id, name, author, title, likes, url, sdk, simple_explanation, tech_stack, rank, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP) ''', ( space['space_id'], space['name'], space.get('author', ''), space.get('title', ''), space.get('likes', 0), space['url'], space.get('sdk', ''), space['simple_explanation'], json.dumps(space.get('tech_stack', [])), space['rank'] )) saved_count += 1 except Exception as e: print(f"⚠️ 스페이스 저장 오류: {e}") conn.commit() conn.close() print(f"✅ {saved_count}개 스페이스 DB 저장 완료") def load_news_from_db() -> List[Dict]: """DB에서 뉴스 로드""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' SELECT title, url, date, source, category, summary, significance, impact_level, impact_text, impact_description, action FROM news ORDER BY created_at DESC LIMIT 50 ''') news_list = [] for row in cursor.fetchall(): news_list.append({ 'title': row[0], 'url': row[1], 'date': row[2], 'source': row[3], 'category': row[4], 'analysis': { 'summary': row[5], 'significance': row[6], 'impact_level': row[7], 'impact_text': row[8], 'impact_description': row[9], 'action': row[10] } }) conn.close() return news_list def load_models_from_db() -> List[Dict]: """DB에서 모델 로드""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' SELECT name, downloads, likes, task, url, analysis, rank FROM models ORDER BY rank ASC LIMIT 30 ''') models_list = [] for row in cursor.fetchall(): models_list.append({ 'name': row[0], 'downloads': row[1], 'likes': row[2], 'task': row[3], 'url': row[4], 'analysis': row[5], 'rank': row[6] }) conn.close() return models_list def load_spaces_from_db() -> List[Dict]: """DB에서 스페이스 로드""" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' SELECT space_id, name, author, title, likes, url, sdk, simple_explanation, tech_stack, rank FROM spaces ORDER BY rank ASC LIMIT 30 ''') spaces_list = [] for row in cursor.fetchall(): spaces_list.append({ 'space_id': row[0], 'name': row[1], 'author': row[2], 'title': row[3], 'likes': row[4], 'url': row[5], 'sdk': row[6], 'simple_explanation': row[7], 'tech_stack': json.loads(row[8]) if row[8] else [], 'rank': row[9], 'description': row[3] # title을 description으로 사용 }) conn.close() return spaces_list # ============================================ # LLM 분석기 클래스 # ============================================ class LLMAnalyzer: """Claude API를 사용한 LLM 분석기""" def __init__(self): self.api_available = True def analyze_news_simple(self, title: str, content: str = "") -> Dict: """뉴스 기사를 초등학생 수준으로 분석""" analysis_templates = { "챗GPT": { "summary": "마이크로소프트(MS)라는 큰 회사가 챗GPT라는 AI를 너무 많은 사람들이 사용해서, 컴퓨터를 보관하는 큰 건물(데이터센터)이 부족하다고 말했어요.", "significance": "챗GPT가 정말 인기가 많다는 뜻이에요. 마치 너무 많은 친구들이 한 게임기를 쓰려고 하는 것과 비슷해요.", "impact_level": "high", "impact_text": "높음", "impact_description": "AI 기술이 빠르게 발전하고 있고, 많은 사람들이 사용하고 있다는 중요한 신호예요.", "action": "챗GPT 같은 AI 도구를 배워보세요. 숙제를 도와달라고 하거나, 모르는 것을 물어볼 수 있어요!" }, "GPU": { "summary": "미국이 아랍에미리트(UAE)라는 나라에 GPU라는 특별한 컴퓨터 부품을 팔 수 있게 허락했어요. GPU는 AI를 만드는 데 꼭 필요한 부품이에요.", "significance": "GPU는 AI의 두뇌 같은 거예요. 이걸 팔 수 있게 되면 더 많은 나라에서 AI를 만들 수 있어요.", "impact_level": "medium", "impact_text": "중간", "impact_description": "AI 기술이 더 많은 나라로 퍼질 수 있게 되었어요.", "action": "컴퓨터가 어떻게 작동하는지 관심을 가져보세요. GPU가 무엇인지 검색해보는 것도 좋아요!" }, "소라": { "summary": "오픈AI가 만든 '소라'라는 AI 앱이 엄청 빠르게 인기를 얻었어요. 100만 명이 다운로드하는 데 챗GPT보다 더 빨랐대요!", "significance": "사람들이 비디오를 만드는 AI에 정말 관심이 많다는 뜻이에요.", "impact_level": "high", "impact_text": "높음", "impact_description": "앞으로 누구나 쉽게 멋진 비디오를 만들 수 있게 될 거예요.", "action": "소라를 써보고, 상상한 것을 비디오로 만들어보세요. 창의력을 발휘할 수 있어요!" } } # 키워드 매칭으로 템플릿 선택 for keyword, template in analysis_templates.items(): if keyword.lower() in title.lower(): return template # 기본 분석 return { "summary": f"'{title}'라는 AI 관련 뉴스가 나왔어요. AI 기술이 계속 발전하고 있다는 소식이에요.", "significance": "AI는 우리 생활을 더 편리하게 만들어주는 기술이에요.", "impact_level": "medium", "impact_text": "중간", "impact_description": "AI 기술의 발전은 우리 미래에 중요한 영향을 줄 거예요.", "action": "AI에 대해 더 알아보고, AI를 활용하는 방법을 배워보세요!" } def analyze_model(self, model_name: str, task: str, downloads: int) -> str: """허깅페이스 모델 분석""" task_explanations = { "text-generation": "글을 자동으로 만들어주는", "image-to-text": "사진을 보고 설명을 써주는", "text-to-image": "글을 읽고 그림을 그려주는", "translation": "다른 언어로 번역해주는", "question-answering": "질문에 답해주는", "summarization": "긴 글을 짧게 요약해주는", "text-classification": "글을 분류해주는", "token-classification": "단어를 분석해주는", "fill-mask": "빈칸을 채워주는" } task_desc = task_explanations.get(task, "특별한 기능을 하는") if downloads > 10000000: popularity = "엄청나게 많은" elif downloads > 1000000: popularity = "아주 많은" elif downloads > 100000: popularity = "많은" else: popularity = "어느 정도" return f"이 모델은 {task_desc} AI예요. {popularity} 사람들이 다운로드해서 사용하고 있어요. {model_name.split('/')[-1]}라는 이름으로 유명해요!" def analyze_space(self, space_name: str, description: str) -> Dict: """허깅페이스 스페이스 분석""" return { "simple_explanation": f"{space_name}는 웹브라우저에서 바로 AI를 체험해볼 수 있는 곳이에요. 설치 없이도 사용할 수 있어서 편리해요! 마치 온라인 게임처럼 바로 접속해서 AI를 사용할 수 있답니다.", "tech_stack": ["Python", "Gradio", "Transformers", "PyTorch"] } # ============================================ # 고급 분석기 클래스 # ============================================ class AdvancedAIAnalyzer: """LLM 기반 고급 AI 뉴스 분석기""" def __init__(self): self.llm_analyzer = LLMAnalyzer() self.huggingface_data = { "models": [], "spaces": [] } self.news_data = [] def fetch_huggingface_models(self, limit: int = 30) -> List[Dict]: """허깅페이스 트렌딩 모델 30개 수집 (실제 API)""" print(f"🤗 허깅페이스 트렌딩 모델 {limit}개 수집 중...") models_list = [] try: # Hugging Face API 사용 api = HfApi() # trending 순위로 모델 가져오기 models = list(api.list_models( sort="trending_score", direction=-1, limit=limit )) print(f"📊 API에서 {len(models)}개 모델 받음") for idx, model in enumerate(models[:limit], 1): try: model_info = { 'name': model.id, 'downloads': getattr(model, 'downloads', 0) or 0, 'likes': getattr(model, 'likes', 0) or 0, 'task': getattr(model, 'pipeline_tag', 'N/A') or 'N/A', 'url': f"https://huggingface.co/{model.id}", 'rank': idx } # LLM 분석 추가 model_info['analysis'] = self.llm_analyzer.analyze_model( model_info['name'], model_info['task'], model_info['downloads'] ) models_list.append(model_info) # 진행상황 표시 if idx % 10 == 0: print(f" ✓ {idx}개 모델 처리 완료...") except Exception as e: print(f" ⚠️ 모델 {idx} 처리 오류: {e}") continue print(f"✅ {len(models_list)}개 트렌딩 모델 수집 완료") # DB에 저장 if models_list: save_models_to_db(models_list) return models_list except Exception as e: print(f"❌ 모델 수집 오류: {e}") print("💾 DB에서 이전 데이터 로드 시도...") return load_models_from_db() def fetch_huggingface_spaces(self, limit: int = 30) -> List[Dict]: """허깅페이스 트렌딩 스페이스 30개 수집 (실제 API)""" print(f"🚀 허깅페이스 트렌딩 스페이스 {limit}개 수집 중...") spaces_list = [] try: # Hugging Face API 사용 api = HfApi() # trending 순위로 스페이스 가져오기 spaces = list(api.list_spaces( sort="trending_score", direction=-1, limit=limit )) print(f"📊 API에서 {len(spaces)}개 스페이스 받음") for idx, space in enumerate(spaces[:limit], 1): try: space_info = { 'space_id': space.id, 'name': space.id.split('/')[-1] if '/' in space.id else space.id, 'author': space.author, 'title': getattr(space, 'title', space.id) or space.id, 'likes': getattr(space, 'likes', 0) or 0, 'url': f"https://huggingface.co/spaces/{space.id}", 'sdk': getattr(space, 'sdk', 'gradio') or 'gradio', 'rank': idx } # LLM 분석 추가 space_analysis = self.llm_analyzer.analyze_space( space_info['name'], space_info['title'] ) space_info['simple_explanation'] = space_analysis['simple_explanation'] space_info['tech_stack'] = space_analysis['tech_stack'] space_info['description'] = space_info['title'] spaces_list.append(space_info) # 진행상황 표시 if idx % 10 == 0: print(f" ✓ {idx}개 스페이스 처리 완료...") except Exception as e: print(f" ⚠️ 스페이스 {idx} 처리 오류: {e}") continue print(f"✅ {len(spaces_list)}개 트렌딩 스페이스 수집 완료") # DB에 저장 if spaces_list: save_spaces_to_db(spaces_list) return spaces_list except Exception as e: print(f"❌ 스페이스 수집 오류: {e}") print("💾 DB에서 이전 데이터 로드 시도...") return load_spaces_from_db() def create_sample_news(self) -> List[Dict]: """오늘의 AI 뉴스 샘플""" sample_news = [ { 'title': 'MS "챗GPT 수요 폭증으로 데이터센터 부족...2026년까지 지속"', 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203055', 'date': '10-10 15:10', 'source': 'AI Times' }, { 'title': '미국, UAE에 GPU 판매 일부 승인...엔비디아 시총 5조달러 눈앞', 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203053', 'date': '10-10 14:46', 'source': 'AI Times' }, { 'title': '소라, 챗GPT보다 빨리 100만 다운로드 돌파', 'url': 'https://www.aitimes.com/news/articleView.html?idxno=203045', 'date': '10-10 12:55', 'source': 'AI Times' } ] return sample_news def analyze_all_news(self) -> List[Dict]: """모든 뉴스에 LLM 분석 추가""" print("📰 뉴스 LLM 분석 시작...") news = self.create_sample_news() analyzed_news = [] for article in news: analysis = self.llm_analyzer.analyze_news_simple( article['title'], "" ) article['analysis'] = analysis analyzed_news.append(article) print(f"✅ {len(analyzed_news)}개 뉴스 분석 완료") # DB에 저장 save_news_to_db(analyzed_news) return analyzed_news def get_all_data(self, force_refresh: bool = False) -> Dict: """모든 데이터 수집 및 분석 Args: force_refresh: True면 새로 수집, False면 DB에서 로드 후 없으면 수집 """ print("\n" + "="*60) print("🚀 AI 뉴스 & 허깅페이스 LLM 분석 시작") print("="*60 + "\n") if force_refresh: print("🔄 강제 새로고침 모드: 모든 데이터 새로 수집") analyzed_news = self.analyze_all_news() analyzed_models = self.fetch_huggingface_models(30) analyzed_spaces = self.fetch_huggingface_spaces(30) else: print("💾 DB 우선 로드 모드") # DB에서 먼저 로드 analyzed_news = load_news_from_db() if not analyzed_news: print("📰 DB에 뉴스 없음 → 새로 수집") analyzed_news = self.analyze_all_news() else: print(f"✅ DB에서 {len(analyzed_news)}개 뉴스 로드") analyzed_models = load_models_from_db() if not analyzed_models: print("🤗 DB에 모델 없음 → 새로 수집") analyzed_models = self.fetch_huggingface_models(30) else: print(f"✅ DB에서 {len(analyzed_models)}개 모델 로드") analyzed_spaces = load_spaces_from_db() if not analyzed_spaces: print("🚀 DB에 스페이스 없음 → 새로 수집") analyzed_spaces = self.fetch_huggingface_spaces(30) else: print(f"✅ DB에서 {len(analyzed_spaces)}개 스페이스 로드") # 통계 stats = { 'total_news': len(analyzed_news), 'hf_models': len(analyzed_models), 'hf_spaces': len(analyzed_spaces), 'llm_analyses': len(analyzed_news) + len(analyzed_models) + len(analyzed_spaces) } print(f"\n✅ 전체 분석 완료: {stats['llm_analyses']}개 항목") print(f" 📰 뉴스: {stats['total_news']}개") print(f" 🤗 모델: {stats['hf_models']}개") print(f" 🚀 스페이스: {stats['hf_spaces']}개") return { 'analyzed_news': analyzed_news, 'analyzed_models': analyzed_models, 'analyzed_spaces': analyzed_spaces, 'stats': stats, 'timestamp': datetime.now().strftime('%Y년 %m월 %d일 %H:%M:%S') } # ============================================ # Flask 라우트 # ============================================ @app.route('/') def index(): """메인 페이지""" try: # refresh 파라미터 확인 force_refresh = request.args.get('refresh', 'false').lower() == 'true' analyzer = AdvancedAIAnalyzer() data = analyzer.get_all_data(force_refresh=force_refresh) return render_template_string(HTML_TEMPLATE, **data) except Exception as e: import traceback error_detail = traceback.format_exc() return f"""

⚠️ 오류 발생

{str(e)}

{error_detail}

""", 500 @app.route('/api/data') def api_data(): """JSON API""" try: force_refresh = request.args.get('refresh', 'false').lower() == 'true' analyzer = AdvancedAIAnalyzer() data = analyzer.get_all_data(force_refresh=force_refresh) return jsonify({ 'success': True, 'data': data }) except Exception as e: return jsonify({ 'success': False, 'error': str(e) }), 500 @app.route('/api/refresh') def api_refresh(): """강제 새로고침 API""" try: analyzer = AdvancedAIAnalyzer() data = analyzer.get_all_data(force_refresh=True) return jsonify({ 'success': True, 'message': '데이터가 성공적으로 갱신되었습니다', 'stats': data['stats'] }) except Exception as e: return jsonify({ 'success': False, 'error': str(e) }), 500 @app.route('/health') def health(): """헬스 체크""" try: # DB 연결 확인 conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM news") news_count = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM models") models_count = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM spaces") spaces_count = cursor.fetchone()[0] conn.close() return jsonify({ "status": "healthy", "service": "AI News LLM Analyzer", "version": "3.0.0", "database": { "connected": True, "news_count": news_count, "models_count": models_count, "spaces_count": spaces_count }, "timestamp": datetime.now().isoformat() }) except Exception as e: return jsonify({ "status": "unhealthy", "error": str(e) }), 500 # ============================================ # 메인 실행 # ============================================ if __name__ == '__main__': port = int(os.environ.get('PORT', 7860)) print(f""" ╔════════════════════════════════════════════════════════════╗ ║ ║ ║ 🤖 AI 뉴스 & 허깅페이스 LLM 분석 웹앱 v3.0 ║ ║ ║ ╚════════════════════════════════════════════════════════════╝ ✨ 주요 기능: • 💾 SQLite DB 영구 스토리지 • 📰 뉴스 초등학생 수준 LLM 분석 • 🤗 허깅페이스 트렌딩 모델 TOP 30 • 🚀 허깅페이스 트렌딩 스페이스 TOP 30 • 🎨 탭 UI (뉴스/모델/스페이스) 🚀 서버 정보: 📍 메인: http://localhost:{port} 🔄 강제갱신: http://localhost:{port}/?refresh=true 📊 API: http://localhost:{port}/api/data 🔥 새로고침 API: http://localhost:{port}/api/refresh 💚 Health: http://localhost:{port}/health 💾 데이터베이스: {DB_PATH} 초기화 중... """) # 데이터베이스 초기화 try: init_database() except Exception as e: print(f"❌ DB 초기화 오류: {e}") sys.exit(1) print("\n✅ 서버 준비 완료!") print("브라우저에서 위 URL을 열어주세요!") print("종료: Ctrl+C\n") try: app.run( host='0.0.0.0', port=port, debug=False, threaded=True ) except KeyboardInterrupt: print("\n\n👋 서버 종료!") sys.exit(0) except Exception as e: print(f"\n❌ 서버 오류: {e}") sys.exit(1)