Today / app.py
ginipick's picture
Create app.py
6085299 verified
raw
history blame
17.1 kB
# -*- coding: utf-8 -*-
"""
AI ๋‰ด์Šค & ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ๋ถ„์„ ์‹œ์Šคํ…œ
- AI Times ๋‰ด์Šค ํฌ๋กค๋ง ๋ฐ ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„๋ฅ˜
- ํ—ˆ๊น…ํŽ˜์ด์Šค ๋ชจ๋ธ/์ŠคํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ์ •๋ณด ์ˆ˜์ง‘
- Fireworks AI (Qwen) ๋ฅผ ํ†ตํ•œ ๋‰ด์Šค ๋ถ„์„
- Brave Search๋ฅผ ํ†ตํ•œ ํŒฉํŠธ ์ฒดํฌ
"""
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime
from typing import List, Dict, Optional
import time
import re
class AINewsAnalyzer:
def __init__(self, fireworks_api_key: str, brave_api_key: str):
"""
Args:
fireworks_api_key: Fireworks AI API ํ‚ค
brave_api_key: Brave Search API ํ‚ค
"""
self.fireworks_api_key = fireworks_api_key
self.brave_api_key = brave_api_key
# ๋‰ด์Šค ์นดํ…Œ๊ณ ๋ฆฌ ์ •์˜
self.categories = {
"์‚ฐ์—…๋™ํ–ฅ": ["์‚ฐ์—…", "๊ธฐ์—…", "ํˆฌ์ž", "์ธ์ˆ˜", "ํŒŒํŠธ๋„ˆ์‹ญ", "์‹œ์žฅ"],
"๊ธฐ์ˆ ํ˜์‹ ": ["๊ธฐ์ˆ ", "๋ชจ๋ธ", "์•Œ๊ณ ๋ฆฌ์ฆ˜", "๊ฐœ๋ฐœ", "์—ฐ๊ตฌ", "๋…ผ๋ฌธ"],
"์ œํ’ˆ์ถœ์‹œ": ["์ถœ์‹œ", "๊ณต๊ฐœ", "๋ฐœํ‘œ", "์„œ๋น„์Šค", "์ œํ’ˆ"],
"์ •์ฑ…๊ทœ์ œ": ["๊ทœ์ œ", "์ •์ฑ…", "๋ฒ•", "์ •๋ถ€", "์ œ์žฌ"],
"๋ณด์•ˆ์ด์Šˆ": ["๋ณด์•ˆ", "์ทจ์•ฝ์ ", "ํ•ดํ‚น", "์œ„ํ—˜", "ํ”„๋ผ์ด๋ฒ„์‹œ"],
}
self.huggingface_data = {
"models": [],
"spaces": []
}
self.news_data = []
def fetch_aitimes_news(self, urls: List[str]) -> List[Dict]:
"""AI Times ๋‰ด์Šค ํฌ๋กค๋ง"""
all_news = []
for url in urls:
try:
print(f"๐Ÿ“ฐ ๋‰ด์Šค ํฌ๋กค๋ง ์ค‘: {url}")
response = requests.get(url, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
soup = BeautifulSoup(response.content, 'html.parser')
# ๋‰ด์Šค ๊ธฐ์‚ฌ ์ถ”์ถœ (์‹ค์ œ ๊ตฌ์กฐ์— ๋งž๊ฒŒ ์กฐ์ • ํ•„์š”)
articles = []
# ์ œ๋ชฉ๊ณผ ๋งํฌ๊ฐ€ ์žˆ๋Š” a ํƒœ๊ทธ ์ฐพ๊ธฐ
for link in soup.find_all('a', href=True):
if '/news/articleView.html' in link['href']:
title = link.get_text(strip=True)
article_url = link['href']
if not article_url.startswith('http'):
article_url = 'https://www.aitimes.com' + article_url
# ๋‚ ์งœ ์ถ”์ถœ (ํ˜•์ œ ์š”์†Œ์—์„œ)
date_text = ""
parent = link.parent
if parent:
date_elem = parent.find(text=re.compile(r'\d{2}-\d{2}'))
if date_elem:
date_text = date_elem.strip()
if title and len(title) > 10:
articles.append({
'title': title,
'url': article_url,
'date': date_text,
'source': 'AI Times'
})
all_news.extend(articles[:10]) # ์ƒ์œ„ 10๊ฐœ๋งŒ
time.sleep(1) # ํฌ๋กค๋ง ์˜ˆ์˜
except Exception as e:
print(f"โŒ ํฌ๋กค๋ง ์˜ค๋ฅ˜: {e}")
return all_news
def fetch_huggingface_trending(self) -> Dict:
"""ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ ๋ฐ ์ŠคํŽ˜์ด์Šค ์ˆ˜์ง‘"""
print("๐Ÿค— ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ์ •๋ณด ์ˆ˜์ง‘ ์ค‘...")
# ๋ชจ๋ธ ํŠธ๋ Œ๋”ฉ
try:
models_url = "https://huggingface.co/api/models"
params = {
'sort': 'trending',
'limit': 30
}
response = requests.get(models_url, params=params, timeout=10)
if response.status_code == 200:
models = response.json()
for model in models[:30]:
self.huggingface_data['models'].append({
'name': model.get('id', 'Unknown'),
'downloads': model.get('downloads', 0),
'likes': model.get('likes', 0),
'task': model.get('pipeline_tag', 'N/A'),
'url': f"https://huggingface.co/{model.get('id', '')}"
})
print(f"โœ… {len(self.huggingface_data['models'])}๊ฐœ ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ ์ˆ˜์ง‘ ์™„๋ฃŒ")
except Exception as e:
print(f"โŒ ๋ชจ๋ธ ์ˆ˜์ง‘ ์˜ค๋ฅ˜: {e}")
# ์ŠคํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ (์›น ํฌ๋กค๋ง)
try:
spaces_url = "https://huggingface.co/spaces"
response = requests.get(spaces_url, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}, timeout=10)
soup = BeautifulSoup(response.content, 'html.parser')
# ์ŠคํŽ˜์ด์Šค ๋งํฌ ์ถ”์ถœ
space_count = 0
for link in soup.find_all('a', href=True):
if '/spaces/' in link['href'] and space_count < 30:
space_name = link['href'].replace('/spaces/', '')
if '/' in space_name and len(space_name) > 3:
title = link.get_text(strip=True)
if title:
self.huggingface_data['spaces'].append({
'name': space_name,
'title': title[:100],
'url': f"https://huggingface.co{link['href']}"
})
space_count += 1
print(f"โœ… {len(self.huggingface_data['spaces'])}๊ฐœ ํŠธ๋ Œ๋”ฉ ์ŠคํŽ˜์ด์Šค ์ˆ˜์ง‘ ์™„๋ฃŒ")
except Exception as e:
print(f"โŒ ์ŠคํŽ˜์ด์Šค ์ˆ˜์ง‘ ์˜ค๋ฅ˜: {e}")
return self.huggingface_data
def categorize_news(self, news_list: List[Dict]) -> List[Dict]:
"""๋‰ด์Šค ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„๋ฅ˜"""
for news in news_list:
title = news['title'].lower()
news['category'] = "๊ธฐํƒ€"
for category, keywords in self.categories.items():
if any(keyword in title for keyword in keywords):
news['category'] = category
break
return news_list
def analyze_with_qwen(self, text: str, instruction: str) -> str:
"""Fireworks AI Qwen ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ๋ถ„์„"""
url = "https://api.fireworks.ai/inference/v1/chat/completions"
payload = {
"model": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
"max_tokens": 4096,
"top_p": 1,
"top_k": 40,
"presence_penalty": 0,
"frequency_penalty": 0,
"temperature": 0.6,
"messages": [
{
"role": "system",
"content": "๋‹น์‹ ์€ AI ๋‰ด์Šค๋ฅผ ์ดˆ๋“ฑํ•™์ƒ๋„ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๊ฒŒ ์‰ฝ๊ฒŒ ์„ค๋ช…ํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค."
},
{
"role": "user",
"content": f"{instruction}\n\n๋‰ด์Šค: {text}"
}
]
}
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": f"Bearer {self.fireworks_api_key}"
}
try:
response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=30)
if response.status_code == 200:
result = response.json()
return result['choices'][0]['message']['content']
else:
return f"๋ถ„์„ ์‹คํŒจ (์ƒํƒœ ์ฝ”๋“œ: {response.status_code})"
except Exception as e:
return f"๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}"
def fact_check_with_brave(self, query: str) -> List[Dict]:
"""Brave Search๋ฅผ ํ†ตํ•œ ํŒฉํŠธ ์ฒดํฌ"""
url = "https://api.search.brave.com/res/v1/web/search"
headers = {
"Accept": "application/json",
"X-Subscription-Token": self.brave_api_key
}
params = {
"q": query,
"count": 5,
"text_decorations": False,
"search_lang": "ko"
}
try:
response = requests.get(url, headers=headers, params=params, timeout=10)
if response.status_code == 200:
data = response.json()
results = []
if 'web' in data and 'results' in data['web']:
for item in data['web']['results'][:3]:
results.append({
'title': item.get('title', ''),
'description': item.get('description', ''),
'url': item.get('url', '')
})
return results
else:
return []
except Exception as e:
print(f"โŒ Brave Search ์˜ค๋ฅ˜: {e}")
return []
def generate_report(self, news_list: List[Dict], analyze_news: bool = True) -> str:
"""์ข…ํ•ฉ ๋ฆฌํฌํŠธ ์ƒ์„ฑ"""
report = []
report.append("=" * 80)
report.append("๐Ÿ“Š AI ๋‰ด์Šค & ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ์ข…ํ•ฉ ๋ฆฌํฌํŠธ")
report.append(f"๐Ÿ“… ์ƒ์„ฑ์ผ์‹œ: {datetime.now().strftime('%Y๋…„ %m์›” %d์ผ %H:%M')}")
report.append("=" * 80)
report.append("")
# 1. ์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ๋‰ด์Šค ๋ถ„์„
report.append("๐Ÿ“ฐ === AI TIMES ๋‰ด์Šค ๋ถ„์„ ===")
report.append("")
categorized_news = {}
for news in news_list:
category = news.get('category', '๊ธฐํƒ€')
if category not in categorized_news:
categorized_news[category] = []
categorized_news[category].append(news)
for category, articles in categorized_news.items():
report.append(f"๐Ÿ“Œ [{category}] ({len(articles)}๊ฑด)")
report.append("-" * 80)
for i, article in enumerate(articles[:5], 1): # ์นดํ…Œ๊ณ ๋ฆฌ๋‹น 5๊ฐœ๋งŒ
report.append(f"{i}. {article['title']}")
report.append(f" ๐Ÿ”— {article['url']}")
report.append(f" ๐Ÿ“… {article.get('date', 'N/A')}")
# LLM ๋ถ„์„ (์„ ํƒ์ )
if analyze_news and i <= 2: # ๊ฐ ์นดํ…Œ๊ณ ๋ฆฌ ์ƒ์œ„ 2๊ฐœ๋งŒ ๋ถ„์„
print(f"๐Ÿค– LLM ๋ถ„์„ ์ค‘: {article['title'][:50]}...")
instruction = """์ด ๋‰ด์Šค๋ฅผ ๋‹ค์Œ ํ˜•์‹์œผ๋กœ ๋ถ„์„ํ•ด์ฃผ์„ธ์š”:
1. ํ•ต์‹ฌ ๋‚ด์šฉ (2-3๋ฌธ์žฅ, ์ดˆ๋“ฑํ•™์ƒ ์ˆ˜์ค€)
2. ์™œ ์ค‘์š”ํ•œ๊ฐ€? (1-2๋ฌธ์žฅ)
3. ๋‹น์‹ ์ด ํ•ด์•ผ ํ•  ํ–‰๋™ (1-2๊ฐœ ํ•ญ๋ชฉ)
๊ฐ„๊ฒฐํ•˜๊ณ  ๋ช…ํ™•ํ•˜๊ฒŒ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."""
analysis = self.analyze_with_qwen(article['title'], instruction)
report.append(f"\n ๐Ÿค– AI ๋ถ„์„:")
for line in analysis.split('\n'):
if line.strip():
report.append(f" {line.strip()}")
# ํŒฉํŠธ ์ฒดํฌ (์„ ํƒ์ )
fact_check = self.fact_check_with_brave(article['title'][:100])
if fact_check:
report.append(f"\n โœ… ํŒฉํŠธ ์ฒดํฌ (Brave Search):")
for fc in fact_check[:2]:
report.append(f" โ€ข {fc['title']}")
report.append(f" {fc['url']}")
time.sleep(2) # API ๋ ˆ์ดํŠธ ๋ฆฌ๋ฐ‹ ๊ณ ๋ ค
report.append("")
report.append("")
# 2. ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ
report.append("๐Ÿค— === ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ TOP 30 ===")
report.append("")
# ๋ชจ๋ธ
report.append("๐Ÿ”ฅ ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ TOP 30")
report.append("-" * 80)
for i, model in enumerate(self.huggingface_data['models'][:30], 1):
report.append(f"{i:2d}. {model['name']}")
report.append(f" ๐Ÿ“Š ๋‹ค์šด๋กœ๋“œ: {model['downloads']:,} | โค๏ธ ์ข‹์•„์š”: {model['likes']:,}")
report.append(f" ๐Ÿท๏ธ Task: {model['task']}")
report.append(f" ๐Ÿ”— {model['url']}")
report.append("")
report.append("")
# ์ŠคํŽ˜์ด์Šค
report.append("๐Ÿš€ ํŠธ๋ Œ๋”ฉ ์ŠคํŽ˜์ด์Šค TOP 30")
report.append("-" * 80)
for i, space in enumerate(self.huggingface_data['spaces'][:30], 1):
report.append(f"{i:2d}. {space['name']}")
report.append(f" ๐Ÿ“ {space['title']}")
report.append(f" ๐Ÿ”— {space['url']}")
report.append("")
# 3. ์ข…ํ•ฉ ์š”์•ฝ
report.append("=" * 80)
report.append("๐Ÿ“ˆ ์ข…ํ•ฉ ์š”์•ฝ")
report.append("=" * 80)
report.append(f"โ€ข ์ด ๋‰ด์Šค ์ˆ˜์ง‘: {len(news_list)}๊ฑด")
report.append(f"โ€ข ์นดํ…Œ๊ณ ๋ฆฌ ์ˆ˜: {len(categorized_news)}๊ฐœ")
report.append(f"โ€ข ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ: {len(self.huggingface_data['models'])}๊ฐœ")
report.append(f"โ€ข ํŠธ๋ Œ๋”ฉ ์ŠคํŽ˜์ด์Šค: {len(self.huggingface_data['spaces'])}๊ฐœ")
report.append("")
return '\n'.join(report)
def run_full_analysis(self, news_urls: List[str], analyze_with_llm: bool = True) -> str:
"""์ „์ฒด ๋ถ„์„ ์‹คํ–‰"""
print("๐Ÿš€ AI ๋‰ด์Šค & ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ๋ถ„์„ ์‹œ์ž‘...")
print("")
# 1. ๋‰ด์Šค ์ˆ˜์ง‘
news_list = self.fetch_aitimes_news(news_urls)
print(f"โœ… ์ด {len(news_list)}๊ฑด์˜ ๋‰ด์Šค ์ˆ˜์ง‘ ์™„๋ฃŒ")
print("")
# 2. ๋‰ด์Šค ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„๋ฅ˜
categorized_news = self.categorize_news(news_list)
print("โœ… ๋‰ด์Šค ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„๋ฅ˜ ์™„๋ฃŒ")
print("")
# 3. ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ์ˆ˜์ง‘
self.fetch_huggingface_trending()
print("")
# 4. ๋ฆฌํฌํŠธ ์ƒ์„ฑ
print("๐Ÿ“ ๋ฆฌํฌํŠธ ์ƒ์„ฑ ์ค‘...")
report = self.generate_report(categorized_news, analyze_news=analyze_with_llm)
print("")
print("โœ… ๋ถ„์„ ์™„๋ฃŒ!")
return report
def save_report(self, report: str, filename: str = None):
"""๋ฆฌํฌํŠธ ์ €์žฅ"""
if filename is None:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"ai_news_report_{timestamp}.txt"
with open(filename, 'w', encoding='utf-8') as f:
f.write(report)
print(f"๐Ÿ’พ ๋ฆฌํฌํŠธ ์ €์žฅ ์™„๋ฃŒ: {filename}")
# ==================== ์‚ฌ์šฉ ์˜ˆ์‹œ ====================
def main():
"""๋ฉ”์ธ ์‹คํ–‰ ํ•จ์ˆ˜"""
# API ํ‚ค ์„ค์ •
FIREWORKS_API_KEY = "YOUR_FIREWORKS_API_KEY" # ์—ฌ๊ธฐ์— Fireworks API ํ‚ค ์ž…๋ ฅ
BRAVE_API_KEY = "YOUR_BRAVE_API_KEY" # ์—ฌ๊ธฐ์— Brave Search API ํ‚ค ์ž…๋ ฅ
# AI Times ๋‰ด์Šค URL
news_urls = [
"https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm", # AI ์‚ฐ์—…
"https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm" # AI ๊ธฐ์ˆ 
]
# ๋ถ„์„๊ธฐ ์ดˆ๊ธฐํ™”
analyzer = AINewsAnalyzer(
fireworks_api_key=FIREWORKS_API_KEY,
brave_api_key=BRAVE_API_KEY
)
# ์ „์ฒด ๋ถ„์„ ์‹คํ–‰
# analyze_with_llm=False๋กœ ์„ค์ •ํ•˜๋ฉด LLM ๋ถ„์„ ์—†์ด ๋น ๋ฅด๊ฒŒ ์ˆ˜์ง‘๋งŒ ํ•จ
report = analyzer.run_full_analysis(
news_urls=news_urls,
analyze_with_llm=True # LLM ๋ถ„์„ ํ™œ์„ฑํ™” (์‹œ๊ฐ„์ด ์˜ค๋ž˜ ๊ฑธ๋ฆผ)
)
# ๊ฒฐ๊ณผ ์ถœ๋ ฅ
print("\n" + "=" * 80)
print(report)
# ํŒŒ์ผ ์ €์žฅ
analyzer.save_report(report)
if __name__ == "__main__":
main()
# ==================== ์‚ฌ์šฉ ํŒ ====================
"""
1. API ํ‚ค ์„ค์ •:
- Fireworks AI: https://fireworks.ai/
- Brave Search: https://brave.com/search/api/
2. ๋น ๋ฅธ ํ…Œ์ŠคํŠธ (LLM ๋ถ„์„ ์—†์ด):
analyzer.run_full_analysis(news_urls, analyze_with_llm=False)
3. ํŠน์ • ์นดํ…Œ๊ณ ๋ฆฌ๋งŒ ๋ถ„์„:
categorized_news์—์„œ ์›ํ•˜๋Š” ์นดํ…Œ๊ณ ๋ฆฌ ํ•„ํ„ฐ๋ง
4. ํฌ๋กค๋ง ์ฃผ๊ธฐ ์กฐ์ •:
time.sleep() ๊ฐ’์„ ์กฐ์ •ํ•˜์—ฌ ์†๋„/์•ˆ์ •์„ฑ ๊ท ํ˜•
5. ๊ฒฐ๊ณผ ํ™œ์šฉ:
- JSON์œผ๋กœ ์ €์žฅ: json.dumps(analyzer.huggingface_data)
- ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ €์žฅ
- ๋Œ€์‹œ๋ณด๋“œ ์—ฐ๋™
"""