|
|
|
|
|
""" |
|
|
AI ๋ด์ค & ํ๊น
ํ์ด์ค ํธ๋ ๋ฉ ๋ถ์ ์์คํ
|
|
|
- AI Times ๋ด์ค ํฌ๋กค๋ง ๋ฐ ์นดํ
๊ณ ๋ฆฌ ๋ถ๋ฅ |
|
|
- ํ๊น
ํ์ด์ค ๋ชจ๋ธ/์คํ์ด์ค ํธ๋ ๋ฉ ์ ๋ณด ์์ง |
|
|
- Fireworks AI (Qwen) ๋ฅผ ํตํ ๋ด์ค ๋ถ์ |
|
|
- Brave Search๋ฅผ ํตํ ํฉํธ ์ฒดํฌ |
|
|
""" |
|
|
|
|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
import json |
|
|
from datetime import datetime |
|
|
from typing import List, Dict, Optional |
|
|
import time |
|
|
import re |
|
|
|
|
|
|
|
|
class AINewsAnalyzer: |
|
|
def __init__(self, fireworks_api_key: str, brave_api_key: str): |
|
|
""" |
|
|
Args: |
|
|
fireworks_api_key: Fireworks AI API ํค |
|
|
brave_api_key: Brave Search API ํค |
|
|
""" |
|
|
self.fireworks_api_key = fireworks_api_key |
|
|
self.brave_api_key = brave_api_key |
|
|
|
|
|
|
|
|
self.categories = { |
|
|
"์ฐ์
๋ํฅ": ["์ฐ์
", "๊ธฐ์
", "ํฌ์", "์ธ์", "ํํธ๋์ญ", "์์ฅ"], |
|
|
"๊ธฐ์ ํ์ ": ["๊ธฐ์ ", "๋ชจ๋ธ", "์๊ณ ๋ฆฌ์ฆ", "๊ฐ๋ฐ", "์ฐ๊ตฌ", "๋
ผ๋ฌธ"], |
|
|
"์ ํ์ถ์": ["์ถ์", "๊ณต๊ฐ", "๋ฐํ", "์๋น์ค", "์ ํ"], |
|
|
"์ ์ฑ
๊ท์ ": ["๊ท์ ", "์ ์ฑ
", "๋ฒ", "์ ๋ถ", "์ ์ฌ"], |
|
|
"๋ณด์์ด์": ["๋ณด์", "์ทจ์ฝ์ ", "ํดํน", "์ํ", "ํ๋ผ์ด๋ฒ์"], |
|
|
} |
|
|
|
|
|
self.huggingface_data = { |
|
|
"models": [], |
|
|
"spaces": [] |
|
|
} |
|
|
|
|
|
self.news_data = [] |
|
|
|
|
|
def fetch_aitimes_news(self, urls: List[str]) -> List[Dict]: |
|
|
"""AI Times ๋ด์ค ํฌ๋กค๋ง""" |
|
|
all_news = [] |
|
|
|
|
|
for url in urls: |
|
|
try: |
|
|
print(f"๐ฐ ๋ด์ค ํฌ๋กค๋ง ์ค: {url}") |
|
|
response = requests.get(url, headers={ |
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' |
|
|
}) |
|
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
|
|
|
|
|
articles = [] |
|
|
|
|
|
|
|
|
for link in soup.find_all('a', href=True): |
|
|
if '/news/articleView.html' in link['href']: |
|
|
title = link.get_text(strip=True) |
|
|
article_url = link['href'] |
|
|
|
|
|
if not article_url.startswith('http'): |
|
|
article_url = 'https://www.aitimes.com' + article_url |
|
|
|
|
|
|
|
|
date_text = "" |
|
|
parent = link.parent |
|
|
if parent: |
|
|
date_elem = parent.find(text=re.compile(r'\d{2}-\d{2}')) |
|
|
if date_elem: |
|
|
date_text = date_elem.strip() |
|
|
|
|
|
if title and len(title) > 10: |
|
|
articles.append({ |
|
|
'title': title, |
|
|
'url': article_url, |
|
|
'date': date_text, |
|
|
'source': 'AI Times' |
|
|
}) |
|
|
|
|
|
all_news.extend(articles[:10]) |
|
|
time.sleep(1) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"โ ํฌ๋กค๋ง ์ค๋ฅ: {e}") |
|
|
|
|
|
return all_news |
|
|
|
|
|
def fetch_huggingface_trending(self) -> Dict: |
|
|
"""ํ๊น
ํ์ด์ค ํธ๋ ๋ฉ ๋ชจ๋ธ ๋ฐ ์คํ์ด์ค ์์ง""" |
|
|
print("๐ค ํ๊น
ํ์ด์ค ํธ๋ ๋ฉ ์ ๋ณด ์์ง ์ค...") |
|
|
|
|
|
|
|
|
try: |
|
|
models_url = "https://huggingface.co/api/models" |
|
|
params = { |
|
|
'sort': 'trending', |
|
|
'limit': 30 |
|
|
} |
|
|
|
|
|
response = requests.get(models_url, params=params, timeout=10) |
|
|
if response.status_code == 200: |
|
|
models = response.json() |
|
|
|
|
|
for model in models[:30]: |
|
|
self.huggingface_data['models'].append({ |
|
|
'name': model.get('id', 'Unknown'), |
|
|
'downloads': model.get('downloads', 0), |
|
|
'likes': model.get('likes', 0), |
|
|
'task': model.get('pipeline_tag', 'N/A'), |
|
|
'url': f"https://huggingface.co/{model.get('id', '')}" |
|
|
}) |
|
|
|
|
|
print(f"โ
{len(self.huggingface_data['models'])}๊ฐ ํธ๋ ๋ฉ ๋ชจ๋ธ ์์ง ์๋ฃ") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"โ ๋ชจ๋ธ ์์ง ์ค๋ฅ: {e}") |
|
|
|
|
|
|
|
|
try: |
|
|
spaces_url = "https://huggingface.co/spaces" |
|
|
response = requests.get(spaces_url, headers={ |
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' |
|
|
}, timeout=10) |
|
|
|
|
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
|
|
|
|
|
space_count = 0 |
|
|
for link in soup.find_all('a', href=True): |
|
|
if '/spaces/' in link['href'] and space_count < 30: |
|
|
space_name = link['href'].replace('/spaces/', '') |
|
|
if '/' in space_name and len(space_name) > 3: |
|
|
title = link.get_text(strip=True) |
|
|
if title: |
|
|
self.huggingface_data['spaces'].append({ |
|
|
'name': space_name, |
|
|
'title': title[:100], |
|
|
'url': f"https://huggingface.co{link['href']}" |
|
|
}) |
|
|
space_count += 1 |
|
|
|
|
|
print(f"โ
{len(self.huggingface_data['spaces'])}๊ฐ ํธ๋ ๋ฉ ์คํ์ด์ค ์์ง ์๋ฃ") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"โ ์คํ์ด์ค ์์ง ์ค๋ฅ: {e}") |
|
|
|
|
|
return self.huggingface_data |
|
|
|
|
|
def categorize_news(self, news_list: List[Dict]) -> List[Dict]: |
|
|
"""๋ด์ค ์นดํ
๊ณ ๋ฆฌ ๋ถ๋ฅ""" |
|
|
for news in news_list: |
|
|
title = news['title'].lower() |
|
|
news['category'] = "๊ธฐํ" |
|
|
|
|
|
for category, keywords in self.categories.items(): |
|
|
if any(keyword in title for keyword in keywords): |
|
|
news['category'] = category |
|
|
break |
|
|
|
|
|
return news_list |
|
|
|
|
|
def analyze_with_qwen(self, text: str, instruction: str) -> str: |
|
|
"""Fireworks AI Qwen ๋ชจ๋ธ์ ์ฌ์ฉํ ๋ถ์""" |
|
|
url = "https://api.fireworks.ai/inference/v1/chat/completions" |
|
|
|
|
|
payload = { |
|
|
"model": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507", |
|
|
"max_tokens": 4096, |
|
|
"top_p": 1, |
|
|
"top_k": 40, |
|
|
"presence_penalty": 0, |
|
|
"frequency_penalty": 0, |
|
|
"temperature": 0.6, |
|
|
"messages": [ |
|
|
{ |
|
|
"role": "system", |
|
|
"content": "๋น์ ์ AI ๋ด์ค๋ฅผ ์ด๋ฑํ์๋ ์ดํดํ ์ ์๊ฒ ์ฝ๊ฒ ์ค๋ช
ํ๋ ์ ๋ฌธ๊ฐ์
๋๋ค." |
|
|
}, |
|
|
{ |
|
|
"role": "user", |
|
|
"content": f"{instruction}\n\n๋ด์ค: {text}" |
|
|
} |
|
|
] |
|
|
} |
|
|
|
|
|
headers = { |
|
|
"Accept": "application/json", |
|
|
"Content-Type": "application/json", |
|
|
"Authorization": f"Bearer {self.fireworks_api_key}" |
|
|
} |
|
|
|
|
|
try: |
|
|
response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=30) |
|
|
|
|
|
if response.status_code == 200: |
|
|
result = response.json() |
|
|
return result['choices'][0]['message']['content'] |
|
|
else: |
|
|
return f"๋ถ์ ์คํจ (์ํ ์ฝ๋: {response.status_code})" |
|
|
|
|
|
except Exception as e: |
|
|
return f"๋ถ์ ์ค๋ฅ: {str(e)}" |
|
|
|
|
|
def fact_check_with_brave(self, query: str) -> List[Dict]: |
|
|
"""Brave Search๋ฅผ ํตํ ํฉํธ ์ฒดํฌ""" |
|
|
url = "https://api.search.brave.com/res/v1/web/search" |
|
|
|
|
|
headers = { |
|
|
"Accept": "application/json", |
|
|
"X-Subscription-Token": self.brave_api_key |
|
|
} |
|
|
|
|
|
params = { |
|
|
"q": query, |
|
|
"count": 5, |
|
|
"text_decorations": False, |
|
|
"search_lang": "ko" |
|
|
} |
|
|
|
|
|
try: |
|
|
response = requests.get(url, headers=headers, params=params, timeout=10) |
|
|
|
|
|
if response.status_code == 200: |
|
|
data = response.json() |
|
|
results = [] |
|
|
|
|
|
if 'web' in data and 'results' in data['web']: |
|
|
for item in data['web']['results'][:3]: |
|
|
results.append({ |
|
|
'title': item.get('title', ''), |
|
|
'description': item.get('description', ''), |
|
|
'url': item.get('url', '') |
|
|
}) |
|
|
|
|
|
return results |
|
|
else: |
|
|
return [] |
|
|
|
|
|
except Exception as e: |
|
|
print(f"โ Brave Search ์ค๋ฅ: {e}") |
|
|
return [] |
|
|
|
|
|
def generate_report(self, news_list: List[Dict], analyze_news: bool = True) -> str: |
|
|
"""์ข
ํฉ ๋ฆฌํฌํธ ์์ฑ""" |
|
|
report = [] |
|
|
report.append("=" * 80) |
|
|
report.append("๐ AI ๋ด์ค & ํ๊น
ํ์ด์ค ํธ๋ ๋ฉ ์ข
ํฉ ๋ฆฌํฌํธ") |
|
|
report.append(f"๐
์์ฑ์ผ์: {datetime.now().strftime('%Y๋
%m์ %d์ผ %H:%M')}") |
|
|
report.append("=" * 80) |
|
|
report.append("") |
|
|
|
|
|
|
|
|
report.append("๐ฐ === AI TIMES ๋ด์ค ๋ถ์ ===") |
|
|
report.append("") |
|
|
|
|
|
categorized_news = {} |
|
|
for news in news_list: |
|
|
category = news.get('category', '๊ธฐํ') |
|
|
if category not in categorized_news: |
|
|
categorized_news[category] = [] |
|
|
categorized_news[category].append(news) |
|
|
|
|
|
for category, articles in categorized_news.items(): |
|
|
report.append(f"๐ [{category}] ({len(articles)}๊ฑด)") |
|
|
report.append("-" * 80) |
|
|
|
|
|
for i, article in enumerate(articles[:5], 1): |
|
|
report.append(f"{i}. {article['title']}") |
|
|
report.append(f" ๐ {article['url']}") |
|
|
report.append(f" ๐
{article.get('date', 'N/A')}") |
|
|
|
|
|
|
|
|
if analyze_news and i <= 2: |
|
|
print(f"๐ค LLM ๋ถ์ ์ค: {article['title'][:50]}...") |
|
|
|
|
|
instruction = """์ด ๋ด์ค๋ฅผ ๋ค์ ํ์์ผ๋ก ๋ถ์ํด์ฃผ์ธ์: |
|
|
1. ํต์ฌ ๋ด์ฉ (2-3๋ฌธ์ฅ, ์ด๋ฑํ์ ์์ค) |
|
|
2. ์ ์ค์ํ๊ฐ? (1-2๋ฌธ์ฅ) |
|
|
3. ๋น์ ์ด ํด์ผ ํ ํ๋ (1-2๊ฐ ํญ๋ชฉ) |
|
|
|
|
|
๊ฐ๊ฒฐํ๊ณ ๋ช
ํํ๊ฒ ์์ฑํด์ฃผ์ธ์.""" |
|
|
|
|
|
analysis = self.analyze_with_qwen(article['title'], instruction) |
|
|
report.append(f"\n ๐ค AI ๋ถ์:") |
|
|
for line in analysis.split('\n'): |
|
|
if line.strip(): |
|
|
report.append(f" {line.strip()}") |
|
|
|
|
|
|
|
|
fact_check = self.fact_check_with_brave(article['title'][:100]) |
|
|
if fact_check: |
|
|
report.append(f"\n โ
ํฉํธ ์ฒดํฌ (Brave Search):") |
|
|
for fc in fact_check[:2]: |
|
|
report.append(f" โข {fc['title']}") |
|
|
report.append(f" {fc['url']}") |
|
|
|
|
|
time.sleep(2) |
|
|
|
|
|
report.append("") |
|
|
|
|
|
report.append("") |
|
|
|
|
|
|
|
|
report.append("๐ค === ํ๊น
ํ์ด์ค ํธ๋ ๋ฉ TOP 30 ===") |
|
|
report.append("") |
|
|
|
|
|
|
|
|
report.append("๐ฅ ํธ๋ ๋ฉ ๋ชจ๋ธ TOP 30") |
|
|
report.append("-" * 80) |
|
|
for i, model in enumerate(self.huggingface_data['models'][:30], 1): |
|
|
report.append(f"{i:2d}. {model['name']}") |
|
|
report.append(f" ๐ ๋ค์ด๋ก๋: {model['downloads']:,} | โค๏ธ ์ข์์: {model['likes']:,}") |
|
|
report.append(f" ๐ท๏ธ Task: {model['task']}") |
|
|
report.append(f" ๐ {model['url']}") |
|
|
report.append("") |
|
|
|
|
|
report.append("") |
|
|
|
|
|
|
|
|
report.append("๐ ํธ๋ ๋ฉ ์คํ์ด์ค TOP 30") |
|
|
report.append("-" * 80) |
|
|
for i, space in enumerate(self.huggingface_data['spaces'][:30], 1): |
|
|
report.append(f"{i:2d}. {space['name']}") |
|
|
report.append(f" ๐ {space['title']}") |
|
|
report.append(f" ๐ {space['url']}") |
|
|
report.append("") |
|
|
|
|
|
|
|
|
report.append("=" * 80) |
|
|
report.append("๐ ์ข
ํฉ ์์ฝ") |
|
|
report.append("=" * 80) |
|
|
report.append(f"โข ์ด ๋ด์ค ์์ง: {len(news_list)}๊ฑด") |
|
|
report.append(f"โข ์นดํ
๊ณ ๋ฆฌ ์: {len(categorized_news)}๊ฐ") |
|
|
report.append(f"โข ํธ๋ ๋ฉ ๋ชจ๋ธ: {len(self.huggingface_data['models'])}๊ฐ") |
|
|
report.append(f"โข ํธ๋ ๋ฉ ์คํ์ด์ค: {len(self.huggingface_data['spaces'])}๊ฐ") |
|
|
report.append("") |
|
|
|
|
|
return '\n'.join(report) |
|
|
|
|
|
def run_full_analysis(self, news_urls: List[str], analyze_with_llm: bool = True) -> str: |
|
|
"""์ ์ฒด ๋ถ์ ์คํ""" |
|
|
print("๐ AI ๋ด์ค & ํ๊น
ํ์ด์ค ํธ๋ ๋ฉ ๋ถ์ ์์...") |
|
|
print("") |
|
|
|
|
|
|
|
|
news_list = self.fetch_aitimes_news(news_urls) |
|
|
print(f"โ
์ด {len(news_list)}๊ฑด์ ๋ด์ค ์์ง ์๋ฃ") |
|
|
print("") |
|
|
|
|
|
|
|
|
categorized_news = self.categorize_news(news_list) |
|
|
print("โ
๋ด์ค ์นดํ
๊ณ ๋ฆฌ ๋ถ๋ฅ ์๋ฃ") |
|
|
print("") |
|
|
|
|
|
|
|
|
self.fetch_huggingface_trending() |
|
|
print("") |
|
|
|
|
|
|
|
|
print("๐ ๋ฆฌํฌํธ ์์ฑ ์ค...") |
|
|
report = self.generate_report(categorized_news, analyze_news=analyze_with_llm) |
|
|
|
|
|
print("") |
|
|
print("โ
๋ถ์ ์๋ฃ!") |
|
|
|
|
|
return report |
|
|
|
|
|
def save_report(self, report: str, filename: str = None): |
|
|
"""๋ฆฌํฌํธ ์ ์ฅ""" |
|
|
if filename is None: |
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') |
|
|
filename = f"ai_news_report_{timestamp}.txt" |
|
|
|
|
|
with open(filename, 'w', encoding='utf-8') as f: |
|
|
f.write(report) |
|
|
|
|
|
print(f"๐พ ๋ฆฌํฌํธ ์ ์ฅ ์๋ฃ: {filename}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
"""๋ฉ์ธ ์คํ ํจ์""" |
|
|
|
|
|
|
|
|
FIREWORKS_API_KEY = "YOUR_FIREWORKS_API_KEY" |
|
|
BRAVE_API_KEY = "YOUR_BRAVE_API_KEY" |
|
|
|
|
|
|
|
|
news_urls = [ |
|
|
"https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm", |
|
|
"https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm" |
|
|
] |
|
|
|
|
|
|
|
|
analyzer = AINewsAnalyzer( |
|
|
fireworks_api_key=FIREWORKS_API_KEY, |
|
|
brave_api_key=BRAVE_API_KEY |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
report = analyzer.run_full_analysis( |
|
|
news_urls=news_urls, |
|
|
analyze_with_llm=True |
|
|
) |
|
|
|
|
|
|
|
|
print("\n" + "=" * 80) |
|
|
print(report) |
|
|
|
|
|
|
|
|
analyzer.save_report(report) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
1. API ํค ์ค์ : |
|
|
- Fireworks AI: https://fireworks.ai/ |
|
|
- Brave Search: https://brave.com/search/api/ |
|
|
|
|
|
2. ๋น ๋ฅธ ํ
์คํธ (LLM ๋ถ์ ์์ด): |
|
|
analyzer.run_full_analysis(news_urls, analyze_with_llm=False) |
|
|
|
|
|
3. ํน์ ์นดํ
๊ณ ๋ฆฌ๋ง ๋ถ์: |
|
|
categorized_news์์ ์ํ๋ ์นดํ
๊ณ ๋ฆฌ ํํฐ๋ง |
|
|
|
|
|
4. ํฌ๋กค๋ง ์ฃผ๊ธฐ ์กฐ์ : |
|
|
time.sleep() ๊ฐ์ ์กฐ์ ํ์ฌ ์๋/์์ ์ฑ ๊ท ํ |
|
|
|
|
|
5. ๊ฒฐ๊ณผ ํ์ฉ: |
|
|
- JSON์ผ๋ก ์ ์ฅ: json.dumps(analyzer.huggingface_data) |
|
|
- ๋ฐ์ดํฐ๋ฒ ์ด์ค ์ ์ฅ |
|
|
- ๋์๋ณด๋ ์ฐ๋ |
|
|
""" |