3gghdf5 / trend_analysis.py
ssboost's picture
Upload 15 files
106555b verified
"""
ํŠธ๋ Œ๋“œ ๋ถ„์„ ๋ชจ๋“ˆ - ๋„ค์ด๋ฒ„ ๋ฐ์ดํ„ฐ๋žฉ API๋ฅผ ํ†ตํ•œ ๊ฒ€์ƒ‰ ํŠธ๋ Œ๋“œ ๋ถ„์„
- ์„ฑ์žฅ๋ฅ ์„ 3๋…„ ๊ธฐ์ค€์œผ๋กœ ๋ณ€๊ฒฝ
- ๋„ˆ๋น„ 100% ์ ์šฉ
"""
import requests
import json
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime, timedelta
import api_utils
import keyword_search
import logging
# ๋กœ๊น… ์„ค์ •
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.addHandler(handler)
def get_trend_data(keywords, period="1year"):
"""
๋„ค์ด๋ฒ„ ๋ฐ์ดํ„ฐ๋žฉ API๋ฅผ ํ†ตํ•ด ๊ฒ€์ƒ‰ ํŠธ๋ Œ๋“œ ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ (์ˆ˜์ •๋œ ๋ฒ„์ „)
Args:
keywords (list): ๋ถ„์„ํ•  ํ‚ค์›Œ๋“œ ๋ชฉ๋ก (์ตœ๋Œ€ 5๊ฐœ)
period (str): ๋ถ„์„ ๊ธฐ๊ฐ„ ("1year" ๋˜๋Š” "3year")
Returns:
dict: ํŠธ๋ Œ๋“œ ๋ฐ์ดํ„ฐ ๋ฐ ๊ทธ๋ž˜ํ”„ HTML
"""
logger.info(f"ํŠธ๋ Œ๋“œ ๋ถ„์„ ์‹œ์ž‘: {len(keywords)}๊ฐœ ํ‚ค์›Œ๋“œ, ๊ธฐ๊ฐ„: {period}")
# ๋‚ ์งœ ๊ณ„์‚ฐ (์–ด์ œ ๊ธฐ์ค€์œผ๋กœ ์›” ๋‹จ์œ„ ๊ณ„์‚ฐ)
yesterday = datetime.now() - timedelta(days=1)
end_year = yesterday.year
end_month = yesterday.month
if period == "1year":
# 1๋…„ ์ „ ๊ฐ™์€ ๋‹ฌ
start_year = end_year - 1
start_month = end_month
else: # 3year
# 3๋…„ ์ „ ๊ฐ™์€ ๋‹ฌ
start_year = end_year - 3
start_month = end_month
# ์›” ์ฒซ์งธ ๋‚ ๋กœ ๋‚ ์งœ ์„ค์ •
start_date_str = f"{start_year:04d}-{start_month:02d}-01"
end_date_str = f"{end_year:04d}-{end_month:02d}-01"
logger.info(f"๋ถ„์„ ๊ธฐ๊ฐ„: {start_date_str} ~ {end_date_str} (์›”๊ฐ„ ๋ฐ์ดํ„ฐ)")
# ํ‚ค์›Œ๋“œ๋Š” ์ตœ๋Œ€ 5๊ฐœ๊นŒ์ง€๋งŒ ์ฒ˜๋ฆฌ
keywords = keywords[:5]
# API ์„ค์ • ๊ฐ€์ ธ์˜ค๊ธฐ
api_config = api_utils.get_next_datalab_api_config()
if not api_config:
logger.error("๋ฐ์ดํ„ฐ๋žฉ API ์„ค์ •์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
return {
"status": "error",
"message": "๋ฐ์ดํ„ฐ๋žฉ API ์„ค์ • ์—†์Œ"
}
# ํ‚ค์›Œ๋“œ ๊ทธ๋ฃน ์ƒ์„ฑ
keyword_groups = []
for keyword in keywords:
keyword_groups.append({
'groupName': keyword,
'keywords': [keyword]
})
# API ์š”์ฒญ ๋ฐ์ดํ„ฐ (device ํŒŒ๋ผ๋ฏธํ„ฐ ์ œ๊ฑฐ)
body_dict = {
'startDate': start_date_str,
'endDate': end_date_str,
'timeUnit': 'month',
'keywordGroups': keyword_groups
# device ํŒŒ๋ผ๋ฏธํ„ฐ ์ œ๊ฑฐ โ†’ ์ „์ฒด ํ™˜๊ฒฝ(PC+๋ชจ๋ฐ”์ผ) ์กฐํšŒ
}
body = json.dumps(body_dict)
# API ํ˜ธ์ถœ
url = "https://openapi.naver.com/v1/datalab/search"
headers = {
'X-Naver-Client-Id': api_config["CLIENT_ID"],
'X-Naver-Client-Secret': api_config["CLIENT_SECRET"],
'Content-Type': 'application/json'
}
try:
response = requests.post(url, data=body, headers=headers, timeout=10)
if response.status_code == 200:
response_json = response.json()
# ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ
trend_data = []
for result in response_json['results']:
for data_point in result['data']:
trend_data.append({
'keyword': result['title'],
'period': data_point['period'],
'ratio': data_point['ratio']
})
df_trend = pd.DataFrame(trend_data)
# ํ˜„์žฌ ์›”๋ณ„ ๊ฒ€์ƒ‰๋Ÿ‰ ์กฐํšŒ (๊ฒ€์ƒ‰๊ด‘๊ณ  API)
search_volumes = keyword_search.fetch_all_search_volumes(keywords)
# ์ ˆ๋Œ€ ๊ฒ€์ƒ‰๋Ÿ‰์œผ๋กœ ๋ณ€ํ™˜
df_trend_with_volume = convert_to_absolute_volume(df_trend, search_volumes)
# ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ (๋„ˆ๋น„ 100% ์ ์šฉ)
graph_html = create_trend_graph(df_trend_with_volume, period)
logger.info(f"ํŠธ๋ Œ๋“œ ๋ถ„์„ ์™„๋ฃŒ: {len(trend_data)}๊ฐœ ๋ฐ์ดํ„ฐ ํฌ์ธํŠธ")
return {
"status": "success",
"trend_data": df_trend_with_volume,
"graph_html": graph_html,
"period": period,
"keywords": keywords
}
else:
logger.error(f"๋ฐ์ดํ„ฐ๋žฉ API ์˜ค๋ฅ˜: {response.status_code} - {response.text}")
return {
"status": "error",
"message": f"API ์˜ค๋ฅ˜: {response.status_code}"
}
except Exception as e:
logger.error(f"ํŠธ๋ Œ๋“œ ๋ถ„์„ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
return {
"status": "error",
"message": f"๋ถ„์„ ์ค‘ ์˜ค๋ฅ˜: {str(e)}"
}
def convert_to_absolute_volume(df_trend, search_volumes):
"""
์ƒ๋Œ€ ๊ฒ€์ƒ‰๋Ÿ‰์„ ์ ˆ๋Œ€ ๊ฒ€์ƒ‰๋Ÿ‰์œผ๋กœ ๋ณ€ํ™˜
Args:
df_trend (DataFrame): ํŠธ๋ Œ๋“œ ๋ฐ์ดํ„ฐ (์ƒ๋Œ€๊ฐ’)
search_volumes (dict): ํ˜„์žฌ ์›”๋ณ„ ๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐ์ดํ„ฐ
Returns:
DataFrame: ์ ˆ๋Œ€ ๊ฒ€์ƒ‰๋Ÿ‰์ด ์ถ”๊ฐ€๋œ ํŠธ๋ Œ๋“œ ๋ฐ์ดํ„ฐ
"""
df_result = df_trend.copy()
df_result['absolute_volume'] = 0
# ๊ฐ ํ‚ค์›Œ๋“œ๋ณ„๋กœ ์ฒ˜๋ฆฌ
for keyword in df_trend['keyword'].unique():
keyword_data = df_trend[df_trend['keyword'] == keyword]
# ํ˜„์žฌ ์›”์˜ ๋น„์œจ (๋งˆ์ง€๋ง‰ ๋ฐ์ดํ„ฐ)
current_ratio = keyword_data['ratio'].iloc[-1]
# ํ˜„์žฌ ์›”์˜ ๊ฒ€์ƒ‰๋Ÿ‰ (PC + ๋ชจ๋ฐ”์ผ)
volume_data = search_volumes.get(keyword.replace(" ", ""), {"์ด๊ฒ€์ƒ‰๋Ÿ‰": 0})
current_volume = volume_data.get("์ด๊ฒ€์ƒ‰๋Ÿ‰", 0)
if current_ratio > 0 and current_volume > 0:
# 1%๋‹น ๊ฒ€์ƒ‰๋Ÿ‰ ๊ณ„์‚ฐ
volume_per_percent = current_volume / current_ratio
# ๊ฐ ๊ธฐ๊ฐ„์˜ ์ ˆ๋Œ€ ๊ฒ€์ƒ‰๋Ÿ‰ ๊ณ„์‚ฐ
mask = df_result['keyword'] == keyword
df_result.loc[mask, 'absolute_volume'] = (
df_result.loc[mask, 'ratio'] * volume_per_percent
).astype(int)
logger.info(f"'{keyword}': ํ˜„์žฌ ๋น„์œจ {current_ratio}%, ๊ฒ€์ƒ‰๋Ÿ‰ {current_volume:,}, 1%๋‹น {volume_per_percent:.0f}")
else:
logger.warning(f"'{keyword}': ๊ฒ€์ƒ‰๋Ÿ‰ ๋ณ€ํ™˜ ๋ถˆ๊ฐ€ (๋น„์œจ: {current_ratio}, ๊ฒ€์ƒ‰๋Ÿ‰: {current_volume})")
return df_result
def create_trend_graph(df_trend, period):
"""
ํŠธ๋ Œ๋“œ ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ (๋„ˆ๋น„ 100% ์ ์šฉ)
Args:
df_trend (DataFrame): ํŠธ๋ Œ๋“œ ๋ฐ์ดํ„ฐ
period (str): ๋ถ„์„ ๊ธฐ๊ฐ„
Returns:
str: HTML ํ˜•ํƒœ์˜ ๊ทธ๋ž˜ํ”„
"""
# Plotly ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ
fig = go.Figure()
# ํ‚ค์›Œ๋“œ๋ณ„๋กœ ๋ผ์ธ ์ถ”๊ฐ€
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7']
for i, keyword in enumerate(df_trend['keyword'].unique()):
keyword_data = df_trend[df_trend['keyword'] == keyword]
fig.add_trace(go.Scatter(
x=keyword_data['period'],
y=keyword_data['absolute_volume'],
mode='lines+markers',
name=keyword,
line=dict(color=colors[i % len(colors)], width=3),
marker=dict(size=6),
hovertemplate='<b>%{fullData.name}</b><br>' +
'๊ธฐ๊ฐ„: %{x}<br>' +
'๊ฒ€์ƒ‰๋Ÿ‰: %{y:,}<br>' +
'<extra></extra>'
))
# ๋ ˆ์ด์•„์›ƒ ์„ค์ • (๋„ˆ๋น„ 100% ์ ์šฉ)
period_text = "์ตœ๊ทผ 1๋…„" if period == "1year" else "์ตœ๊ทผ 3๋…„"
fig.update_layout(
title=f'ํ‚ค์›Œ๋“œ๋ณ„ ์›”๋ณ„ ๊ฒ€์ƒ‰๋Ÿ‰ ํŠธ๋ Œ๋“œ ({period_text})',
xaxis_title='๊ธฐ๊ฐ„',
yaxis_title='์›”๋ณ„ ๊ฒ€์ƒ‰๋Ÿ‰',
hovermode='x unified',
template='plotly_white',
height=500,
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
),
width=None, # ๋„ˆ๋น„ ์ž๋™ ์กฐ์ •
autosize=True # ์ž๋™ ํฌ๊ธฐ ์กฐ์ •
)
# y์ถ• ํฌ๋งท ์„ค์ • (์ฒœ ๋‹จ์œ„ ๊ตฌ๋ถ„)
fig.update_yaxis(tickformat=',')
# HTML๋กœ ๋ณ€ํ™˜ (๋„ˆ๋น„ 100% ์ ์šฉ)
graph_html = fig.to_html(
include_plotlyjs='cdn',
div_id="trend-graph",
config={'responsive': True} # ๋ฐ˜์‘ํ˜• ๊ทธ๋ž˜ํ”„
)
return graph_html
def calculate_3year_growth_rate(volumes):
"""
3๋…„ ๊ธฐ์ค€ ์„ฑ์žฅ๋ฅ  ๊ณ„์‚ฐ (์ „์ฒด ๊ธฐ๊ฐ„ ๊ธฐ์ค€)
Args:
volumes (list): ์›”๋ณ„ ๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐ์ดํ„ฐ
Returns:
float: 3๋…„ ๊ธฐ์ค€ ์„ฑ์žฅ๋ฅ 
"""
if len(volumes) < 6: # ์ตœ์†Œ 6๊ฐœ์›” ๋ฐ์ดํ„ฐ ํ•„์š”
return 0
# ์ „์ฒด ๊ธฐ๊ฐ„์„ 3๋“ฑ๋ถ„ํ•˜์—ฌ ์„ฑ์žฅ๋ฅ  ๊ณ„์‚ฐ
total_months = len(volumes)
period_size = max(1, total_months // 3) # ์ตœ์†Œ 1๊ฐœ์›”
# ์ดˆ๊ธฐ ๊ธฐ๊ฐ„ ํ‰๊ท  (์ฒซ 1/3)
early_period = volumes[:period_size]
early_avg = sum(early_period) / len(early_period)
# ์ตœ๊ทผ ๊ธฐ๊ฐ„ ํ‰๊ท  (๋งˆ์ง€๋ง‰ 1/3)
recent_period = volumes[-period_size:]
recent_avg = sum(recent_period) / len(recent_period)
if early_avg > 0:
return round(((recent_avg - early_avg) / early_avg) * 100, 1)
return 0
def analyze_trend_insights(df_trend):
"""
ํŠธ๋ Œ๋“œ ๋ฐ์ดํ„ฐ์—์„œ ์ธ์‚ฌ์ดํŠธ ์ถ”์ถœ (3๋…„ ๊ธฐ์ค€ ์„ฑ์žฅ๋ฅ ๋กœ ๋ณ€๊ฒฝ)
Args:
df_trend (DataFrame): ํŠธ๋ Œ๋“œ ๋ฐ์ดํ„ฐ
Returns:
dict: ํŠธ๋ Œ๋“œ ์ธ์‚ฌ์ดํŠธ
"""
insights = {}
for keyword in df_trend['keyword'].unique():
keyword_data = df_trend[df_trend['keyword'] == keyword].sort_values('period')
# ์ตœ๊ณ ์ ๊ณผ ์ตœ์ €์ 
max_volume = keyword_data['absolute_volume'].max()
min_volume = keyword_data['absolute_volume'].min()
max_period = keyword_data[keyword_data['absolute_volume'] == max_volume]['period'].iloc[0]
min_period = keyword_data[keyword_data['absolute_volume'] == min_volume]['period'].iloc[0]
# ์ „์ฒด ๊ธฐ๊ฐ„ ํ‰๊ท 
total_avg = keyword_data['absolute_volume'].mean()
# 3๋…„ ๊ธฐ์ค€ ์„ฑ์žฅ๋ฅ  ๊ณ„์‚ฐ (์ „์ฒด ๊ธฐ๊ฐ„ ๊ธฐ์ค€)
volumes = keyword_data['absolute_volume'].tolist()
growth_rate = calculate_3year_growth_rate(volumes)
insights[keyword] = {
'max_volume': int(max_volume),
'max_period': max_period,
'min_volume': int(min_volume),
'min_period': min_period,
'total_avg': int(total_avg),
'growth_rate': growth_rate,
'total_months': len(volumes)
}
return insights