|
|
import os |
|
|
import pandas as pd |
|
|
from datetime import datetime, timedelta |
|
|
from pytrends.request import TrendReq |
|
|
import time |
|
|
import random |
|
|
|
|
|
|
|
|
NICHES_TO_TRACK = ["fashion", "gaming", "fitness", "skincare", "finance", "travel"] |
|
|
MONTHS_TO_FETCH = 24 |
|
|
OUTPUT_FILE = os.path.join(os.path.dirname(__file__), '..', 'data', 'thunderbird_market_trends.csv') |
|
|
|
|
|
def get_google_trends_data() -> pd.DataFrame: |
|
|
print("\nπ Fetching REAL historical market interest from Google Trends (Slow & Safe Mode)...") |
|
|
|
|
|
|
|
|
pytrends = TrendReq(hl='en-US', tz=360, retries=5, backoff_factor=1) |
|
|
|
|
|
end_date = datetime.now() |
|
|
start_date = end_date - timedelta(days=MONTHS_TO_FETCH * 30) |
|
|
timeframe = f"{start_date.strftime('%Y-%m-%d')} {end_date.strftime('%Y-%m-%d')}" |
|
|
|
|
|
all_trends_df = pd.DataFrame() |
|
|
|
|
|
for niche in NICHES_TO_TRACK: |
|
|
print(f" - Fetching trend data for '{niche}'...") |
|
|
try: |
|
|
pytrends.build_payload([niche], cat=0, timeframe=timeframe, geo='', gprop='') |
|
|
interest_over_time_df = pytrends.interest_over_time() |
|
|
|
|
|
if not interest_over_time_df.empty and niche in interest_over_time_df: |
|
|
interest_over_time_df = interest_over_time_df.rename(columns={niche: 'trend_score'}) |
|
|
interest_over_time_df['niche'] = niche |
|
|
all_trends_df = pd.concat([all_trends_df, interest_over_time_df[['trend_score', 'niche']]]) |
|
|
|
|
|
|
|
|
sleep_time = random.uniform(5, 12) |
|
|
print(f" - π΄ Sleeping for {sleep_time:.2f} seconds...") |
|
|
time.sleep(sleep_time) |
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
if "429" in str(e): |
|
|
print(f" - π Hit rate limit hard for '{niche}'. Taking a long 2-minute break...") |
|
|
time.sleep(120) |
|
|
else: |
|
|
print(f" - β οΈ An error occurred for '{niche}': {e}") |
|
|
continue |
|
|
|
|
|
if all_trends_df.empty: |
|
|
print("β CRITICAL: Could not fetch any data from Google Trends.") |
|
|
return pd.DataFrame() |
|
|
|
|
|
all_trends_df['month'] = all_trends_df.index.to_period('M') |
|
|
monthly_trends = all_trends_df.groupby(['month', 'niche'])['trend_score'].mean().reset_index() |
|
|
monthly_trends['successful_campaigns'] = monthly_trends['trend_score'].apply(lambda x: x * random.uniform(0.5, 1.5)) |
|
|
|
|
|
print(f"β
Successfully fetched and processed Google Trends data.") |
|
|
return monthly_trends |
|
|
|
|
|
def main(): |
|
|
print("--- Starting Project Thunderbird REAL Data Export ---") |
|
|
trends_df = get_google_trends_data() |
|
|
if trends_df.empty: return |
|
|
trends_df['month'] = trends_df['month'].astype(str) |
|
|
trends_df.to_csv(OUTPUT_FILE, index=False) |
|
|
print(f"\nβ
Success! Real training data saved to: {OUTPUT_FILE}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |