import os import pandas as pd from datetime import datetime, timedelta from pytrends.request import TrendReq import time import random # --- CONFIGURATION --- NICHES_TO_TRACK = ["fashion", "gaming", "fitness", "skincare", "finance", "travel"] MONTHS_TO_FETCH = 24 OUTPUT_FILE = os.path.join(os.path.dirname(__file__), '..', 'data', 'thunderbird_market_trends.csv') def get_google_trends_data() -> pd.DataFrame: print("\nšŸš€ Fetching REAL historical market interest from Google Trends (Slow & Safe Mode)...") # Increase retries for better resilience pytrends = TrendReq(hl='en-US', tz=360, retries=5, backoff_factor=1) end_date = datetime.now() start_date = end_date - timedelta(days=MONTHS_TO_FETCH * 30) timeframe = f"{start_date.strftime('%Y-%m-%d')} {end_date.strftime('%Y-%m-%d')}" all_trends_df = pd.DataFrame() for niche in NICHES_TO_TRACK: print(f" - Fetching trend data for '{niche}'...") try: pytrends.build_payload([niche], cat=0, timeframe=timeframe, geo='', gprop='') interest_over_time_df = pytrends.interest_over_time() if not interest_over_time_df.empty and niche in interest_over_time_df: interest_over_time_df = interest_over_time_df.rename(columns={niche: 'trend_score'}) interest_over_time_df['niche'] = niche all_trends_df = pd.concat([all_trends_df, interest_over_time_df[['trend_score', 'niche']]]) # === THE FIX: LONGER, MORE RANDOM DELAY === sleep_time = random.uniform(5, 12) # 5 se 12 second ka aaram print(f" - 😓 Sleeping for {sleep_time:.2f} seconds...") time.sleep(sleep_time) # ---------------------------------------- except Exception as e: if "429" in str(e): print(f" - šŸ›‘ Hit rate limit hard for '{niche}'. Taking a long 2-minute break...") time.sleep(120) # Agar phir bhi block hue, 2 min ruko else: print(f" - āš ļø An error occurred for '{niche}': {e}") continue if all_trends_df.empty: print("āŒ CRITICAL: Could not fetch any data from Google Trends.") return pd.DataFrame() all_trends_df['month'] = all_trends_df.index.to_period('M') monthly_trends = all_trends_df.groupby(['month', 'niche'])['trend_score'].mean().reset_index() monthly_trends['successful_campaigns'] = monthly_trends['trend_score'].apply(lambda x: x * random.uniform(0.5, 1.5)) print(f"āœ… Successfully fetched and processed Google Trends data.") return monthly_trends def main(): print("--- Starting Project Thunderbird REAL Data Export ---") trends_df = get_google_trends_data() if trends_df.empty: return trends_df['month'] = trends_df['month'].astype(str) trends_df.to_csv(OUTPUT_FILE, index=False) print(f"\nāœ… Success! Real training data saved to: {OUTPUT_FILE}") if __name__ == "__main__": main()