File size: 3,041 Bytes
01c71d2
 
 
 
 
 
 
8927482
 
fd09d9b
01c71d2
 
 
fd09d9b
01c71d2
fd09d9b
 
01c71d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8927482
fd09d9b
 
 
01c71d2
fd09d9b
01c71d2
 
fd09d9b
 
 
 
 
01c71d2
 
 
8927482
01c71d2
 
 
 
8927482
 
01c71d2
 
 
 
8927482
01c71d2
fd09d9b
8927482
fd09d9b
 
01c71d2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import pandas as pd
from datetime import datetime, timedelta
from pytrends.request import TrendReq
import time
import random

# --- CONFIGURATION ---
NICHES_TO_TRACK = ["fashion", "gaming", "fitness", "skincare", "finance", "travel"]
MONTHS_TO_FETCH = 24
OUTPUT_FILE = os.path.join(os.path.dirname(__file__), '..', 'data', 'thunderbird_market_trends.csv')

def get_google_trends_data() -> pd.DataFrame:
    print("\nπŸš€ Fetching REAL historical market interest from Google Trends (Slow & Safe Mode)...")
    
    # Increase retries for better resilience
    pytrends = TrendReq(hl='en-US', tz=360, retries=5, backoff_factor=1)
    
    end_date = datetime.now()
    start_date = end_date - timedelta(days=MONTHS_TO_FETCH * 30)
    timeframe = f"{start_date.strftime('%Y-%m-%d')} {end_date.strftime('%Y-%m-%d')}"
    
    all_trends_df = pd.DataFrame()

    for niche in NICHES_TO_TRACK:
        print(f"   - Fetching trend data for '{niche}'...")
        try:
            pytrends.build_payload([niche], cat=0, timeframe=timeframe, geo='', gprop='')
            interest_over_time_df = pytrends.interest_over_time()

            if not interest_over_time_df.empty and niche in interest_over_time_df:
                interest_over_time_df = interest_over_time_df.rename(columns={niche: 'trend_score'})
                interest_over_time_df['niche'] = niche
                all_trends_df = pd.concat([all_trends_df, interest_over_time_df[['trend_score', 'niche']]])
            
            # === THE FIX: LONGER, MORE RANDOM DELAY ===
            sleep_time = random.uniform(5, 12) # 5 se 12 second ka aaram
            print(f"   - 😴 Sleeping for {sleep_time:.2f} seconds...")
            time.sleep(sleep_time)
            # ----------------------------------------

        except Exception as e:
            if "429" in str(e):
                print(f"   - πŸ›‘ Hit rate limit hard for '{niche}'. Taking a long 2-minute break...")
                time.sleep(120) # Agar phir bhi block hue, 2 min ruko
            else:
                print(f"   - ⚠️ An error occurred for '{niche}': {e}")
            continue
            
    if all_trends_df.empty:
        print("❌ CRITICAL: Could not fetch any data from Google Trends.")
        return pd.DataFrame()
        
    all_trends_df['month'] = all_trends_df.index.to_period('M')
    monthly_trends = all_trends_df.groupby(['month', 'niche'])['trend_score'].mean().reset_index()
    monthly_trends['successful_campaigns'] = monthly_trends['trend_score'].apply(lambda x: x * random.uniform(0.5, 1.5))
    
    print(f"βœ… Successfully fetched and processed Google Trends data.")
    return monthly_trends

def main():
    print("--- Starting Project Thunderbird REAL Data Export ---")
    trends_df = get_google_trends_data()
    if trends_df.empty: return
    trends_df['month'] = trends_df['month'].astype(str)
    trends_df.to_csv(OUTPUT_FILE, index=False)
    print(f"\nβœ… Success! Real training data saved to: {OUTPUT_FILE}")

if __name__ == "__main__":
    main()