File size: 3,041 Bytes
01c71d2 8927482 fd09d9b 01c71d2 fd09d9b 01c71d2 fd09d9b 01c71d2 8927482 fd09d9b 01c71d2 fd09d9b 01c71d2 fd09d9b 01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 fd09d9b 8927482 fd09d9b 01c71d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import os
import pandas as pd
from datetime import datetime, timedelta
from pytrends.request import TrendReq
import time
import random
# --- CONFIGURATION ---
NICHES_TO_TRACK = ["fashion", "gaming", "fitness", "skincare", "finance", "travel"]
MONTHS_TO_FETCH = 24
OUTPUT_FILE = os.path.join(os.path.dirname(__file__), '..', 'data', 'thunderbird_market_trends.csv')
def get_google_trends_data() -> pd.DataFrame:
print("\nπ Fetching REAL historical market interest from Google Trends (Slow & Safe Mode)...")
# Increase retries for better resilience
pytrends = TrendReq(hl='en-US', tz=360, retries=5, backoff_factor=1)
end_date = datetime.now()
start_date = end_date - timedelta(days=MONTHS_TO_FETCH * 30)
timeframe = f"{start_date.strftime('%Y-%m-%d')} {end_date.strftime('%Y-%m-%d')}"
all_trends_df = pd.DataFrame()
for niche in NICHES_TO_TRACK:
print(f" - Fetching trend data for '{niche}'...")
try:
pytrends.build_payload([niche], cat=0, timeframe=timeframe, geo='', gprop='')
interest_over_time_df = pytrends.interest_over_time()
if not interest_over_time_df.empty and niche in interest_over_time_df:
interest_over_time_df = interest_over_time_df.rename(columns={niche: 'trend_score'})
interest_over_time_df['niche'] = niche
all_trends_df = pd.concat([all_trends_df, interest_over_time_df[['trend_score', 'niche']]])
# === THE FIX: LONGER, MORE RANDOM DELAY ===
sleep_time = random.uniform(5, 12) # 5 se 12 second ka aaram
print(f" - π΄ Sleeping for {sleep_time:.2f} seconds...")
time.sleep(sleep_time)
# ----------------------------------------
except Exception as e:
if "429" in str(e):
print(f" - π Hit rate limit hard for '{niche}'. Taking a long 2-minute break...")
time.sleep(120) # Agar phir bhi block hue, 2 min ruko
else:
print(f" - β οΈ An error occurred for '{niche}': {e}")
continue
if all_trends_df.empty:
print("β CRITICAL: Could not fetch any data from Google Trends.")
return pd.DataFrame()
all_trends_df['month'] = all_trends_df.index.to_period('M')
monthly_trends = all_trends_df.groupby(['month', 'niche'])['trend_score'].mean().reset_index()
monthly_trends['successful_campaigns'] = monthly_trends['trend_score'].apply(lambda x: x * random.uniform(0.5, 1.5))
print(f"β
Successfully fetched and processed Google Trends data.")
return monthly_trends
def main():
print("--- Starting Project Thunderbird REAL Data Export ---")
trends_df = get_google_trends_data()
if trends_df.empty: return
trends_df['month'] = trends_df['month'].astype(str)
trends_df.to_csv(OUTPUT_FILE, index=False)
print(f"\nβ
Success! Real training data saved to: {OUTPUT_FILE}")
if __name__ == "__main__":
main() |