Spaces:

amitbhatt6075
/

reachify-ai-service

Running

App Files Files Community

amitbhatt6075 commited on 10 days ago

Commit

da61bf8

1 Parent(s): fd09d9b

feat: Implement real data sources for Pulse page

Browse files

Files changed (2) hide show

core/thunderbird_engine.py +59 -69
requirements.txt +0 -0

core/thunderbird_engine.py CHANGED Viewed

@@ -5,122 +5,112 @@ import json
 from datetime import datetime
 from newsapi import NewsApiClient
 from pytrends.request import TrendReq
 from typing import Dict, Any, Optional
-from core.utils import get_supabase_client # Assuming this helper exists
 # --- CONFIGURATION ---
 MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'models', 'thunderbird_market_predictor_v1.joblib')
 NEWS_API_KEY = os.getenv("NEWS_API_KEY")
 def get_external_trends() -> dict:
-    """
-    Attempts to fetch REAL data. Returns None for fields where data is unavailable.
-    """
     print("🚀 [Thunderbird Engine] Fetching REAL external trends...")
     results = { "news_headlines": [], "breakout_keyword": None, "trending_audio": None, "platform_shifts": None }
-    # NewsAPI
     if NEWS_API_KEY:
         try:
             newsapi = NewsApiClient(api_key=NEWS_API_KEY)
             top_headlines = newsapi.get_everything(q='("influencer marketing")', language='en', sort_by='relevancy', page_size=5)
             if top_headlines.get('articles'):
                 results["news_headlines"] = [{"title": a['title'], "url": a['url']} for a in top_headlines['articles']]
-        except Exception: pass
-    # Google Trends for Breakout Keyword
     try:
         pytrends = TrendReq(hl='en-US', tz=360)
         trending_df = pytrends.trending_searches(pn='united_states')
         if not trending_df.empty:
             results["breakout_keyword"] = trending_df.iloc[0,0]
-    except Exception: pass
-    # Platform Shifts (Real DB Call)
-    results["platform_shifts"] = get_platform_shifts()
     return results
-def get_platform_shifts() -> Dict[str, str]:
-    """
-    Calculates REAL 7-day shift in platform usage from Supabase.
-    """
-    print("   - Calculating REAL platform shifts from DB...")
-    shifts = { "instagram_shift": "0%", "tiktok_shift": "0%", "youtube_shift": "0%" }
-    try:
-        supabase = get_supabase_client()
-        response = supabase.rpc('get_platform_trend_data_last_14_days').execute()
-        if not response.data: return shifts
-        df = pd.DataFrame(response.data); df['date'] = pd.to_datetime(df['date'])
-        seven_days_ago = datetime.now() - pd.Timedelta(days=7)
-        recent = df[df['date'] >= seven_days_ago]; prev = df[df['date'] < seven_days_ago]
-        if prev.empty or recent.empty: return shifts
-        avg_recent = recent.groupby('platform')['usage_count'].mean()
-        avg_prev = prev.groupby('platform')['usage_count'].mean()
-        for p in ['instagram', 'tiktok', 'youtube']:
-            if p in avg_recent and p in avg_prev and avg_prev[p] > 0:
-                change = ((avg_recent[p] - avg_prev[p]) / avg_prev[p]) * 100
-                shifts[f"{p}_shift"] = f"{'+' if change > 0 else ''}{round(change)}%"
-        return shifts
-    except Exception:
-        return shifts
 def predict_niche_trends() -> dict:
-    """
-    Uses the REAL trained ML pipeline to predict future interest.
-    Handles feature name mismatch error.
-    """
     print("\n🚀 [Thunderbird Engine] Using REAL ML pipeline for predictions...")
     try:
-        # 1. Load the ENTIRE pipeline (preprocessor + model)
         pipeline = joblib.load(MODEL_PATH)
-        # 2. Get niche names from the encoder inside the pipeline
         encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat']
         all_niches = [cat.replace('niche_', '') for cat in encoder.get_feature_names_out(['niche'])]
-        # 3. Prepare future dates for prediction
-        future_dates = pd.to_datetime(pd.date_range(start=datetime.now(), periods=12, freq='M'))
         predictions = {}
         for niche in all_niches:
-            # 4. Create a DataFrame WITH THE ORIGINAL FEATURE NAMES ('niche', 'month_of_year')
             future_df = pd.DataFrame({
-                'month_of_year': future_dates.month,
-                'niche': [niche] * 12,
-                'trend_score': 50  # Assume an average trend score for future prediction
             })
-            # 5. Use pipeline.predict(). It will handle the one-hot encoding internally.
-            predicted_values = pipeline.predict(future_df)
-            predictions[niche] = [
-                {"date": dt.strftime('%Y-%m'), "value": max(0, int(val))}
-                for dt, val in zip(future_dates, predicted_values)
-            ]
-        print(f"   - ✅ Successfully generated REAL predictions for niches: {list(predictions.keys())}")
         return {"trend_predictions": predictions}
     except Exception as e:
         print(f"   - ❌ REAL Prediction Failed: {e}. Chart will be empty.")
         return {"trend_predictions": {}}
-# --- LLM FUNCTION (No changes needed here) ---
 def decode_market_trend(topic: str, llm_instance) -> Dict[str, str]:
-    print(f"🧠 [Thunderbird] Decoding Trend with REAL AI: {topic}")
-    offline_response = {"summary": "AI Analyst is currently offline.", "impact": "Could not get real-time analysis.", "strategy": "Please try again in a few moments."}
     if not llm_instance: return offline_response
     today_date = datetime.now().strftime("%Y-%m-%d")
     prompt = f"[INST]You are PulseAI, a Strategy Director. Today is {today_date}. Analyze trend: \"{topic}\". Provide JSON with keys: \"summary\", \"impact\", \"strategy\".[/INST]"
     try:
         response = llm_instance(prompt, max_tokens=256, temperature=0.6, echo=False)
         text = response['choices'][0]['text'].strip()
-        start = text.find('{'); end = text.rfind('}') + 1
         if start != -1 and end != 0: return json.loads(text[start:end])
         else: raise ValueError("Invalid JSON from LLM")
     except Exception as e:

 from datetime import datetime
 from newsapi import NewsApiClient
 from pytrends.request import TrendReq
+from tiktok_scraper_without_watermark.scraper import Scraper
 from typing import Dict, Any, Optional
+from core.utils import get_supabase_client
 # --- CONFIGURATION ---
 MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'models', 'thunderbird_market_predictor_v1.joblib')
 NEWS_API_KEY = os.getenv("NEWS_API_KEY")
+def get_platform_shifts() -> Optional[Dict[str, str]]:
+    """Calculates REAL 7-day shift from Supabase."""
+    print("   - Calculating REAL platform shifts from DB...")
+    try:
+        supabase = get_supabase_client()
+        response = supabase.rpc('get_platform_trend_data_last_14_days').execute()
+        if not response.data or len(response.data) < 2: return None
+        df = pd.DataFrame(response.data); df['date'] = pd.to_datetime(df['date'])
+        seven_days_ago = datetime.now() - pd.Timedelta(days=7)
+        recent = df[df['date'] >= seven_days_ago]; prev = df[df['date'] < seven_days_ago]
+        if prev.empty or recent.empty: return None
+        avg_recent = recent.groupby('platform')['usage_count'].mean()
+        avg_prev = prev.groupby('platform')['usage_count'].mean()
+        shifts = {}
+        for p in ['instagram', 'tiktok', 'youtube']:
+            if p in avg_recent and p in avg_prev and avg_prev[p] > 0:
+                change = ((avg_recent[p] - avg_prev[p]) / avg_prev[p]) * 100
+                shifts[f"{p}_shift"] = f"{'+' if change > 0 else ''}{round(change)}%"
+        return shifts
+    except Exception as e:
+        print(f"   - ❌ DB Error calculating shifts: {e}")
+        return None
+def get_trending_audio_from_tiktok() -> Optional[Dict[str, str]]:
+    """BEST EFFORT: Scrapes TikTok to find a trending audio."""
+    print("   - Attempting to scrape REAL trending audio from TikTok...")
+    try:
+        scraper = Scraper()
+        trending_posts = scraper.trend(count=5)
+        for post in trending_posts:
+            if post.get('music'):
+                music = post['music']
+                return {
+                    "name": f"{music.get('title', 'Unknown')} - {music.get('author', 'Unknown')}",
+                    "cover_art_url": music.get('cover', 'https://via.placeholder.com/150')
+                }
+        return None
+    except Exception as e:
+        print(f"   - ⚠️ TikTok scraping failed: {e}")
+        return None
 def get_external_trends() -> dict:
+    """Fetches REAL data only."""
     print("🚀 [Thunderbird Engine] Fetching REAL external trends...")
     results = { "news_headlines": [], "breakout_keyword": None, "trending_audio": None, "platform_shifts": None }
     if NEWS_API_KEY:
         try:
             newsapi = NewsApiClient(api_key=NEWS_API_KEY)
             top_headlines = newsapi.get_everything(q='("influencer marketing")', language='en', sort_by='relevancy', page_size=5)
             if top_headlines.get('articles'):
                 results["news_headlines"] = [{"title": a['title'], "url": a['url']} for a in top_headlines['articles']]
+        except: pass
     try:
         pytrends = TrendReq(hl='en-US', tz=360)
         trending_df = pytrends.trending_searches(pn='united_states')
         if not trending_df.empty:
             results["breakout_keyword"] = trending_df.iloc[0,0]
+    except Exception as e:
+        print(f"   - ⚠️ Google Trends failed: {e}")
+    results["trending_audio"] = get_trending_audio_from_tiktok()
+    results["platform_shifts"] = get_platform_shifts()
     return results
 def predict_niche_trends() -> dict:
+    """Uses the REAL trained ML pipeline to predict future interest."""
     print("\n🚀 [Thunderbird Engine] Using REAL ML pipeline for predictions...")
     try:
         pipeline = joblib.load(MODEL_PATH)
         encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat']
         all_niches = [cat.replace('niche_', '') for cat in encoder.get_feature_names_out(['niche'])]
+        future_dates = pd.to_datetime(pd.date_range(start=datetime.now(), periods=12, freq='ME')) # Fixed 'M' to 'ME'
         predictions = {}
         for niche in all_niches:
             future_df = pd.DataFrame({
+                'month_of_year': future_dates.month, 'niche': [niche] * 12, 'trend_score': 50
             })
+            predicted_values = pipeline.predict(future_df[['niche', 'trend_score', 'month_of_year']])
+            predictions[niche] = [{"date": dt.strftime('%Y-%m'), "value": max(0, int(val))} for dt, val in zip(future_dates, predicted_values)]
         return {"trend_predictions": predictions}
     except Exception as e:
         print(f"   - ❌ REAL Prediction Failed: {e}. Chart will be empty.")
         return {"trend_predictions": {}}
 def decode_market_trend(topic: str, llm_instance) -> Dict[str, str]:
+    """Decodes a keyword into a strategy with a clear failure message."""
+    offline_response = {"summary": "AI Analyst is currently offline.", "impact": "Could not get real-time analysis.", "strategy": "Please try again later."}
     if not llm_instance: return offline_response
     today_date = datetime.now().strftime("%Y-%m-%d")
     prompt = f"[INST]You are PulseAI, a Strategy Director. Today is {today_date}. Analyze trend: \"{topic}\". Provide JSON with keys: \"summary\", \"impact\", \"strategy\".[/INST]"
     try:
         response = llm_instance(prompt, max_tokens=256, temperature=0.6, echo=False)
         text = response['choices'][0]['text'].strip()
+        start, end = text.find('{'), text.rfind('}') + 1
         if start != -1 and end != 0: return json.loads(text[start:end])
         else: raise ValueError("Invalid JSON from LLM")
     except Exception as e:

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ