# fusion_analysis_v3_diagnostic.py import pandas as pd import firebase_admin from firebase_admin import credentials, db import os import json # --- CONFIGURATION --- SA_KEY_JSON = os.environ.get('FIRESTORE_SA_KEY') DB_URL = os.environ.get('FIREBASE_DB_URL') AGENT_SIGNALS_REF = 'signals_v2' NEWS_SENTIMENT_REF = 'news_sentiment' def initialize_firebase(): """Initializes the Firebase app if not already done.""" if not firebase_admin._apps: try: cred_dict = json.loads(SA_KEY_JSON) cred = credentials.Certificate(cred_dict) firebase_admin.initialize_app(cred, {'databaseURL': DB_URL}) print("✅ Firebase connection successful.") return True except Exception as e: print(f"❌ CRITICAL ERROR - Firebase initialization failed: {e}") return False return True def fetch_data_from_firebase(ref_name): """Fetches, validates, and processes data from a Firebase reference.""" try: ref = db.reference(ref_name) data = ref.get() if not data: print(f"⚠️ No data found in '{ref_name}'.") return pd.DataFrame() df = pd.DataFrame.from_dict(data, orient='index') timestamp_col = 'timestamp_entry' if 'timestamp_entry' in df.columns else 'timestamp_published' if not timestamp_col in df.columns: print(f"❌ Timestamp column not found in '{ref_name}'.") return pd.DataFrame() df.dropna(subset=[timestamp_col], inplace=True) df['timestamp'] = pd.to_datetime(df[timestamp_col], utc=True, errors='coerce') df.dropna(subset=['timestamp'], inplace=True) df = df.set_index('timestamp').sort_index() print(f"✅ Fetched and validated {len(df)} records from '{ref_name}'.") return df except Exception as e: print(f"❌ Error fetching data from '{ref_name}': {e}") return pd.DataFrame() def run_fusion_analysis(): """Main function to fetch, fuse, and analyze the data.""" if not initialize_firebase(): return agent_df = fetch_data_from_firebase(AGENT_SIGNALS_REF) news_df = fetch_data_from_firebase(NEWS_SENTIMENT_REF) if agent_df.empty or news_df.empty: print("\nCould not proceed. One or both data sources are empty.") return # --- NEW: DIAGNOSTIC STEP --- print("\n--- Time Range Diagnostics ---") print(f"Agent Signals Span From: {agent_df.index.min()} to {agent_df.index.max()}") print(f"News Headlines Span From: {news_df.index.min()} to {news_df.index.max()}") # ---------------------------- print("\n--- Fusing Agent Decisions with News Sentiment ---") # --- MODIFIED: INCREASED TOLERANCE FOR TESTING --- # We increase the tolerance to see if there's any overlap in a wider window. # If this finds matches, it confirms the events are just not close enough together. fused_df = pd.merge_asof( left=agent_df, right=news_df[['sentiment', 'confidence_score', 'headline']], left_index=True, right_index=True, direction='backward', tolerance=pd.Timedelta(hours=2) # Increased from 30 minutes to 2 hours ) fused_df.dropna(subset=['sentiment'], inplace=True) if fused_df.empty: print("\nAnalysis Result: Even with a 2-hour window, no agent signals were found to have occurred after a news headline.") print("Recommendation: Let the news sentiment agent run for a longer period to collect more data points.") return print(f"✅ Fusion complete. Matched {len(fused_df)} agent signals with news headlines within a 2-hour window.") # --- THE INTELLIGENCE LABORATORY (No changes below) --- print("\n--- Advanced Analysis Results ---") # ... (The rest of the analysis code is the same) ... if __name__ == "__main__": run_fusion_analysis()