Spaces:

Badumetsibb
/

PMN

Runtime error

File size: 3,939 Bytes

# fusion_analysis_v3_diagnostic.py
import pandas as pd
import firebase_admin
from firebase_admin import credentials, db
import os
import json

# --- CONFIGURATION ---
SA_KEY_JSON = os.environ.get('FIRESTORE_SA_KEY')
DB_URL = os.environ.get('FIREBASE_DB_URL')
AGENT_SIGNALS_REF = 'signals_v2'
NEWS_SENTIMENT_REF = 'news_sentiment'

def initialize_firebase():
    """Initializes the Firebase app if not already done."""
    if not firebase_admin._apps:
        try:
            cred_dict = json.loads(SA_KEY_JSON)
            cred = credentials.Certificate(cred_dict)
            firebase_admin.initialize_app(cred, {'databaseURL': DB_URL})
            print("✅ Firebase connection successful.")
            return True
        except Exception as e:
            print(f"❌ CRITICAL ERROR - Firebase initialization failed: {e}")
            return False
    return True

def fetch_data_from_firebase(ref_name):
    """Fetches, validates, and processes data from a Firebase reference."""
    try:
        ref = db.reference(ref_name)
        data = ref.get()
        if not data:
            print(f"⚠️  No data found in '{ref_name}'.")
            return pd.DataFrame()
        
        df = pd.DataFrame.from_dict(data, orient='index')
        
        timestamp_col = 'timestamp_entry' if 'timestamp_entry' in df.columns else 'timestamp_published'
        if not timestamp_col in df.columns:
            print(f"❌ Timestamp column not found in '{ref_name}'.")
            return pd.DataFrame()

        df.dropna(subset=[timestamp_col], inplace=True)
        df['timestamp'] = pd.to_datetime(df[timestamp_col], utc=True, errors='coerce')
        df.dropna(subset=['timestamp'], inplace=True)
        
        df = df.set_index('timestamp').sort_index()
        print(f"✅ Fetched and validated {len(df)} records from '{ref_name}'.")
        return df
    except Exception as e:
        print(f"❌ Error fetching data from '{ref_name}': {e}")
        return pd.DataFrame()

def run_fusion_analysis():
    """Main function to fetch, fuse, and analyze the data."""
    if not initialize_firebase():
        return

    agent_df = fetch_data_from_firebase(AGENT_SIGNALS_REF)
    news_df = fetch_data_from_firebase(NEWS_SENTIMENT_REF)

    if agent_df.empty or news_df.empty:
        print("\nCould not proceed. One or both data sources are empty.")
        return

    # --- NEW: DIAGNOSTIC STEP ---
    print("\n--- Time Range Diagnostics ---")
    print(f"Agent Signals Span From: {agent_df.index.min()} to {agent_df.index.max()}")
    print(f"News Headlines Span From: {news_df.index.min()} to {news_df.index.max()}")
    # ----------------------------

    print("\n--- Fusing Agent Decisions with News Sentiment ---")
    
    # --- MODIFIED: INCREASED TOLERANCE FOR TESTING ---
    # We increase the tolerance to see if there's any overlap in a wider window.
    # If this finds matches, it confirms the events are just not close enough together.
    fused_df = pd.merge_asof(
        left=agent_df,
        right=news_df[['sentiment', 'confidence_score', 'headline']],
        left_index=True,
        right_index=True,
        direction='backward',
        tolerance=pd.Timedelta(hours=2) # Increased from 30 minutes to 2 hours
    )
    
    fused_df.dropna(subset=['sentiment'], inplace=True)
    
    if fused_df.empty:
        print("\nAnalysis Result: Even with a 2-hour window, no agent signals were found to have occurred after a news headline.")
        print("Recommendation: Let the news sentiment agent run for a longer period to collect more data points.")
        return

    print(f"✅ Fusion complete. Matched {len(fused_df)} agent signals with news headlines within a 2-hour window.")

    # --- THE INTELLIGENCE LABORATORY (No changes below) ---
    print("\n--- Advanced Analysis Results ---")
    # ... (The rest of the analysis code is the same) ...

if __name__ == "__main__":
    run_fusion_analysis()