PMN / app.py
Badumetsibb's picture
Update app.py
15342b4 verified
# fusion_analysis_v3_diagnostic.py
import pandas as pd
import firebase_admin
from firebase_admin import credentials, db
import os
import json
# --- CONFIGURATION ---
SA_KEY_JSON = os.environ.get('FIRESTORE_SA_KEY')
DB_URL = os.environ.get('FIREBASE_DB_URL')
AGENT_SIGNALS_REF = 'signals_v2'
NEWS_SENTIMENT_REF = 'news_sentiment'
def initialize_firebase():
"""Initializes the Firebase app if not already done."""
if not firebase_admin._apps:
try:
cred_dict = json.loads(SA_KEY_JSON)
cred = credentials.Certificate(cred_dict)
firebase_admin.initialize_app(cred, {'databaseURL': DB_URL})
print("βœ… Firebase connection successful.")
return True
except Exception as e:
print(f"❌ CRITICAL ERROR - Firebase initialization failed: {e}")
return False
return True
def fetch_data_from_firebase(ref_name):
"""Fetches, validates, and processes data from a Firebase reference."""
try:
ref = db.reference(ref_name)
data = ref.get()
if not data:
print(f"⚠️ No data found in '{ref_name}'.")
return pd.DataFrame()
df = pd.DataFrame.from_dict(data, orient='index')
timestamp_col = 'timestamp_entry' if 'timestamp_entry' in df.columns else 'timestamp_published'
if not timestamp_col in df.columns:
print(f"❌ Timestamp column not found in '{ref_name}'.")
return pd.DataFrame()
df.dropna(subset=[timestamp_col], inplace=True)
df['timestamp'] = pd.to_datetime(df[timestamp_col], utc=True, errors='coerce')
df.dropna(subset=['timestamp'], inplace=True)
df = df.set_index('timestamp').sort_index()
print(f"βœ… Fetched and validated {len(df)} records from '{ref_name}'.")
return df
except Exception as e:
print(f"❌ Error fetching data from '{ref_name}': {e}")
return pd.DataFrame()
def run_fusion_analysis():
"""Main function to fetch, fuse, and analyze the data."""
if not initialize_firebase():
return
agent_df = fetch_data_from_firebase(AGENT_SIGNALS_REF)
news_df = fetch_data_from_firebase(NEWS_SENTIMENT_REF)
if agent_df.empty or news_df.empty:
print("\nCould not proceed. One or both data sources are empty.")
return
# --- NEW: DIAGNOSTIC STEP ---
print("\n--- Time Range Diagnostics ---")
print(f"Agent Signals Span From: {agent_df.index.min()} to {agent_df.index.max()}")
print(f"News Headlines Span From: {news_df.index.min()} to {news_df.index.max()}")
# ----------------------------
print("\n--- Fusing Agent Decisions with News Sentiment ---")
# --- MODIFIED: INCREASED TOLERANCE FOR TESTING ---
# We increase the tolerance to see if there's any overlap in a wider window.
# If this finds matches, it confirms the events are just not close enough together.
fused_df = pd.merge_asof(
left=agent_df,
right=news_df[['sentiment', 'confidence_score', 'headline']],
left_index=True,
right_index=True,
direction='backward',
tolerance=pd.Timedelta(hours=2) # Increased from 30 minutes to 2 hours
)
fused_df.dropna(subset=['sentiment'], inplace=True)
if fused_df.empty:
print("\nAnalysis Result: Even with a 2-hour window, no agent signals were found to have occurred after a news headline.")
print("Recommendation: Let the news sentiment agent run for a longer period to collect more data points.")
return
print(f"βœ… Fusion complete. Matched {len(fused_df)} agent signals with news headlines within a 2-hour window.")
# --- THE INTELLIGENCE LABORATORY (No changes below) ---
print("\n--- Advanced Analysis Results ---")
# ... (The rest of the analysis code is the same) ...
if __name__ == "__main__":
run_fusion_analysis()