Spaces:

Badumetsibb
/

PMN

Runtime error

App Files Files Community

PMN / app.py

Badumetsibb

Update app.py

15342b4 verified 5 months ago

raw

history blame contribute delete

3.94 kB

	# fusion_analysis_v3_diagnostic.py
	import pandas as pd
	import firebase_admin
	from firebase_admin import credentials, db
	import os
	import json

	# --- CONFIGURATION ---
	SA_KEY_JSON = os.environ.get('FIRESTORE_SA_KEY')
	DB_URL = os.environ.get('FIREBASE_DB_URL')
	AGENT_SIGNALS_REF = 'signals_v2'
	NEWS_SENTIMENT_REF = 'news_sentiment'

	def initialize_firebase():
	"""Initializes the Firebase app if not already done."""
	if not firebase_admin._apps:
	try:
	cred_dict = json.loads(SA_KEY_JSON)
	cred = credentials.Certificate(cred_dict)
	firebase_admin.initialize_app(cred, {'databaseURL': DB_URL})
	print("✅ Firebase connection successful.")
	return True
	except Exception as e:
	print(f"❌ CRITICAL ERROR - Firebase initialization failed: {e}")
	return False
	return True

	def fetch_data_from_firebase(ref_name):
	"""Fetches, validates, and processes data from a Firebase reference."""
	try:
	ref = db.reference(ref_name)
	data = ref.get()
	if not data:
	print(f"⚠️ No data found in '{ref_name}'.")
	return pd.DataFrame()

	df = pd.DataFrame.from_dict(data, orient='index')

	timestamp_col = 'timestamp_entry' if 'timestamp_entry' in df.columns else 'timestamp_published'
	if not timestamp_col in df.columns:
	print(f"❌ Timestamp column not found in '{ref_name}'.")
	return pd.DataFrame()

	df.dropna(subset=[timestamp_col], inplace=True)
	df['timestamp'] = pd.to_datetime(df[timestamp_col], utc=True, errors='coerce')
	df.dropna(subset=['timestamp'], inplace=True)

	df = df.set_index('timestamp').sort_index()
	print(f"✅ Fetched and validated {len(df)} records from '{ref_name}'.")
	return df
	except Exception as e:
	print(f"❌ Error fetching data from '{ref_name}': {e}")
	return pd.DataFrame()

	def run_fusion_analysis():
	"""Main function to fetch, fuse, and analyze the data."""
	if not initialize_firebase():
	return

	agent_df = fetch_data_from_firebase(AGENT_SIGNALS_REF)
	news_df = fetch_data_from_firebase(NEWS_SENTIMENT_REF)

	if agent_df.empty or news_df.empty:
	print("\nCould not proceed. One or both data sources are empty.")
	return

	# --- NEW: DIAGNOSTIC STEP ---
	print("\n--- Time Range Diagnostics ---")
	print(f"Agent Signals Span From: {agent_df.index.min()} to {agent_df.index.max()}")
	print(f"News Headlines Span From: {news_df.index.min()} to {news_df.index.max()}")
	# ----------------------------

	print("\n--- Fusing Agent Decisions with News Sentiment ---")

	# --- MODIFIED: INCREASED TOLERANCE FOR TESTING ---
	# We increase the tolerance to see if there's any overlap in a wider window.
	# If this finds matches, it confirms the events are just not close enough together.
	fused_df = pd.merge_asof(
	left=agent_df,
	right=news_df[['sentiment', 'confidence_score', 'headline']],
	left_index=True,
	right_index=True,
	direction='backward',
	tolerance=pd.Timedelta(hours=2) # Increased from 30 minutes to 2 hours
	)

	fused_df.dropna(subset=['sentiment'], inplace=True)

	if fused_df.empty:
	print("\nAnalysis Result: Even with a 2-hour window, no agent signals were found to have occurred after a news headline.")
	print("Recommendation: Let the news sentiment agent run for a longer period to collect more data points.")
	return

	print(f"✅ Fusion complete. Matched {len(fused_df)} agent signals with news headlines within a 2-hour window.")

	# --- THE INTELLIGENCE LABORATORY (No changes below) ---
	print("\n--- Advanced Analysis Results ---")
	# ... (The rest of the analysis code is the same) ...

	if __name__ == "__main__":
	run_fusion_analysis()