RazHadas's picture
Upload 6 files
76317bb verified
# -*- coding: utf-8 -*-
"""utils.py
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1RyRghhbleQJ01USX_0O4uUALsuFM10hJ
"""
# utils.py
# Helper functions for data manipulation and plotting defaults.
import pandas as pd
import plotly.graph_objects as go
import re
import numpy as np
import traceback
def get_nested_value(data_dict, keys, default=None):
"""Safely get a value from a nested dictionary or list."""
current_level = data_dict
for key in keys:
if isinstance(current_level, dict) and key in current_level:
current_level = current_level[key]
elif isinstance(current_level, list) and isinstance(key, int) and 0 <= key < len(current_level):
current_level = current_level[key]
else:
return default
return current_level
def parse_numeric_string(value_str, default=None):
"""Attempts to parse numeric values from strings, handling $, %, and commas."""
if not isinstance(value_str, str):
# If it's already a number (int, float), return it directly
if isinstance(value_str, (int, float)):
return value_str
# Otherwise, it might be None or some other non-string type
return default # Return default for non-string, non-numeric types
try:
# Remove currency symbols, percentage signs, and commas
cleaned_str = re.sub(r'[$,%]', '', value_str).strip()
return float(cleaned_str)
except (ValueError, TypeError):
# Return default if cleaning/conversion fails
return default
def create_empty_figure(title="No Data Available"):
"""Creates an empty Plotly figure with a title."""
fig = go.Figure()
fig.update_layout(
title=title,
xaxis={'visible': False},
yaxis={'visible': False},
annotations=[{
'text': title,
'xref': 'paper', 'yref': 'paper',
'showarrow': False, 'font': {'size': 16}
}]
)
return fig
def process_timeseries_chart(chart_data, value_col_name='Value'):
"""
Processes QuantConnect timeseries chart data like [[timestamp, value, ...], ...].
Assumes timestamp is in SECONDS. Extracts the second element as the value.
Returns a DataFrame with 'Time' (datetime) index and value_col_name.
Handles potential errors during processing.
"""
# Check if input data is valid list format
if not chart_data or not isinstance(chart_data, list):
# print(f"Warning: Invalid or empty chart_data for {value_col_name}. Returning empty DataFrame.")
return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
# Check if the first element is a list/tuple with at least two items
if not chart_data[0] or not isinstance(chart_data[0], (list, tuple)) or len(chart_data[0]) < 2:
# print(f"Warning: First element format incorrect for {value_col_name}. Returning empty DataFrame.")
return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
try:
# Extract timestamp (assumed index 0) and value (assumed index 1)
# Filter out entries where timestamp or value is None
processed_data = [
[item[0], item[1]] for item in chart_data
if isinstance(item, (list, tuple)) and len(item) >= 2 and item[0] is not None and item[1] is not None
]
# If no valid data points remain after filtering
if not processed_data:
# print(f"Warning: No valid data points after filtering for {value_col_name}. Returning empty DataFrame.")
return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
# Create DataFrame
df = pd.DataFrame(processed_data, columns=['Time_Raw', value_col_name])
# Convert timestamp (assumed seconds) to numeric, coercing errors
df['Time_Raw'] = pd.to_numeric(df['Time_Raw'], errors='coerce')
df.dropna(subset=['Time_Raw'], inplace=True) # Drop rows where timestamp conversion failed
if df.empty: return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
# Convert numeric timestamp to datetime, coercing errors
df['Time'] = pd.to_datetime(df['Time_Raw'], unit='s', errors='coerce')
df.dropna(subset=['Time'], inplace=True) # Drop rows where datetime conversion failed
if df.empty: return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
# Convert value column to numeric, coercing errors
df[value_col_name] = pd.to_numeric(df[value_col_name], errors='coerce')
df.dropna(subset=[value_col_name], inplace=True) # Drop rows where value conversion failed
if df.empty: return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
# Set the datetime 'Time' column as the index
df = df.set_index('Time')
# Verify the index is indeed a DatetimeIndex
if not isinstance(df.index, pd.DatetimeIndex):
print(f"Warning: Index is not DatetimeIndex for {value_col_name} after setting. Attempting conversion.")
df.index = pd.to_datetime(df.index, errors='coerce')
df.dropna(inplace=True) # Drop rows if conversion failed
if df.empty: return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')
# Ensure the DatetimeIndex is timezone-aware (UTC)
if df.index.tz is None:
df = df.tz_localize('UTC') # Localize if naive
elif df.index.tz != 'UTC':
df = df.tz_convert('UTC') # Convert if different timezone
# Return the DataFrame with only the value column, sorted by time
return df[[value_col_name]].sort_index()
except Exception as e:
print(f"Error creating/processing DataFrame for {value_col_name}: {e}")
traceback.print_exc()
# Return an empty DataFrame in case of any unexpected error
return pd.DataFrame(columns=['Time', value_col_name]).set_index('Time')