Daveabc12 commited on
Commit
e3aa0b6
·
verified ·
1 Parent(s): 526ac11

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -46
app.py CHANGED
@@ -370,9 +370,15 @@ def load_completed_setups(filename):
370
  return completed_configs
371
 
372
  # --- 1. Data Loading and Cleaning Functions ---
373
- @st.cache_data
374
  def load_all_data(folder_path):
 
 
 
 
375
  all_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
 
 
376
  if not all_files:
377
  st.error("No CSV files found in the 'csv_data' folder.")
378
  return None, None
@@ -383,41 +389,58 @@ def load_all_data(folder_path):
383
  for file_name in all_files:
384
  file_path = os.path.join(folder_path, file_name)
385
  try:
386
- # Using dayfirst=True, parse_dates=True, index_col=0
387
- df = pd.read_csv(file_path, header=0, index_col=0, dayfirst=True, parse_dates=True, encoding='utf-8')
388
- if df.empty or df.index.empty:
389
- error_messages.append(f"Warning: Skipped {file_name}, no data loaded or index missing.")
 
 
 
 
 
 
 
 
 
390
  continue
 
391
  df_list.append(df)
392
  except Exception as e:
393
- error_messages.append(f"Could not read or process {file_name}. Error: {e}")
394
 
395
  if not df_list:
396
- for msg in error_messages: st.error(msg)
397
- return None, "No data could be loaded successfully from the CSV files."
398
-
399
- for msg in error_messages: st.warning(msg)
400
-
401
- # --- REVERTED: Use pd.concat without axis=1 (defaults to axis=0 - stacking rows) ---
402
- master_df = pd.concat(df_list)
403
- # --- END REVERT ---
404
 
405
- master_df.index = pd.to_datetime(master_df.index, errors='coerce')
406
- master_df = master_df[master_df.index.notna()]
 
 
 
 
 
 
407
 
408
- # Handle duplicate index entries (keep last) AFTER concatenating rows
409
- if master_df.index.has_duplicates:
410
- master_df = master_df.loc[~master_df.index.duplicated(keep='last')]
411
 
412
- master_df.sort_index(inplace=True)
 
 
 
413
 
414
- # Convert columns to numeric *after* all concatenation and sorting
415
- for col in master_df.columns:
416
- master_df[col] = pd.to_numeric(master_df[col], errors='coerce')
417
 
418
- return master_df, f"Successfully combined data from {len(df_list)} files."
 
419
 
420
  def clean_data_and_report_outliers(df):
 
 
 
 
421
  outlier_report = []
422
  # Identify price columns (exclude _Volume, _High, _Low if present)
423
  price_columns = [col for col in df.columns if not ('_Volume' in str(col) or '_High' in str(col) or '_Low' in str(col))]
@@ -431,10 +454,13 @@ def clean_data_and_report_outliers(df):
431
  if numeric_prices.isna().all(): continue # Skip fully NaN columns
432
 
433
  daily_pct_change = numeric_prices.pct_change().abs()
 
434
  outlier_days = daily_pct_change[daily_pct_change > 1.0].index
 
435
  if not outlier_days.empty:
436
  outlier_report.append({'Ticker': ticker, 'Outliers Removed': len(outlier_days)})
437
  df.loc[outlier_days, ticker] = np.nan # Set outliers to NaN
 
438
  return df, outlier_report
439
 
440
  # --- 2. Custom Backtesting Engine ---
@@ -835,9 +861,11 @@ def run_backtest(data, params,
835
  short_exits = df[(df['short_position'] == 0) & (df['short_position'].shift(1) == -1)]
836
 
837
  if not df.empty:
838
- end_date = df.index.max(); one_month_ago = end_date - timedelta(days=30)
839
- else: end_date = pd.NaT; one_month_ago = pd.NaT
840
- recently_closed_trades = []
 
 
841
 
842
  long_trade_profits, long_durations, first_long_entry_date, last_long_exit_date = [], [], None, None
843
  short_trade_profits, short_durations, first_short_entry_date, last_short_exit_date = [], [], None, None
@@ -860,8 +888,9 @@ def run_backtest(data, params,
860
  profit = (exit_row['Close'] / row['Close']) - 1 if pd.notna(exit_row['Close']) and pd.notna(row['Close']) and row['Close'] != 0 else np.nan
861
  long_trade_profits.append(profit)
862
 
863
- if exit_date >= one_month_ago:
864
- recently_closed_trades.append({'Side': 'Long', 'Date Open': idx, 'Date Closed': exit_date, 'Start Confidence': row.get('long_confidence_score', np.nan), 'Final % P/L': profit, 'Status': 'Closed', 'Exit Reason': 'TSL' if is_tsl else ('Time' if is_time else 'Profit')})
 
865
  try: long_durations.append(df_indices.loc[exit_row.name] - df_indices.loc[idx])
866
  except KeyError: long_durations.append(np.nan)
867
  avg_long_profit_per_trade = np.nanmean(long_trade_profits) if long_trade_profits else 0.0
@@ -881,8 +910,9 @@ def run_backtest(data, params,
881
  profit = ((exit_row['Close'] / row['Close']) - 1) * -1 if pd.notna(exit_row['Close']) and pd.notna(row['Close']) and row['Close'] != 0 else np.nan
882
  short_trade_profits.append(profit)
883
 
884
- if exit_date >= one_month_ago:
885
- recently_closed_trades.append({'Side': 'Short', 'Date Open': idx, 'Date Closed': exit_date, 'Start Confidence': row.get('short_confidence_score', np.nan), 'Final % P/L': profit, 'Status': 'Closed', 'Exit Reason': 'TSL' if is_tsl else ('Time' if is_time else 'Profit')})
 
886
  try: short_durations.append(df_indices.loc[exit_row.name] - df_indices.loc[idx])
887
  except KeyError: short_durations.append(np.nan)
888
  avg_short_profit_per_trade = np.nanmean(short_trade_profits) if short_trade_profits else 0.0
@@ -906,7 +936,6 @@ def run_backtest(data, params,
906
  entry_price = last_entry['Close']
907
  entry_conf = last_entry.get('long_confidence_score', np.nan)
908
  else:
909
- # FALLBACK: If we can't find the entry row (e.g. entered before start date), use static price
910
  last_entry_time = pd.NaT
911
  entry_price = df['long_entry_price_static'].iloc[-1]
912
  entry_conf = np.nan
@@ -931,7 +960,6 @@ def run_backtest(data, params,
931
  entry_price = last_entry['Close']
932
  entry_conf = last_entry.get('short_confidence_score', np.nan)
933
  else:
934
- # FALLBACK for Short
935
  last_entry_time = pd.NaT
936
  entry_price = df['short_entry_price_static'].iloc[-1]
937
  entry_conf = np.nan
@@ -949,7 +977,8 @@ def run_backtest(data, params,
949
  })
950
  # ---------------------------------
951
 
952
- open_trades.extend(recently_closed_trades)
 
953
  df.sort_index(inplace=True)
954
 
955
  trade_dates = (first_long_entry_date, last_long_exit_date, first_short_entry_date, last_short_exit_date)
@@ -4301,20 +4330,27 @@ def main():
4301
  # 1. Top Cards
4302
  display_summary_analytics(st.session_state.summary_df)
4303
 
4304
- # 2. Histogram (Profit Distribution)
4305
  st.markdown("---")
4306
  try:
4307
  if 'generate_profit_distribution_chart' in globals():
4308
  dist_fig = generate_profit_distribution_chart(st.session_state.summary_df)
4309
- if dist_fig:
4310
- st.plotly_chart(dist_fig, use_container_width=True)
4311
- else:
4312
- st.error("Error: Chart function missing.")
4313
- except Exception as e:
4314
- st.error(f"Could not render histogram: {e}")
 
 
 
 
 
 
 
4315
  st.markdown("---")
4316
 
4317
- # 3. Results Per Ticker Table
4318
  st.subheader("Results per Ticker")
4319
  if st.checkbox("Only show tickers with trades", value=True):
4320
  df_to_display = st.session_state.summary_df[(st.session_state.summary_df['Num Long Trades'] > 0) | (st.session_state.summary_df['Num Short Trades'] > 0)].copy()
@@ -4330,26 +4366,40 @@ def main():
4330
  if not production_mode:
4331
  if st.button("💾 Add these settings to User-Defined List", key="save_setup_from_analysis", on_click=add_setup_to_user_list): pass
4332
 
4333
- # 4. Open Positions Table (With Brown-Haired Icon)
4334
  st.subheader("👨🏻‍💼 Open Positions & Recently Closed",
4335
  help="This table displays all currently ACTIVE trades, plus any trades that closed within the last 30 days.")
4336
 
4337
  if st.session_state.get('open_trades_df') is not None and not st.session_state.open_trades_df.empty:
4338
- display_open_df = st.session_state.open_trades_df.copy()
 
 
 
 
 
 
 
 
 
 
 
 
4339
  display_open_df.sort_values(by=['Status', 'Date Open'], ascending=[True, False], inplace=True)
4340
 
4341
  cols_order_manual = ['Ticker', 'Status', 'Final % P/L', 'Side', 'Date Open', 'Date Closed', 'Start Confidence']
4342
  existing_cols_open = [col for col in cols_order_manual if col in display_open_df.columns]
4343
 
4344
- if existing_cols_open:
4345
  st.dataframe(display_open_df[existing_cols_open].style.format({
4346
  "Final % P/L": lambda x: f"{x:.2%}" if pd.notna(x) else '-',
4347
  "Date Open": lambda x: x.strftime('%Y-%m-%d') if pd.notna(x) else '-',
4348
  "Date Closed": lambda x: x.strftime('%Y-%m-%d') if pd.notna(x) else '-',
4349
  "Start Confidence": lambda x: f"{x:.0f}%" if pd.notna(x) else '-'
4350
  }, na_rep='-'))
 
 
4351
  else:
4352
- st.info("No Open Positions or Recently Closed trades found in this period.")
4353
 
4354
  # 14. Default Message
4355
  else:
@@ -4382,5 +4432,44 @@ def main():
4382
  st.sidebar.success("Best Markov setup loaded!")
4383
  st.rerun()
4384
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4385
  if __name__ == "__main__":
4386
  main()
 
370
  return completed_configs
371
 
372
  # --- 1. Data Loading and Cleaning Functions ---
373
+ @st.cache_data(ttl=300) # Added TTL to auto-refresh cache every 5 mins
374
  def load_all_data(folder_path):
375
+ if not os.path.exists(folder_path):
376
+ st.error(f"Folder '{folder_path}' not found.")
377
+ return None, None
378
+
379
  all_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
380
+ all_files.sort() # Ensure we load in chronological order (e.g. 2023, then 2024)
381
+
382
  if not all_files:
383
  st.error("No CSV files found in the 'csv_data' folder.")
384
  return None, None
 
389
  for file_name in all_files:
390
  file_path = os.path.join(folder_path, file_name)
391
  try:
392
+ # 1. Read CSV without parsing dates yet (load as strings to be safe)
393
+ df = pd.read_csv(file_path, header=0, index_col=0, encoding='utf-8')
394
+
395
+ # 2. ROBUST DATE PARSING (The "Zig-Zag" Fix)
396
+ # We explicitly force dayfirst=True here on the index object.
397
+ # 'coerce' turns invalid formats into NaT so we can drop them.
398
+ df.index = pd.to_datetime(df.index, dayfirst=True, errors='coerce')
399
+
400
+ # 3. Drop rows with invalid dates
401
+ df = df[df.index.notna()]
402
+
403
+ if df.empty:
404
+ error_messages.append(f"Warning: Skipped {file_name}, no valid dates found.")
405
  continue
406
+
407
  df_list.append(df)
408
  except Exception as e:
409
+ error_messages.append(f"Could not read {file_name}. Error: {e}")
410
 
411
  if not df_list:
412
+ return None, "No data could be loaded successfully."
 
 
 
 
 
 
 
413
 
414
+ # 4. CONSOLIDATE
415
+ try:
416
+ master_df = pd.concat(df_list)
417
+
418
+ # 5. DEDUPLICATE (Keep the newest version of any overlapping date)
419
+ # This prevents "Double Dots" if 2024-2025.csv and 2025-NOW.csv overlap
420
+ if master_df.index.has_duplicates:
421
+ master_df = master_df[~master_df.index.duplicated(keep='last')]
422
 
423
+ # 6. FORCE SORT (The Final "Zig-Zag" Killer)
424
+ # Ensures Jan 1st always comes before Jan 2nd
425
+ master_df.sort_index(inplace=True)
426
 
427
+ # 7. NUMERIC CONVERSION (From your original code)
428
+ # Ensures all price columns are numbers, not strings
429
+ for col in master_df.columns:
430
+ master_df[col] = pd.to_numeric(master_df[col], errors='coerce')
431
 
432
+ for msg in error_messages: st.warning(msg)
433
+
434
+ return master_df, f"Successfully combined data from {len(df_list)} files."
435
 
436
+ except Exception as e:
437
+ return None, f"Critical Error merging data: {e}"
438
 
439
  def clean_data_and_report_outliers(df):
440
+ """
441
+ Scans for massive daily percentage moves (>100%) which are likely data errors
442
+ and sets them to NaN. Preserved from original code.
443
+ """
444
  outlier_report = []
445
  # Identify price columns (exclude _Volume, _High, _Low if present)
446
  price_columns = [col for col in df.columns if not ('_Volume' in str(col) or '_High' in str(col) or '_Low' in str(col))]
 
454
  if numeric_prices.isna().all(): continue # Skip fully NaN columns
455
 
456
  daily_pct_change = numeric_prices.pct_change().abs()
457
+ # Outlier Threshold: > 1.0 (100% change in one day)
458
  outlier_days = daily_pct_change[daily_pct_change > 1.0].index
459
+
460
  if not outlier_days.empty:
461
  outlier_report.append({'Ticker': ticker, 'Outliers Removed': len(outlier_days)})
462
  df.loc[outlier_days, ticker] = np.nan # Set outliers to NaN
463
+
464
  return df, outlier_report
465
 
466
  # --- 2. Custom Backtesting Engine ---
 
861
  short_exits = df[(df['short_position'] == 0) & (df['short_position'].shift(1) == -1)]
862
 
863
  if not df.empty:
864
+ end_date = df.index.max();
865
+ else: end_date = pd.NaT;
866
+
867
+ # --- CHANGED: COLLECT ALL HISTORY (Removed "30 day" filter) ---
868
+ all_historical_trades = []
869
 
870
  long_trade_profits, long_durations, first_long_entry_date, last_long_exit_date = [], [], None, None
871
  short_trade_profits, short_durations, first_short_entry_date, last_short_exit_date = [], [], None, None
 
888
  profit = (exit_row['Close'] / row['Close']) - 1 if pd.notna(exit_row['Close']) and pd.notna(row['Close']) and row['Close'] != 0 else np.nan
889
  long_trade_profits.append(profit)
890
 
891
+ # APPEND ALL TRADES (No Date Filter)
892
+ all_historical_trades.append({'Side': 'Long', 'Date Open': idx, 'Date Closed': exit_date, 'Start Confidence': row.get('long_confidence_score', np.nan), 'Final % P/L': profit, 'Status': 'Closed', 'Exit Reason': 'TSL' if is_tsl else ('Time' if is_time else 'Profit')})
893
+
894
  try: long_durations.append(df_indices.loc[exit_row.name] - df_indices.loc[idx])
895
  except KeyError: long_durations.append(np.nan)
896
  avg_long_profit_per_trade = np.nanmean(long_trade_profits) if long_trade_profits else 0.0
 
910
  profit = ((exit_row['Close'] / row['Close']) - 1) * -1 if pd.notna(exit_row['Close']) and pd.notna(row['Close']) and row['Close'] != 0 else np.nan
911
  short_trade_profits.append(profit)
912
 
913
+ # APPEND ALL TRADES (No Date Filter)
914
+ all_historical_trades.append({'Side': 'Short', 'Date Open': idx, 'Date Closed': exit_date, 'Start Confidence': row.get('short_confidence_score', np.nan), 'Final % P/L': profit, 'Status': 'Closed', 'Exit Reason': 'TSL' if is_tsl else ('Time' if is_time else 'Profit')})
915
+
916
  try: short_durations.append(df_indices.loc[exit_row.name] - df_indices.loc[idx])
917
  except KeyError: short_durations.append(np.nan)
918
  avg_short_profit_per_trade = np.nanmean(short_trade_profits) if short_trade_profits else 0.0
 
936
  entry_price = last_entry['Close']
937
  entry_conf = last_entry.get('long_confidence_score', np.nan)
938
  else:
 
939
  last_entry_time = pd.NaT
940
  entry_price = df['long_entry_price_static'].iloc[-1]
941
  entry_conf = np.nan
 
960
  entry_price = last_entry['Close']
961
  entry_conf = last_entry.get('short_confidence_score', np.nan)
962
  else:
 
963
  last_entry_time = pd.NaT
964
  entry_price = df['short_entry_price_static'].iloc[-1]
965
  entry_conf = np.nan
 
977
  })
978
  # ---------------------------------
979
 
980
+ # Combine: [All Historical Closed] + [Current Open]
981
+ open_trades.extend(all_historical_trades)
982
  df.sort_index(inplace=True)
983
 
984
  trade_dates = (first_long_entry_date, last_long_exit_date, first_short_entry_date, last_short_exit_date)
 
4330
  # 1. Top Cards
4331
  display_summary_analytics(st.session_state.summary_df)
4332
 
4333
+ # 2. Histogram 1: Profit Distribution
4334
  st.markdown("---")
4335
  try:
4336
  if 'generate_profit_distribution_chart' in globals():
4337
  dist_fig = generate_profit_distribution_chart(st.session_state.summary_df)
4338
+ if dist_fig: st.plotly_chart(dist_fig, use_container_width=True)
4339
+ except Exception: pass
4340
+
4341
+ # 3. Histogram 2: Trades Over Time (NEW)
4342
+ try:
4343
+ if 'generate_trades_timeline_histogram' in globals() and st.session_state.get('open_trades_df') is not None:
4344
+ timeline_fig = generate_trades_timeline_histogram(
4345
+ st.session_state.open_trades_df,
4346
+ st.session_state.start_date,
4347
+ st.session_state.end_date
4348
+ )
4349
+ if timeline_fig: st.plotly_chart(timeline_fig, use_container_width=True)
4350
+ except Exception as e: st.error(f"Chart Error: {e}")
4351
  st.markdown("---")
4352
 
4353
+ # 4. Results Per Ticker Table
4354
  st.subheader("Results per Ticker")
4355
  if st.checkbox("Only show tickers with trades", value=True):
4356
  df_to_display = st.session_state.summary_df[(st.session_state.summary_df['Num Long Trades'] > 0) | (st.session_state.summary_df['Num Short Trades'] > 0)].copy()
 
4366
  if not production_mode:
4367
  if st.button("💾 Add these settings to User-Defined List", key="save_setup_from_analysis", on_click=add_setup_to_user_list): pass
4368
 
4369
+ # 5. Open Positions Table (With Filter)
4370
  st.subheader("👨🏻‍💼 Open Positions & Recently Closed",
4371
  help="This table displays all currently ACTIVE trades, plus any trades that closed within the last 30 days.")
4372
 
4373
  if st.session_state.get('open_trades_df') is not None and not st.session_state.open_trades_df.empty:
4374
+ full_df = st.session_state.open_trades_df.copy()
4375
+
4376
+ # --- FILTER: Show 'Open' OR 'Closed in last 30 days' ---
4377
+ cutoff = pd.Timestamp.now() - pd.Timedelta(days=30)
4378
+ # Ensure date column is datetime
4379
+ full_df['Date Closed'] = pd.to_datetime(full_df['Date Closed'], errors='coerce')
4380
+
4381
+ mask_open = (full_df['Status'] == 'Open')
4382
+ mask_recent = (full_df['Status'] == 'Closed') & (full_df['Date Closed'] >= cutoff)
4383
+
4384
+ display_open_df = full_df[mask_open | mask_recent].copy()
4385
+ # -------------------------------------------------------
4386
+
4387
  display_open_df.sort_values(by=['Status', 'Date Open'], ascending=[True, False], inplace=True)
4388
 
4389
  cols_order_manual = ['Ticker', 'Status', 'Final % P/L', 'Side', 'Date Open', 'Date Closed', 'Start Confidence']
4390
  existing_cols_open = [col for col in cols_order_manual if col in display_open_df.columns]
4391
 
4392
+ if existing_cols_open and not display_open_df.empty:
4393
  st.dataframe(display_open_df[existing_cols_open].style.format({
4394
  "Final % P/L": lambda x: f"{x:.2%}" if pd.notna(x) else '-',
4395
  "Date Open": lambda x: x.strftime('%Y-%m-%d') if pd.notna(x) else '-',
4396
  "Date Closed": lambda x: x.strftime('%Y-%m-%d') if pd.notna(x) else '-',
4397
  "Start Confidence": lambda x: f"{x:.0f}%" if pd.notna(x) else '-'
4398
  }, na_rep='-'))
4399
+ else:
4400
+ st.info("No Open or Recent trades found (older trades are hidden).")
4401
  else:
4402
+ st.info("No trades found.")
4403
 
4404
  # 14. Default Message
4405
  else:
 
4432
  st.sidebar.success("Best Markov setup loaded!")
4433
  st.rerun()
4434
 
4435
+ def generate_trades_timeline_histogram(trades_df, start_date, end_date):
4436
+ """
4437
+ Creates a stacked histogram showing trade results over time.
4438
+ 4 Colors: Long Win, Long Loss, Short Win, Short Loss.
4439
+ """
4440
+ if trades_df is None or trades_df.empty: return None
4441
+
4442
+ # Filter by date range
4443
+ mask = (trades_df['Date Closed'] >= pd.to_datetime(start_date)) & (trades_df['Date Closed'] <= pd.to_datetime(end_date))
4444
+ df = trades_df[mask].copy()
4445
+ if df.empty: return None
4446
+
4447
+ # Categorize
4448
+ df = df[df['Status'] == 'Closed']
4449
+
4450
+ long_wins = df[(df['Side'] == 'Long') & (df['Final % P/L'] > 0)]
4451
+ long_loss = df[(df['Side'] == 'Long') & (df['Final % P/L'] <= 0)]
4452
+ short_wins = df[(df['Side'] == 'Short') & (df['Final % P/L'] > 0)]
4453
+ short_loss = df[(df['Side'] == 'Short') & (df['Final % P/L'] <= 0)]
4454
+
4455
+ fig = go.Figure()
4456
+
4457
+ # Add Stacked Traces
4458
+ fig.add_trace(go.Histogram(x=long_wins['Date Closed'], name='Long Winners', marker_color='green'))
4459
+ fig.add_trace(go.Histogram(x=long_loss['Date Closed'], name='Long Losers', marker_color='red'))
4460
+ fig.add_trace(go.Histogram(x=short_wins['Date Closed'], name='Short Winners', marker_color='blue'))
4461
+ fig.add_trace(go.Histogram(x=short_loss['Date Closed'], name='Short Losers', marker_color='orange'))
4462
+
4463
+ fig.update_layout(
4464
+ barmode='stack',
4465
+ title="Trades Over Time (Win/Loss Stacked)",
4466
+ xaxis_title="Date",
4467
+ yaxis_title="Number of Trades",
4468
+ height=400,
4469
+ template="plotly_white",
4470
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
4471
+ )
4472
+ return fig
4473
+
4474
  if __name__ == "__main__":
4475
  main()