Spaces:

Daveabc12
/

Backtesting-App

Running

App Files Files Community

Daveabc12 commited on Dec 6, 2025

Commit

e3aa0b6

verified ·

1 Parent(s): 526ac11

Upload app.py

Browse files

Files changed (1) hide show

app.py +135 -46

app.py CHANGED Viewed

@@ -370,9 +370,15 @@ def load_completed_setups(filename):
     return completed_configs
 # --- 1. Data Loading and Cleaning Functions ---
-@st.cache_data
 def load_all_data(folder_path):
     all_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
     if not all_files:
         st.error("No CSV files found in the 'csv_data' folder.")
         return None, None
@@ -383,41 +389,58 @@ def load_all_data(folder_path):
     for file_name in all_files:
         file_path = os.path.join(folder_path, file_name)
         try:
-            # Using dayfirst=True, parse_dates=True, index_col=0
-            df = pd.read_csv(file_path, header=0, index_col=0, dayfirst=True, parse_dates=True, encoding='utf-8')
-            if df.empty or df.index.empty:
-                error_messages.append(f"Warning: Skipped {file_name}, no data loaded or index missing.")
                 continue
             df_list.append(df)
         except Exception as e:
-            error_messages.append(f"Could not read or process {file_name}. Error: {e}")
     if not df_list:
-        for msg in error_messages: st.error(msg)
-        return None, "No data could be loaded successfully from the CSV files."
-    for msg in error_messages: st.warning(msg)
-    # --- REVERTED: Use pd.concat without axis=1 (defaults to axis=0 - stacking rows) ---
-    master_df = pd.concat(df_list)
-    # --- END REVERT ---
-    master_df.index = pd.to_datetime(master_df.index, errors='coerce')
-    master_df = master_df[master_df.index.notna()]
-    # Handle duplicate index entries (keep last) AFTER concatenating rows
-    if master_df.index.has_duplicates:
-        master_df = master_df.loc[~master_df.index.duplicated(keep='last')]
-    master_df.sort_index(inplace=True)
-    # Convert columns to numeric *after* all concatenation and sorting
-    for col in master_df.columns:
-        master_df[col] = pd.to_numeric(master_df[col], errors='coerce')
-    return master_df, f"Successfully combined data from {len(df_list)} files."
 def clean_data_and_report_outliers(df):
     outlier_report = []
     # Identify price columns (exclude _Volume, _High, _Low if present)
     price_columns = [col for col in df.columns if not ('_Volume' in str(col) or '_High' in str(col) or '_Low' in str(col))]
@@ -431,10 +454,13 @@ def clean_data_and_report_outliers(df):
             if numeric_prices.isna().all(): continue # Skip fully NaN columns
             daily_pct_change = numeric_prices.pct_change().abs()
             outlier_days = daily_pct_change[daily_pct_change > 1.0].index
             if not outlier_days.empty:
                 outlier_report.append({'Ticker': ticker, 'Outliers Removed': len(outlier_days)})
                 df.loc[outlier_days, ticker] = np.nan # Set outliers to NaN
     return df, outlier_report
 # --- 2. Custom Backtesting Engine ---
@@ -835,9 +861,11 @@ def run_backtest(data, params,
     short_exits = df[(df['short_position'] == 0) & (df['short_position'].shift(1) == -1)]
     if not df.empty:
-        end_date = df.index.max(); one_month_ago = end_date - timedelta(days=30)
-    else: end_date = pd.NaT; one_month_ago = pd.NaT
-    recently_closed_trades = []
     long_trade_profits, long_durations, first_long_entry_date, last_long_exit_date = [], [], None, None
     short_trade_profits, short_durations, first_short_entry_date, last_short_exit_date = [], [], None, None
@@ -860,8 +888,9 @@ def run_backtest(data, params,
             profit = (exit_row['Close'] / row['Close']) - 1 if pd.notna(exit_row['Close']) and pd.notna(row['Close']) and row['Close'] != 0 else np.nan
             long_trade_profits.append(profit)
-            if exit_date >= one_month_ago:
-                recently_closed_trades.append({'Side': 'Long', 'Date Open': idx, 'Date Closed': exit_date, 'Start Confidence': row.get('long_confidence_score', np.nan), 'Final % P/L': profit, 'Status': 'Closed', 'Exit Reason': 'TSL' if is_tsl else ('Time' if is_time else 'Profit')})
             try: long_durations.append(df_indices.loc[exit_row.name] - df_indices.loc[idx])
             except KeyError: long_durations.append(np.nan)
     avg_long_profit_per_trade = np.nanmean(long_trade_profits) if long_trade_profits else 0.0
@@ -881,8 +910,9 @@ def run_backtest(data, params,
             profit = ((exit_row['Close'] / row['Close']) - 1) * -1 if pd.notna(exit_row['Close']) and pd.notna(row['Close']) and row['Close'] != 0 else np.nan
             short_trade_profits.append(profit)
-            if exit_date >= one_month_ago:
-                recently_closed_trades.append({'Side': 'Short', 'Date Open': idx, 'Date Closed': exit_date, 'Start Confidence': row.get('short_confidence_score', np.nan), 'Final % P/L': profit, 'Status': 'Closed', 'Exit Reason': 'TSL' if is_tsl else ('Time' if is_time else 'Profit')})
             try: short_durations.append(df_indices.loc[exit_row.name] - df_indices.loc[idx])
             except KeyError: short_durations.append(np.nan)
     avg_short_profit_per_trade = np.nanmean(short_trade_profits) if short_trade_profits else 0.0
@@ -906,7 +936,6 @@ def run_backtest(data, params,
                 entry_price = last_entry['Close']
                 entry_conf = last_entry.get('long_confidence_score', np.nan)
             else:
-                # FALLBACK: If we can't find the entry row (e.g. entered before start date), use static price
                 last_entry_time = pd.NaT
                 entry_price = df['long_entry_price_static'].iloc[-1]
                 entry_conf = np.nan
@@ -931,7 +960,6 @@ def run_backtest(data, params,
                 entry_price = last_entry['Close']
                 entry_conf = last_entry.get('short_confidence_score', np.nan)
             else:
-                # FALLBACK for Short
                 last_entry_time = pd.NaT
                 entry_price = df['short_entry_price_static'].iloc[-1]
                 entry_conf = np.nan
@@ -949,7 +977,8 @@ def run_backtest(data, params,
                 })
     # ---------------------------------
-    open_trades.extend(recently_closed_trades)
     df.sort_index(inplace=True)
     trade_dates = (first_long_entry_date, last_long_exit_date, first_short_entry_date, last_short_exit_date)
@@ -4301,20 +4330,27 @@ def main():
             # 1. Top Cards
             display_summary_analytics(st.session_state.summary_df)
-            # 2. Histogram (Profit Distribution)
             st.markdown("---")
             try:
                 if 'generate_profit_distribution_chart' in globals():
                     dist_fig = generate_profit_distribution_chart(st.session_state.summary_df)
-                    if dist_fig:
-                        st.plotly_chart(dist_fig, use_container_width=True)
-                else:
-                    st.error("Error: Chart function missing.")
-            except Exception as e:
-                st.error(f"Could not render histogram: {e}")
             st.markdown("---")
-            # 3. Results Per Ticker Table
             st.subheader("Results per Ticker")
             if st.checkbox("Only show tickers with trades", value=True):
                 df_to_display = st.session_state.summary_df[(st.session_state.summary_df['Num Long Trades'] > 0) | (st.session_state.summary_df['Num Short Trades'] > 0)].copy()
@@ -4330,26 +4366,40 @@ def main():
             if not production_mode:
                 if st.button("💾 Add these settings to User-Defined List", key="save_setup_from_analysis", on_click=add_setup_to_user_list): pass
-            # 4. Open Positions Table (With Brown-Haired Icon)
             st.subheader("👨🏻‍💼 Open Positions & Recently Closed",
                          help="This table displays all currently ACTIVE trades, plus any trades that closed within the last 30 days.")
             if st.session_state.get('open_trades_df') is not None and not st.session_state.open_trades_df.empty:
-                display_open_df = st.session_state.open_trades_df.copy()
                 display_open_df.sort_values(by=['Status', 'Date Open'], ascending=[True, False], inplace=True)
                 cols_order_manual = ['Ticker', 'Status', 'Final % P/L', 'Side', 'Date Open', 'Date Closed', 'Start Confidence']
                 existing_cols_open = [col for col in cols_order_manual if col in display_open_df.columns]
-                if existing_cols_open:
                      st.dataframe(display_open_df[existing_cols_open].style.format({
                         "Final % P/L": lambda x: f"{x:.2%}" if pd.notna(x) else '-',
                         "Date Open": lambda x: x.strftime('%Y-%m-%d') if pd.notna(x) else '-',
                         "Date Closed": lambda x: x.strftime('%Y-%m-%d') if pd.notna(x) else '-',
                         "Start Confidence": lambda x: f"{x:.0f}%" if pd.notna(x) else '-'
                     }, na_rep='-'))
             else:
-                st.info("No Open Positions or Recently Closed trades found in this period.")
         # 14. Default Message
         else:
@@ -4382,5 +4432,44 @@ def main():
                         st.sidebar.success("Best Markov setup loaded!")
                         st.rerun()
 if __name__ == "__main__":
     main()

     return completed_configs
 # --- 1. Data Loading and Cleaning Functions ---
+@st.cache_data(ttl=300) # Added TTL to auto-refresh cache every 5 mins
 def load_all_data(folder_path):
+    if not os.path.exists(folder_path):
+        st.error(f"Folder '{folder_path}' not found.")
+        return None, None
     all_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
+    all_files.sort() # Ensure we load in chronological order (e.g. 2023, then 2024)
     if not all_files:
         st.error("No CSV files found in the 'csv_data' folder.")
         return None, None
     for file_name in all_files:
         file_path = os.path.join(folder_path, file_name)
         try:
+            # 1. Read CSV without parsing dates yet (load as strings to be safe)
+            df = pd.read_csv(file_path, header=0, index_col=0, encoding='utf-8')
+            # 2. ROBUST DATE PARSING (The "Zig-Zag" Fix)
+            # We explicitly force dayfirst=True here on the index object.
+            # 'coerce' turns invalid formats into NaT so we can drop them.
+            df.index = pd.to_datetime(df.index, dayfirst=True, errors='coerce')
+            # 3. Drop rows with invalid dates
+            df = df[df.index.notna()]
+            if df.empty:
+                error_messages.append(f"Warning: Skipped {file_name}, no valid dates found.")
                 continue
             df_list.append(df)
         except Exception as e:
+            error_messages.append(f"Could not read {file_name}. Error: {e}")
     if not df_list:
+        return None, "No data could be loaded successfully."
+    # 4. CONSOLIDATE
+    try:
+        master_df = pd.concat(df_list)
+        # 5. DEDUPLICATE (Keep the newest version of any overlapping date)
+        # This prevents "Double Dots" if 2024-2025.csv and 2025-NOW.csv overlap
+        if master_df.index.has_duplicates:
+            master_df = master_df[~master_df.index.duplicated(keep='last')]
+        # 6. FORCE SORT (The Final "Zig-Zag" Killer)
+        # Ensures Jan 1st always comes before Jan 2nd
+        master_df.sort_index(inplace=True)
+        # 7. NUMERIC CONVERSION (From your original code)
+        # Ensures all price columns are numbers, not strings
+        for col in master_df.columns:
+            master_df[col] = pd.to_numeric(master_df[col], errors='coerce')
+        for msg in error_messages: st.warning(msg)
+        return master_df, f"Successfully combined data from {len(df_list)} files."
+    except Exception as e:
+        return None, f"Critical Error merging data: {e}"
 def clean_data_and_report_outliers(df):
+    """
+    Scans for massive daily percentage moves (>100%) which are likely data errors
+    and sets them to NaN. Preserved from original code.
+    """
     outlier_report = []
     # Identify price columns (exclude _Volume, _High, _Low if present)
     price_columns = [col for col in df.columns if not ('_Volume' in str(col) or '_High' in str(col) or '_Low' in str(col))]
             if numeric_prices.isna().all(): continue # Skip fully NaN columns
             daily_pct_change = numeric_prices.pct_change().abs()
+            # Outlier Threshold: > 1.0 (100% change in one day)
             outlier_days = daily_pct_change[daily_pct_change > 1.0].index
             if not outlier_days.empty:
                 outlier_report.append({'Ticker': ticker, 'Outliers Removed': len(outlier_days)})
                 df.loc[outlier_days, ticker] = np.nan # Set outliers to NaN
     return df, outlier_report
 # --- 2. Custom Backtesting Engine ---
     short_exits = df[(df['short_position'] == 0) & (df['short_position'].shift(1) == -1)]
     if not df.empty:
+        end_date = df.index.max();
+    else: end_date = pd.NaT;
+    # --- CHANGED: COLLECT ALL HISTORY (Removed "30 day" filter) ---
+    all_historical_trades = []
     long_trade_profits, long_durations, first_long_entry_date, last_long_exit_date = [], [], None, None
     short_trade_profits, short_durations, first_short_entry_date, last_short_exit_date = [], [], None, None
             profit = (exit_row['Close'] / row['Close']) - 1 if pd.notna(exit_row['Close']) and pd.notna(row['Close']) and row['Close'] != 0 else np.nan
             long_trade_profits.append(profit)
+            # APPEND ALL TRADES (No Date Filter)
+            all_historical_trades.append({'Side': 'Long', 'Date Open': idx, 'Date Closed': exit_date, 'Start Confidence': row.get('long_confidence_score', np.nan), 'Final % P/L': profit, 'Status': 'Closed', 'Exit Reason': 'TSL' if is_tsl else ('Time' if is_time else 'Profit')})
             try: long_durations.append(df_indices.loc[exit_row.name] - df_indices.loc[idx])
             except KeyError: long_durations.append(np.nan)
     avg_long_profit_per_trade = np.nanmean(long_trade_profits) if long_trade_profits else 0.0
             profit = ((exit_row['Close'] / row['Close']) - 1) * -1 if pd.notna(exit_row['Close']) and pd.notna(row['Close']) and row['Close'] != 0 else np.nan
             short_trade_profits.append(profit)
+            # APPEND ALL TRADES (No Date Filter)
+            all_historical_trades.append({'Side': 'Short', 'Date Open': idx, 'Date Closed': exit_date, 'Start Confidence': row.get('short_confidence_score', np.nan), 'Final % P/L': profit, 'Status': 'Closed', 'Exit Reason': 'TSL' if is_tsl else ('Time' if is_time else 'Profit')})
             try: short_durations.append(df_indices.loc[exit_row.name] - df_indices.loc[idx])
             except KeyError: short_durations.append(np.nan)
     avg_short_profit_per_trade = np.nanmean(short_trade_profits) if short_trade_profits else 0.0
                 entry_price = last_entry['Close']
                 entry_conf = last_entry.get('long_confidence_score', np.nan)
             else:
                 last_entry_time = pd.NaT
                 entry_price = df['long_entry_price_static'].iloc[-1]
                 entry_conf = np.nan
                 entry_price = last_entry['Close']
                 entry_conf = last_entry.get('short_confidence_score', np.nan)
             else:
                 last_entry_time = pd.NaT
                 entry_price = df['short_entry_price_static'].iloc[-1]
                 entry_conf = np.nan
                 })
     # ---------------------------------
+    # Combine: [All Historical Closed] + [Current Open]
+    open_trades.extend(all_historical_trades)
     df.sort_index(inplace=True)
     trade_dates = (first_long_entry_date, last_long_exit_date, first_short_entry_date, last_short_exit_date)
             # 1. Top Cards
             display_summary_analytics(st.session_state.summary_df)
+            # 2. Histogram 1: Profit Distribution
             st.markdown("---")
             try:
                 if 'generate_profit_distribution_chart' in globals():
                     dist_fig = generate_profit_distribution_chart(st.session_state.summary_df)
+                    if dist_fig: st.plotly_chart(dist_fig, use_container_width=True)
+            except Exception: pass
+            # 3. Histogram 2: Trades Over Time (NEW)
+            try:
+                if 'generate_trades_timeline_histogram' in globals() and st.session_state.get('open_trades_df') is not None:
+                    timeline_fig = generate_trades_timeline_histogram(
+                        st.session_state.open_trades_df,
+                        st.session_state.start_date,
+                        st.session_state.end_date
+                    )
+                    if timeline_fig: st.plotly_chart(timeline_fig, use_container_width=True)
+            except Exception as e: st.error(f"Chart Error: {e}")
             st.markdown("---")
+            # 4. Results Per Ticker Table
             st.subheader("Results per Ticker")
             if st.checkbox("Only show tickers with trades", value=True):
                 df_to_display = st.session_state.summary_df[(st.session_state.summary_df['Num Long Trades'] > 0) | (st.session_state.summary_df['Num Short Trades'] > 0)].copy()
             if not production_mode:
                 if st.button("💾 Add these settings to User-Defined List", key="save_setup_from_analysis", on_click=add_setup_to_user_list): pass
+            # 5. Open Positions Table (With Filter)
             st.subheader("👨🏻‍💼 Open Positions & Recently Closed",
                          help="This table displays all currently ACTIVE trades, plus any trades that closed within the last 30 days.")
             if st.session_state.get('open_trades_df') is not None and not st.session_state.open_trades_df.empty:
+                full_df = st.session_state.open_trades_df.copy()
+                # --- FILTER: Show 'Open' OR 'Closed in last 30 days' ---
+                cutoff = pd.Timestamp.now() - pd.Timedelta(days=30)
+                # Ensure date column is datetime
+                full_df['Date Closed'] = pd.to_datetime(full_df['Date Closed'], errors='coerce')
+                mask_open = (full_df['Status'] == 'Open')
+                mask_recent = (full_df['Status'] == 'Closed') & (full_df['Date Closed'] >= cutoff)
+                display_open_df = full_df[mask_open | mask_recent].copy()
+                # -------------------------------------------------------
                 display_open_df.sort_values(by=['Status', 'Date Open'], ascending=[True, False], inplace=True)
                 cols_order_manual = ['Ticker', 'Status', 'Final % P/L', 'Side', 'Date Open', 'Date Closed', 'Start Confidence']
                 existing_cols_open = [col for col in cols_order_manual if col in display_open_df.columns]
+                if existing_cols_open and not display_open_df.empty:
                      st.dataframe(display_open_df[existing_cols_open].style.format({
                         "Final % P/L": lambda x: f"{x:.2%}" if pd.notna(x) else '-',
                         "Date Open": lambda x: x.strftime('%Y-%m-%d') if pd.notna(x) else '-',
                         "Date Closed": lambda x: x.strftime('%Y-%m-%d') if pd.notna(x) else '-',
                         "Start Confidence": lambda x: f"{x:.0f}%" if pd.notna(x) else '-'
                     }, na_rep='-'))
+                else:
+                    st.info("No Open or Recent trades found (older trades are hidden).")
             else:
+                st.info("No trades found.")
         # 14. Default Message
         else:
                         st.sidebar.success("Best Markov setup loaded!")
                         st.rerun()
+def generate_trades_timeline_histogram(trades_df, start_date, end_date):
+    """
+    Creates a stacked histogram showing trade results over time.
+    4 Colors: Long Win, Long Loss, Short Win, Short Loss.
+    """
+    if trades_df is None or trades_df.empty: return None
+    # Filter by date range
+    mask = (trades_df['Date Closed'] >= pd.to_datetime(start_date)) & (trades_df['Date Closed'] <= pd.to_datetime(end_date))
+    df = trades_df[mask].copy()
+    if df.empty: return None
+    # Categorize
+    df = df[df['Status'] == 'Closed']
+    long_wins = df[(df['Side'] == 'Long') & (df['Final % P/L'] > 0)]
+    long_loss = df[(df['Side'] == 'Long') & (df['Final % P/L'] <= 0)]
+    short_wins = df[(df['Side'] == 'Short') & (df['Final % P/L'] > 0)]
+    short_loss = df[(df['Side'] == 'Short') & (df['Final % P/L'] <= 0)]
+    fig = go.Figure()
+    # Add Stacked Traces
+    fig.add_trace(go.Histogram(x=long_wins['Date Closed'], name='Long Winners', marker_color='green'))
+    fig.add_trace(go.Histogram(x=long_loss['Date Closed'], name='Long Losers', marker_color='red'))
+    fig.add_trace(go.Histogram(x=short_wins['Date Closed'], name='Short Winners', marker_color='blue'))
+    fig.add_trace(go.Histogram(x=short_loss['Date Closed'], name='Short Losers', marker_color='orange'))
+    fig.update_layout(
+        barmode='stack',
+        title="Trades Over Time (Win/Loss Stacked)",
+        xaxis_title="Date",
+        yaxis_title="Number of Trades",
+        height=400,
+        template="plotly_white",
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
+    )
+    return fig
 if __name__ == "__main__":
     main()