import os import gradio as gr import pandas as pd import matplotlib.pyplot as plt base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) orig_data_path = os.path.join(base_dir, 'data', 'orig_processed.parquet') combined_data_path = os.path.join(base_dir, 'data', 'final_data.parquet') orig_df = pd.read_parquet(orig_data_path) combined_df = pd.read_parquet(combined_data_path) for df in [orig_df, combined_df]: if 'Date' in df.columns: df['Date'] = pd.to_datetime(df['Date']) orig_df = orig_df.sort_values(['Ticker', 'Date']).reset_index(drop=True) combined_df = combined_df.sort_values(['Ticker', 'Date']).reset_index(drop=True) FEATURE_COLS = [c for c in ['Open', 'High', 'Low', 'Close', 'Volume'] if c in orig_df.columns] def plot_ticker_data(ticker, feature): """ Plot last 10 years of time series for selected feature for original and combined datasets. """ if feature not in FEATURE_COLS: return f"Feature '{feature}' not found in dataset." orig_data = orig_df[orig_df['Ticker'] == ticker].sort_values('Date').reset_index(drop=True) synth_data = combined_df[combined_df['Ticker'] == ticker].sort_values('Date').reset_index(drop=True) if orig_data.empty and synth_data.empty: return f"No data found for ticker: {ticker}" if orig_data.empty: return f"No original data found for {ticker}" if synth_data.empty: return f"No combined/synthetic data found for {ticker}" latest_date = min(orig_data['Date'].max(), synth_data['Date'].max()) cutoff_date = latest_date - pd.DateOffset(years=5) orig_data = orig_data[orig_data['Date'] >= cutoff_date] synth_data = synth_data[synth_data['Date'] >= cutoff_date] orig_series = orig_data[['Date', feature]].dropna() synth_series = synth_data[['Date', feature]].dropna() fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=False) fig.suptitle(f"{ticker} — {feature} (Last 10 Years)", fontsize=14) axes[0].plot(orig_series['Date'], orig_series[feature], linewidth=1.0, alpha=0.9) axes[0].set_title("Original Data") axes[0].set_ylabel(feature) axes[0].grid(True) axes[1].plot(synth_series['Date'], synth_series[feature], linewidth=1.0, alpha=0.9) axes[1].set_title("Synthetic Data") axes[1].set_ylabel(feature) axes[1].grid(True) try: min_date = min(orig_series['Date'].min(), synth_series['Date'].min()) max_date = max(orig_series['Date'].max(), synth_series['Date'].max()) axes[0].set_xlim(min_date, max_date) axes[1].set_xlim(min_date, max_date) except Exception: pass plt.tight_layout(rect=[0, 0, 1, 0.96]) return fig unique_tickers = sorted(orig_df['Ticker'].unique()) demo = gr.Interface( fn=plot_ticker_data, inputs=[ gr.Dropdown(unique_tickers, label="Select Stock Ticker"), gr.Dropdown(FEATURE_COLS, label="Select Feature (Open/High/Low/Close/Volume)") ], outputs=gr.Plot(label="Time Series Comparison"), title="Real vs Synthetic Time Series Viewer", description="Pick a ticker and feature to view the last 5 years of data from original and synthetic datasets." ) if __name__ == "__main__": demo.launch()