| import os | |
| import gradio as gr | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| orig_data_path = os.path.join(base_dir, 'data', 'orig_processed.parquet') | |
| combined_data_path = os.path.join(base_dir, 'data', 'final_data.parquet') | |
| orig_df = pd.read_parquet(orig_data_path) | |
| combined_df = pd.read_parquet(combined_data_path) | |
| for df in [orig_df, combined_df]: | |
| if 'Date' in df.columns: | |
| df['Date'] = pd.to_datetime(df['Date']) | |
| orig_df = orig_df.sort_values(['Ticker', 'Date']).reset_index(drop=True) | |
| combined_df = combined_df.sort_values(['Ticker', 'Date']).reset_index(drop=True) | |
| FEATURE_COLS = [c for c in ['Open', 'High', 'Low', 'Close', 'Volume'] if c in orig_df.columns] | |
| def plot_ticker_data(ticker, feature): | |
| """ | |
| Plot last 10 years of time series for selected feature for original and combined datasets. | |
| """ | |
| if feature not in FEATURE_COLS: | |
| return f"Feature '{feature}' not found in dataset." | |
| orig_data = orig_df[orig_df['Ticker'] == ticker].sort_values('Date').reset_index(drop=True) | |
| synth_data = combined_df[combined_df['Ticker'] == ticker].sort_values('Date').reset_index(drop=True) | |
| if orig_data.empty and synth_data.empty: | |
| return f"No data found for ticker: {ticker}" | |
| if orig_data.empty: | |
| return f"No original data found for {ticker}" | |
| if synth_data.empty: | |
| return f"No combined/synthetic data found for {ticker}" | |
| latest_date = min(orig_data['Date'].max(), synth_data['Date'].max()) | |
| cutoff_date = latest_date - pd.DateOffset(years=5) | |
| orig_data = orig_data[orig_data['Date'] >= cutoff_date] | |
| synth_data = synth_data[synth_data['Date'] >= cutoff_date] | |
| orig_series = orig_data[['Date', feature]].dropna() | |
| synth_series = synth_data[['Date', feature]].dropna() | |
| fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=False) | |
| fig.suptitle(f"{ticker} — {feature} (Last 10 Years)", fontsize=14) | |
| axes[0].plot(orig_series['Date'], orig_series[feature], linewidth=1.0, alpha=0.9) | |
| axes[0].set_title("Original Data") | |
| axes[0].set_ylabel(feature) | |
| axes[0].grid(True) | |
| axes[1].plot(synth_series['Date'], synth_series[feature], linewidth=1.0, alpha=0.9) | |
| axes[1].set_title("Synthetic Data") | |
| axes[1].set_ylabel(feature) | |
| axes[1].grid(True) | |
| try: | |
| min_date = min(orig_series['Date'].min(), synth_series['Date'].min()) | |
| max_date = max(orig_series['Date'].max(), synth_series['Date'].max()) | |
| axes[0].set_xlim(min_date, max_date) | |
| axes[1].set_xlim(min_date, max_date) | |
| except Exception: | |
| pass | |
| plt.tight_layout(rect=[0, 0, 1, 0.96]) | |
| return fig | |
| unique_tickers = sorted(orig_df['Ticker'].unique()) | |
| demo = gr.Interface( | |
| fn=plot_ticker_data, | |
| inputs=[ | |
| gr.Dropdown(unique_tickers, label="Select Stock Ticker"), | |
| gr.Dropdown(FEATURE_COLS, label="Select Feature (Open/High/Low/Close/Volume)") | |
| ], | |
| outputs=gr.Plot(label="Time Series Comparison"), | |
| title="Real vs Synthetic Time Series Viewer", | |
| description="Pick a ticker and feature to view the last 5 years of data from original and synthetic datasets." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |