File size: 3,235 Bytes
bbf5d55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt

base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
orig_data_path = os.path.join(base_dir, 'data', 'orig_processed.parquet')
combined_data_path = os.path.join(base_dir, 'data', 'final_data.parquet')

orig_df = pd.read_parquet(orig_data_path)
combined_df = pd.read_parquet(combined_data_path)

for df in [orig_df, combined_df]:
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'])

orig_df = orig_df.sort_values(['Ticker', 'Date']).reset_index(drop=True)
combined_df = combined_df.sort_values(['Ticker', 'Date']).reset_index(drop=True)

FEATURE_COLS = [c for c in ['Open', 'High', 'Low', 'Close', 'Volume'] if c in orig_df.columns]

def plot_ticker_data(ticker, feature):
    """
    Plot last 10 years of time series for selected feature for original and combined datasets.
    """
    if feature not in FEATURE_COLS:
        return f"Feature '{feature}' not found in dataset."

    orig_data = orig_df[orig_df['Ticker'] == ticker].sort_values('Date').reset_index(drop=True)
    synth_data = combined_df[combined_df['Ticker'] == ticker].sort_values('Date').reset_index(drop=True)

    if orig_data.empty and synth_data.empty:
        return f"No data found for ticker: {ticker}"
    if orig_data.empty:
        return f"No original data found for {ticker}"
    if synth_data.empty:
        return f"No combined/synthetic data found for {ticker}"

    latest_date = min(orig_data['Date'].max(), synth_data['Date'].max())
    cutoff_date = latest_date - pd.DateOffset(years=5)

    orig_data = orig_data[orig_data['Date'] >= cutoff_date]
    synth_data = synth_data[synth_data['Date'] >= cutoff_date]

    orig_series = orig_data[['Date', feature]].dropna()
    synth_series = synth_data[['Date', feature]].dropna()

    fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=False)
    fig.suptitle(f"{ticker}{feature} (Last 10 Years)", fontsize=14)

    axes[0].plot(orig_series['Date'], orig_series[feature], linewidth=1.0, alpha=0.9)
    axes[0].set_title("Original Data")
    axes[0].set_ylabel(feature)
    axes[0].grid(True)

    axes[1].plot(synth_series['Date'], synth_series[feature], linewidth=1.0, alpha=0.9)
    axes[1].set_title("Synthetic Data")
    axes[1].set_ylabel(feature)
    axes[1].grid(True)

    try:
        min_date = min(orig_series['Date'].min(), synth_series['Date'].min())
        max_date = max(orig_series['Date'].max(), synth_series['Date'].max())
        axes[0].set_xlim(min_date, max_date)
        axes[1].set_xlim(min_date, max_date)
    except Exception:
        pass

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    return fig

unique_tickers = sorted(orig_df['Ticker'].unique())

demo = gr.Interface(
    fn=plot_ticker_data,
    inputs=[
        gr.Dropdown(unique_tickers, label="Select Stock Ticker"),
        gr.Dropdown(FEATURE_COLS, label="Select Feature (Open/High/Low/Close/Volume)")
    ],
    outputs=gr.Plot(label="Time Series Comparison"),
    title="Real vs Synthetic Time Series Viewer",
    description="Pick a ticker and feature to view the last 5 years of data from original and synthetic datasets."
)

if __name__ == "__main__":
    demo.launch()