Raheel Abdul Rehman
Prod Publish
bbf5d55
import os
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
orig_data_path = os.path.join(base_dir, 'data', 'orig_processed.parquet')
combined_data_path = os.path.join(base_dir, 'data', 'final_data.parquet')
orig_df = pd.read_parquet(orig_data_path)
combined_df = pd.read_parquet(combined_data_path)
for df in [orig_df, combined_df]:
if 'Date' in df.columns:
df['Date'] = pd.to_datetime(df['Date'])
orig_df = orig_df.sort_values(['Ticker', 'Date']).reset_index(drop=True)
combined_df = combined_df.sort_values(['Ticker', 'Date']).reset_index(drop=True)
FEATURE_COLS = [c for c in ['Open', 'High', 'Low', 'Close', 'Volume'] if c in orig_df.columns]
def plot_ticker_data(ticker, feature):
"""
Plot last 10 years of time series for selected feature for original and combined datasets.
"""
if feature not in FEATURE_COLS:
return f"Feature '{feature}' not found in dataset."
orig_data = orig_df[orig_df['Ticker'] == ticker].sort_values('Date').reset_index(drop=True)
synth_data = combined_df[combined_df['Ticker'] == ticker].sort_values('Date').reset_index(drop=True)
if orig_data.empty and synth_data.empty:
return f"No data found for ticker: {ticker}"
if orig_data.empty:
return f"No original data found for {ticker}"
if synth_data.empty:
return f"No combined/synthetic data found for {ticker}"
latest_date = min(orig_data['Date'].max(), synth_data['Date'].max())
cutoff_date = latest_date - pd.DateOffset(years=5)
orig_data = orig_data[orig_data['Date'] >= cutoff_date]
synth_data = synth_data[synth_data['Date'] >= cutoff_date]
orig_series = orig_data[['Date', feature]].dropna()
synth_series = synth_data[['Date', feature]].dropna()
fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=False)
fig.suptitle(f"{ticker}{feature} (Last 10 Years)", fontsize=14)
axes[0].plot(orig_series['Date'], orig_series[feature], linewidth=1.0, alpha=0.9)
axes[0].set_title("Original Data")
axes[0].set_ylabel(feature)
axes[0].grid(True)
axes[1].plot(synth_series['Date'], synth_series[feature], linewidth=1.0, alpha=0.9)
axes[1].set_title("Synthetic Data")
axes[1].set_ylabel(feature)
axes[1].grid(True)
try:
min_date = min(orig_series['Date'].min(), synth_series['Date'].min())
max_date = max(orig_series['Date'].max(), synth_series['Date'].max())
axes[0].set_xlim(min_date, max_date)
axes[1].set_xlim(min_date, max_date)
except Exception:
pass
plt.tight_layout(rect=[0, 0, 1, 0.96])
return fig
unique_tickers = sorted(orig_df['Ticker'].unique())
demo = gr.Interface(
fn=plot_ticker_data,
inputs=[
gr.Dropdown(unique_tickers, label="Select Stock Ticker"),
gr.Dropdown(FEATURE_COLS, label="Select Feature (Open/High/Low/Close/Volume)")
],
outputs=gr.Plot(label="Time Series Comparison"),
title="Real vs Synthetic Time Series Viewer",
description="Pick a ticker and feature to view the last 5 years of data from original and synthetic datasets."
)
if __name__ == "__main__":
demo.launch()