Spaces:

adnlp
/

MulTiCast

Sleeping

App Files Files Community

adnlp commited on Nov 20, 2025

Commit

e9e368b

verified ·

1 Parent(s): 0c63e28

Update app.py

Browse files

Files changed (1) hide show

app.py +492 -458

app.py CHANGED Viewed

@@ -1,459 +1,493 @@
-import os
-import gradio as gr
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-import io
-from PIL import Image
-import pickle
-import requests
-import cv2
-hf_token = {
-    "multicastcustom": os.environ["HF_MulTiCastCustom_Token"],
-    "clipqwentimer": os.environ["HF_CLIPQwenTimer_Token"],
-    "clipllamatimer": os.environ["HF_CLIPLLaMATimer_Token"],
-    "blipqwentimer": os.environ["HF_BLIPQwenTimer_Token"],
-    "blipllamatimer": os.environ["HF_BLIPLLaMATimer_Token"],
-    "clipqwenchronos": os.environ["HF_CLIPQwenChronos_Token"],
-    "clipllamachronos": os.environ["HF_CLIPLLaMAChronos_Token"],
-    "blipqwenchronos": os.environ["HF_BLIPQwenChronos_Token"],
-    "blipllamachronos": os.environ["HF_BLIPLLaMAChronos_Token"]
-}
-with open('example/inputs.pkl', 'rb') as f:
-    inputs = pickle.load(f)
-with open('example/targets.pkl', 'rb') as f:
-    targets = pickle.load(f)
-descriptions = {
-    "NN5 Daily": "Daily cash withdrawal volumes from automated teller machines (ATMs) in the United Kingdom, originally used in the NN5 forecasting competition.",
-    "Australian Electricity": "Half-hourly electricity demand data across five Australian states.",
-    "CIF 2016": "Monthly banking time series used in the CIF 2016 forecasting challenge, reflecting customer financial behaviours.",
-    "Tourism Monthly": "Monthly tourism-related time series used in the Kaggle Tourism forecasting competition, covering various regions and visitor types.",
-    "Custom": "Custom Dataset"
-}
-context_length = {
-    "NN5 Daily": 56,
-    "Australian Electricity": 48,
-    "CIF 2016": 12,
-    "Tourism Monthly": 24
-}
-def selected_dataset(dataset):
-    if dataset == "Custom":
-        gallery_items = []
-    else:
-        gallery_items = [(Image.open(f'example/img/{dataset.replace(" ", "_")}/{i}.png').convert('RGB'), str(i+1)) for i in range(3)]
-    gallery_items.append((Image.open('example/img/custom.png').convert('RGB'), 'Custom Input'))
-    return gr.Gallery(gallery_items, interactive=False, height="350px", object_fit="contain", preview=True), gr.Textbox(value=descriptions[dataset], label="Dataset Description", interactive=False)
-def selected_example(gallery, evt: gr.SelectData):
-    if evt.index == len(gallery) -1:
-        return -1
-    else:
-        return evt.index
-def update_guide_markdown(dataset, example_index):
-    if example_index is None:
-        return gr.Markdown(visible=False), gr.File(visible=False)
-    elif dataset == "Custom":
-        return gr.Markdown(visible=False), gr.File(visible=False)
-    elif example_index == -1: # Custom Input
-        return (
-            gr.Markdown(
-                value=f"To use custom input, please use the sample csv file below. Do not change the name of columns. Only the first {context_length[dataset]} values will be used as input time series.",
-                visible=True
-            ),
-            gr.File(value="example/sample.csv", label="Sample CSV File", visible=True)
-        )
-    else:
-        df = inputs[dataset][example_index]
-        min = df.min()
-        max = df.max()
-        min_timestamp = pd.Series(min["Timestamp"]).to_string(index=False)
-        max_timestamp = pd.Series(max["Timestamp"]).to_string(index=False)
-        min_value = min["Value"]
-        max_value = max["Value"]
-        return (
-            gr.Markdown(
-                value=f"This time series contains values from {min_timestamp} to {max_timestamp}, with a minimum value of {min_value:.4f} and a maximum value of {max_value:.4f}.",
-                visible=True
-            ),
-            gr.File(visible=False)
-        )
-def update_time_series_dataframe(dataset, example_index):
-    if example_index is None:
-        return None, None
-    elif example_index == -1: # Custom Input
-        return gr.File(label="Time Series CSV File", file_types=[".csv"], visible=True), gr.Dataframe(value=None, visible=False)
-    elif dataset == "Custom":
-        return None, None
-    else:
-        df = inputs[dataset][example_index]
-        return gr.File(value=None, visible=False), gr.Dataframe(value=df, label="Time Series Input", interactive=False, visible=True)
-def load_csv(example_index, file):
-    if example_index == -1:
-        if file is not None:
-            return gr.Dataframe(value=pd.read_csv(file.name), visible=True)
-        else:
-            return gr.Dataframe(value=None, visible=False)
-    else:
-        return gr.skip()
-def vision_attention_rollout(attentions, start_layer=0, end_layer=12):
-    seq_len = attentions.shape[-1]
-    result = np.eye(seq_len)
-    for attn in attentions[start_layer:end_layer]:
-        attn_heads = attn.mean(axis=0)
-        attn_aug = attn_heads + np.eye(seq_len)
-        attn_aug = attn_aug / attn_aug.sum(axis=-1, keepdims=True)
-        result = attn_aug @ result
-    return result[0, -49:]
-def plot_vision_heatmap(image, rollout_attention, alpha=0.5, cmap='jet'):
-    num_patches = rollout_attention.shape[0]
-    grid_size = int(np.sqrt(num_patches))
-    attn_grid = rollout_attention.reshape(grid_size, grid_size)
-    H, W = image.shape[:2]
-    attn_map = cv2.resize(attn_grid, (W, H), interpolation=cv2.INTER_CUBIC)
-    attn_map = attn_map / attn_map.max()
-    plt.figure(figsize=(6,6))
-    plt.imshow(image)
-    plt.imshow(attn_map, cmap=cmap, alpha=alpha)
-    plt.axis('off')
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png')
-    buf.seek(0)
-    plot_img = Image.open(buf).convert('RGB')
-    plt.clf()
-    return plot_img
-def time_series_attention_sum(attentions, context_length, start_layer=0, end_layer=12):
-    import math
-    seq_len = attentions.shape[-1]
-    result = np.zeros(seq_len)
-    for attn in attentions[start_layer:end_layer]:
-        attn_heads = attn.mean(0).squeeze()
-        result += attn_heads
-    att_len = math.ceil(context_length/16)
-    return result[-att_len:]
-def plot_time_series_heatmap(context, attention, time_steps):
-    plt.figure(figsize=(8, 4))
-    plt.plot(context, color="black", linewidth=2)
-    attention = attention/attention.max()
-    cmap = plt.get_cmap("coolwarm")
-    for i, v in enumerate(attention):
-        start = i * 16
-        end = min((i + 1) * 16, time_steps-1)
-        color = cmap(v)[:-1] + (v,)
-        plt.axvspan(start, end, color=color)
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png')
-    buf.seek(0)
-    plot_img = Image.open(buf).convert('RGB')
-    plt.clf()
-    return plot_img
-def predict(dataset, text, example_index, file, vision_encoder, text_encoder, tsfm, model_id):
-    if tsfm == "Custom" and model_id == "":
-        return (
-            gr.Markdown(
-                value=f"Please enter the hugging face model repo id.",
-                visible=True
-            ),
-            None,
-            None,
-            None,
-            None
-        )
-    if (dataset is None or example_index is None) or (example_index == -1 and file is None):
-        return (
-            gr.Markdown(
-                value=f"Please Select Example or Provide CSV File.",
-                visible=True
-            ),
-            None,
-            None,
-            None,
-            None
-        )
-    elif (vision_encoder is None or text_encoder is None or tsfm is None):
-        return (
-            gr.Markdown(
-                value=f"Please Select Pretrained Model For UniCast.",
-                visible=True
-            ),
-            None,
-            None,
-            None,
-            None
-        )
-    else:
-        pass
-    if example_index == -1:
-        df = pd.read_csv(file.name)
-        df = df.iloc[:context_length[dataset]]
-    else:
-        df = inputs[dataset][example_index]
-    time_series = np.array(df["Value"])
-    mean = np.mean(time_series)
-    std = np.std(time_series)
-    time_series_normalized = (time_series-mean)/std
-    text = None if text == '' else text
-    unicast_model = f"{vision_encoder.lower()}{text_encoder.lower()}{tsfm.lower()}"
-    if tsfm == "Custom":
-        url = f"https://adnlp-multicast-custom.hf.space/predict"
-        headers = {"Authorization": f"Bearer {hf_token['multicastcustom']}"}
-        payload = {
-            "repo_id": model_id,
-            "dataset": dataset,
-            "context": time_series_normalized.tolist(),
-            "text": text
-        }
-    else:
-        url = f"https://adnlp-unicast-{unicast_model}.hf.space/predict"
-        headers = {"Authorization": f"Bearer {hf_token[unicast_model]}"}
-        payload = {
-            "dataset": dataset,
-            "context": time_series_normalized.tolist(),
-            "text": text
-        }
-    res = requests.post(url, headers=headers, json=payload)
-    res_json = res.json()
-    # Forecast Plot
-    prediction = np.array(res_json['prediction'])
-    cl = context_length[dataset]
-    prediction = prediction[:cl]
-    prediction = prediction*std+mean
-    input_dates_series = pd.to_datetime(df["Timestamp"])
-    time_diff = input_dates_series.diff().mode()[0]
-    start_time = input_dates_series.iloc[-1] + time_diff
-    forecast_dates_series = pd.date_range(start=start_time, periods=len(input_dates_series), freq=time_diff)
-    plt.close()
-    with plt.style.context("seaborn-v0_8"):
-        fig, ax = plt.subplots(figsize=(10,4))
-        ax.plot(input_dates_series, time_series, color="black", alpha=0.7, linewidth=3, label='Input')
-        ax.plot(forecast_dates_series, prediction, color='C2', alpha=0.7, linewidth=3, label='Forecast')
-        if example_index == -1: # Custom Input
-            true = df["Ground Truth"]
-        else:
-            true = targets[dataset][example_index].iloc[:, -1]
-        if len(true) == context_length[dataset]:
-            ax.plot(forecast_dates_series, true, color='C0', alpha=0.7, linewidth=3, label='Ground Truth')
-        ax.legend()
-    # Vision Heatmap
-    plt.figure(figsize=(384/100, 384/100), dpi=100)
-    plt.plot(time_series_normalized, color="black", linestyle="-", linewidth=1, marker="*", markersize=1)
-    plt.xticks([])
-    plt.yticks([])
-    plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
-    plt.margins(0,0)
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png')
-    buf.seek(0)
-    context_image = np.array(Image.open(buf).convert('RGB'))
-    vision_attentions = np.array(res_json['vision_attentions'])
-    vision_heatmap_gallery_items = []
-    for i in range(0, 12, 3):
-        vis_attn = vision_attention_rollout(vision_attentions, i, i+3)
-        vision_heatmap = plot_vision_heatmap(context_image, vis_attn)
-        vision_heatmap_gallery_items.append((vision_heatmap, f"Heatmap from Layer{i}:{i+3}"))
-    # Time Series Heatmap
-    if tsfm == "Chronos":
-        time_series_attentions = np.array(res_json['time_series_attentions'])
-        time_series_heatmap_gallery_items = []
-        for i in range(0, 12, 3):
-            ts_attn = time_series_attention_sum(time_series_attentions, cl, i, i+3)
-            time_series_heatmap = plot_time_series_heatmap(time_series, ts_attn, cl)
-            time_series_heatmap_gallery_items.append((time_series_heatmap, f"Heatmap from Layer{i}:{i+3}"))
-    else:
-        time_series_heatmap_gallery_items = None
-    return (
-        gr.Markdown(visible=False),
-        fig,
-        gr.Markdown("# Attention Map", visible=True),
-        gr.Gallery(vision_heatmap_gallery_items, interactive=False, height="350px", object_fit="contain", visible=True),
-        gr.Gallery(time_series_heatmap_gallery_items, interactive=False, height="350px", object_fit="contain", visible=True if time_series_heatmap_gallery_items else False)
-    )
-def add_example_gallery(dataset, gallery, example_index, file):
-    if example_index == -1 and file:
-        df = pd.read_csv(file.name)
-        custom_input = df[["Timestamp", "Value"]]
-        custom_target = df[["Timestamp", "Ground Truth"]]
-        plt.style.use("seaborn-v0_8")
-        ax = custom_input.plot(x="Timestamp", color="black", linewidth=3, legend=False, x_compat=True)
-        ax.set_xlabel("")
-        # ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d %H:%M"))
-        buf = io.BytesIO()
-        plt.savefig(buf, format='png')
-        buf.seek(0)
-        plot_img = Image.open(buf).convert('RGB')
-        plt.clf()
-        gallery.insert(-1, (plot_img, f"Custom {len(gallery)-3}"))
-        inputs[dataset].append(custom_input)
-        targets[dataset].append(custom_target)
-    return gallery
-def on_model_selection(selected):
-    return gr.update(visible=selected=="Custom")
-custom_css = """
-.two-col { display:flex; align-items:flex-end; gap: 16px; }
-.right-col { display:flex; flex-direction:column; } /* optional */
-.push-down { margin-top:auto; }                     /* optional */
-.footer-fixed{
-  position: fixed; left:0; right:0; bottom:0;
-  font-size: 16px;
-  padding: 10px 16px; border-top: 1px solid var(--border-color);
-  background: var(--background-fill-primary); z-index: 1000;
-  display: flex; justify-content: flex-end; align-items: center;  /* right align */
-}
-.blue-btn {
-  background-color: #024397 !important; /* Bootstrap-style blue */
-  color: white !important;
-  border-radius: 8px !important;
-  border: none !important;
-  padding: 8px 16px !important;
-  font-weight: 600;
-}
-.blue-btn:hover {
-  background-color: #0056b3 !important; /* Darker blue on hover */
-}
-.app-description{
-  font-size: 16px;
-}
-"""
-with gr.Blocks(css=custom_css) as demo:
-    gr.HTML("""
-    <style>
-        #logo {
-            display: flex;
-            justify-content: flex-start;
-        }
-        .gallery-container .grid-container {
-            display: flex !important;
-        }
-    </style>
-    """)
-    gr.Image(
-        value="logo.png",
-        show_label=False,
-        show_download_button=False,
-        show_fullscreen_button=False,
-        show_share_button=False,
-        interactive=False,
-        height=128,
-        container=False,
-        elem_id="logo"
-    )
-    with gr.Row(elem_classes=["two-col"]):
-        with gr.Column(scale=2):
-            gr.Markdown("<b>MulTiCast</b>, based on <a href='https://github.com/adlnlp/unicast'><b>UniCast</b></a>, is designed as a web-based system that allows users to perform multimodal time-series forecasting without technical setup. The system integrates a numerical time-series forecasting backbone with vision and text encoders. It exposes these capabilities through a lightweight but fully interactive web interface hosted on Hugging Face Spaces. Its design focuses on lowering the barrier to entry while ensuring interpretability through attention-based visualizations. The project and demo are sponsored by <a href='https://research.google/'>Google Research</a>", elem_classes=["app-description"])
-            with gr.Row():
-                with gr.Column(scale=1):
-                    gr.Markdown("# Choose Dataset")
-                    dataset_choices = ["NN5 Daily", "Australian Electricity", "Custom"]
-                    dataset_dropdown = gr.Dropdown(dataset_choices, value=None, label="Datasets", interactive=True)
-                    dataset_description_textbox = gr.Textbox(label="Dataset Description", interactive=False)
-                with gr.Column(scale=3):
-                    gr.Markdown("# Data Selection")
-                    example_gallery = gr.Gallery(
-                        None,
-                        interactive=False
-                    )
-                    example_index = gr.State(value=None)
-                    example_gallery.select(selected_example, inputs=example_gallery, outputs=example_index)
-                    guide_text_markdown = gr.Markdown(visible=False)
-                    sample_csv_file = gr.File(visible=False)
-            with gr.Row(elem_classes=["two-col"]):
-                with gr.Column(scale=3):
-                    gr.Markdown("# Data Viewer")
-                    time_series_file = gr.File(value=None, visible=False)
-                    time_series_dataframe = gr.Dataframe(visible=False)
-                    dataset_dropdown.change(selected_dataset, inputs=dataset_dropdown, outputs=[example_gallery, dataset_description_textbox])
-                    dataset_dropdown.change(update_guide_markdown, inputs=[dataset_dropdown, example_index], outputs=[guide_text_markdown, sample_csv_file])
-                    dataset_dropdown.change(update_time_series_dataframe, inputs=[dataset_dropdown, example_index], outputs=[time_series_file, time_series_dataframe])
-                    example_index.change(update_guide_markdown, inputs=[dataset_dropdown, example_index], outputs=[guide_text_markdown, sample_csv_file])
-                    example_index.change(update_time_series_dataframe, inputs=[dataset_dropdown, example_index], outputs=[time_series_file, time_series_dataframe])
-                    time_series_file.change(load_csv, inputs=[example_index, time_series_file], outputs=time_series_dataframe)
-                with gr.Column(scale=1):
-                    gr.Markdown("# Model Selection")
-                    model_choices = ["Timer", "Chronos", "Custom"]
-                    tsfm_radio = gr.Radio(model_choices, label="Time Series Foundation Model")
-                    md_choices = gr.State(model_choices)
-                    model_id_box = gr.Textbox(placeholder="Type and Enter…", label="HF Model ID", interactive=True, visible=False)
-                    # model_token_box = gr.Textbox(placeholder="Type and Enter…", label="HF Model Token", interactive=True, visible=False)
-                    vision_encoder_radio = gr.Radio(["CLIP", "BLIP"], label="Vision Encoder")
-                    text_encoder_radio = gr.Radio(["Qwen", "LLaMA"], label="Text Encoder")
-                    warning_markdown = gr.Markdown(visible=False)
-                    btn = gr.Button("Run", elem_classes="blue-btn")
-                    tsfm_radio.change(on_model_selection, [tsfm_radio], model_id_box)
-                    # tsfm_radio.change(on_model_selection, [tsfm_radio], model_token_box)
-    with gr.Row():
-        with gr.Column(scale=2):
-            gr.Markdown("# Prediction")
-            forecast_plot = gr.Plot(label="Forecast", format="png")
-            heatmap_header_html = gr.Markdown("# Attention Map", visible=False)
-            vision_heatmap_gallery = gr.Gallery(visible=False)
-            time_series_heatmap_gallery = gr.Gallery(visible=False)
-        btn.click(predict, inputs=[dataset_dropdown, dataset_description_textbox, example_index, time_series_file, vision_encoder_radio, text_encoder_radio, tsfm_radio, model_id_box], outputs=[warning_markdown, forecast_plot, heatmap_header_html, vision_heatmap_gallery, time_series_heatmap_gallery])
-        btn.click(add_example_gallery, inputs=[dataset_dropdown, example_gallery, example_index, time_series_file], outputs=[example_gallery])
-    gr.HTML("<small>This work is sponsored by Google Research</small>", elem_classes=["footer-fixed"])
-if __name__ == "__main__":
     demo.launch(ssr_mode=False)

+import os
+import gradio as gr
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import io
+from PIL import Image
+import pickle
+import requests
+import cv2
+hf_token = {
+    "multicastcustom": os.environ["HF_MulTiCastCustom_Token"],
+    "clipqwentimer": os.environ["HF_CLIPQwenTimer_Token"],
+    "clipllamatimer": os.environ["HF_CLIPLLaMATimer_Token"],
+    "blipqwentimer": os.environ["HF_BLIPQwenTimer_Token"],
+    "blipllamatimer": os.environ["HF_BLIPLLaMATimer_Token"],
+    "clipqwenchronos": os.environ["HF_CLIPQwenChronos_Token"],
+    "clipllamachronos": os.environ["HF_CLIPLLaMAChronos_Token"],
+    "blipqwenchronos": os.environ["HF_BLIPQwenChronos_Token"],
+    "blipllamachronos": os.environ["HF_BLIPLLaMAChronos_Token"]
+}
+with open('example/inputs.pkl', 'rb') as f:
+    inputs = pickle.load(f)
+with open('example/targets.pkl', 'rb') as f:
+    targets = pickle.load(f)
+descriptions = {
+    "NN5 Daily": "Daily cash withdrawal volumes from automated teller machines (ATMs) in the United Kingdom, originally used in the NN5 forecasting competition.",
+    "Australian Electricity": "Half-hourly electricity demand data across five Australian states.",
+    "CIF 2016": "Monthly banking time series used in the CIF 2016 forecasting challenge, reflecting customer financial behaviours.",
+    "Tourism Monthly": "Monthly tourism-related time series used in the Kaggle Tourism forecasting competition, covering various regions and visitor types.",
+    "Custom": "Custom Dataset"
+}
+context_length = {
+    "NN5 Daily": 56,
+    "Australian Electricity": 48,
+    "CIF 2016": 12,
+    "Tourism Monthly": 24
+}
+def selected_dataset(dataset):
+    if dataset == "Custom":
+        gallery_items = []
+    else:
+        gallery_items = [(Image.open(f'example/img/{dataset.replace(" ", "_")}/{i}.png').convert('RGB'), str(i+1)) for i in range(3)]
+    gallery_items.append((Image.open('example/img/custom.png').convert('RGB'), 'Custom Input'))
+    return gr.Gallery(gallery_items, interactive=False, height="350px", object_fit="contain", preview=True), gr.Textbox(value=descriptions[dataset], label="Dataset Description", interactive=False)
+def selected_example(gallery, evt: gr.SelectData):
+    if evt.index == len(gallery) -1:
+        return -1
+    else:
+        return evt.index
+def update_guide_markdown(dataset, example_index):
+    if example_index is None:
+        return gr.Markdown(visible=False), gr.File(visible=False)
+    elif dataset == "Custom":
+        return gr.Markdown(visible=False), gr.File(visible=False)
+    elif example_index == -1: # Custom Input
+        return (
+            gr.Markdown(
+                value=f"To use custom input, please use the sample csv file below. Do not change the name of columns. Only the first {context_length[dataset]} values will be used as input time series.",
+                visible=True
+            ),
+            gr.File(value="example/sample.csv", label="Sample CSV File", visible=True)
+        )
+    else:
+        df = inputs[dataset][example_index]
+        min = df.min()
+        max = df.max()
+        min_timestamp = pd.Series(min["Timestamp"]).to_string(index=False)
+        max_timestamp = pd.Series(max["Timestamp"]).to_string(index=False)
+        min_value = min["Value"]
+        max_value = max["Value"]
+        return (
+            gr.Markdown(
+                value=f"This time series contains values from {min_timestamp} to {max_timestamp}, with a minimum value of {min_value:.4f} and a maximum value of {max_value:.4f}.",
+                visible=True
+            ),
+            gr.File(visible=False)
+        )
+def update_time_series_dataframe(dataset, example_index):
+    if example_index is None:
+        return None, None
+    elif example_index == -1: # Custom Input
+        return gr.File(label="Time Series CSV File", file_types=[".csv"], visible=True), gr.Dataframe(value=None, visible=False)
+    elif dataset == "Custom":
+        return None, None
+    else:
+        df = inputs[dataset][example_index]
+        return gr.File(value=None, visible=False), gr.Dataframe(value=df, label="Time Series Input", interactive=False, visible=True)
+def load_csv(example_index, file):
+    if example_index == -1:
+        if file is not None:
+            return gr.Dataframe(value=pd.read_csv(file.name), visible=True)
+        else:
+            return gr.Dataframe(value=None, visible=False)
+    else:
+        return gr.skip()
+def vision_attention_rollout(attentions, start_layer=0, end_layer=12):
+    seq_len = attentions.shape[-1]
+    result = np.eye(seq_len)
+    for attn in attentions[start_layer:end_layer]:
+        attn_heads = attn.mean(axis=0)
+        attn_aug = attn_heads + np.eye(seq_len)
+        attn_aug = attn_aug / attn_aug.sum(axis=-1, keepdims=True)
+        result = attn_aug @ result
+    return result[0, -49:]
+def plot_vision_heatmap(image, rollout_attention, alpha=0.5, cmap='jet'):
+    num_patches = rollout_attention.shape[0]
+    grid_size = int(np.sqrt(num_patches))
+    attn_grid = rollout_attention.reshape(grid_size, grid_size)
+    H, W = image.shape[:2]
+    attn_map = cv2.resize(attn_grid, (W, H), interpolation=cv2.INTER_CUBIC)
+    attn_map = attn_map / attn_map.max()
+    plt.figure(figsize=(6,6))
+    plt.imshow(image)
+    plt.imshow(attn_map, cmap=cmap, alpha=alpha)
+    plt.axis('off')
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    buf.seek(0)
+    plot_img = Image.open(buf).convert('RGB')
+    plt.clf()
+    return plot_img
+def color_token(token, weight):
+    cmap = plt.cm.coolwarm
+    color = np.array(cmap(weight))[:3] * 255
+    return f'<span style="background-color: rgba({color[0]}, {color[1]}, {color[2]}, 0.6); padding:2px; border-radius:3px;">{token}</span>'
+def to_html(attention, clean_text_tokens):
+    ca = np.array(attention).mean(axis=0)
+    weights = ca / ca.max()
+    html_text = " ".join(
+        color_token(tok, w) for tok, w in zip(clean_text_tokens, weights)
+        if tok not in ["<|im_end|>", "<|endoftext|>", "<|im_start|>", "<|im_end|>", "<s>", "</s>", "<unk>"]
+    )
+    return html_text
+def time_series_attention_sum(attentions, context_length, start_layer=0, end_layer=12):
+    import math
+    seq_len = attentions.shape[-1]
+    result = np.zeros(seq_len)
+    for attn in attentions[start_layer:end_layer]:
+        attn_heads = attn.mean(0).squeeze()
+        result += attn_heads
+    att_len = math.ceil(context_length/16)
+    return result[-att_len:]
+def plot_time_series_heatmap(context, attention, time_steps):
+    plt.figure(figsize=(8, 4))
+    plt.plot(context, color="black", linewidth=2)
+    attention = attention/attention.max()
+    cmap = plt.get_cmap("coolwarm")
+    for i, v in enumerate(attention):
+        start = i * 16
+        end = min((i + 1) * 16, time_steps-1)
+        color = cmap(v)[:-1] + (v,)
+        plt.axvspan(start, end, color=color)
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    buf.seek(0)
+    plot_img = Image.open(buf).convert('RGB')
+    plt.clf()
+    return plot_img
+def predict(dataset, text, example_index, file, vision_encoder, text_encoder, tsfm, model_id):
+    if tsfm == "Custom" and model_id == "":
+        return (
+            gr.Markdown(
+                value=f"Please enter the hugging face model repo id.",
+                visible=True
+            ),
+            None,
+            None,
+            None,
+            None
+        )
+    if (dataset is None or example_index is None) or (example_index == -1 and file is None):
+        return (
+            gr.Markdown(
+                value=f"Please Select Example or Provide CSV File.",
+                visible=True
+            ),
+            None,
+            None,
+            None,
+            None
+        )
+    elif (vision_encoder is None or text_encoder is None or tsfm is None):
+        return (
+            gr.Markdown(
+                value=f"Please Select Pretrained Model For UniCast.",
+                visible=True
+            ),
+            None,
+            None,
+            None,
+            None
+        )
+    else:
+        pass
+    if example_index == -1:
+        df = pd.read_csv(file.name)
+        df = df.iloc[:context_length[dataset]]
+    else:
+        df = inputs[dataset][example_index]
+    time_series = np.array(df["Value"])
+    mean = np.mean(time_series)
+    std = np.std(time_series)
+    time_series_normalized = (time_series-mean)/std
+    text = None if text == '' else text
+    unicast_model = f"{vision_encoder.lower()}{text_encoder.lower()}{tsfm.lower()}"
+    if tsfm == "Custom":
+        url = f"https://adnlp-multicast-custom.hf.space/predict"
+        headers = {"Authorization": f"Bearer {hf_token['multicastcustom']}"}
+        payload = {
+            "repo_id": model_id,
+            "dataset": dataset,
+            "context": time_series_normalized.tolist(),
+            "text": text
+        }
+    else:
+        url = f"https://adnlp-unicast-{unicast_model}.hf.space/predict"
+        headers = {"Authorization": f"Bearer {hf_token[unicast_model]}"}
+        payload = {
+            "dataset": dataset,
+            "context": time_series_normalized.tolist(),
+            "text": text
+        }
+    res = requests.post(url, headers=headers, json=payload)
+    res_json = res.json()
+    # Forecast Plot
+    prediction = np.array(res_json['prediction'])
+    cl = context_length[dataset]
+    prediction = prediction[:cl]
+    prediction = prediction*std+mean
+    input_dates_series = pd.to_datetime(df["Timestamp"])
+    time_diff = input_dates_series.diff().mode()[0]
+    start_time = input_dates_series.iloc[-1] + time_diff
+    forecast_dates_series = pd.date_range(start=start_time, periods=len(input_dates_series), freq=time_diff)
+    plt.close()
+    with plt.style.context("seaborn-v0_8"):
+        fig, ax = plt.subplots(figsize=(10,4))
+        ax.plot(input_dates_series, time_series, color="black", alpha=0.7, linewidth=3, label='Input')
+        ax.plot(forecast_dates_series, prediction, color='C2', alpha=0.7, linewidth=3, label='Forecast')
+        if example_index == -1: # Custom Input
+            true = df["Ground Truth"]
+        else:
+            true = targets[dataset][example_index].iloc[:, -1]
+        if len(true) == context_length[dataset]:
+            ax.plot(forecast_dates_series, true, color='C0', alpha=0.7, linewidth=3, label='Ground Truth')
+        ax.legend()
+    # Vision Heatmap
+    plt.figure(figsize=(384/100, 384/100), dpi=100)
+    plt.plot(time_series_normalized, color="black", linestyle="-", linewidth=1, marker="*", markersize=1)
+    plt.xticks([])
+    plt.yticks([])
+    plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
+    plt.margins(0,0)
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    buf.seek(0)
+    context_image = np.array(Image.open(buf).convert('RGB'))
+    vision_attentions = np.array(res_json['vision_attentions'])
+    vision_heatmap_gallery_items = []
+    for i in range(0, 12, 3):
+        vis_attn = vision_attention_rollout(vision_attentions, i, i+3)
+        vision_heatmap = plot_vision_heatmap(context_image, vis_attn)
+        vision_heatmap_gallery_items.append((vision_heatmap, f"Heatmap from Layer{i}:{i+3}"))
+    # Text Heatmap
+    text_tokens = res_json['text_tokens']
+    text_attentions = res_json['text_attentions']
+    if text_encoder == "Qwen":
+        clean_text_tokens = [t.replace("Ġ", "") for t in text_tokens]
+    elif text_encoder == "LLaMA":
+        clean_text_tokens = [t.replace("▁", "") for t in text_tokens]
+    else:
+        pass
+    vision_heatmap_html_text = '<div class="gallery-container"><div class="grid-wrap svelte-1atirkn"><div class="grid-container svelte-1atirkn pt-6">'
+    for i in range(0, 12, 3):
+        vision_heatmap_html_text += f'<button class="thumbnail-item thumbnail-lg svelte-1atirkn"><div class="svelte-1pijsyv">'
+        vision_heatmap_html_text += to_html(text_attentions[i:i+3], clean_text_tokens)
+        vision_heatmap_html_text += f'</div><div class="caption-label svelte-1atirkn">Heatmap from Layer{i}:{i+3}</div></button>'
+    vision_heatmap_html_text += '</div></div></div>'
+    # Time Series Heatmap
+    if tsfm == "Chronos":
+        time_series_attentions = np.array(res_json['time_series_attentions'])
+        time_series_heatmap_gallery_items = []
+        for i in range(0, 12, 3):
+            ts_attn = time_series_attention_sum(time_series_attentions, cl, i, i+3)
+            time_series_heatmap = plot_time_series_heatmap(time_series, ts_attn, cl)
+            time_series_heatmap_gallery_items.append((time_series_heatmap, f"Heatmap from Layer{i}:{i+3}"))
+    else:
+        time_series_heatmap_gallery_items = None
+    return (
+        gr.Markdown(visible=False),
+        fig,
+        gr.Markdown("# Attention Map", visible=True),
+        gr.Gallery(vision_heatmap_gallery_items, interactive=False, height="350px", object_fit="contain", visible=True),
+        gr.HTML(value=vision_heatmap_html_text, visible=True),
+        gr.Gallery(time_series_heatmap_gallery_items, interactive=False, height="350px", object_fit="contain", visible=True if time_series_heatmap_gallery_items else False)
+    )
+def add_example_gallery(dataset, gallery, example_index, file):
+    if example_index == -1 and file:
+        df = pd.read_csv(file.name)
+        custom_input = df[["Timestamp", "Value"]]
+        custom_target = df[["Timestamp", "Ground Truth"]]
+        plt.style.use("seaborn-v0_8")
+        ax = custom_input.plot(x="Timestamp", color="black", linewidth=3, legend=False, x_compat=True)
+        ax.set_xlabel("")
+        # ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d %H:%M"))
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png')
+        buf.seek(0)
+        plot_img = Image.open(buf).convert('RGB')
+        plt.clf()
+        gallery.insert(-1, (plot_img, f"Custom {len(gallery)-3}"))
+        inputs[dataset].append(custom_input)
+        targets[dataset].append(custom_target)
+    return gallery
+def on_model_selection(selected):
+    return gr.update(visible=selected=="Custom")
+custom_css = """
+.two-col { display:flex; align-items:flex-end; gap: 16px; }
+.right-col { display:flex; flex-direction:column; } /* optional */
+.push-down { margin-top:auto; }                     /* optional */
+.footer-fixed{
+  position: fixed; left:0; right:0; bottom:0;
+  font-size: 16px;
+  padding: 10px 16px; border-top: 1px solid var(--border-color);
+  background: var(--background-fill-primary); z-index: 1000;
+  display: flex; justify-content: flex-end; align-items: center;  /* right align */
+}
+.blue-btn {
+  background-color: #024397 !important; /* Bootstrap-style blue */
+  color: white !important;
+  border-radius: 8px !important;
+  border: none !important;
+  padding: 8px 16px !important;
+  font-weight: 600;
+}
+.blue-btn:hover {
+  background-color: #0056b3 !important; /* Darker blue on hover */
+}
+.app-description{
+  font-size: 16px;
+}
+"""
+with gr.Blocks(css=custom_css) as demo:
+    gr.HTML("""
+    <style>
+        #logo {
+            display: flex;
+            justify-content: flex-start;
+        }
+        .gallery-container .grid-container {
+            display: flex !important;
+        }
+    </style>
+    """)
+    gr.Image(
+        value="logo.png",
+        show_label=False,
+        show_download_button=False,
+        show_fullscreen_button=False,
+        show_share_button=False,
+        interactive=False,
+        height=128,
+        container=False,
+        elem_id="logo"
+    )
+    with gr.Row(elem_classes=["two-col"]):
+        with gr.Column(scale=2):
+            gr.Markdown("<b>MulTiCast</b>, based on <a href='https://github.com/adlnlp/unicast'><b>UniCast</b></a>, is designed as a web-based system that allows users to perform multimodal time-series forecasting without technical setup. The system integrates a numerical time-series forecasting backbone with vision and text encoders. It exposes these capabilities through a lightweight but fully interactive web interface hosted on Hugging Face Spaces. Its design focuses on lowering the barrier to entry while ensuring interpretability through attention-based visualizations. The project and demo are sponsored by <a href='https://research.google/'>Google Research</a>", elem_classes=["app-description"])
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("# Choose Dataset")
+                    dataset_choices = ["NN5 Daily", "Australian Electricity", "Custom"]
+                    dataset_dropdown = gr.Dropdown(dataset_choices, value=None, label="Datasets", interactive=True)
+                    dataset_description_textbox = gr.Textbox(label="Dataset Description", interactive=False)
+                with gr.Column(scale=3):
+                    gr.Markdown("# Data Selection")
+                    example_gallery = gr.Gallery(
+                        None,
+                        interactive=False
+                    )
+                    example_index = gr.State(value=None)
+                    example_gallery.select(selected_example, inputs=example_gallery, outputs=example_index)
+                    guide_text_markdown = gr.Markdown(visible=False)
+                    sample_csv_file = gr.File(visible=False)
+            with gr.Row(elem_classes=["two-col"]):
+                with gr.Column(scale=3):
+                    gr.Markdown("# Data Viewer")
+                    time_series_file = gr.File(value=None, visible=False)
+                    time_series_dataframe = gr.Dataframe(visible=False)
+                    dataset_dropdown.change(selected_dataset, inputs=dataset_dropdown, outputs=[example_gallery, dataset_description_textbox])
+                    dataset_dropdown.change(update_guide_markdown, inputs=[dataset_dropdown, example_index], outputs=[guide_text_markdown, sample_csv_file])
+                    dataset_dropdown.change(update_time_series_dataframe, inputs=[dataset_dropdown, example_index], outputs=[time_series_file, time_series_dataframe])
+                    example_index.change(update_guide_markdown, inputs=[dataset_dropdown, example_index], outputs=[guide_text_markdown, sample_csv_file])
+                    example_index.change(update_time_series_dataframe, inputs=[dataset_dropdown, example_index], outputs=[time_series_file, time_series_dataframe])
+                    time_series_file.change(load_csv, inputs=[example_index, time_series_file], outputs=time_series_dataframe)
+                with gr.Column(scale=1):
+                    gr.Markdown("# Model Selection")
+                    model_choices = ["Timer", "Chronos", "Custom"]
+                    tsfm_radio = gr.Radio(model_choices, label="Time Series Foundation Model")
+                    md_choices = gr.State(model_choices)
+                    model_id_box = gr.Textbox(placeholder="Type and Enter…", label="HF Model ID", interactive=True, visible=False)
+                    # model_token_box = gr.Textbox(placeholder="Type and Enter…", label="HF Model Token", interactive=True, visible=False)
+                    vision_encoder_radio = gr.Radio(["CLIP", "BLIP"], label="Vision Encoder")
+                    text_encoder_radio = gr.Radio(["Qwen", "LLaMA"], label="Text Encoder")
+                    warning_markdown = gr.Markdown(visible=False)
+                    btn = gr.Button("Run", elem_classes="blue-btn")
+                    tsfm_radio.change(on_model_selection, [tsfm_radio], model_id_box)
+                    # tsfm_radio.change(on_model_selection, [tsfm_radio], model_token_box)
+    with gr.Row():
+        with gr.Column(scale=2):
+            gr.Markdown("# Prediction")
+            forecast_plot = gr.Plot(label="Forecast", format="png")
+            heatmap_header_html = gr.Markdown("# Attention Map", visible=False)
+            vision_heatmap_gallery = gr.Gallery(visible=False)
+            text_heatmap_html = gr.HTML(visible=False)
+            time_series_heatmap_gallery = gr.Gallery(visible=False)
+        btn.click(predict, inputs=[dataset_dropdown, dataset_description_textbox, example_index, time_series_file, vision_encoder_radio, text_encoder_radio, tsfm_radio, model_id_box], outputs=[warning_markdown, forecast_plot, heatmap_header_html, vision_heatmap_gallery, text_heatmap_html, time_series_heatmap_gallery])
+        btn.click(add_example_gallery, inputs=[dataset_dropdown, example_gallery, example_index, time_series_file], outputs=[example_gallery])
+    gr.HTML("<small>This work is sponsored by Google Research</small>", elem_classes=["footer-fixed"])
+if __name__ == "__main__":
     demo.launch(ssr_mode=False)