File size: 5,489 Bytes
0191ae7 22ced1b 0191ae7 22ced1b 0191ae7 22ced1b 0191ae7 789e257 0191ae7 789e257 0191ae7 22ced1b 0191ae7 22ced1b 0191ae7 22ced1b 0191ae7 22ced1b 0191ae7 22ced1b 0191ae7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | """
QueryChat initialization and filtered DataFrame helpers.
Provides convenience wrappers around the ``querychat`` library for
natural-language filtering of time-series DataFrames inside a Gradio
app. All functions degrade gracefully when the package or an API key
is unavailable.
"""
from __future__ import annotations
import os
from typing import List, Optional
import pandas as pd
try:
from querychat.gradio import QueryChat as _QueryChat
_QUERYCHAT_AVAILABLE = True
except ImportError: # pragma: no cover
_QUERYCHAT_AVAILABLE = False
# ---------------------------------------------------------------------------
# Availability check
# ---------------------------------------------------------------------------
def check_querychat_available() -> bool:
"""Return ``True`` when both *querychat* is installed and an API key is set.
QueryChat requires an ``OPENAI_API_KEY`` environment variable. This
helper lets callers gate UI elements behind a simple boolean.
"""
if not _QUERYCHAT_AVAILABLE:
return False
return bool(os.environ.get("OPENAI_API_KEY"))
# ---------------------------------------------------------------------------
# QueryChat factory
# ---------------------------------------------------------------------------
def create_querychat(
df: pd.DataFrame,
name: str = "dataset",
date_col: str = "date",
y_cols: Optional[List[str]] = None,
freq_label: str = "",
):
"""Create and return a QueryChat instance bound to *df*.
Parameters
----------
df:
The pandas DataFrame to expose to the chat interface.
name:
A human-readable name for the dataset (used in the description).
date_col:
Name of the date/time column.
y_cols:
Names of the value (numeric) columns. If ``None``, an empty
list is used in the description.
freq_label:
Optional frequency label (e.g. ``"Monthly"``, ``"Daily"``).
Returns
-------
QueryChat instance
The object returned by ``QueryChat()``.
Raises
------
RuntimeError
If querychat is not installed.
"""
if not _QUERYCHAT_AVAILABLE:
raise RuntimeError(
"The 'querychat' package is not installed. "
"Install it with: pip install 'querychat[gradio]'"
)
if y_cols is None:
y_cols = []
value_cols_str = ", ".join(y_cols) if y_cols else "none specified"
freq_part = f" Frequency: {freq_label}." if freq_label else ""
data_description = (
f"This dataset is named '{name}'. "
f"It contains {len(df):,} rows. "
f"The date column is '{date_col}'. "
f"Value columns: {value_cols_str}."
f"{freq_part}"
)
# Build example bullets that reference actual column names
if y_cols:
first_y = y_cols[0]
filter_example = f'- "Filter where {first_y} > median"'
else:
filter_example = '- "Filter where value > 100"'
greeting = (
f"Hi! I can help you filter and explore the **{name}** dataset. "
"Try asking me something like:\n"
'- "Show only the last 5 years"\n'
f"{filter_example}\n"
'- "Show rows from January to March"'
)
qc = _QueryChat(
data_source=df,
table_name=name.replace(" ", "_"),
client="openai/gpt-5.2-2025-12-11",
data_description=data_description,
greeting=greeting,
)
return qc
# ---------------------------------------------------------------------------
# Filtered DataFrame extraction
# ---------------------------------------------------------------------------
def get_filtered_pandas_df(qc, state_dict=None) -> pd.DataFrame:
"""Extract the currently filtered DataFrame from a QueryChat instance.
The underlying ``qc.df()`` may return a *narwhals* DataFrame rather
than a pandas one. This helper transparently converts when needed
and falls back to the original frame on any error.
Parameters
----------
qc:
A QueryChat instance previously created via :func:`create_querychat`.
state_dict:
The Gradio state dictionary from ``qc.ui()``. Required for the
Gradio variant of QueryChat.
Returns
-------
pd.DataFrame
The filtered data as a pandas DataFrame.
"""
try:
if state_dict is not None:
result = qc.df(state_dict)
else:
result = qc.df()
# narwhals (or polars) DataFrames expose .to_pandas()
if hasattr(result, "to_pandas"):
return result.to_pandas()
# narwhals also has .to_native() which may give pandas directly
if hasattr(result, "to_native"):
native = result.to_native()
if isinstance(native, pd.DataFrame):
return native
return pd.DataFrame(native)
# Already a pandas DataFrame
if isinstance(result, pd.DataFrame):
return result
# Unknown type -- attempt conversion as a last resort
return pd.DataFrame(result)
except Exception: # noqa: BLE001
# If anything goes wrong, surface the unfiltered data so the app
# can continue to function.
try:
raw = qc.df() if state_dict is None else qc.df(state_dict)
if isinstance(raw, pd.DataFrame):
return raw
except Exception: # noqa: BLE001
pass
return pd.DataFrame()
|