Spaces:

triflix
/

pravah

Sleeping

App Files Files Community

pravah / app /adapter.py

triflix

Upload 17 files

68f3648 verified 4 months ago

raw

history blame contribute delete

9.45 kB

	from __future__ import annotations

	import pandas as pd

	from .ai_engine import df_to_records
	from .models import ColumnItem, SeriesItem, Visualization


	DEFAULT_COLORS = [
	"#2563eb",
	"#10b981",
	"#f59e0b",
	"#ef4444",
	"#8b5cf6",
	"#14b8a6",
	]


	def _safe_float(x) -> float \| None:
	try:
	if x is None:
	return None
	v = float(x)
	if pd.isna(v):
	return None
	return v
	except Exception:
	return None


	def _format_value(v) -> str:
	if v is None:
	return "—"
	try:
	if isinstance(v, (int, float)) and not pd.isna(v):
	if float(v).is_integer():
	return f"{int(v):,}"
	return f"{float(v):,.2f}"
	except Exception:
	pass
	try:
	if isinstance(v, pd.Timestamp):
	return str(v.to_pydatetime())
	except Exception:
	pass
	return str(v)


	def _summarize_small_df(df: pd.DataFrame) -> str \| None:
	if df is None or df.empty:
	return None

	# Only summarize small result sets to avoid long answers.
	if len(df) > 5 or df.shape[1] > 4:
	return None

	sample = df.head(5)

	# If all rows are identical, reduce to 1.
	try:
	sample = sample.drop_duplicates().head(5)
	except Exception:
	pass

	# One row -> key=value pairs.
	if len(sample) == 1:
	row = sample.iloc[0]
	parts = []
	for c in sample.columns:
	parts.append(f"{c}: {_format_value(row.get(c))}")
	return ", ".join(parts)

	# Two columns -> list mapping: A -> B (first few).
	if sample.shape[1] == 2:
	c1 = str(sample.columns[0])
	c2 = str(sample.columns[1])
	lines = []
	for _, r in sample.iterrows():
	lines.append(f"- {_format_value(r.get(c1))}: {_format_value(r.get(c2))}")
	return "\n".join(lines)

	return None


	def _build_insights_for_table(df: pd.DataFrame) -> list[str]:
	if df is None or df.empty:
	return []

	insights: list[str] = []
	insights.append(f"Rows returned: {len(df)}")

	# Find a likely numeric metric column
	numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
	if numeric_cols:
	metric = str(numeric_cols[0])
	top_idx = df[metric].astype(float, errors='ignore').idxmax() if len(df) else None
	if top_idx is not None and top_idx in df.index:
	top_row = df.loc[top_idx]
	label_col = str(df.columns[0])
	label_val = str(top_row.get(label_col, ""))
	metric_val = _safe_float(top_row.get(metric))
	if metric_val is not None:
	insights.append(f"Top by {metric}: {label_val} ({metric_val:,.2f})")

	return insights[:3]


	def _build_insights_for_distribution(df: pd.DataFrame, category: str, value: str) -> list[str]:
	if df is None or df.empty or category not in df.columns or value not in df.columns:
	return []

	s = pd.to_numeric(df[value], errors="coerce")
	if s.dropna().empty:
	return []

	insights: list[str] = []

	try:
	total = float(s.sum(skipna=True))
	if total and total > 0:
	idx = s.astype(float).idxmax()
	top_cat = str(df.loc[idx, category]) if idx in df.index else None
	top_val = _safe_float(df.loc[idx, value]) if idx in df.index else None
	if top_cat is not None and top_val is not None:
	share = (top_val / total) * 100.0
	insights.append(f"Top category: {top_cat} ({top_val:,.0f}, {share:,.1f}% of total)")
	insights.append(f"Total ({value}): {total:,.0f}")
	except Exception:
	pass

	try:
	# How many categories
	insights.append(f"Categories: {df[category].nunique()}")
	except Exception:
	pass

	return insights[:3]


	def _is_time_like(series: pd.Series) -> bool:
	try:
	if pd.api.types.is_datetime64_any_dtype(series):
	return True
	parsed = pd.to_datetime(series, errors="coerce")
	return parsed.notna().mean() >= 0.7
	except Exception:
	return False


	def _build_insights_for_series(df: pd.DataFrame, x: str, y: str) -> list[str]:
	if df is None or df.empty or y not in df.columns:
	return []

	s = pd.to_numeric(df[y], errors="coerce").dropna()
	if s.empty:
	return []

	insights: list[str] = []

	# Min/Max
	try:
	min_v = float(s.min())
	max_v = float(s.max())
	insights.append(f"Range ({y}): {min_v:,.2f} to {max_v:,.2f}")
	except Exception:
	pass

	# Only add trend/"latest" insights if the x-axis looks like a time-series.
	if x in df.columns and _is_time_like(df[x]):
	try:
	first_v = float(s.iloc[0])
	last_v = float(s.iloc[-1])
	delta = last_v - first_v
	direction = "increased" if delta > 0 else "decreased" if delta < 0 else "remained flat"
	insights.append(f"Overall {direction} by {abs(delta):,.2f} ({first_v:,.2f} → {last_v:,.2f})")
	except Exception:
	pass

	try:
	last_x = df[x].iloc[len(df) - 1]
	last_v = float(s.iloc[-1])
	insights.append(f"Latest at {x}={last_x}: {last_v:,.2f}")
	except Exception:
	pass

	return insights[:3]


	def _guess_columns(df: pd.DataFrame, max_cols: int = 6) -> list[ColumnItem]:
	cols = list(df.columns)[:max_cols]
	return [ColumnItem(key=str(c), label=str(c)) for c in cols]


	def _guess_x_key(df: pd.DataFrame) -> str \| None:
	if df.empty:
	return None
	keys = list(df.columns)
	preferred = ["date", "time", "day", "month", "year", "name", "region", "district", "label"]
	lower = {str(k).lower(): str(k) for k in keys}
	for p in preferred:
	if p in lower:
	return lower[p]
	return str(keys[0]) if keys else None


	def _guess_numeric_key(df: pd.DataFrame, exclude: str \| None = None) -> str \| None:
	if df.empty:
	return None

	for c in df.columns:
	if exclude and str(c) == exclude:
	continue
	if pd.api.types.is_numeric_dtype(df[c]):
	return str(c)
	# fallback: try coerce
	for c in df.columns:
	if exclude and str(c) == exclude:
	continue
	try:
	pd.to_numeric(df[c])
	return str(c)
	except Exception:
	continue
	return None


	def build_visualization_from_intent(
	intent: str,
	reply: str,
	df: pd.DataFrame \| None,
	sql: str \| None,
	chart_config: dict \| None,
	) -> tuple[str, Visualization \| None]:
	# answer should be short; keep reply as-is
	answer = reply or ""

	if intent == "chat":
	return answer, None

	if df is None or df.empty:
	# Still return a visualization shell for clarity
	if intent in ["sql", "analytics"]:
	vis = Visualization(type="table", title="Query Result", data=[], columns=[])
	return answer, vis
	return answer, None

	if intent == "sql":
	summary = _summarize_small_df(df)
	if summary:
	if answer.strip():
	answer = f"{answer.strip()}\n\n{summary}"
	else:
	answer = summary
	vis = Visualization(
	type="table",
	title="Query Result",
	data=df_to_records(df),
	columns=_guess_columns(df),
	insights=_build_insights_for_table(df),
	)
	return answer, vis

	if intent == "analytics":
	cfg = chart_config or {}
	ctype = cfg.get("type") or "line"
	title = cfg.get("title") or "Analytics Result"
	x = cfg.get("x") or _guess_x_key(df)
	y = cfg.get("y") or _guess_numeric_key(df, exclude=x)

	if ctype not in ["bar", "line", "area", "pie"]:
	# Unsupported types fall back to table
	vis = Visualization(
	type="table",
	title=title,
	data=df_to_records(df),
	columns=_guess_columns(df),
	)
	return answer, vis

	if not x or not y:
	vis = Visualization(
	type="table",
	title=title,
	data=df_to_records(df),
	columns=_guess_columns(df),
	)
	return answer, vis

	series = [SeriesItem(dataKey=str(y), name=str(y), color=DEFAULT_COLORS[0])]

	insights = _build_insights_for_series(df, str(x), str(y))
	if ctype == "pie":
	insights = _build_insights_for_distribution(df, str(x), str(y))

	summary = _summarize_small_df(df)
	if summary and answer.strip():
	answer = f"{answer.strip()}\n\n{summary}"
	elif summary and not answer.strip():
	answer = summary
	vis = Visualization(
	type=ctype,
	title=title,
	data=df_to_records(df),
	xAxisKey=str(x),
	series=series,
	insights=insights,
	)
	return answer, vis

	return answer, None