Spaces:

ajayinsac
/

PowerBIguide_AB

Sleeping

App Files Files Community

PowerBIguide_AB / app.py

ajayinsac

Update app.py

da360ae verified 4 months ago

raw

history blame contribute delete

26.2 kB

	#!/usr/bin/env python3
	"""
	AI Agent for Power BI — Gradio app for Hugging Face Spaces
	- Upload a spreadsheet (.csv/.xlsx) or a Word doc (.docx)
	- Profile the data, recommend visuals & report patterns (with drilldown, field params, tooltips, decomposition tree)
	- Generate "Power BI build steps" tailored to the dataset
	- Offline RAG Q&A over a small allow-listed corpus (seeded at runtime) focused on Microsoft Learn topics

	Notes:
	- No outbound web calls are required at runtime.
	- If sentence-transformers or faiss-cpu is not available, the app falls back to TF-IDF + cosine.
	- In Spaces, do NOT use share=True in launch().
	"""

	import os
	import io
	import re
	import gc
	import csv
	import json
	import math
	import time
	import glob
	import uuid
	import shutil
	import string
	import random
	import hashlib
	import textwrap
	import tempfile
	from dataclasses import dataclass
	from typing import List, Dict, Any, Tuple

	import numpy as np
	import pandas as pd

	import gradio as gr

	# Optional dependencies
	try:
	import docx # python-docx
	HAS_DOCX = True
	except Exception:
	HAS_DOCX = False

	# Embeddings stack (robust fallbacks)
	_EMBED_BACKEND = "st"
	try:
	from sentence_transformers import SentenceTransformer
	import faiss # type: ignore
	HAS_ST = True
	HAS_FAISS = True
	except Exception:
	HAS_ST = False
	try:
	import faiss # type: ignore
	HAS_FAISS = True
	except Exception:
	HAS_FAISS = False
	_EMBED_BACKEND = "tfidf"
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity

	IN_SPACES = bool(os.getenv("SPACE_ID") or os.getenv("HF_SPACE_ID"))
	APP_DIR = os.path.dirname(os.path.abspath(__file__))
	DOCS_DIR = os.path.join(APP_DIR, "docs") # local allow-listed offline bundle
	CACHE_DIR = os.path.join(APP_DIR, ".cache")
	os.makedirs(CACHE_DIR, exist_ok=True)


	# ---------------------------
	# Utilities
	# ---------------------------

	def clean_text(s: str) -> str:
	if not s:
	return ""
	s = s.replace("\\u0000", " ").replace("\\x00", " ")
	s = re.sub(r"[\u0000-\u001F]", " ", s) # control chars
	s = re.sub(r"\s+", " ", s).strip()
	return s


	def to_markdown_table(rows: List[Dict[str, Any]]) -> str:
	if not rows:
	return "_No rows_"
	headers = list(rows[0].keys())
	out = []
	out.append("\| " + " \| ".join(headers) + " \|")
	out.append("\| " + " \| ".join(["---"] * len(headers)) + " \|")
	for r in rows:
	out.append("\| " + " \| ".join([str(r.get(h, "")) for h in headers]) + " \|")
	return "\n".join(out)


	def save_temp_textfile(name: str, text: str) -> str:
	tmp = os.path.join(tempfile.gettempdir(), name)
	with open(tmp, "w", encoding="utf-8") as f:
	f.write(text)
	return tmp


	# ---------------------------
	# Seed trusted corpus (offline)
	# ---------------------------

	SEED_DOCS = [
	{
	"title": "Star Schema and Modeling Guidance",
	"source_url": "https://learn.microsoft.com/power-bi/guidance/star-schema",
	"text": """
	Key ideas for Power BI modeling:
	- Prefer a star schema: one or more fact tables at the center, surrounded by dimension tables.
	- Use one-to-many relationships from dimensions to facts; default filter direction single-direction.
	- Avoid many-to-many between fact tables. Use conformed dimensions to integrate subjects.
	- Create and mark a proper Date table for time intelligence; avoid relying on auto date/time for complex models.
	- Keep columns narrow and typed correctly; hide surrogate keys and technical columns from report view.
	""".strip()
	},
	{
	"title": "Drillthrough Pages",
	"source_url": "https://learn.microsoft.com/power-bi/visuals/drillthrough",
	"text": """
	Drillthrough pattern:
	- Create a dedicated drillthrough page; add the target field to the Drillthrough well.
	- Place visuals that explain the selected entity (e.g., Customer, Product).
	- Add a Back button to return to the source page.
	- Test by right-clicking a category value on a source visual and choosing Drillthrough.
	""".strip()
	},
	{
	"title": "Report Page Tooltips",
	"source_url": "https://learn.microsoft.com/power-bi/create-reports/desktop-tooltips",
	"text": """
	Report page tooltips:
	- Create a small page sized for tooltip (e.g., 320x240 or default tooltip page size).
	- Add detailed visuals or KPIs intended to show on hover.
	- In the source visual, enable tooltip and assign the tooltip page.
	- Keep tooltip visuals compact; avoid heavy interactions on hover.
	""".strip()
	},
	{
	"title": "Field Parameters for Self-Service",
	"source_url": "https://learn.microsoft.com/power-bi/create-reports/field-parameters",
	"text": """
	Field parameters:
	- Create a field parameter to allow viewers to swap dimensions/measures on visuals.
	- Add the parameter field to the Axis/Legend/Values well of visuals.
	- Provide a slicer bound to the parameter to let users choose fields at runtime.
	- Useful for building one flexible page instead of many near-duplicate pages.
	""".strip()
	},
	{
	"title": "Incremental Refresh",
	"source_url": "https://learn.microsoft.com/power-bi/enterprise/incremental-refresh-overview",
	"text": """
	Incremental refresh:
	- Define RangeStart and RangeEnd parameters to partition by date/time.
	- Configure policy: how much history to store and how much of the newer period to refresh.
	- Only the most recent partitions are refreshed in the Service; historical partitions remain stable.
	- Great for large tables; combine with aggregations for best performance.
	""".strip()
	},
	{
	"title": "Aggregations",
	"source_url": "https://learn.microsoft.com/power-bi/transform-model/aggregations-advanced",
	"text": """
	Aggregations pattern:
	- Create an aggregate table with pre-summarized values (e.g., by Month/Region/Product).
	- Map it to a detailed DirectQuery or Import table via manage aggregations.
	- Power BI routes queries to the smaller aggregated table when possible.
	- Especially valuable with DirectQuery/composite models.
	""".strip()
	},
	{
	"title": "Row-Level Security (RLS)",
	"source_url": "https://learn.microsoft.com/power-bi/admin/rls",
	"text": """
	RLS guidance:
	- Define roles with filters on dimension tables (e.g., Region = USERPRINCIPALNAME() mapping table).
	- Test roles in Power BI Desktop using 'View as'.
	- In the Service, assign users/groups to roles; viewers see only permitted rows.
	- Keep RLS logic simple; avoid complex bi-directional relationships.
	""".strip()
	},
	{
	"title": "Decomposition Tree Visual",
	"source_url": "https://learn.microsoft.com/power-bi/visuals/power-bi-visualization-decomposition-tree",
	"text": """
	Decomposition Tree:
	- A visual to perform root-cause analysis by breaking down a measure across dimensions.
	- Add dimensions in the 'Explain by' well; choose 'High value' or 'Low value' splits to guide analysis.
	- Great for iterative 'why' exploration in operational datasets.
	""".strip()
	}
	]


	def ensure_docs_directory():
	"""Create ./docs with seed files if it doesn't exist or is empty."""
	os.makedirs(DOCS_DIR, exist_ok=True)
	md_files = list(glob.glob(os.path.join(DOCS_DIR, "*.md")))
	if md_files:
	return
	# Write seed docs
	for i, d in enumerate(SEED_DOCS, start=1):
	fn = os.path.join(DOCS_DIR, f"{i:02d}-{re.sub(r'[^a-z0-9]+', '-', d['title'].lower()).strip('-')}.md")
	with open(fn, "w", encoding="utf-8") as f:
	f.write(f"# {d['title']}\n\n")
	f.write(f"Source: {d['source_url']}\n\n")
	f.write(d["text"].strip() + "\n")


	# ---------------------------
	# Simple offline RAG
	# ---------------------------

	@dataclass
	class DocChunk:
	doc_id: str
	title: str
	source_url: str
	text: str


	class SimpleVectorStore:
	def __init__(self):
	self.backend = _EMBED_BACKEND # "st" or "tfidf"
	self.model = None
	self.index = None
	self.texts: List[str] = []
	self.meta: List[Tuple[str, str, str]] = [] # (doc_id, title, source_url)

	if self.backend == "st" and HAS_ST:
	# Smaller model is fine for HF CPU Spaces
	self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

	def _embed(self, texts: List[str]) -> np.ndarray:
	if self.backend == "st" and HAS_ST:
	emb = self.model.encode(texts, batch_size=32, convert_to_numpy=True, normalize_embeddings=True)
	return emb.astype("float32")
	else:
	# TF-IDF fallback (cosine will be computed via sklearn)
	self.vectorizer = TfidfVectorizer(min_df=1, max_df=0.95, ngram_range=(1,2))
	X = self.vectorizer.fit_transform(texts)
	self._tfidf_matrix = X
	return np.zeros((X.shape[0], 1), dtype="float32") # dummy; we won't use FAISS path

	def build(self, chunks: List[DocChunk]) -> None:
	self.texts = [c.text for c in chunks]
	self.meta = [(c.doc_id, c.title, c.source_url) for c in chunks]
	if self.backend == "st" and HAS_ST and HAS_FAISS:
	embs = self._embed(self.texts)
	dim = embs.shape[1]
	self.index = faiss.IndexFlatIP(dim)
	self.index.add(embs)
	else:
	# TF-IDF fallback doesn't build a FAISS index
	if self.backend == "st" and not HAS_FAISS:
	# Still have ST model but no FAISS -> cosine on numpy
	self.embs = self._embed(self.texts)
	elif self.backend == "tfidf":
	# vectorizer built in _embed
	self._embed(self.texts)

	def query(self, q: str, top_k: int = 4) -> List[Tuple[float, int]]:
	"""Return [(score, idx), ...] sorted by score desc."""
	q = clean_text(q)
	if not q.strip() or not self.texts:
	return []
	if self.backend == "st" and HAS_ST and HAS_FAISS:
	q_emb = self.model.encode([q], convert_to_numpy=True, normalize_embeddings=True).astype("float32")
	sims, idxs = self.index.search(q_emb, top_k)
	return [(float(sims[0][i]), int(idxs[0][i])) for i in range(len(idxs[0]))]
	elif self.backend == "st" and HAS_ST and not HAS_FAISS:
	# cosine on numpy
	q_emb = self.model.encode([q], convert_to_numpy=True, normalize_embeddings=True).astype("float32")
	sims = (self.embs @ q_emb.T).ravel()
	idxs = np.argsort(-sims)[:top_k]
	return [(float(sims[i]), int(i)) for i in idxs]
	else:
	# TF-IDF
	q_vec = self.vectorizer.transform([q])
	sims = cosine_similarity(self._tfidf_matrix, q_vec).ravel()
	idxs = np.argsort(-sims)[:top_k]
	return [(float(sims[i]), int(i)) for i in idxs]


	def load_corpus_chunks() -> List[DocChunk]:
	ensure_docs_directory()
	chunks: List[DocChunk] = []
	for path in sorted(glob.glob(os.path.join(DOCS_DIR, "*.md"))):
	with open(path, "r", encoding="utf-8") as f:
	raw = f.read()
	# first line is title, second may contain Source: URL
	lines = [l.strip() for l in raw.splitlines() if l.strip()]
	title = lines[0].lstrip("# ").strip() if lines else os.path.basename(path)
	src = ""
	if len(lines) > 1 and lines[1].lower().startswith("source:"):
	src = lines[1].split(":", 1)[1].strip()
	body = "\n".join(lines[2:]) if len(lines) > 2 else "\n".join(lines[1:])
	body = clean_text(body)
	# Chunk by paragraphs ~600-900 chars
	paras = re.split(r"\n{2,}", body)
	buf = ""
	for p in paras:
	if len(buf) + len(p) < 800:
	buf += (p + "\n\n")
	else:
	if buf.strip():
	chunks.append(DocChunk(doc_id=os.path.basename(path), title=title, source_url=src, text=buf.strip()))
	buf = p + "\n\n"
	if buf.strip():
	chunks.append(DocChunk(doc_id=os.path.basename(path), title=title, source_url=src, text=buf.strip()))
	return chunks


	def compose_answer(question: str, hits: List[Tuple[float, int]], store: SimpleVectorStore) -> Tuple[str, List[Dict[str, str]]]:
	if not hits:
	return ("I couldn't find this in the trusted corpus. Try rephrasing or add more Microsoft Learn pages to the ./docs folder.", [])
	pieces = []
	cites = []
	for score, idx in hits:
	text = store.texts[idx]
	doc_id, title, src = store.meta[idx]
	pieces.append(f"- {text}")
	cites.append({"title": title, "source": src})
	answer = "### Answer (from trusted sources)\n" + "\n".join(pieces[:3])
	answer += "\n\nCitations:\n" + "\n".join([f"- {c['title']} — {c['source']}" for c in cites[:3]])
	return answer, cites


	# ---------------------------
	# Data ingestion & profiling
	# ---------------------------

	READABLE_EXTS = [".csv", ".xlsx"]
	DOC_EXTS = [".docx"]

	def read_table_file(path: str) -> pd.DataFrame:
	ext = os.path.splitext(path)[1].lower()
	if ext == ".csv":
	return pd.read_csv(path)
	elif ext == ".xlsx":
	return pd.read_excel(path)
	else:
	raise ValueError(f"Unsupported table extension: {ext}")


	def read_docx_text(path: str) -> str:
	if not HAS_DOCX:
	return ""
	doc = docx.Document(path)
	return "\n".join([p.text for p in doc.paragraphs])


	def infer_col_type(series: pd.Series) -> str:
	if pd.api.types.is_datetime64_any_dtype(series):
	return "datetime"
	if pd.api.types.is_bool_dtype(series):
	return "boolean"
	if pd.api.types.is_numeric_dtype(series):
	return "numeric"
	# treat everything else as text/categorical
	# attempt parse datetime small sample
	try:
	pd.to_datetime(series.dropna().head(20), errors="raise") # may raise
	return "datetime"
	except Exception:
	pass
	# if few unique values relative to rows -> categorical
	n = len(series)
	dc = series.nunique(dropna=True)
	if n > 0 and dc / max(n,1) < 0.2:
	return "categorical"
	return "text"


	def profile_dataframe(df: pd.DataFrame, max_sample: int = 10_000) -> Dict[str, Any]:
	work = df.copy()
	if len(work) > max_sample:
	work = work.sample(max_sample, random_state=42).reset_index(drop=True)

	cols = []
	for col in work.columns:
	s = work[col]
	inferred = infer_col_type(s)
	role = "measure" if inferred == "numeric" else "dimension"
	n = len(s)
	miss = float(s.isna().sum()) / max(n, 1)
	distinct = int(s.nunique(dropna=True))
	sample_vals = list(s.dropna().unique()[:5])
	cols.append({
	"column": col,
	"pandas_dtype": str(s.dtype),
	"inferred_type": inferred,
	"role": role,
	"rows": n,
	"distinct_count": distinct,
	"pct_missing": round(miss * 100.0, 2),
	"sample_values": sample_vals
	})

	# high-level signals
	has_date = any(c["inferred_type"] == "datetime" for c in cols)
	measures = sum(1 for c in cols if c["role"] == "measure")
	dims = sum(1 for c in cols if c["role"] == "dimension")
	low_card_dims = [c for c in cols if c["role"] == "dimension" and c["distinct_count"] <= 20]
	two_small_categories = len(low_card_dims) >= 2
	hierarchies_found = has_date or two_small_categories

	return {
	"n_rows": int(len(work)),
	"n_cols": int(len(work.columns)),
	"has_date": bool(has_date),
	"measures": int(measures),
	"dimensions": int(dims),
	"two_small_categories": bool(two_small_categories),
	"hierarchies_found": bool(hierarchies_found),
	"columns": cols
	}


	# ---------------------------
	# Visual recommendation engine
	# ---------------------------

	def recommend_visuals(profile: Dict[str, Any]) -> List[Dict[str, Any]]:
	recs = []

	def add(name, score, why, extras=None):
	recs.append({
	"visual": name,
	"score": score,
	"rationale": why,
	"extras": extras or []
	})

	has_date = profile.get("has_date", False)
	measures = profile.get("measures", 0)
	two_small = profile.get("two_small_categories", False)
	hier = profile.get("hierarchies_found", False)

	# Time series
	if has_date and measures >= 1:
	add("Line chart (trend over time)",
	0.96,
	"Date/time column detected + at least one measure → trend visualization with drill down (Y/Q/M).",
	["Enable drill hierarchy on Date table.", "Consider YoY or rolling average in a small multiple."])

	# Ranked bars for categories
	if measures >= 1:
	low_card_dims = [c for c in profile["columns"] if c["role"] == "dimension" and c["distinct_count"] <= 20]
	if low_card_dims:
	add("Clustered bar/column (ranked categories)",
	0.90,
	"Low-cardinality categorical fields with one measure → ranked comparison.",
	["Sort by measure desc.", "Consider report-page tooltip for extra detail."])

	# Part-to-whole
	if measures == 1 and two_small:
	add("100% stacked bar/column (part-to-whole)",
	0.78,
	"Two small categorical fields + one measure → relative composition across segments.",
	["Prefer 100% stacked for share emphasis.", "Avoid pies/donuts unless very few categories."])

	# Compare measures
	if measures >= 2:
	add("Scatter (compare two measures)",
	0.74,
	"Two or more measures → scatter for correlation; add category legend if helpful.",
	["Enable data labels, add trendline where suitable."])

	# Distribution
	numeric_cols = [c for c in profile["columns"] if c["inferred_type"] == "numeric"]
	if numeric_cols:
	add("Histogram/Box plot (distribution & outliers)",
	0.66,
	"Numeric columns → reveal distribution shape and outliers.",
	["Use custom bins for stability.", "Use box plot for skew/outlier awareness."])

	# Root-cause analysis
	if hier:
	add("Decomposition tree (root-cause)",
	0.64,
	"Date hierarchy or multiple small categorical fields → iterative 'why' analysis.",
	["Seed 'Explain by' with key dimensions.", "Use High/Low splits strategically."])

	# Rank by score
	recs.sort(key=lambda x: x["score"], reverse=True)
	return recs


	# ---------------------------
	# Power BI steps generator
	# ---------------------------

	def generate_powerbi_steps(profile: Dict[str, Any], context_text: str = "") -> str:
	bullets = []

	# Modeling
	bullets.append("Modeling & relationships:")
	bullets.append("- Use a star schema: fact table(s) with dimension tables related 1:*; default single-direction filters.")
	bullets.append("- Create a dedicated Date table (one row per date) and Mark as date table; use it for all time axes.")
	bullets.append("- Hide surrogate keys/technical columns; keep columns typed & narrow.")

	# Visuals based on recs
	recs = recommend_visuals(profile)
	bullets.append("\nVisuals to build (with interactivity):")
	for r in recs[:4]:
	bullets.append(f"- {r['visual']} — {r['rationale']}")
	for ex in r.get("extras", []):
	bullets.append(f" - {ex}")

	# Interactivity patterns
	bullets.append("\nInteractivity patterns:")
	bullets.append("- Drillthrough: create a detail page, drag the entity field to the Drillthrough well, add a Back button.")
	bullets.append("- Report page tooltips: create a tooltip-sized page, assign as tooltip on dense visuals.")
	bullets.append("- Field parameters: add a parameter to let users swap dimensions/measures at runtime.")
	bullets.append("- Consider a Decomposition Tree for explorations of 'why' a metric changes.")

	# Scale & governance
	bullets.append("\nScale & governance:")
	bullets.append("- For large fact tables, configure Incremental Refresh (RangeStart/RangeEnd).")
	bullets.append("- Consider Aggregations for DirectQuery/composite models.")
	bullets.append("- Define RLS roles on dimensions; test with 'View as'. Keep logic simple.")

	if context_text.strip():
	bullets.append("\nContext captured from the provided .docx:")
	bullets.append(f"> {context_text[:600]}{'...' if len(context_text) > 600 else ''}")

	md = "# Power BI Build Steps\n\n" + "\n".join([f"{line}" for line in bullets])
	return md


	# ---------------------------
	# Gradio UI callbacks
	# ---------------------------

	def cb_profile(file_path: str, docx_path: str):
	df, profile, notes = None, {}, ""

	if file_path:
	try:
	df = read_table_file(file_path)
	except Exception as e:
	return gr.update(value=None), json.dumps({}, indent=2), f"Error reading table: {e}", None, None

	if docx_path and os.path.splitext(docx_path)[1].lower() in DOC_EXTS:
	try:
	notes = read_docx_text(docx_path) if HAS_DOCX else ""
	except Exception as e:
	notes = f"(Could not parse .docx: {e})"

	if df is None:
	return gr.update(value=None), json.dumps({}, indent=2), "Please upload a .csv or .xlsx file.", None, None

	profile = profile_dataframe(df)
	# Build a small dataframe for display
	cols_df = pd.DataFrame(profile["columns"])
	head_df = df.head(10)

	# Save profile CSV for export
	out_csv = os.path.join(tempfile.gettempdir(), f"profile_{uuid.uuid4().hex[:8]}.csv")
	cols_df.to_csv(out_csv, index=False)

	# Save sample steps
	steps_md = generate_powerbi_steps(profile, context_text=notes)
	out_md = os.path.join(tempfile.gettempdir(), f"pbi_steps_{uuid.uuid4().hex[:8]}.md")
	with open(out_md, "w", encoding="utf-8") as f:
	f.write(steps_md)

	summary = {
	"rows": profile["n_rows"],
	"columns": profile["n_cols"],
	"has_date": profile["has_date"],
	"measures": profile["measures"],
	"dimensions": profile["dimensions"],
	"hierarchies_found": profile["hierarchies_found"]
	}

	return head_df, json.dumps(summary, indent=2), "Profile computed. See 'Column profile' tab for details.", cols_df, [out_md, out_csv]


	# RAG global state
	_VECTOR_STORE = None

	def build_vector_store():
	global _VECTOR_STORE
	chunks = load_corpus_chunks()
	store = SimpleVectorStore()
	store.build(chunks)
	_VECTOR_STORE = store

	def cb_qa(question: str, restrict: str):
	global _VECTOR_STORE
	if _VECTOR_STORE is None:
	build_vector_store()

	# In a full implementation, 'restrict' could filter by domain/source.
	hits = _VECTOR_STORE.query(question, top_k=4)
	answer, cites = compose_answer(question, hits, _VECTOR_STORE)
	return answer


	def cb_reset():
	# Nothing to deeply reset beyond UI; transient files are in /tmp
	return None, "{}", "Session cleared.", None, None, "", "Microsoft Learn (default only)"


	# ---------------------------
	# Build the UI
	# ---------------------------

	with gr.Blocks(title="AI Agent for Power BI") as demo:
	gr.Markdown("# AI Agent for Power BI\nUpload data → get visual/report recommendations, drilldown ideas, and Power BI steps.\nAsk about patterns from trusted docs (offline bundle).")

	with gr.Tabs():
	with gr.TabItem("Upload & Profile"):
	with gr.Row():
	file_in = gr.File(label="Upload .csv or .xlsx", file_types=[".csv", ".xlsx"], type="filepath")
	docx_in = gr.File(label="Optional: upload a .docx with requirements (context only)", file_types=[".docx"], type="filepath")
	with gr.Row():
	btn_profile = gr.Button("Profile Data & Generate Steps", variant="primary")
	btn_reset = gr.Button("Reset Session")

	with gr.Row():
	df_head = gr.Dataframe(label="Sample data (first 10 rows)", interactive=False)
	profile_summary = gr.Code(label="Profile summary (JSON)")
	status_box = gr.Textbox(label="Status", interactive=False)

	with gr.Row():
	gr.Markdown("### Column profile")
	cols_profile = gr.Dataframe(label="Columns", interactive=False)

	with gr.Row():
	exports = gr.File(label="Download build steps (.md) and column profile (.csv)", file_count="multiple")

	with gr.TabItem("Patterns & Best Practices (Q&A)"):
	q = gr.Textbox(label="Ask a Power BI question (modeling, drillthrough, tooltips, field parameters, RLS, incremental refresh, aggregations...)", lines=3, placeholder="e.g., When should I use a star schema vs. a snowflake?")
	restrict = gr.Dropdown(choices=["Microsoft Learn (default only)", "Microsoft Learn + curated community"], value="Microsoft Learn (default only)", label="Trusted sources")
	btn_ask = gr.Button("Answer from trusted corpus", variant="primary")
	answer_md = gr.Markdown()

	with gr.TabItem("Explainability & Exports"):
	gr.Markdown("This app uses simple heuristics for visual recommendations and an offline RAG for Q&A.\n- Visuals: date/time → line; low-cardinality categories → bars; 2+ measures → scatter; numeric distribution → histogram/box; hierarchies → decomposition tree.\n- Interactivity: drillthrough pages, report page tooltips, field parameters.\n- Scale: incremental refresh & aggregations; security via RLS.")

	# Bind events
	btn_profile.click(cb_profile, inputs=[file_in, docx_in], outputs=[df_head, profile_summary, status_box, cols_profile, exports])
	btn_reset.click(cb_reset, inputs=None, outputs=[df_head, profile_summary, status_box, cols_profile, exports, answer_md, restrict])
	btn_ask.click(cb_qa, inputs=[q, restrict], outputs=[answer_md])

	# Build vector store once on startup for snappy Q&A
	try:
	build_vector_store()
	except Exception as e:
	print(f"[WARN] Vector store build failed: {e}")

	if __name__ == "__main__":
	# Do NOT pass share=True for Hugging Face Spaces
	demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))