DanielRegaladoCardoso commited on
Commit
7cc0c39
·
verified ·
1 Parent(s): a42423c

Dynamic suggested questions based on actual dataset columns (no more Titanic-only)

Browse files
Files changed (1) hide show
  1. app.py +45 -8
app.py CHANGED
@@ -557,12 +557,42 @@ def _make_titanic_csv() -> Path:
557
  return p
558
 
559
 
560
- SUGGESTED_QUESTIONS = [
561
- "What's the survival rate by passenger class?",
562
- "Average fare by embarkation port",
563
- "Top 5 oldest passengers who survived",
564
- "Count of male vs female survivors",
565
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
 
567
 
568
  # =================================================== HTML render helpers
@@ -739,12 +769,19 @@ def _empty_state_html() -> str:
739
 
740
 
741
  def _ready_state_html() -> str:
742
- """Shown when data is loaded but no queries asked yet."""
 
 
 
 
 
 
 
743
  return (
744
  '<div class="empty">'
745
  '<div class="empty-title">Ready</div>'
746
  '<div class="empty-sub">Ask a question above, or try one of these:</div>'
747
- f'{_suggestions_html(SUGGESTED_QUESTIONS[:4])}'
748
  '</div>'
749
  )
750
 
 
557
  return p
558
 
559
 
560
+ def _suggest_questions(table: str, schema: list[dict]) -> list[str]:
561
+ """Generate question suggestions tailored to the loaded dataset's columns."""
562
+ if not schema:
563
+ return []
564
+
565
+ NUMERIC = {"INTEGER", "BIGINT", "DOUBLE", "FLOAT", "DECIMAL", "NUMERIC", "REAL", "INT", "SMALLINT"}
566
+ DATE = {"DATE", "TIMESTAMP", "DATETIME", "TIME"}
567
+ STRING = {"VARCHAR", "STRING", "TEXT", "CHAR"}
568
+
569
+ def kind(t: str) -> str:
570
+ t = (t or "").upper().split("(")[0]
571
+ if any(k in t for k in NUMERIC): return "num"
572
+ if any(k in t for k in DATE): return "date"
573
+ if any(k in t for k in STRING): return "str"
574
+ return "other"
575
+
576
+ cols = [(c["name"], kind(c.get("type", ""))) for c in schema]
577
+ nums = [n for n, k in cols if k == "num"]
578
+ dates = [n for n, k in cols if k == "date"]
579
+ strs = [n for n, k in cols if k == "str"]
580
+
581
+ qs: list[str] = []
582
+ if nums:
583
+ qs.append(f"Top 10 rows by {nums[0]}")
584
+ if strs and nums:
585
+ qs.append(f"{nums[0].capitalize()} grouped by {strs[0]}")
586
+ if strs:
587
+ qs.append(f"Count of rows by {strs[0]}")
588
+ if dates and nums:
589
+ qs.append(f"{nums[0].capitalize()} over time ({dates[0]})")
590
+ if len(nums) >= 2:
591
+ qs.append(f"Compare {nums[0]} vs {nums[1]}")
592
+ if not qs:
593
+ qs.append(f"Show me the first 10 rows of {table}")
594
+
595
+ return qs[:4]
596
 
597
 
598
  # =================================================== HTML render helpers
 
769
 
770
 
771
  def _ready_state_html() -> str:
772
+ """Shown when data is loaded but no queries asked yet.
773
+ Suggestions are derived from the actual loaded table's columns."""
774
+ agent = get_agent()
775
+ tables = agent.list_tables()
776
+ suggestions: list[str] = []
777
+ if tables:
778
+ schema = agent.executor.get_table_schema(tables[0])
779
+ suggestions = _suggest_questions(tables[0], schema)
780
  return (
781
  '<div class="empty">'
782
  '<div class="empty-title">Ready</div>'
783
  '<div class="empty-sub">Ask a question above, or try one of these:</div>'
784
+ f'{_suggestions_html(suggestions)}'
785
  '</div>'
786
  )
787