Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

PD03 commited on Jun 25, 2025

Commit

a5ece8b

verified ·

1 Parent(s): 6a97111

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -48

app.py CHANGED Viewed

@@ -1,67 +1,57 @@
-# 3) load TAPAS
-from transformers import pipeline
 qa = pipeline(
     "table-question-answering",
     model="google/tapas-base-finetuned-sqa",
     tokenizer="google/tapas-base-finetuned-sqa"
 )
-# 4) cast to strings to avoid the regex bug
-df_str = df.astype(str)
-# 5) sanity check
-print( qa(table=df_str, query="What was the ProfitMargin for Product B in EMEA Q2 2024?") )
-# 6) launch Gradio
-import gradio as gr
-import re
 def answer(q: str) -> str:
-    # --- 1. try to parse explicit total/average queries ---
-    m = re.search(r"\b(total|average)\s+(ProfitMargin|Profit|Revenue|Cost)\b", q, re.IGNORECASE)
-    p = re.search(r"\bProduct\s*([A-D])\b", q, re.IGNORECASE)
-    t = re.search(r"\b(Q[1-4])\s*(\d{4})\b", q, re.IGNORECASE)
-    if m and p and t:
-        agg_type = m.group(1).lower()      # "total" or "average"
-        metric   = m.group(2)              # column name
-        product  = f"Product {p.group(1).upper()}"
-        quarter  = t.group(1)
-        year     = int(t.group(2))
-        # filter the *numeric* DataFrame
         subset = df[
-            (df["Product"]       == product) &
             (df["FiscalQuarter"] == quarter) &
-            (df["FiscalYear"]    == year)
         ]
-        if not subset.empty:
-            if agg_type == "total":
-                val = subset[metric].sum()
-                return f"Total {metric} for {product} in {quarter} {year}: {val:,.2f}"
-            else:  # average
-                val = subset[metric].mean()
-                # show 3 decimal places for margins, 2 for currency
-                fmt = "{:,.3f}" if metric=="ProfitMargin" else "{:,.2f}"
-                return f"Average {metric} for {product} in {quarter} {year}: " + fmt.format(val)
-    # --- 2. fallback to TAPAS for everything else ---
     res = qa(table=df_str, query=q)
-    agg = res.get("aggregator","")
-    if agg and agg != "NONE":
-        return f"Answer: {res['answer']}  (agg: {agg})"
-    # last-resort: raw answer
-    return f"Answer: {res['answer']}"
 demo = gr.Interface(
     fn=answer,
-    inputs=gr.Textbox(lines=2, placeholder="e.g. Profit for Product A in Q1 2023?"),
     outputs="text",
-    title="S/4HANA Profitability Chat",
 )
-demo.launch(share=True, debug=True)

+import pandas as pd
+import numpy as np
+import re
+from transformers import pipeline
+import gradio as gr
+# Load numeric data
+df = pd.read_csv("synthetic_profit.csv")
+# String DataFrame for TAPAS
+df_str = df.astype(str)
+# Initialize TAPAS
 qa = pipeline(
     "table-question-answering",
     model="google/tapas-base-finetuned-sqa",
     tokenizer="google/tapas-base-finetuned-sqa"
 )
 def answer(q: str) -> str:
+    # 1. Conditional query: negative profit
+    if re.search(r"products.*negative.*profit", q, re.IGNORECASE):
+        negative_profits = df[df["Profit"] < 0]
+        if negative_profits.empty:
+            return "✅ No products with negative profit found."
+        results = negative_profits[['Product', 'Region', 'FiscalQuarter', 'FiscalYear', 'Profit']]
+        return results.to_string(index=False)
+    # 2. Numeric summaries (total/average)
+    match = re.search(r"\b(total|average)\s+(ProfitMargin|Profit|Revenue|Cost)\b.*\bProduct\s*([A-D])\b.*\b(Q[1-4])\s*(\d{4})", q, re.IGNORECASE)
+    if match:
+        agg, metric, product, quarter, year = match.groups()
         subset = df[
+            (df["Product"] == f"Product {product.upper()}") &
             (df["FiscalQuarter"] == quarter) &
+            (df["FiscalYear"] == int(year))
         ]
+        if subset.empty:
+            return "⚠️ No matching data."
+        value = subset[metric].sum() if agg.lower() == "total" else subset[metric].mean()
+        formatted_val = f"{value:.3f}" if metric == "ProfitMargin" else f"{value:,.2f}"
+        return f"📊 {agg.title()} {metric} for Product {product.upper()} in {quarter} {year}: {formatted_val}"
+    # 3. TAPAS fallback for everything else
     res = qa(table=df_str, query=q)
+    return f"🔍 {res['answer']} (agg: {res.get('aggregator','NONE')})"
+# Launch Gradio
 demo = gr.Interface(
     fn=answer,
+    inputs=gr.Textbox(lines=2, placeholder="e.g. 'total Profit for Product A in Q1 2024?' or 'List products with negative profit.'"),
     outputs="text",
+    title="🟢 SAP S/4HANA Profitability Chat",
+    description="Ask questions on profitability data (synthetic demo). Supports total, average, and conditional queries."
 )
+demo.launch()