Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

PD03 commited on Jun 26, 2025

Commit

b1f2bdd

verified ·

1 Parent(s): 53c503a

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -64

app.py CHANGED Viewed

@@ -1,80 +1,83 @@
-import re
 import gradio as gr
 import pandas as pd
-from transformers import pipeline
-# Load data
 df = pd.read_csv("synthetic_profit.csv")
-# Prepare TAPAS fallback
-tapas = pipeline(
-    "table-question-answering",
-    model="google/tapas-base-finetuned-wtq",
-    tokenizer="google/tapas-base-finetuned-wtq",
-    device=-1
 )
-table = df.astype(str).to_dict(orient="records")
-# Helpers
-OPERATIONS = {"total": "sum", "sum": "sum", "average": "mean", "mean": "mean"}
-COLUMNS    = {"revenue": "Revenue", "cost": "Cost", "profit": "Profit", "margin":"ProfitMargin","profit margin":"ProfitMargin"}
-def parse_and_compute(question: str):
-    q = question.lower()
-    # 1) detect operation
-    op = next((OPERATIONS[k] for k in OPERATIONS if k in q), None)
-    # 2) detect column
-    col = next((COLUMNS[k]   for k in COLUMNS    if k in q), None)
-    # 3) detect product by scanning your actual values
-    prod = next((p for p in df["Product"].unique() if p.lower() in q), None)
-    # 4) region
-    region = next((r for r in df["Region"].unique()  if r.lower() in q), None)
-    # 5) year
-    yr_match = re.search(r"\b(20\d{2})\b", q)
-    year = int(yr_match.group(1)) if yr_match else None
-    # 6) quarter
-    qtr = next((x for x in df["FiscalQuarter"].unique() if x.lower() in q), None)
-    # if any piece missing, we fallback
-    if None in (op, col, prod, region, year, qtr):
-        return None
-    # filter & compute
-    sub = df[
-        (df["Product"] == prod) &
-        (df["Region"]  == region) &
-        (df["FiscalYear"]   == year) &
-        (df["FiscalQuarter"]== qtr)
-    ]
-    try:
-        val = getattr(sub[col], op)()
-    except:
-        return None
-    return f"{op.capitalize()} {col} for {prod} in {region}, {qtr} {year}: {val:.2f}"
-def answer(question: str) -> str:
-    out = parse_and_compute(question)
-    if out is not None:
-        return out
-    # fallback
-    try:
-        res = tapas(table=table, query=question)
-        return res.get("answer", "No answer found.")
-    except Exception as e:
-        return f"❌ Error: {e}"
-# Gradio...
 iface = gr.Interface(
-    fn=answer,
-    inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in EMEA in Q1 2024?"),
     outputs=gr.Textbox(lines=3),
-    title="SAP Profitability Q&A",
-    description="Basic total/average queries via Pandas+fallback to TAPAS",
     allow_flagging="never",
 )
-if __name__=="__main__":
     iface.launch(server_name="0.0.0.0", server_port=7860)

+import os
 import gradio as gr
 import pandas as pd
+import tensorflow as tf
+from tapas.scripts import prediction_utils
+from tapas.utils import number_annotation_utils
+from tapas.protos import interaction_pb2
+# 1) Read CSV and build list-of-lists table
+import pandas as pd
 df = pd.read_csv("synthetic_profit.csv")
+# Ensure all values are strings
+df = df.astype(str)
+# Build TAPAS-style table: header row + data rows
+table = [list(df.columns)] + df.values.tolist()
+# 2) Configure TAPAS conversion with aggregation support
+from tapas.utils import example_utils as tf_example_utils
+config = tf_example_utils.ClassifierConversionConfig(
+    vocab_file="tapas_sqa_base/vocab.txt",
+    max_seq_length=512,
+    max_column_id=512,
+    max_row_id=512,
+    strip_column_names=False,             # Keep header names
+    add_aggregation_candidates=True,     # Propose SUM/AVERAGE operations
 )
+converter = tf_example_utils.ToClassifierTensorflowExample(config)
+# 3) Helper: convert one interaction to model input
+def interaction_from_query(question: str):
+    interaction = interaction_pb2.Interaction()
+    # Add question
+    q = interaction.questions.add()
+    q.original_text = question
+    # Add table columns
+    for col in table[0]:
+        interaction.table.columns.add().text = col
+    # Add table rows/cells
+    for row in table[1:]:
+        r = interaction.table.rows.add()
+        for cell in row:
+            r.cells.add().text = cell
+    # Annotate numeric values
+    number_annotation_utils.add_numeric_values(interaction)
+    return interaction
+# 4) Instantiate TAPAS model and tokenizer
+from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
+MODEL = "google/tapas-base-finetuned-wtq"
+tokenizer = AutoTokenizer.from_pretrained(MODEL)
+model     = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
+# 5) Prediction helper
+def predict_answer(question: str):
+    interaction = interaction_from_query(question)
+    # Convert to TensorFlowExample
+    tf_example = converter.convert(interaction)
+    # Run prediction
+    result = model(tf_example.features)
+    # Parse answer coordinates
+    coords = prediction_utils.parse_coordinates(result.logits)
+    # Map coordinates back to table cells
+    answers = []
+    for r, c in coords:
+        answers.append(table[r+1][c])
+    return ", ".join(answers)
+# 6) Gradio interface
 iface = gr.Interface(
+    fn=predict_answer,
+    inputs=gr.Textbox(lines=2, placeholder="Ask a question…"),
     outputs=gr.Textbox(lines=3),
+    title="SAP Profitability Q&A (TAPAS Low-Level)",
+    description=(
+        "Low-level TAPAS: list-of-lists input, numeric annotations, "
+        "aggregation candidates, and coordinate post-processing."
+    ),
     allow_flagging="never",
 )
+if __name__ == "__main__":
     iface.launch(server_name="0.0.0.0", server_port=7860)