Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

PD03 commited on Jun 26, 2025

Commit

25e4074

verified ·

1 Parent(s): 93045b6

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -71

app.py CHANGED Viewed

@@ -1,89 +1,55 @@
 import gradio as gr
 import pandas as pd
-import tensorflow as tf
-from tapas.protos import interaction_pb2
-from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
-from tapas.scripts.run_task_main import get_classifier_model, get_task_config
 # 1) Load & stringify your CSV
 df = pd.read_csv("synthetic_profit.csv")
-df = df.astype(str)
-# 2) Build the “list of lists” table (header + rows)
-table = [list(df.columns)]
-table.extend(df.values.tolist())
-# 3) Prepare the TAPAS converter with aggregation candidates
-config = tf_example_utils.ClassifierConversionConfig(
-    vocab_file="tapas_sqa_base/vocab.txt",
-    max_seq_length=512,
-    max_column_id=512,
-    max_row_id=512,
-    strip_column_names=False,
-    add_aggregation_candidates=True,
 )
-converter = tf_example_utils.ToClassifierTensorflowExample(config)
-# 4) Load pretrained TAPAS checkpoint
-task_config = get_task_config(
-    task="sqa",
-    init_checkpoint="tapas_sqa_base/model.ckpt-0",
-    vocab_file=config.vocab_file,
-    bsz=1,
-    max_seq_length=config.max_seq_length,
-)
-model, tokenizer = get_classifier_model(task_config)
-# 5) Build a TF example from (table, query)
-def make_tf_example(table, query):
-    interaction = interaction_pb2.Interaction()
-    # question
-    q = interaction.questions.add()
-    q.original_text = query
-    # columns
-    for col in table[0]:
-        interaction.table.columns.add().text = col
-    # rows
-    for row_vals in table[1:]:
-        row = interaction.table.rows.add()
-        for cell in row_vals:
-            row.cells.add().text = cell
-    # numeric annotation for SUM/AVG
-    number_annotation_utils.add_numeric_values(interaction)
-    # convert to serialized Example
-    return converter.convert(interaction)
-# 6) Run TAPAS & parse coordinates back to cell values
-def predict_answer(query):
-    example = make_tf_example(table, query)
-    input_fn = tf_example_utils.input_fn_builder(
-        [example],
-        is_training=False,
-        drop_remainder=False,
-        batch_size=1,
-        seq_length=config.max_seq_length,
-    )
-    preds = model.predict(input_fn)
-    coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
-    answers = [ table[r+1][c] for (r, c) in coords ]  # r+1 because row 0 is header
-    return ", ".join(answers) if answers else "No answer found."
-# 7) Gradio interface
-def answer_fn(question: str) -> str:
     try:
-        return predict_answer(question)
     except Exception as e:
-        return f"❌ Error: {e}"
 iface = gr.Interface(
-    fn=answer_fn,
-    inputs=gr.Textbox(lines=2, label="Your question"),
-    outputs=gr.Textbox(label="Answer"),
-    title="SAP Profitability Q&A (TAPAS Low-Level)",
     description=(
-        "TAPAS with aggregation candidates & numeric annotations—"
-        "robust sums/averages on your SAP data."
     ),
     allow_flagging="never",
 )

 import gradio as gr
 import pandas as pd
+from transformers import pipeline
 # 1) Load & stringify your CSV
 df = pd.read_csv("synthetic_profit.csv")
+table = df.astype(str).to_dict(orient="records")
+# 2) Instantiate the TAPAS pipeline from Transformers
+qa = pipeline(
+    "table-question-answering",
+    model="google/tapas-base-finetuned-wtq",
+    tokenizer="google/tapas-base-finetuned-wtq",
+    device=-1,   # CPU; change to 0 if you have a GPU
 )
+# 3) Few-shot examples teach “filter + sum” vs. “filter + mean”
+EXAMPLES = """
+Example 1:
+Q: What is the total revenue for Product A in EMEA in Q1 2024?
+A: Filter Product=A & Region=EMEA & FiscalYear=2024 & FiscalQuarter=Q1, then sum Revenue → 3075162.49
+Example 2:
+Q: What is the total cost for Product A in EMEA in Q1 2024?
+A: Filter Product=A & Region=EMEA & FiscalYear=2024 & FiscalQuarter=Q1, then sum Cost → 2894321.75
+Example 3:
+Q: What is the total margin for Product A in EMEA in Q1 2024?
+A: Filter Product=A & Region=EMEA & FiscalYear=2024 & FiscalQuarter=Q1, then sum ProfitMargin → 0.18
+Example 4:
+Q: What is the average profit margin for Product A in EMEA in Q1 2024?
+A: Filter Product=A & Region=EMEA & FiscalYear=2024 & FiscalQuarter=Q1, then mean ProfitMargin → 0.18
+"""
+def answer_question(question: str) -> str:
+    prompt = EXAMPLES + f"\nQ: {question}\nA:"
     try:
+        result = qa(table=table, query=prompt)
+        return result.get("answer", "No answer found.")
     except Exception as e:
+        return f"❌ Pipeline error:\n{e}"
+# 4) Gradio UI
 iface = gr.Interface(
+    fn=answer_question,
+    inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in Q1 2024?"),
+    outputs=gr.Textbox(lines=3),
+    title="SAP Profitability Q&A",
     description=(
+        "Ask simple sum/mean questions on the synthetic SAP data.  \n"
+        "Powered by google/tapas-base-finetuned-wtq with four few-shot examples."
     ),
     allow_flagging="never",
 )