Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

PD03 commited on Jun 26, 2025

Commit

d162c32

verified ·

1 Parent(s): b1faf3e

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -48

app.py CHANGED Viewed

@@ -2,79 +2,102 @@ import os
 import gradio as gr
 import pandas as pd
 import tensorflow as tf
-from tapas.scripts import prediction_utils
-from tapas.utils import number_annotation_utils
-from tapas.protos import interaction_pb2
-# 1) Read CSV and build list-of-lists table
-import pandas as pd
 df = pd.read_csv("synthetic_profit.csv")
-# Ensure all values are strings
 df = df.astype(str)
-# Build TAPAS-style table: header row + data rows
-table = [list(df.columns)] + df.values.tolist()
-# 2) Configure TAPAS conversion with aggregation support
-from tapas.utils import example_utils as tf_example_utils
 config = tf_example_utils.ClassifierConversionConfig(
     vocab_file="tapas_sqa_base/vocab.txt",
     max_seq_length=512,
     max_column_id=512,
     max_row_id=512,
-    strip_column_names=False,             # Keep header names
-    add_aggregation_candidates=True,     # Propose SUM/AVERAGE operations
 )
 converter = tf_example_utils.ToClassifierTensorflowExample(config)
-# 3) Helper: convert one interaction to model input
-def interaction_from_query(question: str):
     interaction = interaction_pb2.Interaction()
-    # Add question
     q = interaction.questions.add()
-    q.original_text = question
-    # Add table columns
     for col in table[0]:
         interaction.table.columns.add().text = col
-    # Add table rows/cells
-    for row in table[1:]:
-        r = interaction.table.rows.add()
-        for cell in row:
-            r.cells.add().text = cell
-    # Annotate numeric values
     number_annotation_utils.add_numeric_values(interaction)
-    return interaction
-# 4) Instantiate TAPAS model and tokenizer
-from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
-MODEL = "google/tapas-base-finetuned-wtq"
-tokenizer = AutoTokenizer.from_pretrained(MODEL)
-model     = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
-# 5) Prediction helper
-def predict_answer(question: str):
-    interaction = interaction_from_query(question)
-    # Convert to TensorFlowExample
-    tf_example = converter.convert(interaction)
-    # Run prediction
-    result = model(tf_example.features)
-    # Parse answer coordinates
-    coords = prediction_utils.parse_coordinates(result.logits)
-    # Map coordinates back to table cells
     answers = []
-    for r, c in coords:
         answers.append(table[r+1][c])
-    return ", ".join(answers)
-# 6) Gradio interface
 iface = gr.Interface(
-    fn=predict_answer,
-    inputs=gr.Textbox(lines=2, placeholder="Ask a question…"),
-    outputs=gr.Textbox(lines=3),
     title="SAP Profitability Q&A (TAPAS Low-Level)",
     description=(
-        "Low-level TAPAS: list-of-lists input, numeric annotations, "
-        "aggregation candidates, and coordinate post-processing."
     ),
     allow_flagging="never",
 )

 import gradio as gr
 import pandas as pd
 import tensorflow as tf
+# TAPAS imports
+from tapas.protos import interaction_pb2
+from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
+from tapas.scripts.run_task_main import get_classifier_model, get_task_config
+# 1) Load & stringify your CSV
 df = pd.read_csv("synthetic_profit.csv")
 df = df.astype(str)
+# 2) Build the “list of lists” table
+#    (header row + all data rows)
+table = [list(df.columns)]
+table.extend(df.values.tolist())
+# 3) Prepare the TAPAS converter + model
+#    – add_aggregation_candidates=True to surface SUM/AVG ops
+#    – strip_column_names=False so your exact headers stay visible
 config = tf_example_utils.ClassifierConversionConfig(
     vocab_file="tapas_sqa_base/vocab.txt",
     max_seq_length=512,
     max_column_id=512,
     max_row_id=512,
+    strip_column_names=False,
+    add_aggregation_candidates=True,
 )
 converter = tf_example_utils.ToClassifierTensorflowExample(config)
+# 4) Load your pretrained checkpoint
+#    (uses the same flags as run_task_main.py --mode=predict)
+task_config = get_task_config(
+    task="sqa",
+    init_checkpoint="tapas_sqa_base/model.ckpt-0",
+    vocab_file=config.vocab_file,
+    bsz=1,
+    max_seq_length=config.max_seq_length,
+)
+model, tokenizer = get_classifier_model(task_config)
+# 5) Convert a single (table, query) into a TF Example
+def make_tf_example(table, query):
     interaction = interaction_pb2.Interaction()
+    # a) question
     q = interaction.questions.add()
+    q.original_text = query
+    # b) columns
     for col in table[0]:
         interaction.table.columns.add().text = col
+    # c) rows
+    for row_vals in table[1:]:
+        row = interaction.table.rows.add()
+        for cell in row_vals:
+            row.cells.add().text = cell
+    # d) numeric annotation helps SUM/AVG
     number_annotation_utils.add_numeric_values(interaction)
+    # e) convert to example
+    serialized = converter.convert(interaction)
+    return serialized
+# 6) Run TAPAS and parse its coordinate output
+def predict_answer(query):
+    # build TF example
+    example = make_tf_example(table, query)
+    # run prediction
+    input_fn = tf_example_utils.input_fn_builder(
+        [example],
+        is_training=False,
+        drop_remainder=False,
+        batch_size=1,
+        seq_length=config.max_seq_length,
+    )
+    preds = model.predict(input_fn)
+    # parse answer coordinates
+    coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
+    # map back to table values
     answers = []
+    for (r, c) in coords:
+        # table[0] is header row, so data starts at index 1
         answers.append(table[r+1][c])
+    return ", ".join(answers) if answers else "No answer found."
+# 7) Gradio interface
+def answer_fn(question: str) -> str:
+    try:
+        return predict_answer(question)
+    except Exception as e:
+        return f"❌ Error: {e}"
 iface = gr.Interface(
+    fn=answer_fn,
+    inputs=gr.Textbox(lines=2, label="Your question"),
+    outputs=gr.Textbox(label="Answer"),
     title="SAP Profitability Q&A (TAPAS Low-Level)",
     description=(
+        "Uses TAPAS’s Interaction + Converter APIs with aggregation candidates "
+        "and numeric annotations to reliably answer sum/average queries."
     ),
     allow_flagging="never",
 )