AmandaPanda's picture
Update app.py
22d5c05 verified
# Explore 2019-2020 NHTSA Traffic Fatality data
# A. Harper / DATA 495 / February 2026
# VERY IMPORTANT
# Load into Hugging Face Space (using the Gradio Framework)
# Include requirements.txt file (list: torch==2.1.2, transformers==4.37.2, gradio==4.19.2, pandas)
# Also open the README file and add a new line: python_version: 3.11
# App does not run properly on default Python version.
# END
# To run, navigate to the App tab.
# The app will prompt the user to upload a csv file,
# then ask a question about the data. Click 'Submit' to run.
# Import tools and libraries
import multiprocessing
multiprocessing.set_start_method("spawn", force=True)
import torch
import pandas as pd
import gradio as gr
from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
# Load TAPAS model
model_name = "Meena/table-question-answering-tapas"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTableQuestionAnswering.from_pretrained(model_name).to("cpu")
# Global dataframe storage
df = None
def dataset_change(file):
global df
df = pd.read_csv(file.name)
# Limit size for TAPAS - changed from 20 to 100 to show all rows.
# Recommended limit for TAPAS is 200; 100 is well within the limit.
df = df.head(100)
# Convert everything to string - required for TAPAS
df = df.astype(str)
return df.head(100)
def answer_question(question):
global df
if df is None:
return "Please upload a dataset first."
inputs = tokenizer(
table=df,
queries=[question],
padding="max_length",
truncation=True,
return_tensors="pt"
)
outputs = model(**inputs)
predicted_answer_coordinates, predicted_aggregation_indices = (
tokenizer.convert_logits_to_predictions(
inputs,
outputs.logits.detach(),
outputs.logits_aggregation.detach()
)
)
answers = []
for coordinates in predicted_answer_coordinates:
if len(coordinates) == 0:
answers.append("No answer found.")
else:
cell_values = [df.iat[row, col] for row, col in coordinates]
answers.append(", ".join(map(str, cell_values)))
return answers[0]
# Gradio UI
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
file_input = gr.File(label="Upload CSV File")
question_input = gr.Textbox(label="Input Question")
submit_btn = gr.Button("Submit")
with gr.Column():
answer_output = gr.Textbox(label="Answer")
with gr.Row():
dataframe_preview = gr.Dataframe()
file_input.change(fn=dataset_change, inputs=file_input, outputs=dataframe_preview)
submit_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output)
demo.launch(debug=True, show_error=True)