# Explore 2019-2020 NHTSA Traffic Fatality data # A. Harper / DATA 495 / February 2026 # VERY IMPORTANT # Load into Hugging Face Space (using the Gradio Framework) # Include requirements.txt file (list: torch==2.1.2, transformers==4.37.2, gradio==4.19.2, pandas) # Also open the README file and add a new line: python_version: 3.11 # App does not run properly on default Python version. # END # To run, navigate to the App tab. # The app will prompt the user to upload a csv file, # then ask a question about the data. Click 'Submit' to run. # Import tools and libraries import multiprocessing multiprocessing.set_start_method("spawn", force=True) import torch import pandas as pd import gradio as gr from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering # Load TAPAS model model_name = "Meena/table-question-answering-tapas" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForTableQuestionAnswering.from_pretrained(model_name).to("cpu") # Global dataframe storage df = None def dataset_change(file): global df df = pd.read_csv(file.name) # Limit size for TAPAS - changed from 20 to 100 to show all rows. # Recommended limit for TAPAS is 200; 100 is well within the limit. df = df.head(100) # Convert everything to string - required for TAPAS df = df.astype(str) return df.head(100) def answer_question(question): global df if df is None: return "Please upload a dataset first." inputs = tokenizer( table=df, queries=[question], padding="max_length", truncation=True, return_tensors="pt" ) outputs = model(**inputs) predicted_answer_coordinates, predicted_aggregation_indices = ( tokenizer.convert_logits_to_predictions( inputs, outputs.logits.detach(), outputs.logits_aggregation.detach() ) ) answers = [] for coordinates in predicted_answer_coordinates: if len(coordinates) == 0: answers.append("No answer found.") else: cell_values = [df.iat[row, col] for row, col in coordinates] answers.append(", ".join(map(str, cell_values))) return answers[0] # Gradio UI with gr.Blocks() as demo: with gr.Row(): with gr.Column(): file_input = gr.File(label="Upload CSV File") question_input = gr.Textbox(label="Input Question") submit_btn = gr.Button("Submit") with gr.Column(): answer_output = gr.Textbox(label="Answer") with gr.Row(): dataframe_preview = gr.Dataframe() file_input.change(fn=dataset_change, inputs=file_input, outputs=dataframe_preview) submit_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output) demo.launch(debug=True, show_error=True)