Spaces:
Build error
Build error
| from copy import deepcopy | |
| import streamlit as st | |
| import pandas as pd | |
| from io import StringIO | |
| from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering | |
| import numpy as np | |
| # Initialize TAPAS model and tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq") | |
| model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq") | |
| def ask_llm_chunk(chunk, questions): | |
| chunk = chunk.astype(str) | |
| try: | |
| inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt") | |
| except Exception as e: | |
| st.write(f"An error occurred: {e}") | |
| return ["Error occurred while tokenizing"] * len(questions) | |
| if inputs["input_ids"].shape[1] > 512: | |
| st.warning("Token limit exceeded for chunk") | |
| return ["Token limit exceeded for chunk"] * len(questions) | |
| outputs = model(**inputs) | |
| predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions( | |
| inputs, | |
| outputs.logits.detach(), | |
| outputs.logits_aggregation.detach() | |
| ) | |
| answers = [] | |
| for coordinates in predicted_answer_coordinates: | |
| if len(coordinates) == 1: | |
| row, col = coordinates[0] | |
| try: | |
| st.write(f"DataFrame shape: {chunk.shape}") # Debugging line | |
| st.write(f"DataFrame columns: {chunk.columns}") # Debugging line | |
| st.write(f"Trying to access row {row}, col {col}") # Debugging line | |
| value = chunk.iloc[row, col] | |
| st.write(f"Value accessed: {value}") # Debugging line | |
| answers.append(value) | |
| except Exception as e: | |
| st.write(f"An error occurred: {e}") | |
| else: | |
| cell_values = [] | |
| for coordinate in coordinates: | |
| row, col = coordinate | |
| try: | |
| value = chunk.iloc[row, col] | |
| cell_values.append(value) | |
| except Exception as e: | |
| st.write(f"An error occurred: {e}") | |
| answers.append(", ".join(map(str, cell_values))) | |
| return answers | |
| MAX_ROWS_PER_CHUNK = 200 | |
| def summarize_map_reduce(data, questions): | |
| dataframe = pd.read_csv(StringIO(data)) | |
| num_chunks = len(dataframe) // MAX_ROWS_PER_CHUNK + 1 | |
| dataframe_chunks = [deepcopy(chunk) for chunk in np.array_split(dataframe, num_chunks)] | |
| all_answers = [] | |
| for chunk in dataframe_chunks: | |
| chunk_answers = ask_llm_chunk(chunk, questions) | |
| all_answers.extend(chunk_answers) | |
| return all_answers | |
| st.title("TAPAS Table Question Answering") | |
| # Upload CSV data | |
| csv_file = st.file_uploader("Upload a CSV file", type=["csv"]) | |
| if csv_file is not None: | |
| data = csv_file.read().decode("utf-8") | |
| st.write("CSV Data Preview:") | |
| st.write(pd.read_csv(StringIO(data)).head()) | |
| # Input for questions | |
| questions = st.text_area("Enter your questions (one per line)") | |
| questions = questions.split("\n") # split questions by line | |
| questions = [q for q in questions if q] # remove empty strings | |
| if st.button("Submit"): | |
| if data and questions: | |
| answers = summarize_map_reduce(data, questions) | |
| st.write("Answers:") | |
| for q, a in zip(questions, answers): | |
| st.write(f"Question: {q}") | |
| st.write(f"Answer: {a}") | |
| # Add Ctrl+Enter functionality for submitting the questions | |
| st.markdown(""" | |
| <script> | |
| document.addEventListener("DOMContentLoaded", function(event) { | |
| document.addEventListener("keydown", function(event) { | |
| if (event.ctrlKey && event.key === "Enter") { | |
| document.querySelector(".stButton button").click(); | |
| } | |
| }); | |
| }); | |
| </script> | |
| """, unsafe_allow_html=True) | |