jskinner215 commited on
Commit
5e71278
·
1 Parent(s): 6674859

Still debugging errors

Browse files

This version includes:

Exception handling to catch and display errors.
Reduced the MAX_ROWS_PER_CHUNK to 50 to further mitigate the token limit issue.
Additional error messages to help you understand what's going wrong.

Files changed (1) hide show
  1. app.py +29 -28
app.py CHANGED
@@ -2,7 +2,6 @@ import streamlit as st
2
  import pandas as pd
3
  from io import StringIO
4
  from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
5
- import numpy as np
6
 
7
  # Initialize TAPAS model and tokenizer
8
  tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
@@ -10,26 +9,20 @@ model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-f
10
 
11
  def ask_llm_chunk(chunk, questions):
12
  chunk = chunk.astype(str)
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- # Debugging statement to print chunk shape
15
- print("Chunk shape:", chunk.shape)
16
- print("Sample data:", chunk.head())
17
-
18
- # Count tokens
19
- token_count = len(tokenizer.tokenize(str(chunk) + " ".join(questions)))
20
- print("Token count:", token_count)
21
-
22
- if token_count > 512:
23
- print("Warning: Token count exceeds maximum allowable sequence length.")
24
- return ["Token limit exceeded for chunk"] * len(questions)
25
-
26
- inputs = tokenizer(table=chunk, queries=questions, padding="max_length", return_tensors="pt")
27
- outputs = model(**inputs)
28
- predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
29
- inputs,
30
- outputs.logits.detach(),
31
- outputs.logits_aggregation.detach()
32
- )
33
  answers = []
34
  for coordinates in predicted_answer_coordinates:
35
  if len(coordinates) == 1:
@@ -41,17 +34,22 @@ def ask_llm_chunk(chunk, questions):
41
  answers.append(", ".join(cell_values))
42
  return answers
43
 
44
- MAX_ROWS_PER_CHUNK = 200
45
 
46
  def summarize_map_reduce(data, questions):
47
- dataframe = pd.read_csv(StringIO(data))
 
 
 
 
 
48
  num_chunks = len(dataframe) // MAX_ROWS_PER_CHUNK + 1
49
  dataframe_chunks = np.array_split(dataframe, num_chunks)
50
  all_answers = []
51
  for chunk in dataframe_chunks:
52
  chunk_answers = ask_llm_chunk(chunk, questions)
53
  all_answers.extend(chunk_answers)
54
- return all_answers # For now, simply returning the answers for each chunk
55
 
56
  st.title("TAPAS Table Question Answering")
57
 
@@ -69,8 +67,11 @@ if csv_file is not None:
69
 
70
  if st.button("Submit"):
71
  if data and questions:
72
- answers = summarize_map_reduce(data, questions)
73
- st.write("Answers:")
74
- for q, a in zip(questions, answers):
75
- st.write(f"Question: {q}")
76
- st.write(f"Answer: {a}")
 
 
 
 
2
  import pandas as pd
3
  from io import StringIO
4
  from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
 
5
 
6
  # Initialize TAPAS model and tokenizer
7
  tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
 
9
 
10
  def ask_llm_chunk(chunk, questions):
11
  chunk = chunk.astype(str)
12
+ try:
13
+ inputs = tokenizer(table=chunk, queries=questions, padding="max_length", return_tensors="pt")
14
+ if inputs["input_ids"].shape[1] > 512:
15
+ return ["Token limit exceeded for this chunk"] * len(questions)
16
+ outputs = model(**inputs)
17
+ predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
18
+ inputs,
19
+ outputs.logits.detach(),
20
+ outputs.logits_aggregation.detach()
21
+ )
22
+ except Exception as e:
23
+ st.write(f"An error occurred: {e}")
24
+ return ["Error processing this chunk"] * len(questions)
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  answers = []
27
  for coordinates in predicted_answer_coordinates:
28
  if len(coordinates) == 1:
 
34
  answers.append(", ".join(cell_values))
35
  return answers
36
 
37
+ MAX_ROWS_PER_CHUNK = 50 # Reduced chunk size
38
 
39
  def summarize_map_reduce(data, questions):
40
+ try:
41
+ dataframe = pd.read_csv(StringIO(data))
42
+ except Exception as e:
43
+ st.write(f"Error reading the CSV file: {e}")
44
+ return []
45
+
46
  num_chunks = len(dataframe) // MAX_ROWS_PER_CHUNK + 1
47
  dataframe_chunks = np.array_split(dataframe, num_chunks)
48
  all_answers = []
49
  for chunk in dataframe_chunks:
50
  chunk_answers = ask_llm_chunk(chunk, questions)
51
  all_answers.extend(chunk_answers)
52
+ return all_answers
53
 
54
  st.title("TAPAS Table Question Answering")
55
 
 
67
 
68
  if st.button("Submit"):
69
  if data and questions:
70
+ try:
71
+ answers = summarize_map_reduce(data, questions)
72
+ st.write("Answers:")
73
+ for q, a in zip(questions, answers):
74
+ st.write(f"Question: {q}")
75
+ st.write(f"Answer: {a}")
76
+ except Exception as e:
77
+ st.write(f"An error occurred: {e}")