jskinner215 commited on
Commit
6674859
·
1 Parent(s): 0704eb2

updated to fix indexing error

Browse files

Error:
Token indices sequence length is longer than the specified maximum sequence length for this model (5187 > 512).

This suggests that your input sequence length is exceeding the maximum allowable sequence length for the model. TAPAS, like many other transformer models, has a maximum input sequence length. In this case, it's 512 tokens.
IndexError: iloc cannot enlarge its target object

This is the error related to pandas, where it's not allowed to assign a value to an index that doesn't exist in the DataFrame.

Attempted Fix:
I've added checks to ensure that token count doesn't exceed the model's limit.
I've added debugging print statements to help you understand what's going on.

Files changed (1) hide show
  1. app.py +14 -4
app.py CHANGED
@@ -10,9 +10,20 @@ model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-f
10
 
11
  def ask_llm_chunk(chunk, questions):
12
  chunk = chunk.astype(str)
13
- inputs = tokenizer(table=chunk, queries=questions, padding="max_length", return_tensors="pt")
14
- if inputs["input_ids"].shape[1] > 1024:
 
 
 
 
 
 
 
 
 
15
  return ["Token limit exceeded for chunk"] * len(questions)
 
 
16
  outputs = model(**inputs)
17
  predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
18
  inputs,
@@ -40,8 +51,7 @@ def summarize_map_reduce(data, questions):
40
  for chunk in dataframe_chunks:
41
  chunk_answers = ask_llm_chunk(chunk, questions)
42
  all_answers.extend(chunk_answers)
43
- aggregated_answers = all_answers
44
- return aggregated_answers
45
 
46
  st.title("TAPAS Table Question Answering")
47
 
 
10
 
11
  def ask_llm_chunk(chunk, questions):
12
  chunk = chunk.astype(str)
13
+
14
+ # Debugging statement to print chunk shape
15
+ print("Chunk shape:", chunk.shape)
16
+ print("Sample data:", chunk.head())
17
+
18
+ # Count tokens
19
+ token_count = len(tokenizer.tokenize(str(chunk) + " ".join(questions)))
20
+ print("Token count:", token_count)
21
+
22
+ if token_count > 512:
23
+ print("Warning: Token count exceeds maximum allowable sequence length.")
24
  return ["Token limit exceeded for chunk"] * len(questions)
25
+
26
+ inputs = tokenizer(table=chunk, queries=questions, padding="max_length", return_tensors="pt")
27
  outputs = model(**inputs)
28
  predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
29
  inputs,
 
51
  for chunk in dataframe_chunks:
52
  chunk_answers = ask_llm_chunk(chunk, questions)
53
  all_answers.extend(chunk_answers)
54
+ return all_answers # For now, simply returning the answers for each chunk
 
55
 
56
  st.title("TAPAS Table Question Answering")
57