Spaces:

jskinner215
/

TAPAS_WTQ_Chunking

Build error

App Files Files Community

jskinner215 commited on Aug 31, 2023

Commit

66f9f66

1 Parent(s): 072f6c1

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -24

app.py CHANGED Viewed

@@ -16,8 +16,9 @@ def ask_llm_chunk(chunk, questions):
         st.write(f"An error occurred: {e}")
         return ["Error occurred while tokenizing"] * len(questions)
-    # Debugging line
-    st.write(f"Token shape: {inputs['input_ids'].shape[1]}")
     outputs = model(**inputs)
     predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
@@ -28,42 +29,28 @@ def ask_llm_chunk(chunk, questions):
     answers = []
     for coordinates in predicted_answer_coordinates:
-        if len(coordinates) == 1:
-            row, col = coordinates[0]
             try:
-                st.write(f"DataFrame shape: {chunk.shape}")  # Debugging line
-                st.write(f"DataFrame columns: {chunk.columns}")  # Debugging line
                 st.write(f"Trying to access row {row}, col {col}")  # Debugging line
                 value = chunk.iloc[row, col]
-                st.write(f"Value accessed: {value}")  # New debugging line
                 if isinstance(value, pd.Series):
                     answers.append(value.values)
                 else:
                     answers.append(value.item() if hasattr(value, 'item') else value)
             except Exception as e:
-                st.write(f"Exception type: {type(e)}")  # New debugging line
-                st.write(f"Exception args: {e.args}")  # New debugging line
-                st.write(f"An error occurred: {e}")  # Existing debugging line
-        else:
-            cell_values = []
-            for coordinate in coordinates:
-                row, col = coordinate
-                try:
-                    value = chunk.iloc[row, col]
-                    if isinstance(value, pd.Series):
-                        cell_values.append(value.values)
-                    else:
-                        cell_values.append(value.item() if hasattr(value, 'item') else value)
-                except Exception as e:
-                    st.write(f"An error occurred: {e}")
-                    cell_values.append("Error")
-            answers.append(", ".join(map(str, cell_values)))
     return answers
 MAX_ROWS_PER_CHUNK = 200
 def summarize_map_reduce(data, questions):

         st.write(f"An error occurred: {e}")
         return ["Error occurred while tokenizing"] * len(questions)
+    if inputs["input_ids"].shape[1] > 512:
+        st.warning("Token limit exceeded for chunk")
+        return ["Token limit exceeded for chunk"] * len(questions)
     outputs = model(**inputs)
     predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
     answers = []
     for coordinates in predicted_answer_coordinates:
+        for coordinate in coordinates:
+            row, col = coordinate
             try:
                 st.write(f"Trying to access row {row}, col {col}")  # Debugging line
                 value = chunk.iloc[row, col]
+                st.write(f"Value accessed: {value}")  # Debugging line
                 if isinstance(value, pd.Series):
                     answers.append(value.values)
                 else:
                     answers.append(value.item() if hasattr(value, 'item') else value)
             except Exception as e:
+                st.write(f"An error occurred: {e}")
+                st.write(f"Type of error: {type(e)}")
+                st.write(f"Arguments of error: {e.args}")
+        answers.append(", ".join(map(str, [chunk.iloc[coordinate].values for coordinate in coordinates])))
     return answers
 MAX_ROWS_PER_CHUNK = 200
 def summarize_map_reduce(data, questions):