Spaces:

jskinner215
/

TAPAS_WTQ_Chunking

Build error

jskinner215 commited on Sep 10, 2023

Commit

0e62360

1 Parent(s): 45ee012

added embedded weaviate client and ingest_data + query_weaviate functions

from copy import deepcopy
import streamlit as st
import pandas as pd
from io import StringIO
from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
import numpy as np
import weaviate

# Initialize TAPAS model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")

# Initialize Weaviate client for the embedded instance
client = weaviate.Client("http://localhost:8080")

# Function to ingest data into Weaviate
def ingest_data_to_weaviate(dataframe):
for index, row in dataframe.iterrows():
obj = {
"class": "YourClassName",
"id": str(index),
"properties": row.to_dict()
}
client.data_object.create(obj)

# Function to query data from Weaviate
def query_weaviate(question):
# This is a basic example; adapt the query based on the question
results = client.query.get('YourClassName').with_near_text(question).do()
return results

# Existing function to ask TAPAS
def ask_llm_chunk(chunk, questions):
# ... [rest of the function remains unchanged]

# Existing function to handle large datasets
def summarize_map_reduce(data, questions):
# ... [rest of the function remains unchanged]

st.title("TAPAS Table Question Answering with Weaviate")

# Upload CSV data
csv_file = st.file_uploader("Upload a CSV file", type=["csv"])
if csv_file is not None:
data = csv_file.read().decode("utf-8")
dataframe = pd.read_csv(StringIO(data))

# Ingest data into Weaviate
ingest_data_to_weaviate(dataframe)

st.write("CSV Data Preview:")
st.write(dataframe.head())

# Input for questions
questions = st.text_area("Enter your questions (one per line)")
questions = questions.split("\n") # split questions by line
questions = [q for q in questions if q] # remove empty strings

if st.button("Submit"):
if data and questions:
# Query Weaviate to get relevant data
relevant_data = query_weaviate(questions[0]) # Example: using the first question
# Convert the relevant data to a DataFrame (you might need to adjust this based on the Weaviate response format)
relevant_df = pd.DataFrame(relevant_data)

# Pass the relevant data to TAPAS
answers = summarize_map_reduce(relevant_df, questions)

st.write("Answers:")
for q, a in zip(questions, answers):
st.write(f"Question: {q}")
st.write(f"Answer: {a}")

# Add Ctrl+Enter functionality for submitting the questions
st.markdown("""
<script>
document.addEventListener("DOMContentLoaded", function(event) {
document.addEventListener("keydown", function(event) {
if (event.ctrlKey && event.key === "Enter") {
document.querySelector(".stButton button").click();
}
});
});
</script>
""", unsafe_allow_html=True)

Files changed (1) hide show

app.py +36 -5

app.py CHANGED Viewed

@@ -1,15 +1,34 @@
 from copy import deepcopy
 import streamlit as st
 import pandas as pd
 from io import StringIO
 from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
 import numpy as np
 # Initialize TAPAS model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
 model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
 def ask_llm_chunk(chunk, questions):
     chunk = chunk.astype(str)
     try:
@@ -67,14 +86,19 @@ def summarize_map_reduce(data, questions):
         all_answers.extend(chunk_answers)
     return all_answers
-st.title("TAPAS Table Question Answering")
 # Upload CSV data
 csv_file = st.file_uploader("Upload a CSV file", type=["csv"])
 if csv_file is not None:
     data = csv_file.read().decode("utf-8")
     st.write("CSV Data Preview:")
-    st.write(pd.read_csv(StringIO(data)).head())
     # Input for questions
     questions = st.text_area("Enter your questions (one per line)")
@@ -83,7 +107,14 @@ if csv_file is not None:
     if st.button("Submit"):
         if data and questions:
-            answers = summarize_map_reduce(data, questions)
             st.write("Answers:")
             for q, a in zip(questions, answers):
                 st.write(f"Question: {q}")
@@ -100,4 +131,4 @@ st.markdown("""
         });
     });
     </script>
-    """, unsafe_allow_html=True)

 from copy import deepcopy
 import streamlit as st
 import pandas as pd
 from io import StringIO
 from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
 import numpy as np
+import weaviate
 # Initialize TAPAS model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
 model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
+# Initialize Weaviate client for the embedded instance
+client = weaviate.Client("http://localhost:8080")
+# Function to ingest data into Weaviate
+def ingest_data_to_weaviate(dataframe):
+    for index, row in dataframe.iterrows():
+        obj = {
+            "class": "YourClassName",
+            "id": str(index),
+            "properties": row.to_dict()
+        }
+        client.data_object.create(obj)
+# Function to query data from Weaviate
+def query_weaviate(question):
+    # This is a basic example; adapt the query based on the question
+    results = client.query.get('YourClassName').with_near_text(question).do()
+    return results
 def ask_llm_chunk(chunk, questions):
     chunk = chunk.astype(str)
     try:
         all_answers.extend(chunk_answers)
     return all_answers
+st.title("TAPAS Table Question Answering with Weaviate")
 # Upload CSV data
 csv_file = st.file_uploader("Upload a CSV file", type=["csv"])
 if csv_file is not None:
     data = csv_file.read().decode("utf-8")
+    dataframe = pd.read_csv(StringIO(data))
+    # Ingest data into Weaviate
+    ingest_data_to_weaviate(dataframe)
     st.write("CSV Data Preview:")
+    st.write(dataframe.head())
     # Input for questions
     questions = st.text_area("Enter your questions (one per line)")
     if st.button("Submit"):
         if data and questions:
+            # Query Weaviate to get relevant data
+            relevant_data = query_weaviate(questions[0])  # Example: using the first question
+            # Convert the relevant data to a DataFrame (you might need to adjust this based on the Weaviate response format)
+            relevant_df = pd.DataFrame(relevant_data)
+            # Pass the relevant data to TAPAS
+            answers = summarize_map_reduce(relevant_df, questions)
             st.write("Answers:")
             for q, a in zip(questions, answers):
                 st.write(f"Question: {q}")
         });
     });
     </script>
+    """, unsafe_allow_html=True)