from copy import deepcopy
import streamlit as st
import pandas as pd
from io import StringIO
from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
import numpy as np
import weaviate
from weaviate.embedded import EmbeddedOptions
from weaviate import Client, ObjectsBatchRequest

# Initialize TAPAS model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")

# Initialize Weaviate client for the embedded instance
client = weaviate.Client(
  embedded_options=EmbeddedOptions()
)

def ingest_data_to_weaviate(dataframe, class_name, class_description):
    properties = []
    for column in dataframe.columns:
        data_type = "string"
        if dataframe[column].dtype == "float64":
            data_type = "float"
        elif dataframe[column].dtype == "int64":
            data_type = "int"
        properties.append({
            "name": column,
            "description": column,
            "dataType": [data_type]
        })
    
    schema = {
        "classes": [
            {
                "class": class_name,
                "description": class_description,
                "properties": properties
            }
        ]
    }

    # Create Schema in Weaviate
    client.schema.create(schema)
    
    # Ingest Data
    batch_request = weaviate.ObjectsBatchRequest()
    for _, row in dataframe.iterrows():
        obj = {
            "class": class_name,
            "properties": row.to_dict()
        }
        batch_request.add(obj)
    client.batch.create(batch_request)

def query_weaviate(question):
    # This is a basic example; adapt the query based on the question
    results = client.query.get(class_name).with_near_text(question).do()
    return results

def ask_llm_chunk(chunk, questions):
    chunk = chunk.astype(str)
    try:
        inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
    except Exception as e:
        st.write(f"An error occurred: {e}")
        return ["Error occurred while tokenizing"] * len(questions)

    if inputs["input_ids"].shape[1] > 512:
        st.warning("Token limit exceeded for chunk")
        return ["Token limit exceeded for chunk"] * len(questions)

    outputs = model(**inputs)
    predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
        inputs,
        outputs.logits.detach(),
        outputs.logits_aggregation.detach()
    )

    answers = []
    for coordinates in predicted_answer_coordinates:
        if len(coordinates) == 1:
            row, col = coordinates[0]
            try:
                st.write(f"DataFrame shape: {chunk.shape}")  # Debugging line
                st.write(f"DataFrame columns: {chunk.columns}")  # Debugging line
                st.write(f"Trying to access row {row}, col {col}")  # Debugging line
                value = chunk.iloc[row, col]
                st.write(f"Value accessed: {value}")  # Debugging line
                answers.append(value)
            except Exception as e:
                st.write(f"An error occurred: {e}")
        else:
            cell_values = []
            for coordinate in coordinates:
                row, col = coordinate
                try:
                    value = chunk.iloc[row, col]
                    cell_values.append(value)
                except Exception as e:
                    st.write(f"An error occurred: {e}")
            answers.append(", ".join(map(str, cell_values)))

    return answers

MAX_ROWS_PER_CHUNK = 200

def summarize_map_reduce(data, questions):
    dataframe = pd.read_csv(StringIO(data))
    num_chunks = len(dataframe) // MAX_ROWS_PER_CHUNK + 1
    dataframe_chunks = [deepcopy(chunk) for chunk in np.array_split(dataframe, num_chunks)]
    all_answers = []
    for chunk in dataframe_chunks:
        chunk_answers = ask_llm_chunk(chunk, questions)
        all_answers.extend(chunk_answers)
    return all_answers

st.title("TAPAS Table Question Answering with Weaviate Integration")

# UI Input for Class and Description
class_name = st.text_input("Enter the class name for your CSV data:")
class_description = st.text_input("Enter a description for your class:")

# Upload CSV data
csv_file = st.file_uploader("Upload a CSV file", type=["csv"])
if csv_file is not None:
    data = csv_file.read().decode("utf-8")
    dataframe = pd.read_csv(StringIO(data))
    st.write("CSV Data Preview:")
    st.write(dataframe.head())

    # Ingest data to Weaviate
    if st.button("Ingest to Weaviate"):
        ingest_data_to_weaviate(dataframe, class_name, class_description)
        st.write("Data ingested successfully!")

    # Input for questions
    questions = st.text_area("Enter your questions (one per line)")
    questions = questions.split("\n")  # split questions by line
    questions = [q for q in questions if q]  # remove empty strings

    if st.button("Submit"):
        if data and questions:
            answers = summarize_map_reduce(data, questions)
            st.write("Answers:")
            for q, a in zip(questions, answers):
                st.write(f"Question: {q}")
                st.write(f"Answer: {a}")

# Add Ctrl+Enter functionality for submitting the questions
st.markdown("""
    <script>
    document.addEventListener("DOMContentLoaded", function(event) {
        document.addEventListener("keydown", function(event) {
            if (event.ctrlKey && event.key === "Enter") {
                document.querySelector(".stButton button").click();
            }
        });
    });
    </script>
    """, unsafe_allow_html=True)