import streamlit as st
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

# Load a pre-trained model and tokenizer
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Function to get embedding
def get_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

# Function to classify text
def classify_text(embedding, embeddings_df, threshold=0.5):
    similarities = cosine_similarity([embedding], embeddings_df.iloc[:, 1:-1].values)[0]  # Exclude 'recipe_id' and 'label'
    max_similarity = np.max(similarities)
    if max_similarity < threshold:
        return "neither"
    return embeddings_df['label'].iloc[np.argmax(similarities)]

# Streamlit app
st.title('Biryani, Pizza, or Neither Classifier')

# Load the embeddings and labels DataFrame
df = pd.read_csv("embeddings_receipes_final.csv")

# Check if the DataFrame is loaded correctly
if df.shape[1] < 385:  # 384 embeddings + 1 label + 1 recipe_id
    st.error(f"Expected DataFrame with 385 columns, but got less than that. Please check your CSV file.")
else:
    # Input text
    input_text = st.text_area("Enter text to classify")

    if st.button("Classify"):
        if input_text:
            # Get the embedding for the input text
            embedding = get_embedding(input_text)

            # Ensure the embedding is of the correct dimension
            if embedding.shape[0] != 384:
                st.error(f"Expected embedding of dimension 384, but got {embedding.shape[0]}.")
            else:
                # Classify the input text using existing embeddings DataFrame `df`
                predicted_label = classify_text(embedding, df)

                # Display the result
                st.write(f"The predicted label is: **{predicted_label}**")

                # Visualization using t-SNE
                embeddings = df.iloc[:, 1:-1].values  # Exclude 'recipe_id' and 'label'
                labels = df['label']

                tsne = TSNE(n_components=2, random_state=42)
                tsne_embeddings = tsne.fit_transform(embeddings)

                # Plotting t-SNE
                plt.figure(figsize=(10, 6))
                for label in np.unique(labels):
                    indices = labels == label
                    plt.scatter(tsne_embeddings[indices, 0], tsne_embeddings[indices, 1], label=label)

                # Project the input text embedding into the existing t-SNE space
                tsne_input = TSNE(n_components=2, init='pca', random_state=42)
                tsne_input_embedding = tsne_input.fit_transform(np.vstack([embeddings, embedding]))[-1]

                plt.scatter(tsne_input_embedding[0], tsne_input_embedding[1], label='input text', c='red', marker='x', s=100)

                plt.legend()
                plt.title('2D t-SNE Visualization of Embeddings')
                plt.xlabel('t-SNE Dimension 1')
                plt.ylabel('t-SNE Dimension 2')
                st.pyplot(plt)

        else:
            st.write("Please enter text to classify.")