Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from transformers import AutoTokenizer, AutoModel | |
| import torch | |
| from sklearn.manifold import TSNE | |
| import matplotlib.pyplot as plt | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # Load a pre-trained model and tokenizer | |
| model_name = "sentence-transformers/all-MiniLM-L6-v2" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModel.from_pretrained(model_name) | |
| # Function to get embedding | |
| def get_embedding(text): | |
| inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding=True) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| return outputs.last_hidden_state.mean(dim=1).squeeze().numpy() | |
| # Function to classify text | |
| def classify_text(embedding, embeddings_df, threshold=0.5): | |
| similarities = cosine_similarity([embedding], embeddings_df.iloc[:, 1:-1].values)[0] # Exclude 'recipe_id' and 'label' | |
| max_similarity = np.max(similarities) | |
| if max_similarity < threshold: | |
| return "neither" | |
| return embeddings_df['label'].iloc[np.argmax(similarities)] | |
| # Streamlit app | |
| st.title('Biryani, Pizza, or Neither Classifier') | |
| # Load the embeddings and labels DataFrame | |
| df = pd.read_csv("embeddings_receipes_final.csv") | |
| # Check if the DataFrame is loaded correctly | |
| if df.shape[1] < 385: # 384 embeddings + 1 label + 1 recipe_id | |
| st.error(f"Expected DataFrame with 385 columns, but got less than that. Please check your CSV file.") | |
| else: | |
| # Input text | |
| input_text = st.text_area("Enter text to classify") | |
| if st.button("Classify"): | |
| if input_text: | |
| # Get the embedding for the input text | |
| embedding = get_embedding(input_text) | |
| # Ensure the embedding is of the correct dimension | |
| if embedding.shape[0] != 384: | |
| st.error(f"Expected embedding of dimension 384, but got {embedding.shape[0]}.") | |
| else: | |
| # Classify the input text using existing embeddings DataFrame `df` | |
| predicted_label = classify_text(embedding, df) | |
| # Display the result | |
| st.write(f"The predicted label is: **{predicted_label}**") | |
| # Visualization using t-SNE | |
| embeddings = df.iloc[:, 1:-1].values # Exclude 'recipe_id' and 'label' | |
| labels = df['label'] | |
| tsne = TSNE(n_components=2, random_state=42) | |
| tsne_embeddings = tsne.fit_transform(embeddings) | |
| # Plotting t-SNE | |
| plt.figure(figsize=(10, 6)) | |
| for label in np.unique(labels): | |
| indices = labels == label | |
| plt.scatter(tsne_embeddings[indices, 0], tsne_embeddings[indices, 1], label=label) | |
| # Project the input text embedding into the existing t-SNE space | |
| tsne_input = TSNE(n_components=2, init='pca', random_state=42) | |
| tsne_input_embedding = tsne_input.fit_transform(np.vstack([embeddings, embedding]))[-1] | |
| plt.scatter(tsne_input_embedding[0], tsne_input_embedding[1], label='input text', c='red', marker='x', s=100) | |
| plt.legend() | |
| plt.title('2D t-SNE Visualization of Embeddings') | |
| plt.xlabel('t-SNE Dimension 1') | |
| plt.ylabel('t-SNE Dimension 2') | |
| st.pyplot(plt) | |
| else: | |
| st.write("Please enter text to classify.") | |