Spaces:

shubham142000
/

recipe_classifier

Build error

App Files Files Community

recipe_classifier / app.py

shubham142000

Update app.py

9721b50 verified over 1 year ago

raw

history blame contribute delete

3.45 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from transformers import AutoTokenizer, AutoModel
	import torch
	from sklearn.manifold import TSNE
	import matplotlib.pyplot as plt
	from sklearn.metrics.pairwise import cosine_similarity

	# Load a pre-trained model and tokenizer
	model_name = "sentence-transformers/all-MiniLM-L6-v2"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModel.from_pretrained(model_name)

	# Function to get embedding
	def get_embedding(text):
	inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding=True)
	with torch.no_grad():
	outputs = model(**inputs)
	return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

	# Function to classify text
	def classify_text(embedding, embeddings_df, threshold=0.5):
	similarities = cosine_similarity([embedding], embeddings_df.iloc[:, 1:-1].values)[0] # Exclude 'recipe_id' and 'label'
	max_similarity = np.max(similarities)
	if max_similarity < threshold:
	return "neither"
	return embeddings_df['label'].iloc[np.argmax(similarities)]

	# Streamlit app
	st.title('Biryani, Pizza, or Neither Classifier')

	# Load the embeddings and labels DataFrame
	df = pd.read_csv("embeddings_receipes_final.csv")

	# Check if the DataFrame is loaded correctly
	if df.shape[1] < 385: # 384 embeddings + 1 label + 1 recipe_id
	st.error(f"Expected DataFrame with 385 columns, but got less than that. Please check your CSV file.")
	else:
	# Input text
	input_text = st.text_area("Enter text to classify")

	if st.button("Classify"):
	if input_text:
	# Get the embedding for the input text
	embedding = get_embedding(input_text)

	# Ensure the embedding is of the correct dimension
	if embedding.shape[0] != 384:
	st.error(f"Expected embedding of dimension 384, but got {embedding.shape[0]}.")
	else:
	# Classify the input text using existing embeddings DataFrame `df`
	predicted_label = classify_text(embedding, df)

	# Display the result
	st.write(f"The predicted label is: {predicted_label}")

	# Visualization using t-SNE
	embeddings = df.iloc[:, 1:-1].values # Exclude 'recipe_id' and 'label'
	labels = df['label']

	tsne = TSNE(n_components=2, random_state=42)
	tsne_embeddings = tsne.fit_transform(embeddings)

	# Plotting t-SNE
	plt.figure(figsize=(10, 6))
	for label in np.unique(labels):
	indices = labels == label
	plt.scatter(tsne_embeddings[indices, 0], tsne_embeddings[indices, 1], label=label)

	# Project the input text embedding into the existing t-SNE space
	tsne_input = TSNE(n_components=2, init='pca', random_state=42)
	tsne_input_embedding = tsne_input.fit_transform(np.vstack([embeddings, embedding]))[-1]

	plt.scatter(tsne_input_embedding[0], tsne_input_embedding[1], label='input text', c='red', marker='x', s=100)

	plt.legend()
	plt.title('2D t-SNE Visualization of Embeddings')
	plt.xlabel('t-SNE Dimension 1')
	plt.ylabel('t-SNE Dimension 2')
	st.pyplot(plt)

	else:
	st.write("Please enter text to classify.")