|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from transformers import AutoTokenizer, AutoModel |
|
|
import torch |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
from scipy.spatial.distance import cosine |
|
|
import joblib |
|
|
|
|
|
|
|
|
model_name = "sentence-transformers/all-MiniLM-L6-v2" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModel.from_pretrained(model_name) |
|
|
|
|
|
|
|
|
def get_embedding(text): |
|
|
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding=True) |
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
return outputs.last_hidden_state.mean(dim=1).squeeze().numpy() |
|
|
|
|
|
|
|
|
def classify_text_cosine(embedding, mean_embeddings, threshold=0.5): |
|
|
distances = {label: cosine(embedding, mean_embedding) for label, mean_embedding in mean_embeddings.items()} |
|
|
min_distance = min(distances.values()) |
|
|
if min_distance > threshold: |
|
|
return "neither" |
|
|
predicted_label = min(distances, key=distances.get) |
|
|
return predicted_label |
|
|
|
|
|
|
|
|
def classify_text_mlp(embedding, mlp_model): |
|
|
prediction = mlp_model.predict([embedding])[0] |
|
|
return list(label_mapping.keys())[prediction] |
|
|
|
|
|
|
|
|
st.title('Biryani, Pizza, Milk, Pasta, Potatos, Tomato, or Neither Classifier') |
|
|
|
|
|
|
|
|
df = pd.read_csv("embeddings_receipes_final.csv") |
|
|
|
|
|
|
|
|
label_mapping = {'pizza': 0, 'biryani': 1, 'milk': 2, 'pasta': 3, 'potatos': 4, 'tomato': 5, 'neither': 6} |
|
|
df['label_int'] = df['label'].map(label_mapping) |
|
|
|
|
|
|
|
|
embeddings = df.iloc[:, 1:-2] |
|
|
labels = df['label'] |
|
|
mean_embeddings = {label: embeddings[labels == label].mean(axis=0) for label in label_mapping.keys() if label != 'neither'} |
|
|
|
|
|
|
|
|
try: |
|
|
mlp_model = joblib.load("mlp_model2.joblib") |
|
|
except Exception as e: |
|
|
st.error(f"Error loading MLP model: {e}") |
|
|
mlp_model = None |
|
|
|
|
|
|
|
|
if df.shape[1] < 386: |
|
|
st.error(f"Expected DataFrame with 386 columns, but got less than that. Please check your CSV file.") |
|
|
else: |
|
|
|
|
|
classification_method = st.selectbox("Select classification method", ["Cosine Similarity", "MLP Model"]) |
|
|
|
|
|
|
|
|
input_text = st.text_area("Enter text to classify") |
|
|
|
|
|
if st.button("Classify"): |
|
|
if input_text: |
|
|
|
|
|
embedding = get_embedding(input_text) |
|
|
|
|
|
|
|
|
if embedding.shape[0] != 384: |
|
|
st.error(f"Expected embedding of dimension 384, but got {embedding.shape[0]}.") |
|
|
else: |
|
|
|
|
|
if classification_method == "Cosine Similarity": |
|
|
predicted_label = classify_text_cosine(embedding, mean_embeddings) |
|
|
elif mlp_model is not None: |
|
|
predicted_label = classify_text_mlp(embedding, mlp_model) |
|
|
else: |
|
|
st.error("MLP model is not available.") |
|
|
predicted_label = "unknown" |
|
|
|
|
|
|
|
|
st.write(f"The predicted label is: **{predicted_label}**") |
|
|
|
|
|
|
|
|
image_mapping = { |
|
|
'pizza': 'pizza.jpg', |
|
|
'biryani': 'biryani.jpg', |
|
|
'milk': 'milk.jpg', |
|
|
'pasta': 'pasta.jpg', |
|
|
'potatos': 'potatos.jpg', |
|
|
'tomato': 'tomato.jpg', |
|
|
'neither': 'other.jpg' |
|
|
} |
|
|
st.image(image_mapping[predicted_label], caption=f"Predicted Label: {predicted_label}", use_column_width=True) |
|
|
|
|
|
else: |
|
|
st.write("Please enter text to classify.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.markdown( |
|
|
""" |
|
|
<style> |
|
|
.footer { |
|
|
position: fixed; |
|
|
left: 0; |
|
|
bottom: 0; |
|
|
width: 100%; |
|
|
background-color: #f1f1f1; |
|
|
color: black; |
|
|
text-align: center; |
|
|
padding: 10px; |
|
|
} |
|
|
.footer p { |
|
|
font-size: 1.2em; |
|
|
font-weight: bold; |
|
|
} |
|
|
</style> |
|
|
<div class="footer"> |
|
|
<p>© Shubham Kale and Prof.Ganesh Bagler, IIIT Delhi</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True |
|
|
) |