File size: 5,108 Bytes
e3dc140
 
 
 
 
 
 
b6e3154
e3dc140
 
 
 
 
 
 
 
 
 
 
 
 
b6e3154
 
e3dc140
 
 
 
 
 
 
b6e3154
 
 
 
 
e3dc140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6e3154
3657891
 
 
 
 
b6e3154
e3dc140
 
 
 
b6e3154
 
 
e3dc140
 
 
 
 
 
 
 
 
 
 
 
b6e3154
 
 
3657891
b6e3154
3657891
 
 
e3dc140
 
 
 
bbc4916
 
 
 
 
 
 
 
 
 
 
e3dc140
 
 
c3890d3
260863b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59af5c4
260863b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import streamlit as st
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import cosine
import joblib

# Load a pre-trained model and tokenizer
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Function to get embedding
def get_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

# Function to classify text using cosine similarity
def classify_text_cosine(embedding, mean_embeddings, threshold=0.5):
    distances = {label: cosine(embedding, mean_embedding) for label, mean_embedding in mean_embeddings.items()}
    min_distance = min(distances.values())
    if min_distance > threshold:
        return "neither"
    predicted_label = min(distances, key=distances.get)
    return predicted_label

# Function to classify text using MLP model
def classify_text_mlp(embedding, mlp_model):
    prediction = mlp_model.predict([embedding])[0]
    return list(label_mapping.keys())[prediction]

# Streamlit app
st.title('Biryani, Pizza, Milk, Pasta, Potatos, Tomato, or Neither Classifier')

# Load the embeddings and labels DataFrame
df = pd.read_csv("embeddings_receipes_final.csv")

# Map labels to integers
label_mapping = {'pizza': 0, 'biryani': 1, 'milk': 2, 'pasta': 3, 'potatos': 4, 'tomato': 5, 'neither': 6}
df['label_int'] = df['label'].map(label_mapping)

# Calculate mean embeddings for each class
embeddings = df.iloc[:, 1:-2]
labels = df['label']
mean_embeddings = {label: embeddings[labels == label].mean(axis=0) for label in label_mapping.keys() if label != 'neither'}

# Load the MLP model
try:
    mlp_model = joblib.load("mlp_model2.joblib")
except Exception as e:
    st.error(f"Error loading MLP model: {e}")
    mlp_model = None

# Check if the DataFrame is loaded correctly
if df.shape[1] < 386:  # 384 embeddings + 1 label + 1 recipe_id + 1 label_int
    st.error(f"Expected DataFrame with 386 columns, but got less than that. Please check your CSV file.")
else:
    # Select classification method
    classification_method = st.selectbox("Select classification method", ["Cosine Similarity", "MLP Model"])

    # Input text
    input_text = st.text_area("Enter text to classify")

    if st.button("Classify"):
        if input_text:
            # Get the embedding for the input text
            embedding = get_embedding(input_text)

            # Ensure the embedding is of the correct dimension
            if embedding.shape[0] != 384:
                st.error(f"Expected embedding of dimension 384, but got {embedding.shape[0]}.")
            else:
                # Classify the input text using the selected method
                if classification_method == "Cosine Similarity":
                    predicted_label = classify_text_cosine(embedding, mean_embeddings)
                elif mlp_model is not None:
                    predicted_label = classify_text_mlp(embedding, mlp_model)
                else:
                    st.error("MLP model is not available.")
                    predicted_label = "unknown"

                # Display the result
                st.write(f"The predicted label is: **{predicted_label}**")

                # Map predicted label to corresponding image
                image_mapping = {
                    'pizza': 'pizza.jpg',
                    'biryani': 'biryani.jpg',
                    'milk': 'milk.jpg',
                    'pasta': 'pasta.jpg',
                    'potatos': 'potatos.jpg',
                    'tomato': 'tomato.jpg',
                    'neither': 'other.jpg'
                }
                st.image(image_mapping[predicted_label], caption=f"Predicted Label: {predicted_label}", use_column_width=True)

        else:
            st.write("Please enter text to classify.")

# # Footer
# st.markdown(
#     """
#     <style>
#     .footer {
#         position: fixed;
#         bottom: 0;
#         width: 100%;
#         text-align: center;
#         padding: 10px;
#         background-color: #f1f1f1;
#         color: black;
#     }
#     </style>
#     <div class="footer">
#         <b>&copy; Shubham Kale and Prof Ganesh Baglar, IIIT Delhi</b>
#     </div>
#     """,
#     unsafe_allow_html=True
# )
 # Add a footer
    st.markdown(
        """
        <style>
        .footer {
            position: fixed;
            left: 0;
            bottom: 0;
            width: 100%;
            background-color: #f1f1f1;
            color: black;
            text-align: center;
            padding: 10px;
        }
        .footer p {
            font-size: 1.2em;
            font-weight: bold;
        }
        </style>
        <div class="footer">
        <p>© Shubham Kale and Prof.Ganesh Bagler, IIIT Delhi</p>
        </div>
        """, unsafe_allow_html=True
    )