Spaces:

shubham142000
/

multi_class_recipe_classifier

Sleeping

App Files Files Community

shubham142000 commited on Jul 27, 2024

Commit

b6e3154

verified ·

1 Parent(s): 0ec1989

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -14

app.py CHANGED Viewed

@@ -6,8 +6,9 @@ import torch
 from sklearn.manifold import TSNE
 import matplotlib.pyplot as plt
 from sklearn.metrics.pairwise import cosine_similarity
-from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
 from scipy.spatial.distance import cosine
 # Load a pre-trained model and tokenizer
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
@@ -21,8 +22,8 @@ def get_embedding(text):
         outputs = model(**inputs)
     return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
-# Function to classify text
-def classify_text(embedding, mean_embeddings, threshold=0.5):
     distances = {label: cosine(embedding, mean_embedding) for label, mean_embedding in mean_embeddings.items()}
     min_distance = min(distances.values())
     if min_distance > threshold:
@@ -30,6 +31,11 @@ def classify_text(embedding, mean_embeddings, threshold=0.5):
     predicted_label = min(distances, key=distances.get)
     return predicted_label
 # Streamlit app
 st.title('Biryani, Pizza, Milk, Pasta, Potatos, Tomato, or Neither Classifier')
@@ -45,10 +51,16 @@ embeddings = df.iloc[:, 1:-2]
 labels = df['label']
 mean_embeddings = {label: embeddings[labels == label].mean(axis=0) for label in label_mapping.keys() if label != 'neither'}
 # Check if the DataFrame is loaded correctly
 if df.shape[1] < 386:  # 384 embeddings + 1 label + 1 recipe_id + 1 label_int
     st.error(f"Expected DataFrame with 386 columns, but got less than that. Please check your CSV file.")
 else:
     # Input text
     input_text = st.text_area("Enter text to classify")
@@ -61,8 +73,11 @@ else:
             if embedding.shape[0] != 384:
                 st.error(f"Expected embedding of dimension 384, but got {embedding.shape[0]}.")
             else:
-                # Classify the input text using existing embeddings DataFrame `df`
-                predicted_label = classify_text(embedding, mean_embeddings)
                 # Display the result
                 st.write(f"The predicted label is: **{predicted_label}**")
@@ -93,15 +108,18 @@ else:
                 st.pyplot(plt)
                 # Generate the confusion matrix
-                predictions = []
-                for i, embedding_row in embeddings.iterrows():
-                    distances = {label: cosine(embedding_row, mean_embeddings[label]) for label in mean_embeddings}
-                    min_distance = min(distances.values())
-                    if min_distance > 0.5:  # Threshold for "neither"
-                        predictions.append(label_mapping['neither'])
-                    else:
-                        predicted_label = min(distances, key=distances.get)
-                        predictions.append(label_mapping[predicted_label])
                 conf_matrix = confusion_matrix(df['label_int'], predictions)

 from sklearn.manifold import TSNE
 import matplotlib.pyplot as plt
 from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
 from scipy.spatial.distance import cosine
+import joblib
 # Load a pre-trained model and tokenizer
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
         outputs = model(**inputs)
     return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
+# Function to classify text using cosine similarity
+def classify_text_cosine(embedding, mean_embeddings, threshold=0.5):
     distances = {label: cosine(embedding, mean_embedding) for label, mean_embedding in mean_embeddings.items()}
     min_distance = min(distances.values())
     if min_distance > threshold:
     predicted_label = min(distances, key=distances.get)
     return predicted_label
+# Function to classify text using MLP model
+def classify_text_mlp(embedding, mlp_model):
+    prediction = mlp_model.predict([embedding])[0]
+    return list(label_mapping.keys())[prediction]
 # Streamlit app
 st.title('Biryani, Pizza, Milk, Pasta, Potatos, Tomato, or Neither Classifier')
 labels = df['label']
 mean_embeddings = {label: embeddings[labels == label].mean(axis=0) for label in label_mapping.keys() if label != 'neither'}
+# Load the MLP model
+mlp_model = joblib.load("mlp_model2.joblib")
 # Check if the DataFrame is loaded correctly
 if df.shape[1] < 386:  # 384 embeddings + 1 label + 1 recipe_id + 1 label_int
     st.error(f"Expected DataFrame with 386 columns, but got less than that. Please check your CSV file.")
 else:
+    # Select classification method
+    classification_method = st.selectbox("Select classification method", ["Cosine Similarity", "MLP Model"])
     # Input text
     input_text = st.text_area("Enter text to classify")
             if embedding.shape[0] != 384:
                 st.error(f"Expected embedding of dimension 384, but got {embedding.shape[0]}.")
             else:
+                # Classify the input text using the selected method
+                if classification_method == "Cosine Similarity":
+                    predicted_label = classify_text_cosine(embedding, mean_embeddings)
+                else:
+                    predicted_label = classify_text_mlp(embedding, mlp_model)
                 # Display the result
                 st.write(f"The predicted label is: **{predicted_label}**")
                 st.pyplot(plt)
                 # Generate the confusion matrix
+                if classification_method == "Cosine Similarity":
+                    predictions = []
+                    for i, embedding_row in embeddings.iterrows():
+                        distances = {label: cosine(embedding_row, mean_embeddings[label]) for label in mean_embeddings}
+                        min_distance = min(distances.values())
+                        if min_distance > 0.5:  # Threshold for "neither"
+                            predictions.append(label_mapping['neither'])
+                        else:
+                            predicted_label = min(distances, key=distances.get)
+                            predictions.append(label_mapping[predicted_label])
+                else:
+                    predictions = mlp_model.predict(embeddings)
                 conf_matrix = confusion_matrix(df['label_int'], predictions)