shubham142000 commited on
Commit
b6e3154
·
verified ·
1 Parent(s): 0ec1989

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -14
app.py CHANGED
@@ -6,8 +6,9 @@ import torch
6
  from sklearn.manifold import TSNE
7
  import matplotlib.pyplot as plt
8
  from sklearn.metrics.pairwise import cosine_similarity
9
- from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
10
  from scipy.spatial.distance import cosine
 
11
 
12
  # Load a pre-trained model and tokenizer
13
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
@@ -21,8 +22,8 @@ def get_embedding(text):
21
  outputs = model(**inputs)
22
  return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
23
 
24
- # Function to classify text
25
- def classify_text(embedding, mean_embeddings, threshold=0.5):
26
  distances = {label: cosine(embedding, mean_embedding) for label, mean_embedding in mean_embeddings.items()}
27
  min_distance = min(distances.values())
28
  if min_distance > threshold:
@@ -30,6 +31,11 @@ def classify_text(embedding, mean_embeddings, threshold=0.5):
30
  predicted_label = min(distances, key=distances.get)
31
  return predicted_label
32
 
 
 
 
 
 
33
  # Streamlit app
34
  st.title('Biryani, Pizza, Milk, Pasta, Potatos, Tomato, or Neither Classifier')
35
 
@@ -45,10 +51,16 @@ embeddings = df.iloc[:, 1:-2]
45
  labels = df['label']
46
  mean_embeddings = {label: embeddings[labels == label].mean(axis=0) for label in label_mapping.keys() if label != 'neither'}
47
 
 
 
 
48
  # Check if the DataFrame is loaded correctly
49
  if df.shape[1] < 386: # 384 embeddings + 1 label + 1 recipe_id + 1 label_int
50
  st.error(f"Expected DataFrame with 386 columns, but got less than that. Please check your CSV file.")
51
  else:
 
 
 
52
  # Input text
53
  input_text = st.text_area("Enter text to classify")
54
 
@@ -61,8 +73,11 @@ else:
61
  if embedding.shape[0] != 384:
62
  st.error(f"Expected embedding of dimension 384, but got {embedding.shape[0]}.")
63
  else:
64
- # Classify the input text using existing embeddings DataFrame `df`
65
- predicted_label = classify_text(embedding, mean_embeddings)
 
 
 
66
 
67
  # Display the result
68
  st.write(f"The predicted label is: **{predicted_label}**")
@@ -93,15 +108,18 @@ else:
93
  st.pyplot(plt)
94
 
95
  # Generate the confusion matrix
96
- predictions = []
97
- for i, embedding_row in embeddings.iterrows():
98
- distances = {label: cosine(embedding_row, mean_embeddings[label]) for label in mean_embeddings}
99
- min_distance = min(distances.values())
100
- if min_distance > 0.5: # Threshold for "neither"
101
- predictions.append(label_mapping['neither'])
102
- else:
103
- predicted_label = min(distances, key=distances.get)
104
- predictions.append(label_mapping[predicted_label])
 
 
 
105
 
106
  conf_matrix = confusion_matrix(df['label_int'], predictions)
107
 
 
6
  from sklearn.manifold import TSNE
7
  import matplotlib.pyplot as plt
8
  from sklearn.metrics.pairwise import cosine_similarity
9
+ from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
10
  from scipy.spatial.distance import cosine
11
+ import joblib
12
 
13
  # Load a pre-trained model and tokenizer
14
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
 
22
  outputs = model(**inputs)
23
  return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
24
 
25
+ # Function to classify text using cosine similarity
26
+ def classify_text_cosine(embedding, mean_embeddings, threshold=0.5):
27
  distances = {label: cosine(embedding, mean_embedding) for label, mean_embedding in mean_embeddings.items()}
28
  min_distance = min(distances.values())
29
  if min_distance > threshold:
 
31
  predicted_label = min(distances, key=distances.get)
32
  return predicted_label
33
 
34
+ # Function to classify text using MLP model
35
+ def classify_text_mlp(embedding, mlp_model):
36
+ prediction = mlp_model.predict([embedding])[0]
37
+ return list(label_mapping.keys())[prediction]
38
+
39
  # Streamlit app
40
  st.title('Biryani, Pizza, Milk, Pasta, Potatos, Tomato, or Neither Classifier')
41
 
 
51
  labels = df['label']
52
  mean_embeddings = {label: embeddings[labels == label].mean(axis=0) for label in label_mapping.keys() if label != 'neither'}
53
 
54
+ # Load the MLP model
55
+ mlp_model = joblib.load("mlp_model2.joblib")
56
+
57
  # Check if the DataFrame is loaded correctly
58
  if df.shape[1] < 386: # 384 embeddings + 1 label + 1 recipe_id + 1 label_int
59
  st.error(f"Expected DataFrame with 386 columns, but got less than that. Please check your CSV file.")
60
  else:
61
+ # Select classification method
62
+ classification_method = st.selectbox("Select classification method", ["Cosine Similarity", "MLP Model"])
63
+
64
  # Input text
65
  input_text = st.text_area("Enter text to classify")
66
 
 
73
  if embedding.shape[0] != 384:
74
  st.error(f"Expected embedding of dimension 384, but got {embedding.shape[0]}.")
75
  else:
76
+ # Classify the input text using the selected method
77
+ if classification_method == "Cosine Similarity":
78
+ predicted_label = classify_text_cosine(embedding, mean_embeddings)
79
+ else:
80
+ predicted_label = classify_text_mlp(embedding, mlp_model)
81
 
82
  # Display the result
83
  st.write(f"The predicted label is: **{predicted_label}**")
 
108
  st.pyplot(plt)
109
 
110
  # Generate the confusion matrix
111
+ if classification_method == "Cosine Similarity":
112
+ predictions = []
113
+ for i, embedding_row in embeddings.iterrows():
114
+ distances = {label: cosine(embedding_row, mean_embeddings[label]) for label in mean_embeddings}
115
+ min_distance = min(distances.values())
116
+ if min_distance > 0.5: # Threshold for "neither"
117
+ predictions.append(label_mapping['neither'])
118
+ else:
119
+ predicted_label = min(distances, key=distances.get)
120
+ predictions.append(label_mapping[predicted_label])
121
+ else:
122
+ predictions = mlp_model.predict(embeddings)
123
 
124
  conf_matrix = confusion_matrix(df['label_int'], predictions)
125