sree4411 commited on
Commit
a5fd2e9
Β·
verified Β·
1 Parent(s): a0fead1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -19
app.py CHANGED
@@ -2,49 +2,53 @@ import pickle
2
  import streamlit as st
3
  import numpy as np
4
 
5
- # Load saved vectorizer, model, and binarizer
6
- with open("vectorizer (3).pkl", "rb") as f:
7
  vectorizer = pickle.load(f)
8
 
9
- with open("model (6).pkl", "rb") as f:
10
  model = pickle.load(f)
11
 
12
- with open("binarizer (3).pkl", "rb") as f:
13
  mlb = pickle.load(f)
14
 
15
  st.title("πŸ”– Stack Overflow Tags Predictor")
16
- st.markdown("Enter a question title and description. Tags will be predicted automatically based on model confidence.")
17
 
 
18
  title = st.text_input("πŸ“Œ Enter Question Title")
19
  description = st.text_area("πŸ“ Enter Question Description", height=150)
20
 
21
- # πŸ”§ Adjust this to control how many tags are returned
22
- threshold = 0.2 # Lower threshold means more tags predicted
23
-
24
- def predict_tags_auto(title, description, threshold=0.2):
25
  input_text = title + " " + description
26
  input_vector = vectorizer.transform([input_text])
27
 
28
- # Get probabilities for each tag
29
  probas = model.predict_proba(input_vector)
30
 
31
- # Get the probability for class=1 (relevant tag) for each classifier
32
- probas_array = np.array([p[:, 1][0] for p in probas]) # (n_classes,)
 
 
 
33
 
34
- # Apply threshold
35
- predicted_binary = (probas_array >= threshold).astype(int).reshape(1, -1)
 
 
36
 
37
- # Convert binary vector to tags
38
  tags = mlb.inverse_transform(predicted_binary)
39
  return tags[0] if tags else []
40
 
41
-
42
  if st.button("Predict Tags"):
43
  if not title.strip() or not description.strip():
44
  st.warning("⚠️ Please enter both title and description.")
45
  else:
46
- tags = predict_tags_auto(title, description, threshold)
47
- if tags:
48
- st.success("βœ… Predicted Tags: " + ", ".join(tags))
49
  else:
50
  st.info("ℹ️ No tags predicted. Try refining your question.")
 
2
  import streamlit as st
3
  import numpy as np
4
 
5
+ # Load saved model, vectorizer, and binarizer
6
+ with open("vectorizer.pkl", "rb") as f:
7
  vectorizer = pickle.load(f)
8
 
9
+ with open("model.pkl", "rb") as f:
10
  model = pickle.load(f)
11
 
12
+ with open("binarizer.pkl", "rb") as f:
13
  mlb = pickle.load(f)
14
 
15
  st.title("πŸ”– Stack Overflow Tags Predictor")
16
+ st.markdown("Enter a question title and description. The top 3 most relevant tags will be predicted automatically.")
17
 
18
+ # Input fields
19
  title = st.text_input("πŸ“Œ Enter Question Title")
20
  description = st.text_area("πŸ“ Enter Question Description", height=150)
21
 
22
+ # Function to predict top N tags (e.g., top 3)
23
+ def predict_tags_top_n(title, description, top_n=3):
 
 
24
  input_text = title + " " + description
25
  input_vector = vectorizer.transform([input_text])
26
 
27
+ # Get probability estimates from each classifier
28
  probas = model.predict_proba(input_vector)
29
 
30
+ # Extract positive class probabilities for each tag
31
+ probas_array = np.array([p[0][1] for p in probas]) # shape: (n_classes,)
32
+
33
+ # Get indices of top N tags
34
+ top_indices = probas_array.argsort()[-top_n:][::-1]
35
 
36
+ # Build binary array for top tags
37
+ predicted_binary = np.zeros_like(probas_array, dtype=int)
38
+ predicted_binary[top_indices] = 1
39
+ predicted_binary = predicted_binary.reshape(1, -1)
40
 
41
+ # Convert binary to tag names
42
  tags = mlb.inverse_transform(predicted_binary)
43
  return tags[0] if tags else []
44
 
45
+ # Predict and display
46
  if st.button("Predict Tags"):
47
  if not title.strip() or not description.strip():
48
  st.warning("⚠️ Please enter both title and description.")
49
  else:
50
+ predicted_tags = predict_tags_top_n(title, description, top_n=3)
51
+ if predicted_tags:
52
+ st.success("βœ… Predicted Tags: " + ", ".join(predicted_tags))
53
  else:
54
  st.info("ℹ️ No tags predicted. Try refining your question.")