shwetashweta05 commited on
Commit
2bee89e
·
verified ·
1 Parent(s): 30415de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -34
app.py CHANGED
@@ -1,22 +1,17 @@
1
-
2
  import streamlit as st
3
  import pickle
4
  import numpy as np
5
  import pandas as pd
6
  import nltk
7
  import re
8
- import emoji
9
- import string
10
- import contractions
11
  from nltk.corpus import stopwords
12
- from nltk.tokenize import word_tokenize
13
- from nltk.stem import PorterStemmer,LancasterStemmer, SnowballStemmer, WordNetLemmatizer
14
 
15
  nltk.download("stopwords")
16
  nltk.download("punkt")
17
- nltk.download("punkt_tab")
18
  nltk.download("wordnet")
19
 
 
20
  with open("final_model.pkl", "rb") as f:
21
  model = pickle.load(f)
22
 
@@ -26,6 +21,9 @@ with open("tfidf_vectorizer.pkl", "rb") as f:
26
  with open("count_vectorizer.pkl", "rb") as f:
27
  count_vectorizer = pickle.load(f)
28
 
 
 
 
29
  st.set_page_config(page_title="Stack Overflow Tag Predictor")
30
 
31
  st.markdown(
@@ -33,42 +31,48 @@ st.markdown(
33
  <style>
34
  .stApp {
35
  background-color: midnightblue;
 
36
  }
37
  </style>
38
  """,
39
  unsafe_allow_html=True
40
  )
41
 
42
- # Main title
43
  st.title("🧠 Stack Overflow Tag Predictor")
 
44
 
45
- st.markdown("<br>",unsafe_allow_html = True)
 
 
 
 
 
 
 
 
 
 
46
 
 
47
  def predict_tags(text):
48
- cleaned_text = re.sub(r'<.*?>', '', text)
49
- cleaned_text = re.sub(r'[^a-z\s]', '', cleaned_text)
50
- cleaned_text = cleaned_text.lower()
51
- cleaned_text = cleaned_text.split()
52
- cleaned_text = [word for word in cleaned_text if word not in stop_words and len(word) > 2]
53
- cleaned_text = ' '.join(cleaned_text)
54
- question = tfidf_vect.transform([text])
55
- print(question)
56
- pred= model.predict(question)
57
- pred_array= pd.DataFrame(pred.toarray(), columns = count_vect.get_feature_names_out())
58
- tags = []
59
- for i, col in zip(pred_array.iloc[0, :].values, count_vect.get_feature_names_out()):
60
- if i == 1:
61
- tags.append(col)
62
- return tags
63
-
64
-
65
- question = st.text_input("Enter the question title")
66
 
67
- # Display tags
68
- st.subheader(" Predicted Tags")
69
- if predicted_tags:
70
- for tag in predicted_tags:
71
- st.markdown(f"#{tag}")
72
- else:
73
- st.info("No tags predicted. Try refining your question and description.")
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pickle
3
  import numpy as np
4
  import pandas as pd
5
  import nltk
6
  import re
 
 
 
7
  from nltk.corpus import stopwords
8
+ from bs4 import BeautifulSoup
 
9
 
10
  nltk.download("stopwords")
11
  nltk.download("punkt")
 
12
  nltk.download("wordnet")
13
 
14
+ # Load required models and vectorizers
15
  with open("final_model.pkl", "rb") as f:
16
  model = pickle.load(f)
17
 
 
21
  with open("count_vectorizer.pkl", "rb") as f:
22
  count_vectorizer = pickle.load(f)
23
 
24
+ stop_words = set(stopwords.words("english"))
25
+
26
+ # Streamlit setup
27
  st.set_page_config(page_title="Stack Overflow Tag Predictor")
28
 
29
  st.markdown(
 
31
  <style>
32
  .stApp {
33
  background-color: midnightblue;
34
+ color: white;
35
  }
36
  </style>
37
  """,
38
  unsafe_allow_html=True
39
  )
40
 
 
41
  st.title("🧠 Stack Overflow Tag Predictor")
42
+ st.markdown("<br>", unsafe_allow_html=True)
43
 
44
+ # Preprocessing function
45
+ def clean_text(text):
46
+ if not isinstance(text, str):
47
+ return ""
48
+ text = BeautifulSoup(text, "html.parser").get_text()
49
+ text = re.sub(r"<.*?>", "", text)
50
+ text = re.sub(r"[^a-zA-Z\s]", "", text)
51
+ text = text.lower()
52
+ words = text.split()
53
+ words = [w for w in words if w not in stop_words and len(w) > 2]
54
+ return " ".join(words)
55
 
56
+ # Prediction function
57
  def predict_tags(text):
58
+ cleaned = clean_text(text)
59
+ question_vec = tfidf_vectorizer.transform([cleaned])
60
+ prediction = model.predict(question_vec)
61
+ prediction_df = pd.DataFrame(prediction.toarray(), columns=count_vectorizer.get_feature_names_out())
62
+ tags = [col for col, val in zip(prediction_df.columns, prediction_df.iloc[0].values) if val == 1]
63
+ return tags
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ # User input
66
+ question = st.text_area("Enter your Stack Overflow question title and/or description", height=200)
 
 
 
 
 
67
 
68
+ if st.button("Predict Tags"):
69
+ if not question.strip():
70
+ st.warning("Please enter a question to predict tags.")
71
+ else:
72
+ predicted_tags = predict_tags(question)
73
+ st.subheader("✅ Predicted Tags:")
74
+ if predicted_tags:
75
+ for tag in predicted_tags:
76
+ st.success(f"#{tag}")
77
+ else:
78
+ st.info("No tags predicted. Try refining your question.")