Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,22 +1,17 @@
|
|
| 1 |
-
|
| 2 |
import streamlit as st
|
| 3 |
import pickle
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
| 6 |
import nltk
|
| 7 |
import re
|
| 8 |
-
import emoji
|
| 9 |
-
import string
|
| 10 |
-
import contractions
|
| 11 |
from nltk.corpus import stopwords
|
| 12 |
-
from
|
| 13 |
-
from nltk.stem import PorterStemmer,LancasterStemmer, SnowballStemmer, WordNetLemmatizer
|
| 14 |
|
| 15 |
nltk.download("stopwords")
|
| 16 |
nltk.download("punkt")
|
| 17 |
-
nltk.download("punkt_tab")
|
| 18 |
nltk.download("wordnet")
|
| 19 |
|
|
|
|
| 20 |
with open("final_model.pkl", "rb") as f:
|
| 21 |
model = pickle.load(f)
|
| 22 |
|
|
@@ -26,6 +21,9 @@ with open("tfidf_vectorizer.pkl", "rb") as f:
|
|
| 26 |
with open("count_vectorizer.pkl", "rb") as f:
|
| 27 |
count_vectorizer = pickle.load(f)
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
st.set_page_config(page_title="Stack Overflow Tag Predictor")
|
| 30 |
|
| 31 |
st.markdown(
|
|
@@ -33,42 +31,48 @@ st.markdown(
|
|
| 33 |
<style>
|
| 34 |
.stApp {
|
| 35 |
background-color: midnightblue;
|
|
|
|
| 36 |
}
|
| 37 |
</style>
|
| 38 |
""",
|
| 39 |
unsafe_allow_html=True
|
| 40 |
)
|
| 41 |
|
| 42 |
-
# Main title
|
| 43 |
st.title("🧠 Stack Overflow Tag Predictor")
|
|
|
|
| 44 |
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
|
|
|
| 47 |
def predict_tags(text):
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
question = tfidf_vect.transform([text])
|
| 55 |
-
print(question)
|
| 56 |
-
pred= model.predict(question)
|
| 57 |
-
pred_array= pd.DataFrame(pred.toarray(), columns = count_vect.get_feature_names_out())
|
| 58 |
-
tags = []
|
| 59 |
-
for i, col in zip(pred_array.iloc[0, :].values, count_vect.get_feature_names_out()):
|
| 60 |
-
if i == 1:
|
| 61 |
-
tags.append(col)
|
| 62 |
-
return tags
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
question = st.text_input("Enter the question title")
|
| 66 |
|
| 67 |
-
|
| 68 |
-
st.
|
| 69 |
-
if predicted_tags:
|
| 70 |
-
for tag in predicted_tags:
|
| 71 |
-
st.markdown(f"#{tag}")
|
| 72 |
-
else:
|
| 73 |
-
st.info("No tags predicted. Try refining your question and description.")
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pickle
|
| 3 |
import numpy as np
|
| 4 |
import pandas as pd
|
| 5 |
import nltk
|
| 6 |
import re
|
|
|
|
|
|
|
|
|
|
| 7 |
from nltk.corpus import stopwords
|
| 8 |
+
from bs4 import BeautifulSoup
|
|
|
|
| 9 |
|
| 10 |
nltk.download("stopwords")
|
| 11 |
nltk.download("punkt")
|
|
|
|
| 12 |
nltk.download("wordnet")
|
| 13 |
|
| 14 |
+
# Load required models and vectorizers
|
| 15 |
with open("final_model.pkl", "rb") as f:
|
| 16 |
model = pickle.load(f)
|
| 17 |
|
|
|
|
| 21 |
with open("count_vectorizer.pkl", "rb") as f:
|
| 22 |
count_vectorizer = pickle.load(f)
|
| 23 |
|
| 24 |
+
stop_words = set(stopwords.words("english"))
|
| 25 |
+
|
| 26 |
+
# Streamlit setup
|
| 27 |
st.set_page_config(page_title="Stack Overflow Tag Predictor")
|
| 28 |
|
| 29 |
st.markdown(
|
|
|
|
| 31 |
<style>
|
| 32 |
.stApp {
|
| 33 |
background-color: midnightblue;
|
| 34 |
+
color: white;
|
| 35 |
}
|
| 36 |
</style>
|
| 37 |
""",
|
| 38 |
unsafe_allow_html=True
|
| 39 |
)
|
| 40 |
|
|
|
|
| 41 |
st.title("🧠 Stack Overflow Tag Predictor")
|
| 42 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 43 |
|
| 44 |
+
# Preprocessing function
|
| 45 |
+
def clean_text(text):
|
| 46 |
+
if not isinstance(text, str):
|
| 47 |
+
return ""
|
| 48 |
+
text = BeautifulSoup(text, "html.parser").get_text()
|
| 49 |
+
text = re.sub(r"<.*?>", "", text)
|
| 50 |
+
text = re.sub(r"[^a-zA-Z\s]", "", text)
|
| 51 |
+
text = text.lower()
|
| 52 |
+
words = text.split()
|
| 53 |
+
words = [w for w in words if w not in stop_words and len(w) > 2]
|
| 54 |
+
return " ".join(words)
|
| 55 |
|
| 56 |
+
# Prediction function
|
| 57 |
def predict_tags(text):
|
| 58 |
+
cleaned = clean_text(text)
|
| 59 |
+
question_vec = tfidf_vectorizer.transform([cleaned])
|
| 60 |
+
prediction = model.predict(question_vec)
|
| 61 |
+
prediction_df = pd.DataFrame(prediction.toarray(), columns=count_vectorizer.get_feature_names_out())
|
| 62 |
+
tags = [col for col, val in zip(prediction_df.columns, prediction_df.iloc[0].values) if val == 1]
|
| 63 |
+
return tags
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
# User input
|
| 66 |
+
question = st.text_area("Enter your Stack Overflow question title and/or description", height=200)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
+
if st.button("Predict Tags"):
|
| 69 |
+
if not question.strip():
|
| 70 |
+
st.warning("Please enter a question to predict tags.")
|
| 71 |
+
else:
|
| 72 |
+
predicted_tags = predict_tags(question)
|
| 73 |
+
st.subheader("✅ Predicted Tags:")
|
| 74 |
+
if predicted_tags:
|
| 75 |
+
for tag in predicted_tags:
|
| 76 |
+
st.success(f"#{tag}")
|
| 77 |
+
else:
|
| 78 |
+
st.info("No tags predicted. Try refining your question.")
|