|
|
import streamlit as st |
|
|
import joblib |
|
|
import re |
|
|
import nltk |
|
|
from nltk.corpus import stopwords |
|
|
from nltk.tokenize import word_tokenize |
|
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
from sklearn.preprocessing import MultiLabelBinarizer |
|
|
|
|
|
nltk.download('punkt') |
|
|
nltk.download('stopwords') |
|
|
|
|
|
|
|
|
model=joblib.load(r"E:\Stack-Overflow\models\model.pkl") |
|
|
vectorizer = joblib.load(r"E:\Stack-Overflow\models\vectorizer.pkl") |
|
|
mlb = joblib.load(r"E:\Stack-Overflow\models\mlb.pkl") |
|
|
|
|
|
|
|
|
|
|
|
def preprocess(text): |
|
|
text = text.lower() |
|
|
text = re.sub(r'[^a-z\s]', '', text) |
|
|
tokens = word_tokenize(text) |
|
|
stop_words = set(stopwords.words('english')) |
|
|
tokens = [t for t in tokens if t not in stop_words] |
|
|
return " ".join(tokens) |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Stack Overflow Tag Predictor", layout="centered") |
|
|
|
|
|
st.title("💬 Stack Overflow Tag Predictor") |
|
|
st.markdown("Enter a question (title + body) and get predicted tags.") |
|
|
|
|
|
user_input = st.text_area("✍️ Question Title + Body", height=200) |
|
|
|
|
|
top_k = st.slider("Number of tags to show", min_value=1, max_value=10, value=5) |
|
|
|
|
|
if st.button("Predict Tags") and user_input.strip(): |
|
|
cleaned = preprocess(user_input) |
|
|
X_vec = vectorizer.transform([cleaned]) |
|
|
y_pred_proba = model.predict_proba(X_vec) |
|
|
|
|
|
|
|
|
top_indices = y_pred_proba[0].argsort()[-top_k:][::-1] |
|
|
predicted_tags = [mlb.classes_[i] for i in top_indices] |
|
|
confidence = [y_pred_proba[0][i] for i in top_indices] |
|
|
|
|
|
st.markdown("### 🏷️ Predicted Tags:") |
|
|
for tag, conf in zip(predicted_tags, confidence): |
|
|
st.markdown(f"- **{tag}** (confidence: {conf:.2f})") |
|
|
|