| import streamlit as st |
| import joblib |
| import re |
| import nltk |
| from nltk.corpus import stopwords |
| from nltk.tokenize import word_tokenize |
| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn.preprocessing import MultiLabelBinarizer |
|
|
| nltk.download('punkt') |
| nltk.download('stopwords') |
|
|
| |
| model=joblib.load(r"E:\Stack-Overflow\models\model.pkl") |
| vectorizer = joblib.load(r"E:\Stack-Overflow\models\vectorizer.pkl") |
| mlb = joblib.load(r"E:\Stack-Overflow\models\mlb.pkl") |
| |
|
|
| |
| def preprocess(text): |
| text = text.lower() |
| text = re.sub(r'[^a-z\s]', '', text) |
| tokens = word_tokenize(text) |
| stop_words = set(stopwords.words('english')) |
| tokens = [t for t in tokens if t not in stop_words] |
| return " ".join(tokens) |
|
|
| |
| st.set_page_config(page_title="Stack Overflow Tag Predictor", layout="centered") |
|
|
| st.title("💬 Stack Overflow Tag Predictor") |
| st.markdown("Enter a question (title + body) and get predicted tags.") |
|
|
| user_input = st.text_area("✍️ Question Title + Body", height=200) |
|
|
| top_k = st.slider("Number of tags to show", min_value=1, max_value=10, value=5) |
|
|
| if st.button("Predict Tags") and user_input.strip(): |
| cleaned = preprocess(user_input) |
| X_vec = vectorizer.transform([cleaned]) |
| y_pred_proba = model.predict_proba(X_vec) |
|
|
| |
| top_indices = y_pred_proba[0].argsort()[-top_k:][::-1] |
| predicted_tags = [mlb.classes_[i] for i in top_indices] |
| confidence = [y_pred_proba[0][i] for i in top_indices] |
|
|
| st.markdown("### 🏷️ Predicted Tags:") |
| for tag, conf in zip(predicted_tags, confidence): |
| st.markdown(f"- **{tag}** (confidence: {conf:.2f})") |
|
|