File size: 5,383 Bytes
ed07733
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import streamlit as st
import joblib
import pandas as pd
import re
from unidecode import unidecode
import emoji
import string
import contractions
from nltk.stem import PorterStemmer
import numpy as np
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

# Custom CSS Styling
st.markdown("""
<style>
    .stApp {
        background-color: #f9fbfc;
        font-family: 'Segoe UI', sans-serif;
    }
    .custom-header {
        background-color: #1e293b;
        color: white;
        padding: 2rem;
        border-radius: 0.5rem;
        text-align: center;
        margin-bottom: 2rem;
    }
    .custom-header h1 {
        font-size: 2rem;
        margin-bottom: 0.5rem;
    }
    .custom-header p {
        font-size: 1rem;
        color: #cbd5e1;
    }
    .input-box, .output-box {
        background-color: white;
        padding: 1.5rem;
        border-radius: 0.5rem;
        box-shadow: 0 0 10px rgba(0, 0, 0, 0.04);
        margin-bottom: 2rem;
    }
    .tag-pill {
        display: inline-block;
        background-color: #e0f2fe;
        color: #0369a1;
        padding: 0.4em 0.8em;
        margin: 0.25em;
        border-radius: 999px;
        font-weight: 600;
        font-size: 0.9rem;
    }
    .footer {
        text-align: center;
        font-size: 0.85rem;
        color: #64748b;
        margin-top: 2rem;
    }
</style>
""", unsafe_allow_html=True)

# Header
st.markdown("""
<div class="custom-header">
    <h1>πŸ“Œ StackOverflow Tag Predictor</h1>
    <p>Enter a programming question to see predicted tags</p>
</div>
""", unsafe_allow_html=True)

# Initialize components
stemmer = PorterStemmer()
stop_words = set(ENGLISH_STOP_WORDS)
chat_words = {
    "brb": "be right back", "btw": "by the way", "lol": "laugh out loud",
    "afaik": "as far as i know", "imo": "in my opinion", "tbh": "to be honest",
    "idk": "i don't know", "asap": "as soon as possible", "np": "no problem",
    "thx": "thanks", "pls": "please", "fyi": "for your information"
}

def preprocess_text(text):
    if not isinstance(text, str) or not text.strip():
        return ""
    try:
        text = re.sub(r'<[^>]+>', '', text)
        text = re.sub(r'https?://\S+|www\.\S+', '', text)
        text = emoji.demojize(text, delimiters=(" ", " "))
        text = unidecode(text)
        text = contractions.fix(text)
        text = text.lower()
        words = text.split()
        text = " ".join([chat_words.get(word.lower(), word) for word in words])
        text = text.translate(str.maketrans('', '', string.punctuation))
        tokens = re.findall(r'\b\w+\b', text)
        tokens = [word for word in tokens if word not in stop_words]
        tokens = [stemmer.stem(word) for word in tokens]
        return " ".join(tokens)
    except Exception as e:
        st.error(f"Preprocessing error: {e}")
        return ""

@st.cache_resource
def load_models():
    try:
        model = joblib.load("tag_model.joblib")
        mlb = joblib.load("tag_binarizer.joblib")
        return model, mlb
    except Exception as e:
        st.error(f"Error loading model: {e}")
        return None, None

model, mlb = load_models()

# Input
st.markdown('<div class="input-box">', unsafe_allow_html=True)
user_input = st.text_area("✍️ Paste your programming question below:", height=200, placeholder="e.g., How to reverse a list in Python?")
st.markdown('</div>', unsafe_allow_html=True)

# Prediction
if st.button("πŸš€ Predict Tags"):
    if not user_input.strip():
        st.warning("Please enter your question to get predictions.")
    elif model is None or mlb is None:
        st.error("Model loading failed.")
    else:
        with st.spinner("Processing..."):
            processed = preprocess_text(user_input)
            if processed:
                try:
                    input_df = pd.DataFrame({'processed_excerpt': [processed]})
                    if hasattr(model, "predict_proba"):
                        probs = model.predict_proba(input_df)[0]
                        top_idx = np.argsort(probs)[-5:][::-1]
                        tags = [mlb.classes_[i] for i in top_idx]
                        confs = [int(probs[i] * 100) for i in top_idx]
                    elif hasattr(model, "decision_function"):
                        scores = model.decision_function(input_df)[0]
                        top_idx = np.argsort(scores)[-5:][::-1]
                        tags = [mlb.classes_[i] for i in top_idx]
                        confs = [None] * 5
                    else:
                        preds = model.predict(input_df)
                        tags = mlb.inverse_transform(preds)[0]
                        confs = [None] * len(tags)

                    # Output
                    st.markdown('<div class="output-box"><h4>🏷️ Predicted Tags:</h4>', unsafe_allow_html=True)
                    for tag, conf in zip(tags, confs):
                        confidence = f" ({conf}%)" if conf is not None else ""
                        st.markdown(f'<span class="tag-pill">{tag}{confidence}</span>', unsafe_allow_html=True)
                    st.markdown('</div>', unsafe_allow_html=True)
                except Exception as e:
                    st.error(f"Prediction error: {e}")

# Footer
st.markdown("""
<div class="footer">
    πŸ”Ž This ML tool predicts tags based on programming question content.
</div>
""", unsafe_allow_html=True)