File size: 2,213 Bytes
eda0eb2
930a19b
 
 
 
eda0eb2
930a19b
2bee89e
930a19b
 
 
 
 
2bee89e
930a19b
 
eda0eb2
930a19b
 
eda0eb2
930a19b
 
eda0eb2
2bee89e
 
 
930a19b
 
 
 
 
 
 
2bee89e
930a19b
 
 
 
 
 
a73225b
2bee89e
eda0eb2
2bee89e
 
 
 
 
 
 
 
 
 
 
930a19b
2bee89e
930a19b
2bee89e
 
 
 
 
 
55f74cf
2bee89e
 
eda0eb2
2bee89e
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import streamlit as st
import pickle
import numpy as np
import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
from bs4 import BeautifulSoup

nltk.download("stopwords")
nltk.download("punkt")
nltk.download("wordnet")

# Load required models and vectorizers
with open("final_model.pkl", "rb") as f:
    model = pickle.load(f)

with open("tfidf_vectorizer.pkl", "rb") as f:
    tfidf_vectorizer = pickle.load(f)

with open("count_vectorizer.pkl", "rb") as f:
    count_vectorizer = pickle.load(f)

stop_words = set(stopwords.words("english"))

# Streamlit setup
st.set_page_config(page_title="Stack Overflow Tag Predictor")

st.markdown(
    """
    <style>
    .stApp {
        background-color: midnightblue;
        color: white;
    }
    </style>
    """,
    unsafe_allow_html=True
)

st.title("🧠 Stack Overflow Tag Predictor")
st.markdown("<br>", unsafe_allow_html=True)

# Preprocessing function
def clean_text(text):
    if not isinstance(text, str):
        return ""
    text = BeautifulSoup(text, "html.parser").get_text()
    text = re.sub(r"<.*?>", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    text = text.lower()
    words = text.split()
    words = [w for w in words if w not in stop_words and len(w) > 2]
    return " ".join(words)

# Prediction function
def predict_tags(text):
    cleaned = clean_text(text)
    question_vec = tfidf_vectorizer.transform([cleaned])
    prediction = model.predict(question_vec)
    prediction_df = pd.DataFrame(prediction.toarray(), columns=count_vectorizer.get_feature_names_out())
    tags = [col for col, val in zip(prediction_df.columns, prediction_df.iloc[0].values) if val == 1]
    return tags

# User input
question = st.text_area("Enter your Stack Overflow question title and/or description", height=200)

if st.button("Predict Tags"):
    if not question.strip():
        st.warning("Please enter a question to predict tags.")
    else:
        predicted_tags = predict_tags(question)
        st.subheader("✅ Predicted Tags:")
        if predicted_tags:
            for tag in predicted_tags:
                st.success(f"#{tag}")
        else:
            st.info("No tags predicted. Try refining your question.")