Spaces:
Sleeping
Sleeping
File size: 2,213 Bytes
eda0eb2 930a19b eda0eb2 930a19b 2bee89e 930a19b 2bee89e 930a19b eda0eb2 930a19b eda0eb2 930a19b eda0eb2 2bee89e 930a19b 2bee89e 930a19b a73225b 2bee89e eda0eb2 2bee89e 930a19b 2bee89e 930a19b 2bee89e 55f74cf 2bee89e eda0eb2 2bee89e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import streamlit as st
import pickle
import numpy as np
import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
from bs4 import BeautifulSoup
nltk.download("stopwords")
nltk.download("punkt")
nltk.download("wordnet")
# Load required models and vectorizers
with open("final_model.pkl", "rb") as f:
model = pickle.load(f)
with open("tfidf_vectorizer.pkl", "rb") as f:
tfidf_vectorizer = pickle.load(f)
with open("count_vectorizer.pkl", "rb") as f:
count_vectorizer = pickle.load(f)
stop_words = set(stopwords.words("english"))
# Streamlit setup
st.set_page_config(page_title="Stack Overflow Tag Predictor")
st.markdown(
"""
<style>
.stApp {
background-color: midnightblue;
color: white;
}
</style>
""",
unsafe_allow_html=True
)
st.title("🧠 Stack Overflow Tag Predictor")
st.markdown("<br>", unsafe_allow_html=True)
# Preprocessing function
def clean_text(text):
if not isinstance(text, str):
return ""
text = BeautifulSoup(text, "html.parser").get_text()
text = re.sub(r"<.*?>", "", text)
text = re.sub(r"[^a-zA-Z\s]", "", text)
text = text.lower()
words = text.split()
words = [w for w in words if w not in stop_words and len(w) > 2]
return " ".join(words)
# Prediction function
def predict_tags(text):
cleaned = clean_text(text)
question_vec = tfidf_vectorizer.transform([cleaned])
prediction = model.predict(question_vec)
prediction_df = pd.DataFrame(prediction.toarray(), columns=count_vectorizer.get_feature_names_out())
tags = [col for col, val in zip(prediction_df.columns, prediction_df.iloc[0].values) if val == 1]
return tags
# User input
question = st.text_area("Enter your Stack Overflow question title and/or description", height=200)
if st.button("Predict Tags"):
if not question.strip():
st.warning("Please enter a question to predict tags.")
else:
predicted_tags = predict_tags(question)
st.subheader("✅ Predicted Tags:")
if predicted_tags:
for tag in predicted_tags:
st.success(f"#{tag}")
else:
st.info("No tags predicted. Try refining your question.")
|