Stack_Overflow / app.py
shwetashweta05's picture
Update app.py
2bee89e verified
import streamlit as st
import pickle
import numpy as np
import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
from bs4 import BeautifulSoup
nltk.download("stopwords")
nltk.download("punkt")
nltk.download("wordnet")
# Load required models and vectorizers
with open("final_model.pkl", "rb") as f:
model = pickle.load(f)
with open("tfidf_vectorizer.pkl", "rb") as f:
tfidf_vectorizer = pickle.load(f)
with open("count_vectorizer.pkl", "rb") as f:
count_vectorizer = pickle.load(f)
stop_words = set(stopwords.words("english"))
# Streamlit setup
st.set_page_config(page_title="Stack Overflow Tag Predictor")
st.markdown(
"""
<style>
.stApp {
background-color: midnightblue;
color: white;
}
</style>
""",
unsafe_allow_html=True
)
st.title("🧠 Stack Overflow Tag Predictor")
st.markdown("<br>", unsafe_allow_html=True)
# Preprocessing function
def clean_text(text):
if not isinstance(text, str):
return ""
text = BeautifulSoup(text, "html.parser").get_text()
text = re.sub(r"<.*?>", "", text)
text = re.sub(r"[^a-zA-Z\s]", "", text)
text = text.lower()
words = text.split()
words = [w for w in words if w not in stop_words and len(w) > 2]
return " ".join(words)
# Prediction function
def predict_tags(text):
cleaned = clean_text(text)
question_vec = tfidf_vectorizer.transform([cleaned])
prediction = model.predict(question_vec)
prediction_df = pd.DataFrame(prediction.toarray(), columns=count_vectorizer.get_feature_names_out())
tags = [col for col, val in zip(prediction_df.columns, prediction_df.iloc[0].values) if val == 1]
return tags
# User input
question = st.text_area("Enter your Stack Overflow question title and/or description", height=200)
if st.button("Predict Tags"):
if not question.strip():
st.warning("Please enter a question to predict tags.")
else:
predicted_tags = predict_tags(question)
st.subheader("✅ Predicted Tags:")
if predicted_tags:
for tag in predicted_tags:
st.success(f"#{tag}")
else:
st.info("No tags predicted. Try refining your question.")