import streamlit as st import pickle import numpy as np import pandas as pd import nltk import re from nltk.corpus import stopwords from bs4 import BeautifulSoup nltk.download("stopwords") nltk.download("punkt") nltk.download("wordnet") # Load required models and vectorizers with open("final_model.pkl", "rb") as f: model = pickle.load(f) with open("tfidf_vectorizer.pkl", "rb") as f: tfidf_vectorizer = pickle.load(f) with open("count_vectorizer.pkl", "rb") as f: count_vectorizer = pickle.load(f) stop_words = set(stopwords.words("english")) # Streamlit setup st.set_page_config(page_title="Stack Overflow Tag Predictor") st.markdown( """ """, unsafe_allow_html=True ) st.title("🧠 Stack Overflow Tag Predictor") st.markdown("
", unsafe_allow_html=True) # Preprocessing function def clean_text(text): if not isinstance(text, str): return "" text = BeautifulSoup(text, "html.parser").get_text() text = re.sub(r"<.*?>", "", text) text = re.sub(r"[^a-zA-Z\s]", "", text) text = text.lower() words = text.split() words = [w for w in words if w not in stop_words and len(w) > 2] return " ".join(words) # Prediction function def predict_tags(text): cleaned = clean_text(text) question_vec = tfidf_vectorizer.transform([cleaned]) prediction = model.predict(question_vec) prediction_df = pd.DataFrame(prediction.toarray(), columns=count_vectorizer.get_feature_names_out()) tags = [col for col, val in zip(prediction_df.columns, prediction_df.iloc[0].values) if val == 1] return tags # User input question = st.text_area("Enter your Stack Overflow question title and/or description", height=200) if st.button("Predict Tags"): if not question.strip(): st.warning("Please enter a question to predict tags.") else: predicted_tags = predict_tags(question) st.subheader("✅ Predicted Tags:") if predicted_tags: for tag in predicted_tags: st.success(f"#{tag}") else: st.info("No tags predicted. Try refining your question.")