import streamlit as st import pickle import re import numpy as np # Streamlit page configuration st.set_page_config(page_title="Stack Overflow Tags Predictor", layout="centered") # ✅ Text preprocessing def clean_text(text): text = re.sub(r"<.*?>", " ", text) # Remove HTML tags text = re.sub(r"\W", " ", text) # Remove special characters text = re.sub(r"\s+", " ", text.lower()).strip() # Normalize whitespace and lowercase return text # ✅ Load pickled model, vectorizer, and label binarizer @st.cache_resource def load_artifacts(): with open("model (1).pkl","rb") as f: model = pickle.load(f) with open("tfidf (1).pkl","rb") as f: vectorizer = pickle.load(f) with open("mlb (1).pkl","rb") as f: mlb = pickle.load(f) return model, vectorizer, mlb # Load artifacts model, vectorizer, mlb = load_artifacts() # UI st.title("🔖 Stack Overflow Tags Predictor") st.markdown("Enter a question's *title* and *description*, and this app will suggest relevant tags.") # User Inputs title = st.text_input("📝 Question Title") body = st.text_area("📄 Question Description", height=200) # Prediction Button if st.button("🔍 Predict Tags"): if not title.strip() or not body.strip(): st.warning("⚠ Please enter both a title and a description.") else: input_text = clean_text(title + " " + body) X_input = vectorizer.transform([input_text]) try: y_pred = model.predict(X_input) except Exception as e: st.error(f"❌ Prediction failed: {e}") y_pred = None if y_pred is not None: predicted_tags = mlb.inverse_transform(y_pred) if predicted_tags and predicted_tags[0]: st.success("✅ Predicted Tags:") st.write(", ".join(predicted_tags[0])) else: st.info("🤔 No tags predicted. Try refining your question.")