import streamlit as st
import pickle
import re
import numpy as np

# Streamlit page configuration
st.set_page_config(page_title="Stack Overflow Tags Predictor", layout="centered")

# ✅ Text preprocessing
def clean_text(text):
    text = re.sub(r"<.*?>", " ", text)  # Remove HTML tags
    text = re.sub(r"\W", " ", text)     # Remove special characters
    text = re.sub(r"\s+", " ", text.lower()).strip()  # Normalize whitespace and lowercase
    return text

# ✅ Load pickled model, vectorizer, and label binarizer
@st.cache_resource
def load_artifacts():
    with open("model (1).pkl","rb") as f:
        model = pickle.load(f)
    with open("tfidf (1).pkl","rb") as f:
        vectorizer = pickle.load(f)
    with open("mlb (1).pkl","rb") as f:
        mlb = pickle.load(f)
    return model, vectorizer, mlb

# Load artifacts
model, vectorizer, mlb = load_artifacts()

# UI
st.title("🔖 Stack Overflow Tags Predictor")
st.markdown("Enter a question's *title* and *description*, and this app will suggest relevant tags.")

# User Inputs
title = st.text_input("📝 Question Title")
body = st.text_area("📄 Question Description", height=200)

# Prediction Button
if st.button("🔍 Predict Tags"):
    if not title.strip() or not body.strip():
        st.warning("⚠ Please enter both a title and a description.")
    else:
        input_text = clean_text(title + " " + body)
        X_input = vectorizer.transform([input_text])

        try:
            y_pred = model.predict(X_input)
        except Exception as e:
            st.error(f"❌ Prediction failed: {e}")
            y_pred = None

        if y_pred is not None:
            predicted_tags = mlb.inverse_transform(y_pred)

            if predicted_tags and predicted_tags[0]:
                st.success("✅ Predicted Tags:")
                st.write(", ".join(predicted_tags[0]))
            else:
                st.info("🤔 No tags predicted. Try refining your question.")