import streamlit as st
import pickle
import numpy as np
import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
from bs4 import BeautifulSoup

nltk.download("stopwords")
nltk.download("punkt")
nltk.download("wordnet")

# Load required models and vectorizers
with open("final_model.pkl", "rb") as f:
    model = pickle.load(f)

with open("tfidf_vectorizer.pkl", "rb") as f:
    tfidf_vectorizer = pickle.load(f)

with open("count_vectorizer.pkl", "rb") as f:
    count_vectorizer = pickle.load(f)

stop_words = set(stopwords.words("english"))

# Streamlit setup
st.set_page_config(page_title="Stack Overflow Tag Predictor")

st.markdown(
    """
    <style>
    .stApp {
        background-color: midnightblue;
        color: white;
    }
    </style>
    """,
    unsafe_allow_html=True
)

st.title("🧠 Stack Overflow Tag Predictor")
st.markdown("<br>", unsafe_allow_html=True)

# Preprocessing function
def clean_text(text):
    if not isinstance(text, str):
        return ""
    text = BeautifulSoup(text, "html.parser").get_text()
    text = re.sub(r"<.*?>", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    text = text.lower()
    words = text.split()
    words = [w for w in words if w not in stop_words and len(w) > 2]
    return " ".join(words)

# Prediction function
def predict_tags(text):
    cleaned = clean_text(text)
    question_vec = tfidf_vectorizer.transform([cleaned])
    prediction = model.predict(question_vec)
    prediction_df = pd.DataFrame(prediction.toarray(), columns=count_vectorizer.get_feature_names_out())
    tags = [col for col, val in zip(prediction_df.columns, prediction_df.iloc[0].values) if val == 1]
    return tags

# User input
question = st.text_area("Enter your Stack Overflow question title and/or description", height=200)

if st.button("Predict Tags"):
    if not question.strip():
        st.warning("Please enter a question to predict tags.")
    else:
        predicted_tags = predict_tags(question)
        st.subheader("✅ Predicted Tags:")
        if predicted_tags:
            for tag in predicted_tags:
                st.success(f"#{tag}")
        else:
            st.info("No tags predicted. Try refining your question.")