Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pickle | |
| import numpy as np | |
| import pandas as pd | |
| import nltk | |
| import re | |
| from nltk.corpus import stopwords | |
| from bs4 import BeautifulSoup | |
| nltk.download("stopwords") | |
| nltk.download("punkt") | |
| nltk.download("wordnet") | |
| # Load required models and vectorizers | |
| with open("final_model.pkl", "rb") as f: | |
| model = pickle.load(f) | |
| with open("tfidf_vectorizer.pkl", "rb") as f: | |
| tfidf_vectorizer = pickle.load(f) | |
| with open("count_vectorizer.pkl", "rb") as f: | |
| count_vectorizer = pickle.load(f) | |
| stop_words = set(stopwords.words("english")) | |
| # Streamlit setup | |
| st.set_page_config(page_title="Stack Overflow Tag Predictor") | |
| st.markdown( | |
| """ | |
| <style> | |
| .stApp { | |
| background-color: midnightblue; | |
| color: white; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| st.title("🧠 Stack Overflow Tag Predictor") | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| # Preprocessing function | |
| def clean_text(text): | |
| if not isinstance(text, str): | |
| return "" | |
| text = BeautifulSoup(text, "html.parser").get_text() | |
| text = re.sub(r"<.*?>", "", text) | |
| text = re.sub(r"[^a-zA-Z\s]", "", text) | |
| text = text.lower() | |
| words = text.split() | |
| words = [w for w in words if w not in stop_words and len(w) > 2] | |
| return " ".join(words) | |
| # Prediction function | |
| def predict_tags(text): | |
| cleaned = clean_text(text) | |
| question_vec = tfidf_vectorizer.transform([cleaned]) | |
| prediction = model.predict(question_vec) | |
| prediction_df = pd.DataFrame(prediction.toarray(), columns=count_vectorizer.get_feature_names_out()) | |
| tags = [col for col, val in zip(prediction_df.columns, prediction_df.iloc[0].values) if val == 1] | |
| return tags | |
| # User input | |
| question = st.text_area("Enter your Stack Overflow question title and/or description", height=200) | |
| if st.button("Predict Tags"): | |
| if not question.strip(): | |
| st.warning("Please enter a question to predict tags.") | |
| else: | |
| predicted_tags = predict_tags(question) | |
| st.subheader("✅ Predicted Tags:") | |
| if predicted_tags: | |
| for tag in predicted_tags: | |
| st.success(f"#{tag}") | |
| else: | |
| st.info("No tags predicted. Try refining your question.") | |