Spaces:

shwetashweta05
/

Stack_Overflow

Sleeping

App Files Files Community

shwetashweta05 commited on Jun 12, 2025

Commit

930a19b

verified ·

1 Parent(s): 78e21a3

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -31

app.py CHANGED Viewed

@@ -1,42 +1,113 @@
 import streamlit as st
-import joblib
 import re
-from bs4 import BeautifulSoup
-# Load saved models
-model = joblib.load("count_vectorizer.pkl")
-vectorizer = joblib.load("final_model.pkl")
-mlb = joblib.load("tfidf_vectorizer.pkl")
-# Clean user input
-def clean_text(text):
-    soup = BeautifulSoup(text, "html.parser").get_text()
-    text = re.sub(r'[^a-zA-Z\s]', '', text)
-    text = text.lower()
-    return text
-# Streamlit UI
 st.title("🧠 Stack Overflow Tag Predictor")
-st.write("Enter the title and body of your Stack Overflow question:")
-title = st.text_input("question")
-if st.button("Predict Tags"):
-    if not title or not body:
-        st.warning("Please enter both title and body.")
-    else:
-        # Preprocess
-        combined_text = clean_text(title + " " + body)
-        transformed = vectorizer.transform([combined_text])
-        # Predict
-        pred = model.predict(transformed)
-        tags = mlb.inverse_transform(pred)
-        # Display
-        if tags and tags[0]:
-            st.success("Predicted Tags:")
-            st.write(", ".join(tags[0]))
-        else:
-            st.info("No tags could be predicted with the current model.")

+Hugging Face's logo
+Hugging Face
+Models
+Datasets
+Spaces
+Docs
+Pricing
+Spaces:
+Chait333
+/
+Stack_Overflow_Tag_Prediction
+like
+0
+App
+Files
+Community
+Stack_Overflow_Tag_Prediction
+/
+Home.py
+Chait333's picture
+Chait333
+Update Home.py
+5f267c3
+verified
+13 days ago
+raw
+Copy download link
+history
+blame
+contribute
+delete
+5.75 kB
 import streamlit as st
+import pickle
+import numpy as np
+import pandas as pd
+import nltk
 import re
+import emoji
+import string
+import contractions
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from nltk.stem import PorterStemmer,LancasterStemmer, SnowballStemmer, WordNetLemmatizer
+nltk.download("stopwords")
+nltk.download("punkt")
+nltk.download("punkt_tab")
+nltk.download("wordnet")
+with open("final_model.pkl", "rb") as f:
+    model = pickle.load(f)
+with open("tfidf_vectorizer.pkl", "rb") as f:
+    tfidf_vectorizer = pickle.load(f)
+with open("count_vectorizer.pkl", "rb") as f:
+    count_vectorizer = pickle.load(f)
+st.set_page_config(page_title="Stack Overflow Tag Predictor")
+st.markdown(
+    """
+    <style>
+    .stApp {
+        background-color: midnightblue;
+    }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+# Main title
 st.title("🧠 Stack Overflow Tag Predictor")
+st.markdown("<br>",unsafe_allow_html = True)
+def predict_tags(text):
+  cleaned_text = re.sub(r'<.*?>', '', text)
+  cleaned_text = re.sub(r'[^a-z\s]', '', cleaned_text)
+  cleaned_text = cleaned_text.lower()
+  cleaned_text = cleaned_text.split()
+  cleaned_text = [word for word in cleaned_text if word not in stop_words and len(word) > 2]
+  cleaned_text = ' '.join(cleaned_text)
+  question = tfidf_vect.transform([text])
+  print(question)
+  pred= model.predict(question)
+  pred_array= pd.DataFrame(pred.toarray(), columns = count_vect.get_feature_names_out())
+  tags = []
+  for i, col in zip(pred_array.iloc[0, :].values, count_vect.get_feature_names_out()):
+    if i == 1:
+      tags.append(col)
+  return tags
+question = st.text_input("Enter the question title")
+        # Display tags
+st.subheader("✅ Predicted Tags")
+if predicted_tags:
+    for tag in predicted_tags:
+        st.markdown(f"#{tag}")
+else:
+    st.info("No tags predicted. Try refining your question and description.")