Spaces:

shwetashweta05
/

Stack_Overflow

Sleeping

App Files Files Community

shwetashweta05 commited on Jun 12, 2025

Commit

2bee89e

verified ·

1 Parent(s): 30415de

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -34

app.py CHANGED Viewed

@@ -1,22 +1,17 @@
 import streamlit as st
 import pickle
 import numpy as np
 import pandas as pd
 import nltk
 import re
-import emoji
-import string
-import contractions
 from nltk.corpus import stopwords
-from nltk.tokenize import word_tokenize
-from nltk.stem import PorterStemmer,LancasterStemmer, SnowballStemmer, WordNetLemmatizer
 nltk.download("stopwords")
 nltk.download("punkt")
-nltk.download("punkt_tab")
 nltk.download("wordnet")
 with open("final_model.pkl", "rb") as f:
     model = pickle.load(f)
@@ -26,6 +21,9 @@ with open("tfidf_vectorizer.pkl", "rb") as f:
 with open("count_vectorizer.pkl", "rb") as f:
     count_vectorizer = pickle.load(f)
 st.set_page_config(page_title="Stack Overflow Tag Predictor")
 st.markdown(
@@ -33,42 +31,48 @@ st.markdown(
     <style>
     .stApp {
         background-color: midnightblue;
     }
     </style>
     """,
     unsafe_allow_html=True
 )
-# Main title
 st.title("🧠 Stack Overflow Tag Predictor")
-st.markdown("<br>",unsafe_allow_html = True)
 def predict_tags(text):
-  cleaned_text = re.sub(r'<.*?>', '', text)
-  cleaned_text = re.sub(r'[^a-z\s]', '', cleaned_text)
-  cleaned_text = cleaned_text.lower()
-  cleaned_text = cleaned_text.split()
-  cleaned_text = [word for word in cleaned_text if word not in stop_words and len(word) > 2]
-  cleaned_text = ' '.join(cleaned_text)
-  question = tfidf_vect.transform([text])
-  print(question)
-  pred= model.predict(question)
-  pred_array= pd.DataFrame(pred.toarray(), columns = count_vect.get_feature_names_out())
-  tags = []
-  for i, col in zip(pred_array.iloc[0, :].values, count_vect.get_feature_names_out()):
-    if i == 1:
-      tags.append(col)
-  return tags
-question = st.text_input("Enter the question title")
-        # Display tags
-st.subheader("✅ Predicted Tags")
-if predicted_tags:
-    for tag in predicted_tags:
-        st.markdown(f"#{tag}")
-else:
-    st.info("No tags predicted. Try refining your question and description.")

 import streamlit as st
 import pickle
 import numpy as np
 import pandas as pd
 import nltk
 import re
 from nltk.corpus import stopwords
+from bs4 import BeautifulSoup
 nltk.download("stopwords")
 nltk.download("punkt")
 nltk.download("wordnet")
+# Load required models and vectorizers
 with open("final_model.pkl", "rb") as f:
     model = pickle.load(f)
 with open("count_vectorizer.pkl", "rb") as f:
     count_vectorizer = pickle.load(f)
+stop_words = set(stopwords.words("english"))
+# Streamlit setup
 st.set_page_config(page_title="Stack Overflow Tag Predictor")
 st.markdown(
     <style>
     .stApp {
         background-color: midnightblue;
+        color: white;
     }
     </style>
     """,
     unsafe_allow_html=True
 )
 st.title("🧠 Stack Overflow Tag Predictor")
+st.markdown("<br>", unsafe_allow_html=True)
+# Preprocessing function
+def clean_text(text):
+    if not isinstance(text, str):
+        return ""
+    text = BeautifulSoup(text, "html.parser").get_text()
+    text = re.sub(r"<.*?>", "", text)
+    text = re.sub(r"[^a-zA-Z\s]", "", text)
+    text = text.lower()
+    words = text.split()
+    words = [w for w in words if w not in stop_words and len(w) > 2]
+    return " ".join(words)
+# Prediction function
 def predict_tags(text):
+    cleaned = clean_text(text)
+    question_vec = tfidf_vectorizer.transform([cleaned])
+    prediction = model.predict(question_vec)
+    prediction_df = pd.DataFrame(prediction.toarray(), columns=count_vectorizer.get_feature_names_out())
+    tags = [col for col, val in zip(prediction_df.columns, prediction_df.iloc[0].values) if val == 1]
+    return tags
+# User input
+question = st.text_area("Enter your Stack Overflow question title and/or description", height=200)
+if st.button("Predict Tags"):
+    if not question.strip():
+        st.warning("Please enter a question to predict tags.")
+    else:
+        predicted_tags = predict_tags(question)
+        st.subheader("✅ Predicted Tags:")
+        if predicted_tags:
+            for tag in predicted_tags:
+                st.success(f"#{tag}")
+        else:
+            st.info("No tags predicted. Try refining your question.")