shwetashweta05 commited on
Commit
930a19b
·
verified ·
1 Parent(s): 78e21a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -31
app.py CHANGED
@@ -1,42 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
- import joblib
 
 
 
3
  import re
4
- from bs4 import BeautifulSoup
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- # Load saved models
7
- model = joblib.load("count_vectorizer.pkl")
8
- vectorizer = joblib.load("final_model.pkl")
9
- mlb = joblib.load("tfidf_vectorizer.pkl")
10
 
11
- # Clean user input
12
- def clean_text(text):
13
- soup = BeautifulSoup(text, "html.parser").get_text()
14
- text = re.sub(r'[^a-zA-Z\s]', '', text)
15
- text = text.lower()
16
- return text
17
 
18
- # Streamlit UI
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  st.title("🧠 Stack Overflow Tag Predictor")
20
- st.write("Enter the title and body of your Stack Overflow question:")
21
 
22
- title = st.text_input("question")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
- if st.button("Predict Tags"):
26
- if not title or not body:
27
- st.warning("Please enter both title and body.")
28
- else:
29
- # Preprocess
30
- combined_text = clean_text(title + " " + body)
31
- transformed = vectorizer.transform([combined_text])
32
 
33
- # Predict
34
- pred = model.predict(transformed)
35
- tags = mlb.inverse_transform(pred)
 
 
 
 
36
 
37
- # Display
38
- if tags and tags[0]:
39
- st.success("Predicted Tags:")
40
- st.write(", ".join(tags[0]))
41
- else:
42
- st.info("No tags could be predicted with the current model.")
 
1
+ Hugging Face's logo
2
+ Hugging Face
3
+ Models
4
+ Datasets
5
+ Spaces
6
+ Docs
7
+ Pricing
8
+
9
+
10
+
11
+ Spaces:
12
+
13
+ Chait333
14
+ /
15
+ Stack_Overflow_Tag_Prediction
16
+
17
+ like
18
+ 0
19
+ App
20
+ Files
21
+ Community
22
+ Stack_Overflow_Tag_Prediction
23
+ /
24
+ Home.py
25
+
26
+ Chait333's picture
27
+ Chait333
28
+ Update Home.py
29
+ 5f267c3
30
+ verified
31
+ 13 days ago
32
+ raw
33
+
34
+ Copy download link
35
+ history
36
+ blame
37
+ contribute
38
+ delete
39
+
40
+ 5.75 kB
41
  import streamlit as st
42
+ import pickle
43
+ import numpy as np
44
+ import pandas as pd
45
+ import nltk
46
  import re
47
+ import emoji
48
+ import string
49
+ import contractions
50
+ from nltk.corpus import stopwords
51
+ from nltk.tokenize import word_tokenize
52
+ from nltk.stem import PorterStemmer,LancasterStemmer, SnowballStemmer, WordNetLemmatizer
53
+
54
+ nltk.download("stopwords")
55
+ nltk.download("punkt")
56
+ nltk.download("punkt_tab")
57
+ nltk.download("wordnet")
58
+
59
+ with open("final_model.pkl", "rb") as f:
60
+ model = pickle.load(f)
61
 
62
+ with open("tfidf_vectorizer.pkl", "rb") as f:
63
+ tfidf_vectorizer = pickle.load(f)
 
 
64
 
65
+ with open("count_vectorizer.pkl", "rb") as f:
66
+ count_vectorizer = pickle.load(f)
 
 
 
 
67
 
68
+ st.set_page_config(page_title="Stack Overflow Tag Predictor")
69
+
70
+ st.markdown(
71
+ """
72
+ <style>
73
+ .stApp {
74
+ background-color: midnightblue;
75
+ }
76
+ </style>
77
+ """,
78
+ unsafe_allow_html=True
79
+ )
80
+
81
+ # Main title
82
  st.title("🧠 Stack Overflow Tag Predictor")
 
83
 
84
+ st.markdown("<br>",unsafe_allow_html = True)
85
+
86
+ def predict_tags(text):
87
+ cleaned_text = re.sub(r'<.*?>', '', text)
88
+ cleaned_text = re.sub(r'[^a-z\s]', '', cleaned_text)
89
+ cleaned_text = cleaned_text.lower()
90
+ cleaned_text = cleaned_text.split()
91
+ cleaned_text = [word for word in cleaned_text if word not in stop_words and len(word) > 2]
92
+ cleaned_text = ' '.join(cleaned_text)
93
+ question = tfidf_vect.transform([text])
94
+ print(question)
95
+ pred= model.predict(question)
96
+ pred_array= pd.DataFrame(pred.toarray(), columns = count_vect.get_feature_names_out())
97
+ tags = []
98
+ for i, col in zip(pred_array.iloc[0, :].values, count_vect.get_feature_names_out()):
99
+ if i == 1:
100
+ tags.append(col)
101
+ return tags
102
 
103
 
104
+ question = st.text_input("Enter the question title")
 
 
 
 
 
 
105
 
106
+ # Display tags
107
+ st.subheader("✅ Predicted Tags")
108
+ if predicted_tags:
109
+ for tag in predicted_tags:
110
+ st.markdown(f"#{tag}")
111
+ else:
112
+ st.info("No tags predicted. Try refining your question and description.")
113