Adityaganesh commited on
Commit
1df2517
·
verified ·
1 Parent(s): a6592dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -24
app.py CHANGED
@@ -12,27 +12,24 @@ from nltk.corpus import stopwords
12
  # Ensure necessary downloads
13
  nltk.download("punkt")
14
  nltk.download("wordnet")
15
- nltk.download("stopwords")
16
- nltk.download("omw-1.4") # Optional but useful for lemmatization
17
-
18
 
19
  lemmatizer = WordNetLemmatizer()
20
  stop_words = set(stopwords.words('english'))
21
 
22
- def pre_process(x):
23
- x = x.lower()
24
- x = re.sub("<.*?>", "", x)
25
- x = re.sub("http[s]?://.+?\\S+", "", x)
26
- x = re.sub("[@#].+?\\S", "", x)
27
- x = re.sub(r"\\_+", " ", x)
28
- x = re.sub("^[A-Za-z.].*\\s-\\s", "", x)
29
- x = emoji.demojize(x)
30
- x = re.sub(":.*?:", "", x)
31
- x = re.sub("[^a-zA-Z0-9\\s_]", "", x)
32
- words = word_tokenize(x)
33
- words = [word for word in words if word not in stop_words]
34
- x = " ".join([lemmatizer.lemmatize(word) for word in words])
35
- return x
36
 
37
  # Load the label encoder
38
  with open("label_encoder.pkl", "rb") as f:
@@ -45,24 +42,21 @@ text_vectorizer = tf.keras.models.load_model("news_tv_model.keras")
45
  news_model = tf.keras.models.load_model("news_model.keras")
46
 
47
  def predict_category(text):
48
- # Preprocess the input text
49
- processed_text = [pre_process(text[0])]
50
  vectorized_text = text_vectorizer(processed_text)
51
- # Predict category
52
  prediction = news_model.predict(vectorized_text)
53
  predicted_label_index = np.argmax(prediction, axis=1)[0]
54
- predicted_label = label_encoder.inverse_transform([predicted_label_index])[0]
55
- return predicted_label
56
 
57
  # Streamlit UI
58
  st.title("News Classification App")
59
 
60
  # User input
61
- user_text = st.text_area("Enter news text:")
62
 
63
  if st.button("Predict Category"):
64
  if user_text.strip():
65
- category = predict_category([user_text])
66
  st.success(f"Predicted Category: {category}")
67
  else:
68
  st.warning("Please enter some text to classify.")
 
12
  # Ensure necessary downloads
13
  nltk.download("punkt")
14
  nltk.download("wordnet")
15
+ nltk.download('stopwords')
 
 
16
 
17
  lemmatizer = WordNetLemmatizer()
18
  stop_words = set(stopwords.words('english'))
19
 
20
+ def pre_process(text):
21
+ text = text.lower()
22
+ text = re.sub("<.*?>", "", text)
23
+ text = re.sub("http[s]?://\\S+", "", text)
24
+ text = re.sub("[@#]\\S+", "", text)
25
+ text = re.sub(r"\\_+", " ", text)
26
+ text = re.sub("^[A-Za-z.].*\\s-\\s", "", text)
27
+ text = emoji.demojize(text)
28
+ text = re.sub(":.*?:", "", text)
29
+ text = re.sub("[^a-zA-Z0-9\\s_]", "", text)
30
+ words = word_tokenize(text)
31
+ words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
32
+ return " ".join(words)
 
33
 
34
  # Load the label encoder
35
  with open("label_encoder.pkl", "rb") as f:
 
42
  news_model = tf.keras.models.load_model("news_model.keras")
43
 
44
  def predict_category(text):
45
+ processed_text = [pre_process(text)]
 
46
  vectorized_text = text_vectorizer(processed_text)
 
47
  prediction = news_model.predict(vectorized_text)
48
  predicted_label_index = np.argmax(prediction, axis=1)[0]
49
+ return label_encoder.inverse_transform([predicted_label_index])[0]
 
50
 
51
  # Streamlit UI
52
  st.title("News Classification App")
53
 
54
  # User input
55
+ user_text = st.text_area("Enter your news content for classification.")
56
 
57
  if st.button("Predict Category"):
58
  if user_text.strip():
59
+ category = predict_category(user_text)
60
  st.success(f"Predicted Category: {category}")
61
  else:
62
  st.warning("Please enter some text to classify.")