Spaces:

Adityaganesh
/

News_Classification_prediction

Sleeping

App Files Files Community

Adityaganesh commited on Apr 2

Commit

1df2517

verified ·

1 Parent(s): a6592dd

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -24

app.py CHANGED Viewed

@@ -12,27 +12,24 @@ from nltk.corpus import stopwords
 # Ensure necessary downloads
 nltk.download("punkt")
 nltk.download("wordnet")
-nltk.download("stopwords")
-nltk.download("omw-1.4")  # Optional but useful for lemmatization
 lemmatizer = WordNetLemmatizer()
 stop_words = set(stopwords.words('english'))
-def pre_process(x):
-    x = x.lower()
-    x = re.sub("<.*?>", "", x)
-    x = re.sub("http[s]?://.+?\\S+", "", x)
-    x = re.sub("[@#].+?\\S", "", x)
-    x = re.sub(r"\\_+", " ", x)
-    x = re.sub("^[A-Za-z.].*\\s-\\s", "", x)
-    x = emoji.demojize(x)
-    x = re.sub(":.*?:", "", x)
-    x = re.sub("[^a-zA-Z0-9\\s_]", "", x)
-    words = word_tokenize(x)
-    words = [word for word in words if word not in stop_words]
-    x = " ".join([lemmatizer.lemmatize(word) for word in words])
-    return x
 # Load the label encoder
 with open("label_encoder.pkl", "rb") as f:
@@ -45,24 +42,21 @@ text_vectorizer = tf.keras.models.load_model("news_tv_model.keras")
 news_model = tf.keras.models.load_model("news_model.keras")
 def predict_category(text):
-    # Preprocess the input text
-    processed_text = [pre_process(text[0])]
     vectorized_text = text_vectorizer(processed_text)
-    # Predict category
     prediction = news_model.predict(vectorized_text)
     predicted_label_index = np.argmax(prediction, axis=1)[0]
-    predicted_label = label_encoder.inverse_transform([predicted_label_index])[0]
-    return predicted_label
 # Streamlit UI
 st.title("News Classification App")
 # User input
-user_text = st.text_area("Enter news text:")
 if st.button("Predict Category"):
     if user_text.strip():
-        category = predict_category([user_text])
         st.success(f"Predicted Category: {category}")
     else:
         st.warning("Please enter some text to classify.")

 # Ensure necessary downloads
 nltk.download("punkt")
 nltk.download("wordnet")
+nltk.download('stopwords')
 lemmatizer = WordNetLemmatizer()
 stop_words = set(stopwords.words('english'))
+def pre_process(text):
+    text = text.lower()
+    text = re.sub("<.*?>", "", text)
+    text = re.sub("http[s]?://\\S+", "", text)
+    text = re.sub("[@#]\\S+", "", text)
+    text = re.sub(r"\\_+", " ", text)
+    text = re.sub("^[A-Za-z.].*\\s-\\s", "", text)
+    text = emoji.demojize(text)
+    text = re.sub(":.*?:", "", text)
+    text = re.sub("[^a-zA-Z0-9\\s_]", "", text)
+    words = word_tokenize(text)
+    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
+    return " ".join(words)
 # Load the label encoder
 with open("label_encoder.pkl", "rb") as f:
 news_model = tf.keras.models.load_model("news_model.keras")
 def predict_category(text):
+    processed_text = [pre_process(text)]
     vectorized_text = text_vectorizer(processed_text)
     prediction = news_model.predict(vectorized_text)
     predicted_label_index = np.argmax(prediction, axis=1)[0]
+    return label_encoder.inverse_transform([predicted_label_index])[0]
 # Streamlit UI
 st.title("News Classification App")
 # User input
+user_text = st.text_area("Enter your news content for classification.")
 if st.button("Predict Category"):
     if user_text.strip():
+        category = predict_category(user_text)
         st.success(f"Predicted Category: {category}")
     else:
         st.warning("Please enter some text to classify.")