Spaces:

Ginidu2003
/

Distilbert_Base_News_Classifier

Sleeping

App Files Files Community

Ginidu2003 commited on Apr 4

Commit

191b0d0

verified ·

1 Parent(s): ab4f49e

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +31 -14

src/streamlit_app.py CHANGED Viewed

@@ -11,53 +11,70 @@ import string
 st.set_page_config(page_title="Daily Mirror News Classifier", page_icon="📰")
 # ====================== PREPROCESSING ======================
-# ====================== LOAD MODEL (with better error handling) ======================
 @st.cache_resource(show_spinner=False)
 def load_model():
-    model_name = "Ginidu2003/Distilbert-Base-News-classifier"   # ← Make sure this is exact
     try:
         pipe = pipeline(
             "text-classification",
             model=model_name,
             device=0 if torch.cuda.is_available() else -1
         )
-        st.success(f"✅ Model loaded successfully: {model_name}")
         return pipe
     except Exception as e:
-        st.error(f"❌ Failed to load model: {model_name}")
-        st.error(f"Error: {str(e)}")
-        st.info("Make sure the model is Public and the name is correct.")
         return None
 classifier = load_model()
 # ====================== APP ======================
 st.title("📰 Daily Mirror News Classifier")
 st.subheader("Classify news into Business, Opinion, Political Gossip, Sports, or World News")
-if classifier is None:
-    st.stop()
 st.markdown("**Upload a CSV file** with a column named `content`")
 uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
 if uploaded_file is not None:
     df = pd.read_csv(uploaded_file)
     st.write("### Preview of uploaded data")
     st.dataframe(df.head())
     if 'content' not in df.columns:
         st.error("Your CSV must have a column named 'content'")
     else:
-        with st.spinner("Classifying news..."):
-            #df['clean_content'] = df['content'].apply(preprocess_text)
             predictions = []
-            for text in df['content']:
                 if not text.strip():
                     predictions.append("Unknown")
                 else:
@@ -65,7 +82,7 @@ if uploaded_file is not None:
                     predictions.append(result['label'])
             df['class'] = predictions
-            #df = df.drop(columns=['clean_content'], errors='ignore')
             st.success("✅ Classification completed!")
             st.dataframe(df.head())

 st.set_page_config(page_title="Daily Mirror News Classifier", page_icon="📰")
 # ====================== PREPROCESSING ======================
+nltk.download('stopwords', quiet=True)
+nltk.download('wordnet', quiet=True)
+nltk.download('punkt', quiet=True)
+stop_words = set(stopwords.words('english'))
+lemmatizer = WordNetLemmatizer()
+def preprocess_text(text):
+    if not isinstance(text, str):
+        return ""
+    text = text.lower()
+    text = re.sub(f'[{string.punctuation}]', ' ', text)
+    text = re.sub(r'[^a-z\s]', ' ', text)
+    tokens = nltk.word_tokenize(text)
+    tokens = [word for word in tokens if word not in stop_words]
+    tokens = [lemmatizer.lemmatize(word) for word in tokens]
+    return ' '.join(tokens)
+# ====================== LOAD MODEL ======================
 @st.cache_resource(show_spinner=False)
 def load_model():
+    model_name = "Ginidu2003/Distilbert-Base-News-classifier"
+    hf_token = st.secrets.get("HF_TOKEN")   # Reads the secret you added
     try:
         pipe = pipeline(
             "text-classification",
             model=model_name,
+            token=hf_token,                    # ← This fixes most 403 errors
             device=0 if torch.cuda.is_available() else -1
         )
+        st.success("✅ Model loaded successfully!")
         return pipe
     except Exception as e:
+        st.error("❌ Failed to load model")
+        st.error(str(e))
         return None
 classifier = load_model()
+if classifier is None:
+    st.stop()
 # ====================== APP ======================
 st.title("📰 Daily Mirror News Classifier")
 st.subheader("Classify news into Business, Opinion, Political Gossip, Sports, or World News")
 st.markdown("**Upload a CSV file** with a column named `content`")
 uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
 if uploaded_file is not None:
     df = pd.read_csv(uploaded_file)
     st.write("### Preview of uploaded data")
     st.dataframe(df.head())
     if 'content' not in df.columns:
         st.error("Your CSV must have a column named 'content'")
     else:
+        with st.spinner("Preprocessing and classifying..."):
+            df['clean_content'] = df['content'].apply(preprocess_text)
             predictions = []
+            for text in df['clean_content']:
                 if not text.strip():
                     predictions.append("Unknown")
                 else:
                     predictions.append(result['label'])
             df['class'] = predictions
+            df = df.drop(columns=['clean_content'], errors='ignore')
             st.success("✅ Classification completed!")
             st.dataframe(df.head())