Ginidu2003 commited on
Commit
191b0d0
Β·
verified Β·
1 Parent(s): ab4f49e

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +31 -14
src/streamlit_app.py CHANGED
@@ -11,53 +11,70 @@ import string
11
  st.set_page_config(page_title="Daily Mirror News Classifier", page_icon="πŸ“°")
12
 
13
  # ====================== PREPROCESSING ======================
 
 
 
14
 
 
 
15
 
16
- # ====================== LOAD MODEL (with better error handling) ======================
 
 
 
 
 
 
 
 
 
 
 
17
  @st.cache_resource(show_spinner=False)
18
  def load_model():
19
- model_name = "Ginidu2003/Distilbert-Base-News-classifier" # ← Make sure this is exact
 
 
20
  try:
21
  pipe = pipeline(
22
  "text-classification",
23
  model=model_name,
 
24
  device=0 if torch.cuda.is_available() else -1
25
  )
26
- st.success(f"βœ… Model loaded successfully: {model_name}")
27
  return pipe
28
  except Exception as e:
29
- st.error(f"❌ Failed to load model: {model_name}")
30
- st.error(f"Error: {str(e)}")
31
- st.info("Make sure the model is Public and the name is correct.")
32
  return None
33
 
34
  classifier = load_model()
35
 
 
 
 
36
  # ====================== APP ======================
37
  st.title("πŸ“° Daily Mirror News Classifier")
38
  st.subheader("Classify news into Business, Opinion, Political Gossip, Sports, or World News")
39
 
40
- if classifier is None:
41
- st.stop()
42
-
43
  st.markdown("**Upload a CSV file** with a column named `content`")
44
 
45
  uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
46
 
47
  if uploaded_file is not None:
48
  df = pd.read_csv(uploaded_file)
49
-
50
  st.write("### Preview of uploaded data")
51
  st.dataframe(df.head())
52
 
53
  if 'content' not in df.columns:
54
  st.error("Your CSV must have a column named 'content'")
55
  else:
56
- with st.spinner("Classifying news..."):
57
- #df['clean_content'] = df['content'].apply(preprocess_text)
58
 
59
  predictions = []
60
- for text in df['content']:
61
  if not text.strip():
62
  predictions.append("Unknown")
63
  else:
@@ -65,7 +82,7 @@ if uploaded_file is not None:
65
  predictions.append(result['label'])
66
 
67
  df['class'] = predictions
68
- #df = df.drop(columns=['clean_content'], errors='ignore')
69
 
70
  st.success("βœ… Classification completed!")
71
  st.dataframe(df.head())
 
11
  st.set_page_config(page_title="Daily Mirror News Classifier", page_icon="πŸ“°")
12
 
13
  # ====================== PREPROCESSING ======================
14
+ nltk.download('stopwords', quiet=True)
15
+ nltk.download('wordnet', quiet=True)
16
+ nltk.download('punkt', quiet=True)
17
 
18
+ stop_words = set(stopwords.words('english'))
19
+ lemmatizer = WordNetLemmatizer()
20
 
21
+ def preprocess_text(text):
22
+ if not isinstance(text, str):
23
+ return ""
24
+ text = text.lower()
25
+ text = re.sub(f'[{string.punctuation}]', ' ', text)
26
+ text = re.sub(r'[^a-z\s]', ' ', text)
27
+ tokens = nltk.word_tokenize(text)
28
+ tokens = [word for word in tokens if word not in stop_words]
29
+ tokens = [lemmatizer.lemmatize(word) for word in tokens]
30
+ return ' '.join(tokens)
31
+
32
+ # ====================== LOAD MODEL ======================
33
  @st.cache_resource(show_spinner=False)
34
  def load_model():
35
+ model_name = "Ginidu2003/Distilbert-Base-News-classifier"
36
+ hf_token = st.secrets.get("HF_TOKEN") # Reads the secret you added
37
+
38
  try:
39
  pipe = pipeline(
40
  "text-classification",
41
  model=model_name,
42
+ token=hf_token, # ← This fixes most 403 errors
43
  device=0 if torch.cuda.is_available() else -1
44
  )
45
+ st.success("βœ… Model loaded successfully!")
46
  return pipe
47
  except Exception as e:
48
+ st.error("❌ Failed to load model")
49
+ st.error(str(e))
 
50
  return None
51
 
52
  classifier = load_model()
53
 
54
+ if classifier is None:
55
+ st.stop()
56
+
57
  # ====================== APP ======================
58
  st.title("πŸ“° Daily Mirror News Classifier")
59
  st.subheader("Classify news into Business, Opinion, Political Gossip, Sports, or World News")
60
 
 
 
 
61
  st.markdown("**Upload a CSV file** with a column named `content`")
62
 
63
  uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
64
 
65
  if uploaded_file is not None:
66
  df = pd.read_csv(uploaded_file)
 
67
  st.write("### Preview of uploaded data")
68
  st.dataframe(df.head())
69
 
70
  if 'content' not in df.columns:
71
  st.error("Your CSV must have a column named 'content'")
72
  else:
73
+ with st.spinner("Preprocessing and classifying..."):
74
+ df['clean_content'] = df['content'].apply(preprocess_text)
75
 
76
  predictions = []
77
+ for text in df['clean_content']:
78
  if not text.strip():
79
  predictions.append("Unknown")
80
  else:
 
82
  predictions.append(result['label'])
83
 
84
  df['class'] = predictions
85
+ df = df.drop(columns=['clean_content'], errors='ignore')
86
 
87
  st.success("βœ… Classification completed!")
88
  st.dataframe(df.head())