Ginidu2003 commited on
Commit
05ad179
·
verified ·
1 Parent(s): c368330

Create streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +73 -0
streamlit_app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import torch
4
+ from transformers import pipeline
5
+ import nltk
6
+ from nltk.corpus import stopwords
7
+ from nltk.stem import WordNetLemmatizer
8
+ import re
9
+ import string
10
+
11
+ # ====================== PREPROCESSING (Same as Task 2) ======================
12
+
13
+ # ====================== LOAD FINE-TUNED MODEL ======================
14
+ @st.cache_resource
15
+ def load_model():
16
+ model_name = "Ginidu2003/Distilbert-Base-News-classifier" # ← Your exact model name
17
+ return pipeline(
18
+ "text-classification",
19
+ model=model_name,
20
+ device=0 if torch.cuda.is_available() else -1
21
+ )
22
+
23
+ classifier = load_model()
24
+
25
+ # ====================== STREAMLIT APP ======================
26
+ st.title("📰 Daily Mirror News Classifier")
27
+ st.subheader("Classify news into Business, Opinion, Political Gossip, Sports, or World News")
28
+
29
+ st.markdown("**Upload a CSV file** with a column named `content`")
30
+
31
+ uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
32
+
33
+ if uploaded_file is not None:
34
+ df = pd.read_csv(uploaded_file)
35
+
36
+ st.write("### Preview of uploaded data")
37
+ st.dataframe(df.head())
38
+
39
+ if 'content' not in df.columns:
40
+ st.error("Your CSV must have a column named 'content'")
41
+ else:
42
+ with st.spinner("Preprocessing and classifying..."):
43
+ # Apply same preprocessing as Task 2
44
+ #df['clean_content'] = df['content'].apply(preprocess_text)
45
+
46
+ # Classify
47
+ predictions = []
48
+ for text in df['clean_content']:
49
+ if text.strip() == "":
50
+ predictions.append("Unknown")
51
+ else:
52
+ result = classifier(text)[0]
53
+ predictions.append(result['label'])
54
+
55
+ df['class'] = predictions
56
+
57
+ # Drop helper column
58
+ #df = df.drop(columns=['clean_content'], errors='ignore')
59
+
60
+ st.success("✅ Classification completed!")
61
+ st.write("### Preview of classified data")
62
+ st.dataframe(df.head())
63
+
64
+ # Download button
65
+ csv = df.to_csv(index=False).encode('utf-8')
66
+ st.download_button(
67
+ label="📥 Download output.csv",
68
+ data=csv,
69
+ file_name="output.csv",
70
+ mime="text/csv"
71
+ )
72
+
73
+ st.caption("Built for Text Analytics Assignment - Section 02")