dinusha11 commited on
Commit
cd4fcbb
Β·
verified Β·
1 Parent(s): ac32e60

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +182 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import torch
4
+ from transformers import pipeline, AutoTokenizer
5
+ import matplotlib.pyplot as plt
6
+ from wordcloud import WordCloud
7
+
8
+ # Load the fine-tuned DistilBERT model from Hugging Face
9
+ MODEL_NAME = "dinusha11/finetuned-distilbert-news"
10
+
11
+ # Label mapping
12
+ label_mapping = {
13
+ "LABEL_0": "Business",
14
+ "LABEL_1": "Opinion",
15
+ "LABEL_2": "Sports",
16
+ "LABEL_3": "Political_gossip",
17
+ "LABEL_4": "World_news"
18
+ }
19
+
20
+ # Load tokenizer and classification pipeline
21
+ @st.cache_resource
22
+ def load_model():
23
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
24
+ classifier = pipeline("text-classification", model=MODEL_NAME, tokenizer=tokenizer,
25
+ device=0 if torch.cuda.is_available() else -1)
26
+ return classifier
27
+
28
+ classifier = load_model()
29
+
30
+ # Load QA pipeline
31
+ @st.cache_resource
32
+ def load_qa_pipeline():
33
+ return pipeline("question-answering")
34
+
35
+ qa_pipeline = load_qa_pipeline()
36
+
37
+ # Load Sentiment Analysis pipeline
38
+ @st.cache_resource
39
+ def load_sentiment_pipeline():
40
+ return pipeline("sentiment-analysis")
41
+
42
+ sentiment_pipeline = load_sentiment_pipeline()
43
+
44
+ # Function to preprocess text
45
+ def preprocess_text(text):
46
+ return text.strip()
47
+
48
+ # Function for Q&A
49
+ def get_answer(question, context):
50
+ return qa_pipeline(question=question, context=context)['answer']
51
+
52
+ # Function to generate word cloud
53
+ def generate_wordcloud(text):
54
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
55
+ return wordcloud
56
+
57
+ # Function to analyze sentiment
58
+ def analyze_sentiment(text):
59
+ return sentiment_pipeline(text[:512])[0]['label']
60
+
61
+ # Custom CSS Styling
62
+ st.markdown("""
63
+ <style>
64
+ body {
65
+ font-family: Arial, sans-serif;
66
+ background-color: #f8f9fa;
67
+ }
68
+ .css-1aumxhk {
69
+ display: none;
70
+ }
71
+ .main-title {
72
+ text-align: center;
73
+ font-size: 36px;
74
+ color: #2b2d42;
75
+ }
76
+ .stButton>button {
77
+ width: 100%;
78
+ border-radius: 10px;
79
+ }
80
+ </style>
81
+ """, unsafe_allow_html=True)
82
+
83
+ # Sidebar Navigation
84
+ st.sidebar.title("Navigation")
85
+ page = st.sidebar.radio("Go to:", ["Home", "News Classification", "Q&A", "Word Cloud", "Sentiment Analysis"])
86
+
87
+ # Home Page
88
+ if page == "Home":
89
+ st.title("πŸ“° News Classification & Analysis App")
90
+ st.write("Welcome to the AI-powered news classification and analysis platform.")
91
+ st.write("""
92
+ - πŸ“Œ **Upload a CSV** containing news articles.
93
+ - πŸ” **Get Classification** into Business, Opinion, Political Gossip, Sports, or World News.
94
+ - 🧠 **Ask AI Questions** on news content.
95
+ - ☁ **Visualize Data** with a Word Cloud.
96
+ - πŸ“Š **Analyze Sentiment** of news articles.
97
+ """)
98
+ st.success("Get started by navigating to 'News Classification' from the sidebar!")
99
+
100
+ # News Classification Page
101
+ elif page == "News Classification":
102
+ st.title("πŸ“ Classify News Articles")
103
+ uploaded_file = st.file_uploader("πŸ“‚ Upload a CSV file", type=["csv"], key="file_uploader")
104
+
105
+ if uploaded_file:
106
+ df = pd.read_csv(uploaded_file)
107
+ if 'content' not in df.columns:
108
+ st.error("The CSV file must contain a 'content' column.")
109
+ else:
110
+ df['processed_content'] = df['content'].apply(preprocess_text)
111
+ df['class'] = df['processed_content'].apply(lambda x: label_mapping[classifier(x[:512])[0]['label']])
112
+ st.success("βœ… Classification completed!")
113
+
114
+ with st.expander("πŸ“‹ View Classified News"):
115
+ st.dataframe(df[['content', 'class']])
116
+
117
+ # Download button
118
+ output_csv = df[['content', 'class']].to_csv(index=False).encode('utf-8')
119
+ st.download_button("⬇ Download Classified Data", data=output_csv, file_name="output.csv", mime="text/csv")
120
+
121
+ # Q&A Section
122
+ elif page == "Q&A":
123
+ st.title("🧠 Ask Questions About News Content")
124
+ uploaded_file_qa = st.file_uploader("πŸ“‚ Upload CSV for Q&A", type=["csv"], key="qa_file_uploader")
125
+
126
+ if uploaded_file_qa:
127
+ df_qa = pd.read_csv(uploaded_file_qa)
128
+ if 'content' not in df_qa.columns:
129
+ st.error("The CSV file must contain a 'content' column.")
130
+ else:
131
+ st.write("πŸ“° **Available News Articles:**")
132
+ selected_article = st.selectbox("Select an article", df_qa['content'])
133
+
134
+ question = st.text_input("πŸ” Ask a question about this article:")
135
+
136
+ if question and selected_article.strip():
137
+ try:
138
+ answer = get_answer(question, selected_article)
139
+ st.success(f"**Answer:** {answer}")
140
+ except Exception as e:
141
+ st.error(f"Error processing question: {str(e)}")
142
+
143
+ # Word Cloud Section
144
+ elif page == "Word Cloud":
145
+ st.title("☁ Word Cloud Visualization")
146
+ uploaded_file_wc = st.file_uploader("πŸ“‚ Upload CSV for Word Cloud", type=["csv"], key="wc_file_uploader")
147
+
148
+ if uploaded_file_wc:
149
+ df_wc = pd.read_csv(uploaded_file_wc)
150
+ if 'content' not in df_wc.columns:
151
+ st.error("The CSV file must contain a 'content' column.")
152
+ else:
153
+ all_text = " ".join(df_wc['content'].dropna().astype(str))
154
+ if all_text:
155
+ wordcloud = generate_wordcloud(all_text)
156
+ fig, ax = plt.subplots(figsize=(10, 5))
157
+ ax.imshow(wordcloud, interpolation="bilinear")
158
+ ax.axis("off")
159
+ st.pyplot(fig)
160
+ else:
161
+ st.error("The 'content' column is empty or contains invalid data.")
162
+
163
+ # Sentiment Analysis Section
164
+ elif page == "Sentiment Analysis":
165
+ st.title("πŸ“Š Sentiment Analysis")
166
+ uploaded_file_sentiment = st.file_uploader("πŸ“‚ Upload CSV for Sentiment Analysis", type=["csv"], key="sentiment_file_uploader")
167
+
168
+ if uploaded_file_sentiment:
169
+ df_sentiment = pd.read_csv(uploaded_file_sentiment)
170
+ if 'content' not in df_sentiment.columns:
171
+ st.error("The CSV file must contain a 'content' column.")
172
+ else:
173
+ df_sentiment['sentiment'] = df_sentiment['content'].apply(lambda x: analyze_sentiment(x[:512]))
174
+ st.success("βœ… Sentiment Analysis Completed!")
175
+
176
+ with st.expander("πŸ“‹ View Sentiment Results"):
177
+ st.dataframe(df_sentiment[['content', 'sentiment']])
178
+
179
+ # Download button
180
+ output_csv_sentiment = df_sentiment[['content', 'sentiment']].to_csv(index=False).encode('utf-8')
181
+ st.download_button("⬇ Download Sentiment Data", data=output_csv_sentiment, file_name="sentiment_output.csv", mime="text/csv")
182
+
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ transformers==4.38.2
2
+ torch
3
+ pandas
4
+ numpy
5
+ scikit-learn
6
+ streamlit
7
+ wordcloud
8
+ matplotlib
9
+