dinusha11 commited on
Commit
2d580de
Β·
verified Β·
1 Parent(s): 4b61c3a

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +198 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import torch
4
+ from transformers import pipeline, AutoTokenizer
5
+ import matplotlib.pyplot as plt
6
+ from wordcloud import WordCloud
7
+
8
+ # Load the fine-tuned DistilBERT model from Hugging Face
9
+ MODEL_NAME = "dinusha11/finetuned-distilbert-news"
10
+
11
+
12
+ # Load tokenizer and classification pipeline
13
+ @st.cache_resource
14
+ def load_model():
15
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
16
+ classifier = pipeline("text-classification", model=MODEL_NAME, tokenizer=tokenizer,
17
+ device=0 if torch.cuda.is_available() else -1)
18
+ return classifier
19
+
20
+
21
+ classifier = load_model()
22
+
23
+
24
+ # Load QA pipeline
25
+ @st.cache_resource
26
+ def load_qa_pipeline():
27
+ return pipeline("question-answering")
28
+
29
+
30
+ qa_pipeline = load_qa_pipeline()
31
+
32
+
33
+ # Load Sentiment Analysis pipeline
34
+ @st.cache_resource
35
+ def load_sentiment_pipeline():
36
+ return pipeline("sentiment-analysis")
37
+
38
+
39
+ sentiment_pipeline = load_sentiment_pipeline()
40
+
41
+
42
+ # Function to preprocess text
43
+ def preprocess_text(text):
44
+ return text.strip()
45
+
46
+
47
+ # Function for Q&A
48
+ def get_answer(question, context):
49
+ return qa_pipeline(question=question, context=context)['answer']
50
+
51
+
52
+ # Function to generate word cloud
53
+ def generate_wordcloud(text):
54
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
55
+ return wordcloud
56
+
57
+
58
+ # Function to analyze sentiment
59
+ def analyze_sentiment(text):
60
+ return sentiment_pipeline(text[:512])[0]['label']
61
+
62
+
63
+ # Custom CSS Styling
64
+ st.markdown("""
65
+ <style>
66
+ body {
67
+ font-family: Arial, sans-serif;
68
+ background-color: #f8f9fa;
69
+ }
70
+ .css-1aumxhk {
71
+ display: none;
72
+ }
73
+ .main-title {
74
+ text-align: center;
75
+ font-size: 36px;
76
+ color: #2b2d42;
77
+ }
78
+ .stButton>button {
79
+ width: 100%;
80
+ border-radius: 10px;
81
+ }
82
+ </style>
83
+ """, unsafe_allow_html=True)
84
+
85
+ # Sidebar Navigation
86
+
87
+ st.sidebar.title("Navigation")
88
+ page = st.sidebar.radio("Go to:", ["Home", "News Classification", "Q&A", "Word Cloud", "Sentiment Analysis"])
89
+
90
+ # Home Page
91
+ if page == "Home":
92
+ st.title("πŸ“° News Classification & Analysis App")
93
+ st.write("Welcome to the AI-powered news classification and analysis platform.")
94
+
95
+ st.write("""
96
+ - πŸ“Œ **Upload a CSV** containing news articles.
97
+ - πŸ” **Get Classification** into Business, Opinion, Political Gossip, Sports, or World News.
98
+ - 🧠 **Ask AI Questions** on news content.
99
+ - ☁ **Visualize Data** with a Word Cloud.
100
+ - πŸ“Š **Analyze Sentiment** of news articles.
101
+ """)
102
+ st.success("Get started by navigating to 'News Classification' from the sidebar!")
103
+
104
+ # News Classification Page
105
+ elif page == "News Classification":
106
+ st.title("πŸ“ Classify News Articles")
107
+ uploaded_file = st.file_uploader("πŸ“‚ Upload a CSV file", type=["csv"], key="file_uploader")
108
+
109
+ if uploaded_file:
110
+ df = pd.read_csv(uploaded_file)
111
+ if 'content' not in df.columns:
112
+ st.error("The CSV file must contain a 'content' column.")
113
+ else:
114
+ df['processed_content'] = df['content'].apply(preprocess_text)
115
+ df['class'] = df['processed_content'].apply(lambda x: classifier(x[:512])[0]['label'])
116
+ st.success("βœ… Classification completed!")
117
+
118
+ with st.expander("πŸ“‹ View Classified News"):
119
+ st.dataframe(df[['content', 'class']])
120
+
121
+ # Download button
122
+ output_csv = df[['content', 'class']].to_csv(index=False).encode('utf-8')
123
+ st.download_button("⬇ Download Classified Data", data=output_csv, file_name="output.csv", mime="text/csv")
124
+
125
+ # Q&A Section
126
+ elif page == "Q&A":
127
+ st.title("🧠 Ask Questions About News Content")
128
+ uploaded_file_qa = st.file_uploader("πŸ“‚ Upload CSV for Q&A", type=["csv"], key="qa_file_uploader")
129
+
130
+ if uploaded_file_qa:
131
+ df_qa = pd.read_csv(uploaded_file_qa)
132
+ if 'content' not in df_qa.columns:
133
+ st.error("The CSV file must contain a 'content' column.")
134
+ else:
135
+ st.write("πŸ“° **Available News Articles:**")
136
+ selected_article = st.selectbox("Select an article", df_qa['content'])
137
+
138
+ question = st.text_input("πŸ” Ask a question about this article:")
139
+
140
+ if question and selected_article.strip():
141
+ try:
142
+ answer = get_answer(question, selected_article)
143
+ st.success(f"**Answer:** {answer}")
144
+ except Exception as e:
145
+ st.error(f"Error processing question: {str(e)}")
146
+
147
+ # Word Cloud Section
148
+ elif page == "Word Cloud":
149
+ st.title("☁ Word Cloud Visualization")
150
+
151
+ uploaded_file_wc = st.file_uploader("πŸ“‚ Upload CSV for Word Cloud", type=["csv"], key="wc_file_uploader")
152
+
153
+ if uploaded_file_wc:
154
+ df_wc = pd.read_csv(uploaded_file_wc)
155
+ if 'content' not in df_wc.columns:
156
+ st.error("The CSV file must contain a 'content' column.")
157
+ else:
158
+ all_text = " ".join(df_wc['content'].dropna().astype(str)) # Ensure no NaN values
159
+ if all_text:
160
+ wordcloud = generate_wordcloud(all_text)
161
+ fig, ax = plt.subplots(figsize=(10, 5))
162
+ ax.imshow(wordcloud, interpolation="bilinear")
163
+ ax.axis("off")
164
+ st.pyplot(fig)
165
+ else:
166
+ st.error("The 'content' column is empty or contains invalid data.")
167
+
168
+ # Sentiment Analysis Section
169
+ elif page == "Sentiment Analysis":
170
+ st.title("πŸ“Š Sentiment Analysis")
171
+
172
+ uploaded_file_sentiment = st.file_uploader("πŸ“‚ Upload CSV for Sentiment Analysis", type=["csv"], key="sentiment_file_uploader")
173
+
174
+ if uploaded_file_sentiment:
175
+ df_sentiment = pd.read_csv(uploaded_file_sentiment)
176
+ if 'content' not in df_sentiment.columns:
177
+ st.error("The CSV file must contain a 'content' column.")
178
+ else:
179
+ df_sentiment['sentiment'] = df_sentiment['content'].apply(lambda x: analyze_sentiment(x[:512]))
180
+ st.success("βœ… Sentiment Analysis Completed!")
181
+
182
+ with st.expander("πŸ“‹ View Sentiment Results"):
183
+ st.dataframe(df_sentiment[['content', 'sentiment']])
184
+
185
+ # Sentiment distribution visualization
186
+ sentiment_counts = df_sentiment['sentiment'].value_counts()
187
+ fig, ax = plt.subplots()
188
+ sentiment_counts.plot(kind='bar', color=['green', 'red', 'gray'], ax=ax)
189
+ ax.set_title("Sentiment Distribution")
190
+ ax.set_xlabel("Sentiment")
191
+ ax.set_ylabel("Count")
192
+ st.pyplot(fig)
193
+
194
+ # Download button
195
+ output_csv_sentiment = df_sentiment[['content', 'sentiment']].to_csv(index=False).encode('utf-8')
196
+ st.download_button("⬇ Download Sentiment Data", data=output_csv_sentiment, file_name="sentiment_output.csv", mime="text/csv")
197
+
198
+
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ pandas
4
+ wordcloud
5
+ matplotlib
6
+ torch
7
+ sentencepiece
8
+ huggingface_hub
9
+