dinusha11 commited on
Commit
9e0643c
Β·
verified Β·
1 Parent(s): 8ddf433

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +146 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import torch
4
+ from transformers import pipeline, AutoTokenizer
5
+ import matplotlib.pyplot as plt
6
+ from wordcloud import WordCloud
7
+
8
+ # Load the fine-tuned DistilBERT model from Hugging Face
9
+ MODEL_NAME = "dinusha11/finetuned-distilbert-news"
10
+
11
+
12
+ # Load tokenizer and classification pipeline
13
+ @st.cache_resource
14
+ def load_model():
15
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
16
+ classifier = pipeline("text-classification", model=MODEL_NAME, tokenizer=tokenizer,
17
+ device=0 if torch.cuda.is_available() else -1)
18
+ return classifier
19
+
20
+
21
+ classifier = load_model()
22
+
23
+
24
+ # Load QA pipeline
25
+ @st.cache_resource
26
+ def load_qa_pipeline():
27
+ return pipeline("question-answering")
28
+
29
+
30
+ qa_pipeline = load_qa_pipeline()
31
+
32
+
33
+ # Function to preprocess text
34
+ def preprocess_text(text):
35
+ return text.strip()
36
+
37
+
38
+ # Function for Q&A
39
+ def get_answer(question, context):
40
+ return qa_pipeline(question=question, context=context)['answer']
41
+
42
+
43
+ # Function to generate word cloud
44
+ def generate_wordcloud(text):
45
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
46
+ return wordcloud
47
+
48
+
49
+ # Custom CSS Styling
50
+ st.markdown("""
51
+ <style>
52
+ body {
53
+ font-family: Arial, sans-serif;
54
+ background-color: #f8f9fa;
55
+ }
56
+ .css-1aumxhk {
57
+ display: none;
58
+ }
59
+ .main-title {
60
+ text-align: center;
61
+ font-size: 36px;
62
+ color: #2b2d42;
63
+ }
64
+ .stButton>button {
65
+ width: 100%;
66
+ border-radius: 10px;
67
+ }
68
+ </style>
69
+ """, unsafe_allow_html=True)
70
+
71
+ # Sidebar Navigation
72
+
73
+ st.sidebar.title("Navigation")
74
+ page = st.sidebar.radio("Go to:", ["Home", "News Classification", "Q&A", "Word Cloud"])
75
+
76
+ # Home Page
77
+ if page == "Home":
78
+ st.title("πŸ“° News Classification & Analysis App")
79
+ st.write("Welcome to the AI-powered news classification and analysis platform.")
80
+
81
+ st.write("""
82
+ - πŸ“Œ **Upload a CSV** containing news articles.
83
+ - πŸ” **Get Classification** into Business, Opinion, Political Gossip, Sports, or World News.
84
+ - 🧠 **Ask AI Questions** on news content.
85
+ - ☁ **Visualize Data** with a Word Cloud.
86
+ """)
87
+ st.success("Get started by navigating to 'News Classification' from the sidebar!")
88
+
89
+ # News Classification Page
90
+ elif page == "News Classification":
91
+ st.title("πŸ“ Classify News Articles")
92
+ uploaded_file = st.file_uploader("πŸ“‚ Upload a CSV file", type=["csv"], key="file_uploader")
93
+
94
+ if uploaded_file:
95
+ df = pd.read_csv(uploaded_file)
96
+ if 'content' not in df.columns:
97
+ st.error("The CSV file must contain a 'content' column.")
98
+ else:
99
+ df['processed_content'] = df['content'].apply(preprocess_text)
100
+ df['class'] = df['processed_content'].apply(lambda x: classifier(x[:512])[0]['label'])
101
+ st.success("βœ… Classification completed!")
102
+
103
+ with st.expander("πŸ“‹ View Classified News"):
104
+ st.dataframe(df[['content', 'class']])
105
+
106
+ # Download button
107
+ output_csv = df[['content', 'class']].to_csv(index=False).encode('utf-8')
108
+ st.download_button("⬇ Download Classified Data", data=output_csv, file_name="output.csv", mime="text/csv")
109
+
110
+ # Q&A Section
111
+ elif page == "Q&A":
112
+ st.title("🧠 Ask Questions About News Content")
113
+ uploaded_file_qa = st.file_uploader("πŸ“‚ Upload CSV for Q&A", type=["csv"], key="qa_file_uploader")
114
+
115
+ if uploaded_file_qa:
116
+ df_qa = pd.read_csv(uploaded_file_qa)
117
+ if 'content' not in df_qa.columns:
118
+ st.error("The CSV file must contain a 'content' column.")
119
+ else:
120
+ st.write("πŸ“° **Available News Articles:**")
121
+ selected_article = st.selectbox("Select an article", df_qa['content'])
122
+
123
+ question = st.text_input("πŸ” Ask a question about this article:")
124
+ if question:
125
+ answer = get_answer(question, selected_article)
126
+ st.success(f"**Answer:** {answer}")
127
+
128
+ # Word Cloud Section
129
+ elif page == "Word Cloud":
130
+ st.title("☁ Word Cloud Visualization")
131
+
132
+ uploaded_file_wc = st.file_uploader("πŸ“‚ Upload CSV for Word Cloud", type=["csv"], key="wc_file_uploader")
133
+
134
+ if uploaded_file_wc:
135
+ df_wc = pd.read_csv(uploaded_file_wc)
136
+ if 'content' not in df_wc.columns:
137
+ st.error("The CSV file must contain a 'content' column.")
138
+ else:
139
+ all_text = " ".join(df_wc['content'].astype(str))
140
+ wordcloud = generate_wordcloud(all_text)
141
+
142
+ fig, ax = plt.subplots(figsize=(10, 5))
143
+ ax.imshow(wordcloud, interpolation="bilinear")
144
+ ax.axis("off")
145
+ st.pyplot(fig)
146
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ pandas