Ashendilantha commited on
Commit
e964457
Β·
verified Β·
1 Parent(s): 826eadb

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -0
app.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import re
4
+ from nltk.tokenize import word_tokenize
5
+ from nltk.corpus import stopwords
6
+ from nltk.stem import WordNetLemmatizer
7
+ from transformers import pipeline
8
+ from PIL import Image
9
+
10
+ # Load Models
11
+ news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
12
+ import streamlit as st
13
+ import pandas as pd
14
+ import re
15
+ import string
16
+ import nltk
17
+ from nltk.corpus import stopwords
18
+ from nltk.tokenize import word_tokenize
19
+ from nltk.stem import WordNetLemmatizer
20
+ from transformers import pipeline
21
+ from PIL import Image
22
+
23
+ # Download required NLTK data
24
+ nltk.download('stopwords')
25
+ nltk.download('punkt')
26
+ nltk.download('wordnet')
27
+
28
+ # Load Models
29
+ news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
30
+ qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
31
+
32
+ # Label Mapping
33
+ label_mapping = {
34
+ "LABEL_0": "Business",
35
+ "LABEL_1": "Opinion",
36
+ "LABEL_2": "Political Gossip",
37
+ "LABEL_3": "Sports",
38
+ "LABEL_4": "World News"
39
+ }
40
+
41
+ # Store classified article for QA
42
+ context_storage = {"context": "", "bulk_context": "", "num_articles": 0}
43
+
44
+
45
+ # Text Cleaning Functions
46
+ def clean_text(text):
47
+ text = text.lower()
48
+ text = re.sub(f"[{string.punctuation}]", "", text) # Remove punctuation
49
+ text = re.sub(r"[^a-zA-Z0-9\s]", "", text) # Remove special characters
50
+ tokens = word_tokenize(text)
51
+ tokens = [word for word in tokens if word not in stopwords.words("english")] # Remove stopwords
52
+ lemmatizer = WordNetLemmatizer()
53
+ tokens = [lemmatizer.lemmatize(word) for word in tokens] # Lemmatize tokens
54
+ return " ".join(tokens)
55
+
56
+
57
+ # Define the functions
58
+ def classify_text(text):
59
+ cleaned_text = clean_text(text)
60
+ result = news_classifier(cleaned_text)[0]
61
+ category = label_mapping.get(result['label'], "Unknown")
62
+ confidence = round(result['score'] * 100, 2)
63
+
64
+ # Store context for QA
65
+ context_storage["context"] = cleaned_text
66
+
67
+ return category, f"Confidence: {confidence}%"
68
+
69
+
70
+ def classify_csv(file):
71
+ try:
72
+ df = pd.read_csv(file, encoding="utf-8")
73
+ text_column = df.columns[0] # Assume first column is the text column
74
+
75
+ df[text_column] = df[text_column].astype(str).apply(clean_text) # Clean text column
76
+ df["Encoded Prediction"] = df[text_column].apply(lambda x: news_classifier(x)[0]['label'])
77
+ df["Decoded Prediction"] = df["Encoded Prediction"].map(label_mapping)
78
+ df["Confidence"] = df[text_column].apply(lambda x: round(news_classifier(x)[0]['score'] * 100, 2))
79
+
80
+ # Store all text as a single context for QA
81
+ context_storage["bulk_context"] = " ".join(df[text_column].dropna().astype(str).tolist())
82
+ context_storage["num_articles"] = len(df)
83
+
84
+ output_file = "output.csv"
85
+ df.to_csv(output_file, index=False)
86
+ return df, output_file
87
+ except Exception as e:
88
+ return None, f"Error: {str(e)}"
89
+
90
+
91
+ def chatbot_response(history, user_input, source):
92
+ user_input = user_input.lower()
93
+ context = context_storage["context"] if source == "Single Article" else context_storage["bulk_context"]
94
+ num_articles = context_storage["num_articles"]
95
+
96
+ if "number of articles" in user_input or "how many articles" in user_input:
97
+ answer = f"There are {num_articles} articles in the uploaded CSV."
98
+ history.append([user_input, answer])
99
+ return history, ""
100
+
101
+ if context:
102
+ result = qa_pipeline(question=user_input, context=context)
103
+ answer = result["answer"]
104
+ history.append([user_input, answer])
105
+ return history, ""
106
+
107
+ responses = {
108
+ "hello": "πŸ‘‹ Hello! How can I assist you with news today?",
109
+ "hi": "😊 Hi there! What do you want to know about news?",
110
+ "how are you": "πŸ€– I'm just a bot, but I'm here to help!",
111
+ "thank you": "πŸ™ You're welcome! Let me know if you need anything else.",
112
+ "news": "πŸ“° I can classify news into Business, Sports, Politics, and more!",
113
+ }
114
+ response = responses.get(user_input,
115
+ "πŸ€” I'm here to help with news classification and general info. Ask me about news topics!")
116
+ history.append([user_input, response])
117
+ return history, ""
118
+
119
+
120
+ # Streamlit App Layout
121
+ st.set_page_config(page_title="News Classifier", page_icon="πŸ“°")
122
+ cover_image = Image.open("cover.png") # Ensure this image exists
123
+ st.image(cover_image, caption="News Classifier πŸ“’", use_column_width=True)
124
+
125
+ # Section for Single Article Classification
126
+ st.subheader("πŸ“° Single Article Classification")
127
+ text_input = st.text_area("Enter News Text", placeholder="Type or paste news content here...")
128
+ if st.button("πŸ” Classify"):
129
+ if text_input:
130
+ category, confidence = classify_text(text_input)
131
+ st.write(f"*Predicted Category:* {category}")
132
+ st.write(f"*Confidence Level:* {confidence}")
133
+ else:
134
+ st.warning("Please enter some text to classify.")
135
+
136
+ # Section for Bulk CSV Classification
137
+ st.subheader("πŸ“‚ Bulk Classification (CSV)")
138
+ file_input = st.file_uploader("Upload CSV File", type="csv")
139
+ if file_input:
140
+ df, output_file = classify_csv(file_input)
141
+ if df is not None:
142
+ st.dataframe(df)
143
+ st.download_button(
144
+ label="Download Processed CSV",
145
+ data=open(output_file, 'rb').read(),
146
+ file_name=output_file,
147
+ mime="text/csv"
148
+ )
149
+ else:
150
+ st.error(f"Error processing file: {output_file}")
151
+
152
+ # Section for Chatbot Interaction
153
+ st.subheader("πŸ’¬ AI Chat Assistant")
154
+ history = []
155
+ user_input = st.text_input("Ask about news classification or topics", placeholder="Type a message...")
156
+ source_toggle = st.radio("Select Context Source", ["Single Article", "Bulk Classification"])
157
+ if st.button("βœ‰ Send"):
158
+ history, bot_response = chatbot_response(history, user_input, source_toggle)
159
+ st.write("*Chatbot Response:*")
160
+ for q, a in history:
161
+ st.write(f"*Q:* {q}")
162
+ st.write(f"*A:* {a}")