Ashendilantha commited on
Commit
a7c1903
Β·
verified Β·
1 Parent(s): 77d6801

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -54
app.py CHANGED
@@ -1,21 +1,8 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import string
4
- import re
5
- import nltk
6
- from nltk.corpus import stopwords
7
- from nltk.tokenize import word_tokenize
8
- from nltk.stem import WordNetLemmatizer
9
- from nltk.corpus import wordnet
10
  from transformers import pipeline
11
  from PIL import Image
12
 
13
- # Download necessary NLTK data
14
- nltk.download("stopwords")
15
- nltk.download("punkt")
16
- nltk.download("wordnet")
17
- nltk.download("averaged_perceptron_tagger")
18
-
19
  # Load Models
20
  news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
21
  qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
@@ -32,63 +19,79 @@ label_mapping = {
32
  # Store classified article for QA
33
  context_storage = {"context": "", "bulk_context": "", "num_articles": 0}
34
 
35
- # Preprocessing functions
36
- def remove_punctuation(text):
37
- return text.translate(str.maketrans('', '', string.punctuation))
38
-
39
- def remove_special_characters(text):
40
- return re.sub(r'[^A-Za-z\s]', '', text)
41
-
42
- def remove_stopwords(text):
43
- stop_words = set(stopwords.words('english'))
44
- return " ".join([word for word in text.split() if word not in stop_words])
45
-
46
- def tokenize_text(text):
47
- return word_tokenize(text)
48
-
49
- def lemmatize_tokens(tokens):
50
- lemmatizer = WordNetLemmatizer()
51
- wordnet_map = {"N": wordnet.NOUN, 'V': wordnet.VERB, 'J': wordnet.ADJ, 'R': wordnet.ADV}
52
- return [lemmatizer.lemmatize(token, wordnet_map.get(nltk.pos_tag([token])[0][1][0].upper(), wordnet.NOUN)) for token in tokens]
53
-
54
- def preprocess_text(text):
55
- text = text.lower()
56
- text = remove_punctuation(text)
57
- text = remove_special_characters(text)
58
- text = remove_stopwords(text)
59
- tokens = tokenize_text(text)
60
- tokens = lemmatize_tokens(tokens)
61
- return " ".join(tokens)
62
-
63
- # Classification functions
64
  def classify_text(text):
65
- cleaned_text = preprocess_text(text)
66
- result = news_classifier(cleaned_text)[0]
67
  category = label_mapping.get(result['label'], "Unknown")
68
  confidence = round(result['score'] * 100, 2)
69
- context_storage["context"] = cleaned_text
 
 
 
70
  return category, f"Confidence: {confidence}%"
71
 
72
- def classify_csv(file):
73
  try:
74
- df = pd.read_csv(file, encoding="utf-8")
75
- text_column = df.columns[0]
76
- df["Cleaned_Text"] = df[text_column].astype(str).apply(preprocess_text)
77
- df["Encoded Prediction"] = df["Cleaned_Text"].apply(lambda x: news_classifier(x)[0]['label'])
 
 
78
  df["Decoded Prediction"] = df["Encoded Prediction"].map(label_mapping)
79
- df["Confidence"] = df["Cleaned_Text"].apply(lambda x: round(news_classifier(x)[0]['score'] * 100, 2))
80
- context_storage["bulk_context"] = " ".join(df["Cleaned_Text"].dropna().tolist())
 
 
81
  context_storage["num_articles"] = len(df)
 
82
  output_file = "output.csv"
83
  df.to_csv(output_file, index=False)
 
84
  return df, output_file
85
  except Exception as e:
86
  return None, f"Error: {str(e)}"
87
 
88
- # Streamlit App
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  st.set_page_config(page_title="News Classifier", page_icon="πŸ“°")
90
- st.image("cover.png", caption="News Classifier πŸ“’", use_column_width=True)
91
 
 
 
 
 
 
92
  st.subheader("πŸ“° Single Article Classification")
93
  text_input = st.text_area("Enter News Text", placeholder="Type or paste news content here...")
94
  if st.button("πŸ” Classify"):
@@ -99,6 +102,7 @@ if st.button("πŸ” Classify"):
99
  else:
100
  st.warning("Please enter some text to classify.")
101
 
 
102
  st.subheader("πŸ“‚ Bulk Classification (CSV)")
103
  file_input = st.file_uploader("Upload CSV File", type="csv")
104
  if file_input:
@@ -113,3 +117,15 @@ if file_input:
113
  )
114
  else:
115
  st.error(f"Error processing file: {output_file}")
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
 
 
 
 
 
 
 
3
  from transformers import pipeline
4
  from PIL import Image
5
 
 
 
 
 
 
 
6
  # Load Models
7
  news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
8
  qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
 
19
  # Store classified article for QA
20
  context_storage = {"context": "", "bulk_context": "", "num_articles": 0}
21
 
22
+ # Define the functions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def classify_text(text):
24
+ result = news_classifier(text)[0]
 
25
  category = label_mapping.get(result['label'], "Unknown")
26
  confidence = round(result['score'] * 100, 2)
27
+
28
+ # Store context for QA
29
+ context_storage["context"] = text
30
+
31
  return category, f"Confidence: {confidence}%"
32
 
33
+ def classify_csv(file_path):
34
  try:
35
+ df = pd.read_csv(file_path, encoding="utf-8")
36
+
37
+ # Automatically detect the column containing text
38
+ text_column = df.columns[0] # Assume first column is the text column
39
+
40
+ df["Encoded Prediction"] = df[text_column].apply(lambda x: news_classifier(str(x))[0]['label'])
41
  df["Decoded Prediction"] = df["Encoded Prediction"].map(label_mapping)
42
+ df["Confidence"] = df[text_column].apply(lambda x: round(news_classifier(str(x))[0]['score'] * 100, 2))
43
+
44
+ # Store all text as a single context for QA
45
+ context_storage["bulk_context"] = " ".join(df[text_column].dropna().astype(str).tolist())
46
  context_storage["num_articles"] = len(df)
47
+
48
  output_file = "output.csv"
49
  df.to_csv(output_file, index=False)
50
+
51
  return df, output_file
52
  except Exception as e:
53
  return None, f"Error: {str(e)}"
54
 
55
+ def chatbot_response(history, user_input, source):
56
+ user_input = user_input.lower()
57
+
58
+ # Select context based on source toggle
59
+ context = context_storage["context"] if source == "Single Article" else context_storage["bulk_context"]
60
+ num_articles = context_storage["num_articles"]
61
+
62
+ if "number of articles" in user_input or "how many articles" in user_input:
63
+ answer = f"There are {num_articles} articles in the uploaded CSV."
64
+ history.append([user_input, answer])
65
+ return history, ""
66
+
67
+ if context:
68
+ result = qa_pipeline(question=user_input, context=context)
69
+ answer = result["answer"]
70
+ history.append([user_input, answer])
71
+ return history, ""
72
+
73
+ # Default responses if no context is available
74
+ responses = {
75
+ "hello": "πŸ‘‹ Hello! How can I assist you with news today?",
76
+ "hi": "😊 Hi there! What do you want to know about news?",
77
+ "how are you": "πŸ€– I'm just a bot, but I'm here to help!",
78
+ "thank you": "πŸ™ You're welcome! Let me know if you need anything else.",
79
+ "news": "πŸ“° I can classify news into Business, Sports, Politics, and more!",
80
+ }
81
+
82
+ response = responses.get(user_input,
83
+ "πŸ€” I'm here to help with news classification and general info. Ask me about news topics!")
84
+ history.append([user_input, response])
85
+ return history, ""
86
+
87
+ # Streamlit App Layout
88
  st.set_page_config(page_title="News Classifier", page_icon="πŸ“°")
 
89
 
90
+ # Load Cover Image
91
+ cover_image = Image.open("cover.png") # Ensure this image exists
92
+ st.image(cover_image, caption="News Classifier πŸ“’", use_column_width=True)
93
+
94
+ # Section for Single Article Classification
95
  st.subheader("πŸ“° Single Article Classification")
96
  text_input = st.text_area("Enter News Text", placeholder="Type or paste news content here...")
97
  if st.button("πŸ” Classify"):
 
102
  else:
103
  st.warning("Please enter some text to classify.")
104
 
105
+ # Section for Bulk CSV Classification
106
  st.subheader("πŸ“‚ Bulk Classification (CSV)")
107
  file_input = st.file_uploader("Upload CSV File", type="csv")
108
  if file_input:
 
117
  )
118
  else:
119
  st.error(f"Error processing file: {output_file}")
120
+
121
+ # Section for Chatbot Interaction
122
+ st.subheader("πŸ’¬ AI Chat Assistant")
123
+ history = []
124
+ user_input = st.text_input("Ask about news classification or topics", placeholder="Type a message...")
125
+ source_toggle = st.radio("Select Context Source", ["Single Article", "Bulk Classification"])
126
+ if st.button("βœ‰ Send"):
127
+ history, bot_response = chatbot_response(history, user_input, source_toggle)
128
+ st.write("*Chatbot Response:*")
129
+ for q, a in history:
130
+ st.write(f"*Q:* {q}")
131
+ st.write(f"*A:*Β {a}")