Ashendilantha commited on
Commit
999a117
Β·
verified Β·
1 Parent(s): a7c1903

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -22
app.py CHANGED
@@ -1,8 +1,19 @@
1
  import streamlit as st
2
  import pandas as pd
 
 
 
 
 
 
3
  from transformers import pipeline
4
  from PIL import Image
5
 
 
 
 
 
 
6
  # Load Models
7
  news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
8
  qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
@@ -19,27 +30,38 @@ label_mapping = {
19
  # Store classified article for QA
20
  context_storage = {"context": "", "bulk_context": "", "num_articles": 0}
21
 
 
 
 
 
 
 
 
 
 
 
 
22
  # Define the functions
23
  def classify_text(text):
24
- result = news_classifier(text)[0]
 
25
  category = label_mapping.get(result['label'], "Unknown")
26
  confidence = round(result['score'] * 100, 2)
27
 
28
  # Store context for QA
29
- context_storage["context"] = text
30
 
31
  return category, f"Confidence: {confidence}%"
32
 
33
- def classify_csv(file_path):
34
  try:
35
- df = pd.read_csv(file_path, encoding="utf-8")
36
-
37
- # Automatically detect the column containing text
38
  text_column = df.columns[0] # Assume first column is the text column
39
-
40
- df["Encoded Prediction"] = df[text_column].apply(lambda x: news_classifier(str(x))[0]['label'])
 
41
  df["Decoded Prediction"] = df["Encoded Prediction"].map(label_mapping)
42
- df["Confidence"] = df[text_column].apply(lambda x: round(news_classifier(str(x))[0]['score'] * 100, 2))
43
 
44
  # Store all text as a single context for QA
45
  context_storage["bulk_context"] = " ".join(df[text_column].dropna().astype(str).tolist())
@@ -47,30 +69,26 @@ def classify_csv(file_path):
47
 
48
  output_file = "output.csv"
49
  df.to_csv(output_file, index=False)
50
-
51
  return df, output_file
52
  except Exception as e:
53
  return None, f"Error: {str(e)}"
54
 
55
  def chatbot_response(history, user_input, source):
56
  user_input = user_input.lower()
57
-
58
- # Select context based on source toggle
59
  context = context_storage["context"] if source == "Single Article" else context_storage["bulk_context"]
60
  num_articles = context_storage["num_articles"]
61
-
62
  if "number of articles" in user_input or "how many articles" in user_input:
63
  answer = f"There are {num_articles} articles in the uploaded CSV."
64
  history.append([user_input, answer])
65
  return history, ""
66
-
67
  if context:
68
  result = qa_pipeline(question=user_input, context=context)
69
  answer = result["answer"]
70
  history.append([user_input, answer])
71
  return history, ""
72
-
73
- # Default responses if no context is available
74
  responses = {
75
  "hello": "πŸ‘‹ Hello! How can I assist you with news today?",
76
  "hi": "😊 Hi there! What do you want to know about news?",
@@ -78,16 +96,12 @@ def chatbot_response(history, user_input, source):
78
  "thank you": "πŸ™ You're welcome! Let me know if you need anything else.",
79
  "news": "πŸ“° I can classify news into Business, Sports, Politics, and more!",
80
  }
81
-
82
- response = responses.get(user_input,
83
- "πŸ€” I'm here to help with news classification and general info. Ask me about news topics!")
84
  history.append([user_input, response])
85
  return history, ""
86
 
87
  # Streamlit App Layout
88
  st.set_page_config(page_title="News Classifier", page_icon="πŸ“°")
89
-
90
- # Load Cover Image
91
  cover_image = Image.open("cover.png") # Ensure this image exists
92
  st.image(cover_image, caption="News Classifier πŸ“’", use_column_width=True)
93
 
@@ -128,4 +142,4 @@ if st.button("βœ‰ Send"):
128
  st.write("*Chatbot Response:*")
129
  for q, a in history:
130
  st.write(f"*Q:* {q}")
131
- st.write(f"*A:*Β {a}")
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import re
4
+ import string
5
+ import nltk
6
+ from nltk.corpus import stopwords
7
+ from nltk.tokenize import word_tokenize
8
+ from nltk.stem import WordNetLemmatizer
9
  from transformers import pipeline
10
  from PIL import Image
11
 
12
+ # Download required NLTK data
13
+ nltk.download('stopwords')
14
+ nltk.download('punkt')
15
+ nltk.download('wordnet')
16
+
17
  # Load Models
18
  news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
19
  qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
 
30
  # Store classified article for QA
31
  context_storage = {"context": "", "bulk_context": "", "num_articles": 0}
32
 
33
+ # Text Cleaning Functions
34
+ def clean_text(text):
35
+ text = text.lower()
36
+ text = re.sub(f"[{string.punctuation}]", "", text) # Remove punctuation
37
+ text = re.sub(r"[^a-zA-Z0-9\s]", "", text) # Remove special characters
38
+ tokens = word_tokenize(text)
39
+ tokens = [word for word in tokens if word not in stopwords.words("english")] # Remove stopwords
40
+ lemmatizer = WordNetLemmatizer()
41
+ tokens = [lemmatizer.lemmatize(word) for word in tokens] # Lemmatize tokens
42
+ return " ".join(tokens)
43
+
44
  # Define the functions
45
  def classify_text(text):
46
+ cleaned_text = clean_text(text)
47
+ result = news_classifier(cleaned_text)[0]
48
  category = label_mapping.get(result['label'], "Unknown")
49
  confidence = round(result['score'] * 100, 2)
50
 
51
  # Store context for QA
52
+ context_storage["context"] = cleaned_text
53
 
54
  return category, f"Confidence: {confidence}%"
55
 
56
+ def classify_csv(file):
57
  try:
58
+ df = pd.read_csv(file, encoding="utf-8")
 
 
59
  text_column = df.columns[0] # Assume first column is the text column
60
+
61
+ df[text_column] = df[text_column].astype(str).apply(clean_text) # Clean text column
62
+ df["Encoded Prediction"] = df[text_column].apply(lambda x: news_classifier(x)[0]['label'])
63
  df["Decoded Prediction"] = df["Encoded Prediction"].map(label_mapping)
64
+ df["Confidence"] = df[text_column].apply(lambda x: round(news_classifier(x)[0]['score'] * 100, 2))
65
 
66
  # Store all text as a single context for QA
67
  context_storage["bulk_context"] = " ".join(df[text_column].dropna().astype(str).tolist())
 
69
 
70
  output_file = "output.csv"
71
  df.to_csv(output_file, index=False)
 
72
  return df, output_file
73
  except Exception as e:
74
  return None, f"Error: {str(e)}"
75
 
76
  def chatbot_response(history, user_input, source):
77
  user_input = user_input.lower()
 
 
78
  context = context_storage["context"] if source == "Single Article" else context_storage["bulk_context"]
79
  num_articles = context_storage["num_articles"]
80
+
81
  if "number of articles" in user_input or "how many articles" in user_input:
82
  answer = f"There are {num_articles} articles in the uploaded CSV."
83
  history.append([user_input, answer])
84
  return history, ""
85
+
86
  if context:
87
  result = qa_pipeline(question=user_input, context=context)
88
  answer = result["answer"]
89
  history.append([user_input, answer])
90
  return history, ""
91
+
 
92
  responses = {
93
  "hello": "πŸ‘‹ Hello! How can I assist you with news today?",
94
  "hi": "😊 Hi there! What do you want to know about news?",
 
96
  "thank you": "πŸ™ You're welcome! Let me know if you need anything else.",
97
  "news": "πŸ“° I can classify news into Business, Sports, Politics, and more!",
98
  }
99
+ response = responses.get(user_input, "πŸ€” I'm here to help with news classification and general info. Ask me about news topics!")
 
 
100
  history.append([user_input, response])
101
  return history, ""
102
 
103
  # Streamlit App Layout
104
  st.set_page_config(page_title="News Classifier", page_icon="πŸ“°")
 
 
105
  cover_image = Image.open("cover.png") # Ensure this image exists
106
  st.image(cover_image, caption="News Classifier πŸ“’", use_column_width=True)
107
 
 
142
  st.write("*Chatbot Response:*")
143
  for q, a in history:
144
  st.write(f"*Q:* {q}")
145
+ st.write(f"*A:* {a}")