Ashendilantha commited on
Commit
f6359d6
Β·
verified Β·
1 Parent(s): 14e74ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -50
app.py CHANGED
@@ -1,11 +1,48 @@
1
  import streamlit as st
2
  import pandas as pd
 
 
 
 
3
  from transformers import pipeline
4
  from PIL import Image
5
 
6
  # Load Models
7
  news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
8
- qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Label Mapping
11
  label_mapping = {
@@ -19,17 +56,19 @@ label_mapping = {
19
  # Store classified article for QA
20
  context_storage = {"context": "", "bulk_context": "", "num_articles": 0}
21
 
22
- # Define the functions
23
  def classify_text(text):
 
24
  result = news_classifier(text)[0]
25
  category = label_mapping.get(result['label'], "Unknown")
26
  confidence = round(result['score'] * 100, 2)
27
-
28
  # Store context for QA
29
  context_storage["context"] = text
30
 
31
  return category, f"Confidence: {confidence}%"
32
 
 
33
  def classify_csv(file_path):
34
  try:
35
  df = pd.read_csv(file_path, encoding="utf-8")
@@ -37,9 +76,9 @@ def classify_csv(file_path):
37
  # Automatically detect the column containing text
38
  text_column = df.columns[0] # Assume first column is the text column
39
 
40
- df["Encoded Prediction"] = df[text_column].apply(lambda x: news_classifier(str(x))[0]['label'])
41
  df["Decoded Prediction"] = df["Encoded Prediction"].map(label_mapping)
42
- df["Confidence"] = df[text_column].apply(lambda x: round(news_classifier(str(x))[0]['score'] * 100, 2))
43
 
44
  # Store all text as a single context for QA
45
  context_storage["bulk_context"] = " ".join(df[text_column].dropna().astype(str).tolist())
@@ -52,44 +91,15 @@ def classify_csv(file_path):
52
  except Exception as e:
53
  return None, f"Error: {str(e)}"
54
 
55
- def chatbot_response(history, user_input, source):
56
- user_input = user_input.lower()
57
-
58
- # Select context based on source toggle
59
- context = context_storage["context"] if source == "Single Article" else context_storage["bulk_context"]
60
- num_articles = context_storage["num_articles"]
61
-
62
- if "number of articles" in user_input or "how many articles" in user_input:
63
- answer = f"There are {num_articles} articles in the uploaded CSV."
64
- history.append([user_input, answer])
65
- return history, ""
66
-
67
- if context:
68
- result = qa_pipeline(question=user_input, context=context)
69
- answer = result["answer"]
70
- history.append([user_input, answer])
71
- return history, ""
72
-
73
- # Default responses if no context is available
74
- responses = {
75
- "hello": "πŸ‘‹ Hello! How can I assist you with news today?",
76
- "hi": "😊 Hi there! What do you want to know about news?",
77
- "how are you": "πŸ€– I'm just a bot, but I'm here to help!",
78
- "thank you": "πŸ™ You're welcome! Let me know if you need anything else.",
79
- "news": "πŸ“° I can classify news into Business, Sports, Politics, and more!",
80
- }
81
-
82
- response = responses.get(user_input,
83
- "πŸ€” I'm here to help with news classification and general info. Ask me about news topics!")
84
- history.append([user_input, response])
85
- return history, ""
86
 
87
  # Streamlit App Layout
88
  st.set_page_config(page_title="News Classifier", page_icon="πŸ“°")
89
 
90
- # Load Cover Image
91
- cover_image = Image.open("cover.png") # Ensure this image exists
92
- st.image(cover_image, caption="News Classifier πŸ“’", use_column_width=True)
93
 
94
  # Section for Single Article Classification
95
  st.subheader("πŸ“° Single Article Classification")
@@ -118,14 +128,23 @@ if file_input:
118
  else:
119
  st.error(f"Error processing file: {output_file}")
120
 
121
- # Section for Chatbot Interaction
122
- st.subheader("πŸ’¬ AI Chat Assistant")
123
- history = []
124
- user_input = st.text_input("Ask about news classification or topics", placeholder="Type a message...")
125
- source_toggle = st.radio("Select Context Source", ["Single Article", "Bulk Classification"])
126
- if st.button("βœ‰ Send"):
127
- history, bot_response = chatbot_response(history, user_input, source_toggle)
128
- st.write("**Chatbot Response:**")
129
- for q, a in history:
130
- st.write(f"**Q:** {q}")
131
- st.write(f"**A:** {a}")
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import re
4
+ from nltk.tokenize import word_tokenize
5
+ from nltk.corpus import stopwords
6
+ from nltk.stem import WordNetLemmatizer
7
  from transformers import pipeline
8
  from PIL import Image
9
 
10
  # Load Models
11
  news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
12
+
13
+ # Preprocessing Function
14
+ lemmatizer = WordNetLemmatizer()
15
+ stop_words = set(stopwords.words('english'))
16
+
17
+ def preprocess_text(text):
18
+ if pd.isna(text):
19
+ return ""
20
+
21
+ # Convert to lowercase
22
+ text = text.lower()
23
+
24
+ # Remove URLs
25
+ text = re.sub(r'http\S+|www\S+|https\S+', '', text)
26
+
27
+ # Remove HTML tags
28
+ text = re.sub(r'<.*?>', '', text)
29
+
30
+ # Remove special characters and numbers
31
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
32
+
33
+ # Tokenize
34
+ tokens = word_tokenize(text)
35
+
36
+ # Remove stopwords and lemmatize
37
+ cleaned_tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]
38
+
39
+ # Join tokens back into text
40
+ cleaned_text = ' '.join(cleaned_tokens)
41
+
42
+ return cleaned_text
43
+
44
+ # Load Cover Image
45
+ cover_image = Image.open("cover.png") # Ensure this image exists
46
 
47
  # Label Mapping
48
  label_mapping = {
 
56
  # Store classified article for QA
57
  context_storage = {"context": "", "bulk_context": "", "num_articles": 0}
58
 
59
+ # Function for Single Article Classification
60
  def classify_text(text):
61
+ text = preprocess_text(text) # Preprocess text
62
  result = news_classifier(text)[0]
63
  category = label_mapping.get(result['label'], "Unknown")
64
  confidence = round(result['score'] * 100, 2)
65
+
66
  # Store context for QA
67
  context_storage["context"] = text
68
 
69
  return category, f"Confidence: {confidence}%"
70
 
71
+ # Function for Bulk Classification
72
  def classify_csv(file_path):
73
  try:
74
  df = pd.read_csv(file_path, encoding="utf-8")
 
76
  # Automatically detect the column containing text
77
  text_column = df.columns[0] # Assume first column is the text column
78
 
79
+ df["Encoded Prediction"] = df[text_column].apply(lambda x: news_classifier(preprocess_text(str(x)))[0]['label'])
80
  df["Decoded Prediction"] = df["Encoded Prediction"].map(label_mapping)
81
+ df["Confidence"] = df[text_column].apply(lambda x: round(news_classifier(preprocess_text(str(x)))[0]['score'] * 100, 2))
82
 
83
  # Store all text as a single context for QA
84
  context_storage["bulk_context"] = " ".join(df[text_column].dropna().astype(str).tolist())
 
91
  except Exception as e:
92
  return None, f"Error: {str(e)}"
93
 
94
+ # Function to Load Q&A Pipeline
95
+ def load_qa_pipeline():
96
+ return pipeline("question-answering", model="deepset/roberta-base-squad2")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  # Streamlit App Layout
99
  st.set_page_config(page_title="News Classifier", page_icon="πŸ“°")
100
 
101
+ # Load and display the cover image
102
+ st.image(cover_image, caption="News Classifier πŸ“’", use_container_width=True)
 
103
 
104
  # Section for Single Article Classification
105
  st.subheader("πŸ“° Single Article Classification")
 
128
  else:
129
  st.error(f"Error processing file: {output_file}")
130
 
131
+ # Section for Q&A
132
+ st.subheader("πŸ’¬ Q&A Model")
133
+ question = st.text_input("Ask a question about the news article:", placeholder="Ask anything related to the news...")
134
+ if question:
135
+ # Load the QA model and get the answer
136
+ with st.spinner("Loading Q&A model..."):
137
+ qa_pipeline = load_qa_pipeline()
138
+
139
+ if st.button("Get Answer"):
140
+ with st.spinner("Finding answer..."):
141
+ result = qa_pipeline(question=question, context=context_storage["context"])
142
+
143
+ # Display results
144
+ st.subheader("Answer")
145
+ st.write(result["answer"])
146
+
147
+ st.subheader("Confidence")
148
+ st.progress(float(result["score"]))
149
+ st.write(f"Confidence Score: {result['score']:.4f}")
150
+