Spaces:

Ashendilantha
/

News_Classification

Sleeping

App Files Files Community

Ashendilantha commited on Mar 30, 2025

Commit

f6359d6

verified ·

1 Parent(s): 14e74ba

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -50

app.py CHANGED Viewed

@@ -1,11 +1,48 @@
 import streamlit as st
 import pandas as pd
 from transformers import pipeline
 from PIL import Image
 # Load Models
 news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
-qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
 # Label Mapping
 label_mapping = {
@@ -19,17 +56,19 @@ label_mapping = {
 # Store classified article for QA
 context_storage = {"context": "", "bulk_context": "", "num_articles": 0}
-# Define the functions
 def classify_text(text):
     result = news_classifier(text)[0]
     category = label_mapping.get(result['label'], "Unknown")
     confidence = round(result['score'] * 100, 2)
     # Store context for QA
     context_storage["context"] = text
     return category, f"Confidence: {confidence}%"
 def classify_csv(file_path):
     try:
         df = pd.read_csv(file_path, encoding="utf-8")
@@ -37,9 +76,9 @@ def classify_csv(file_path):
         # Automatically detect the column containing text
         text_column = df.columns[0]  # Assume first column is the text column
-        df["Encoded Prediction"] = df[text_column].apply(lambda x: news_classifier(str(x))[0]['label'])
         df["Decoded Prediction"] = df["Encoded Prediction"].map(label_mapping)
-        df["Confidence"] = df[text_column].apply(lambda x: round(news_classifier(str(x))[0]['score'] * 100, 2))
         # Store all text as a single context for QA
         context_storage["bulk_context"] = " ".join(df[text_column].dropna().astype(str).tolist())
@@ -52,44 +91,15 @@ def classify_csv(file_path):
     except Exception as e:
         return None, f"Error: {str(e)}"
-def chatbot_response(history, user_input, source):
-    user_input = user_input.lower()
-    # Select context based on source toggle
-    context = context_storage["context"] if source == "Single Article" else context_storage["bulk_context"]
-    num_articles = context_storage["num_articles"]
-    if "number of articles" in user_input or "how many articles" in user_input:
-        answer = f"There are {num_articles} articles in the uploaded CSV."
-        history.append([user_input, answer])
-        return history, ""
-    if context:
-        result = qa_pipeline(question=user_input, context=context)
-        answer = result["answer"]
-        history.append([user_input, answer])
-        return history, ""
-    # Default responses if no context is available
-    responses = {
-        "hello": "👋 Hello! How can I assist you with news today?",
-        "hi": "😊 Hi there! What do you want to know about news?",
-        "how are you": "🤖 I'm just a bot, but I'm here to help!",
-        "thank you": "🙏 You're welcome! Let me know if you need anything else.",
-        "news": "📰 I can classify news into Business, Sports, Politics, and more!",
-    }
-    response = responses.get(user_input,
-                             "🤔 I'm here to help with news classification and general info. Ask me about news topics!")
-    history.append([user_input, response])
-    return history, ""
 # Streamlit App Layout
 st.set_page_config(page_title="News Classifier", page_icon="📰")
-# Load Cover Image
-cover_image = Image.open("cover.png")  # Ensure this image exists
-st.image(cover_image, caption="News Classifier 📢", use_column_width=True)
 # Section for Single Article Classification
 st.subheader("📰 Single Article Classification")
@@ -118,14 +128,23 @@ if file_input:
     else:
         st.error(f"Error processing file: {output_file}")
-# Section for Chatbot Interaction
-st.subheader("💬 AI Chat Assistant")
-history = []
-user_input = st.text_input("Ask about news classification or topics", placeholder="Type a message...")
-source_toggle = st.radio("Select Context Source", ["Single Article", "Bulk Classification"])
-if st.button("✉ Send"):
-    history, bot_response = chatbot_response(history, user_input, source_toggle)
-    st.write("**Chatbot Response:**")
-    for q, a in history:
-        st.write(f"**Q:** {q}")
-        st.write(f"**A:** {a}")

 import streamlit as st
 import pandas as pd
+import re
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
 from transformers import pipeline
 from PIL import Image
 # Load Models
 news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
+# Preprocessing Function
+lemmatizer = WordNetLemmatizer()
+stop_words = set(stopwords.words('english'))
+def preprocess_text(text):
+    if pd.isna(text):
+        return ""
+    # Convert to lowercase
+    text = text.lower()
+    # Remove URLs
+    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
+    # Remove HTML tags
+    text = re.sub(r'<.*?>', '', text)
+    # Remove special characters and numbers
+    text = re.sub(r'[^a-zA-Z\s]', '', text)
+    # Tokenize
+    tokens = word_tokenize(text)
+    # Remove stopwords and lemmatize
+    cleaned_tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]
+    # Join tokens back into text
+    cleaned_text = ' '.join(cleaned_tokens)
+    return cleaned_text
+# Load Cover Image
+cover_image = Image.open("cover.png")  # Ensure this image exists
 # Label Mapping
 label_mapping = {
 # Store classified article for QA
 context_storage = {"context": "", "bulk_context": "", "num_articles": 0}
+# Function for Single Article Classification
 def classify_text(text):
+    text = preprocess_text(text)  # Preprocess text
     result = news_classifier(text)[0]
     category = label_mapping.get(result['label'], "Unknown")
     confidence = round(result['score'] * 100, 2)
     # Store context for QA
     context_storage["context"] = text
     return category, f"Confidence: {confidence}%"
+# Function for Bulk Classification
 def classify_csv(file_path):
     try:
         df = pd.read_csv(file_path, encoding="utf-8")
         # Automatically detect the column containing text
         text_column = df.columns[0]  # Assume first column is the text column
+        df["Encoded Prediction"] = df[text_column].apply(lambda x: news_classifier(preprocess_text(str(x)))[0]['label'])
         df["Decoded Prediction"] = df["Encoded Prediction"].map(label_mapping)
+        df["Confidence"] = df[text_column].apply(lambda x: round(news_classifier(preprocess_text(str(x)))[0]['score'] * 100, 2))
         # Store all text as a single context for QA
         context_storage["bulk_context"] = " ".join(df[text_column].dropna().astype(str).tolist())
     except Exception as e:
         return None, f"Error: {str(e)}"
+# Function to Load Q&A Pipeline
+def load_qa_pipeline():
+    return pipeline("question-answering", model="deepset/roberta-base-squad2")
 # Streamlit App Layout
 st.set_page_config(page_title="News Classifier", page_icon="📰")
+# Load and display the cover image
+st.image(cover_image, caption="News Classifier 📢", use_container_width=True)
 # Section for Single Article Classification
 st.subheader("📰 Single Article Classification")
     else:
         st.error(f"Error processing file: {output_file}")
+# Section for Q&A
+st.subheader("💬 Q&A Model")
+question = st.text_input("Ask a question about the news article:", placeholder="Ask anything related to the news...")
+if question:
+    # Load the QA model and get the answer
+    with st.spinner("Loading Q&A model..."):
+        qa_pipeline = load_qa_pipeline()
+    if st.button("Get Answer"):
+        with st.spinner("Finding answer..."):
+            result = qa_pipeline(question=question, context=context_storage["context"])
+            # Display results
+            st.subheader("Answer")
+            st.write(result["answer"])
+            st.subheader("Confidence")
+            st.progress(float(result["score"]))
+            st.write(f"Confidence Score: {result['score']:.4f}")