Spaces:

Ashendilantha
/

News_Classification

Sleeping

App Files Files Community

Ashendilantha commited on Mar 30, 2025

Commit

bada648

verified ·

1 Parent(s): a44885d

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -38

app.py CHANGED Viewed

@@ -4,19 +4,20 @@ import re
 import string
 import nltk
 from nltk.corpus import stopwords
-from nltk.tokenize import word_tokenize
 from nltk.stem import WordNetLemmatizer
 from transformers import pipeline
 from PIL import Image
 # Download required NLTK data
 nltk.download('stopwords')
-nltk.download('punkt')
 nltk.download('wordnet')
 # Load Models
 news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
-qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
 # Label Mapping
 label_mapping = {
@@ -35,11 +36,11 @@ def clean_text(text):
     text = text.lower()
     text = re.sub(f"[{string.punctuation}]", "", text)  # Remove punctuation
     text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Remove special characters
-    tokens = word_tokenize(text)
-    tokens = [word for word in tokens if word not in stopwords.words("english")]  # Remove stopwords
     lemmatizer = WordNetLemmatizer()
-    tokens = [lemmatizer.lemmatize(word) for word in tokens]  # Lemmatize tokens
-    return " ".join(tokens)
 # Define the functions
 def classify_text(text):
@@ -59,8 +60,7 @@ def classify_csv(file):
         text_column = df.columns[0]  # Assume first column is the text column
         df[text_column] = df[text_column].astype(str).apply(clean_text)  # Clean text column
-        df["Encoded Prediction"] = df[text_column].apply(lambda x: news_classifier(x)[0]['label'])
-        df["Decoded Prediction"] = df["Encoded Prediction"].map(label_mapping)
         df["Confidence"] = df[text_column].apply(lambda x: round(news_classifier(x)[0]['score'] * 100, 2))
         # Store all text as a single context for QA
@@ -73,37 +73,46 @@ def classify_csv(file):
     except Exception as e:
         return None, f"Error: {str(e)}"
-def chatbot_response(history, user_input, source):
     user_input = user_input.lower()
-    context = context_storage["context"] if source == "Single Article" else context_storage["bulk_context"]
-    num_articles = context_storage["num_articles"]
-    if "number of articles" in user_input or "how many articles" in user_input:
-        answer = f"There are {num_articles} articles in the uploaded CSV."
-        history.append([user_input, answer])
-        return history, ""
     if context:
-        result = qa_pipeline(question=user_input, context=context)
-        answer = result["answer"]
-        history.append([user_input, answer])
-        return history, ""
-    responses = {
-        "hello": "👋 Hello! How can I assist you with news today?",
-        "hi": "😊 Hi there! What do you want to know about news?",
-        "how are you": "🤖 I'm just a bot, but I'm here to help!",
-        "thank you": "🙏 You're welcome! Let me know if you need anything else.",
-        "news": "📰 I can classify news into Business, Sports, Politics, and more!",
-    }
-    response = responses.get(user_input, "🤔 I'm here to help with news classification and general info. Ask me about news topics!")
-    history.append([user_input, response])
-    return history, ""
 # Streamlit App Layout
 st.set_page_config(page_title="News Classifier", page_icon="📰")
 cover_image = Image.open("cover.png")  # Ensure this image exists
-st.image(cover_image, caption="News Classifier 📢", use_column_width=True)
 # Section for Single Article Classification
 st.subheader("📰 Single Article Classification")
@@ -111,8 +120,12 @@ text_input = st.text_area("Enter News Text", placeholder="Type or paste news con
 if st.button("🔍 Classify"):
     if text_input:
         category, confidence = classify_text(text_input)
-        st.write(f"*Predicted Category:* {category}")
-        st.write(f"*Confidence Level:* {confidence}")
     else:
         st.warning("Please enter some text to classify.")
@@ -129,6 +142,13 @@ if file_input:
             file_name=output_file,
             mime="text/csv"
         )
     else:
         st.error(f"Error processing file: {output_file}")
@@ -137,9 +157,18 @@ st.subheader("💬 AI Chat Assistant")
 history = []
 user_input = st.text_input("Ask about news classification or topics", placeholder="Type a message...")
 source_toggle = st.radio("Select Context Source", ["Single Article", "Bulk Classification"])
 if st.button("✉ Send"):
-    history, bot_response = chatbot_response(history, user_input, source_toggle)
-    st.write("*Chatbot Response:*")
-    for q, a in history:
-        st.write(f"*Q:* {q}")
-        st.write(f"*A:* {a}")

 import string
 import nltk
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
 from transformers import pipeline
 from PIL import Image
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
 # Download required NLTK data
 nltk.download('stopwords')
 nltk.download('wordnet')
+nltk.download('omw-1.4')
 # Load Models
 news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
+qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
 # Label Mapping
 label_mapping = {
     text = text.lower()
     text = re.sub(f"[{string.punctuation}]", "", text)  # Remove punctuation
     text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Remove special characters
+    words = text.split()  # Tokenization without Punkt
+    words = [word for word in words if word not in stopwords.words("english")]  # Remove stopwords
     lemmatizer = WordNetLemmatizer()
+    words = [lemmatizer.lemmatize(word) for word in words]  # Lemmatize tokens
+    return " ".join(words)
 # Define the functions
 def classify_text(text):
         text_column = df.columns[0]  # Assume first column is the text column
         df[text_column] = df[text_column].astype(str).apply(clean_text)  # Clean text column
+        df["Decoded Prediction"] = df[text_column].apply(lambda x: label_mapping.get(news_classifier(x)[0]['label'], "Unknown"))
         df["Confidence"] = df[text_column].apply(lambda x: round(news_classifier(x)[0]['score'] * 100, 2))
         # Store all text as a single context for QA
     except Exception as e:
         return None, f"Error: {str(e)}"
+def chatbot_response(history, user_input, text_input=None, file_input=None):
     user_input = user_input.lower()
+    context = ""
+    if text_input:
+        context += text_input
+    if file_input:
+        df, _ = classify_csv(file_input)
+        context += context_storage["bulk_context"]
     if context:
+        with st.spinner("Finding answer..."):
+            result = qa_pipeline(question=user_input, context=context)
+            answer = result["answer"]
+            history.append([user_input, answer])
+    return history, answer
+# Function to generate word cloud from the 'content' column (from output CSV)
+def generate_word_cloud_from_output(df):
+    # Assuming 'content' column is the first column after processing
+    content_text = " ".join(df["content"].dropna().astype(str).tolist())
+    wordcloud = WordCloud(width=800, height=400, background_color="white").generate(content_text)
+    return wordcloud
+# Function to generate bar graph for decoded predictions
+def generate_bar_graph(df):
+    prediction_counts = df["Decoded Prediction"].value_counts()
+    fig, ax = plt.subplots(figsize=(10, 6))
+    prediction_counts.plot(kind='bar', ax=ax, color='skyblue')
+    ax.set_title('Frequency of Decoded Predictions', fontsize=16)
+    ax.set_xlabel('Category', fontsize=12)
+    ax.set_ylabel('Frequency', fontsize=12)
+    st.pyplot(fig)
 # Streamlit App Layout
 st.set_page_config(page_title="News Classifier", page_icon="📰")
 cover_image = Image.open("cover.png")  # Ensure this image exists
+st.image(cover_image, caption="News Classifier 📢", use_container_width=True)
 # Section for Single Article Classification
 st.subheader("📰 Single Article Classification")
 if st.button("🔍 Classify"):
     if text_input:
         category, confidence = classify_text(text_input)
+        st.write(f"Predicted Category: {category}")
+        st.write(f"Confidence Level: {confidence}")
+        # Generate word cloud for the cleaned text input
+        wordcloud = generate_word_cloud_from_output(pd.DataFrame({"content": [text_input]}))  # Create a DataFrame for single input
+        st.image(wordcloud.to_array(), caption="Word Cloud for Text Input", use_container_width=True)
     else:
         st.warning("Please enter some text to classify.")
             file_name=output_file,
             mime="text/csv"
         )
+        # Generate word cloud for the 'content' column of the processed CSV data
+        wordcloud = generate_word_cloud_from_output(df)
+        st.image(wordcloud.to_array(), caption="Word Cloud for CSV Content", use_container_width=True)
+        # Generate bar graph for decoded predictions frequency
+        generate_bar_graph(df)
     else:
         st.error(f"Error processing file: {output_file}")
 history = []
 user_input = st.text_input("Ask about news classification or topics", placeholder="Type a message...")
 source_toggle = st.radio("Select Context Source", ["Single Article", "Bulk Classification"])
 if st.button("✉ Send"):
+    if not user_input and not file_input:
+        st.warning("Please upload your file or provide text input for QA.")
+    else:
+        history, bot_response = chatbot_response(
+            history,
+            user_input,
+            text_input=text_input if source_toggle == "Single Article" else None,
+            file_input=file_input if source_toggle == "Bulk Classification" else None
+        )
+        st.write("Chatbot Response:")
+        for q, a in history:
+            st.write(f"Q: {q}")
+            st.write(f"A: {a}")