Spaces:

Prageeth-1
/

News_Classification_App

Sleeping

App Files Files Community

News_Classification_App / app.py

Prageeth-1

Update app.py

aa6fe03 verified about 1 year ago

raw

history blame

9.48 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	from wordcloud import WordCloud
	import matplotlib.pyplot as plt
	import io
	import speech_recognition as sr
	from gtts import gTTS
	import os

	# Download NLTK resources
	nltk.download('punkt')
	nltk.download('stopwords')
	nltk.download('wordnet')

	# Initialize lemmatizer
	lemmatizer = WordNetLemmatizer()

	# Load models (cache them to avoid reloading on every interaction)
	@st.cache_resource
	def load_classification_model():
	model_name = "Prageeth-1/News_classification.2" # Replace with your model path
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	return pipeline("text-classification", model=model, tokenizer=tokenizer)

	@st.cache_resource
	def load_qa_model():
	return pipeline("question-answering", model="deepset/roberta-base-squad2")

	def recognize_speech():
	recognizer = sr.Recognizer()
	with sr.Microphone() as source:
	st.info("Listening... Speak now.")
	try:
	audio = recognizer.listen(source, timeout=5) # Listen for 5 seconds
	question_text = recognizer.recognize_google(audio) # Convert speech to text
	st.success(f"You said: {question_text}") # Show recognized text
	return question_text
	except sr.UnknownValueError:
	st.error("Sorry, could not understand the audio.")
	except sr.RequestError:
	st.error("Could not request results, check your internet connection.")
	return None


	# Preprocessing function (same as in Section 01)
	def preprocess_text(text):
	# Lowercase
	text = text.lower()
	# Remove URLs
	text = re.sub(r'http\S+\|www\S+\|https\S+', '', text, flags=re.MULTILINE)
	# Remove special characters and numbers
	text = re.sub(r'[^a-zA-Z\s]', '', text)
	# Tokenize
	tokens = word_tokenize(text)
	# Remove stopwords
	stop_words = set(stopwords.words('english'))
	tokens = [token for token in tokens if token not in stop_words]
	# Lemmatization
	tokens = [lemmatizer.lemmatize(token) for token in tokens]
	# Join tokens back to string
	return ' '.join(tokens)

	# Function to generate word cloud
	def generate_wordcloud(text, title=None):
	wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
	plt.figure(figsize=(10, 5))
	plt.imshow(wordcloud, interpolation='bilinear')
	plt.axis("off")
	plt.title(title, fontsize=20)
	st.pyplot(plt)

	# Set page config
	st.set_page_config(
	page_title="News Analysis Dashboard",
	page_icon="📰",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS
	st.markdown("""
	<style>
	.main {
	background-color: #f5f5f5;
	}
	.stButton>button {
	background-color: #4CAF50;
	color: white;
	}
	.stDownloadButton>button {
	background-color: #2196F3;
	color: white;
	}
	.stTextInput>div>div>input {
	background-color: #ffffff;
	}
	</style>
	""", unsafe_allow_html=True)

	# App title and description
	st.title("📰 Daily Mirror News Analyzer")
	st.markdown("""
	Analyze news excerpts with our powerful AI tools:
	- Classify news articles into categories
	- Get answers to your questions about the news content
	- Visualize key themes
	""")

	# Create tabs for different functionalities
	tab1, tab2, tab3 = st.tabs(["📋 News Classification", "❓ Q&A Pipeline", "✨ Advanced Features"])

	with tab1:

	st.header("News Classification Pipeline")
	st.write("Upload a CSV file containing news excerpts to classify them into categories.")

	# File uploader
	uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

	# Check the file
	if uploaded_file is None:
	st.warning("Please upload a CSV file.")


	else:
	df = pd.read_csv(uploaded_file)

	# Load the fine-tuned news classifier
	classifier = pipeline("text-classification", model="Prageeth-1/News_classification.2")

	# Classify each article and store the predictions
	df["predicted_category"] = df["content"].apply(lambda text: classifier(text)[0]["label"])

	# Preprocess and classify


	# Show results
	st.subheader("Classification Results")
	st.write(df)

	# Show distribution
	st.subheader("Class Distribution")
	class_dist = df['predicted_category'].value_counts()
	st.bar_chart(class_dist)



	# Download button
	st.subheader("Download Results")
	csv = df.to_csv(index=False).encode('utf-8')
	st.download_button(
	label="Download output.csv",
	data=csv,
	file_name='output.csv',
	mime='text/csv'
	)




	with tab2:

	st.header("Question Answering Pipeline")
	st.write("Ask questions about news content and get answers from our AI model.")



	if uploaded_file is not None:

	context = ' '.join(df['content'].tolist()) # Use predictions for Q&A
	st.write(f"Loaded {len(df)} news excerpts")

	else:
	st.warning("Please upload a CSV file.")


	question = st.text_input("Enter your question:")
	use_voice = False

	if st.button("🎤 Speak"):
	question = recognize_speech()
	use_voice = True

	if st.button("Get Answer") and context and question:
	with st.spinner("Searching for answers..."):
	qa_pipeline = load_qa_model()
	result = qa_pipeline(question=question, context=context)

	st.subheader("Answer")
	st.success(result['answer'])

	st.subheader("Details")
	st.write(f"Confidence: {result['score']:.2f}")


	if use_voice:
	tts = gTTS(result['answer']) # Convert text answer to speech
	temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") # Create temp file
	tts.save(temp_audio.name)

	# Play the Answer
	st.audio(temp_audio.name, format="audio/mp3")

	# Cleanup temp file
	os.remove(temp_audio.name)


	with tab3:
	st.header("Advanced Features")
	st.write("Explore additional functionalities to enhance your news analysis.")

	# Sentiment Analysis
	st.subheader("📊 Sentiment Analysis")
	sentiment_text = st.text_area("Enter text for sentiment analysis:", height=100)
	if st.button("Analyze Sentiment"):
	with st.spinner("Analyzing sentiment..."):
	sentiment_pipeline = pipeline("sentiment-analysis")
	result = sentiment_pipeline(sentiment_text)[0]
	st.write(f"Label: {result['label']}")
	st.write(f"Confidence: {result['score']:.2f}")
	if result['label'] == 'POSITIVE':
	st.success("This text appears positive!")
	else:
	st.warning("This text appears negative.")

	# Named Entity Recognition
	st.subheader("🏷️ Named Entity Recognition")
	ner_text = st.text_area("Enter text for entity recognition:", height=100)
	if st.button("Extract Entities"):
	with st.spinner("Identifying entities..."):
	ner_pipeline = pipeline("ner", grouped_entities=True)
	results = ner_pipeline(ner_text)

	entities = []
	for entity in results:
	entities.append({
	"Entity": entity['entity_group'],
	"Word": entity['word'],
	"Score": entity['score']
	})

	st.table(pd.DataFrame(entities))

	# Text Summarization
	st.subheader("✍️ Text Summarization")
	summary_text = st.text_area("Enter text to summarize:", height=150)
	if st.button("Generate Summary"):
	with st.spinner("Generating summary..."):
	summarizer = pipeline("summarization")
	summary = summarizer(summary_text, max_length=130, min_length=30)
	st.write(summary[0]['summary_text'])

	# Sidebar with additional info
	with st.sidebar:
	st.image("https://via.placeholder.com/150x50?text=Daily+Mirror", width=150)
	st.title("About")
	st.write("""
	This app helps analyze news content using AI-powered tools:
	- Classify news into categories
	- Answer questions about news content
	- Perform advanced text analysis
	""")

	st.title("Instructions")
	st.write("""
	1. Upload a CSV file with 'excerpt' column
	2. Click classify to categorize news
	3. Download results as CSV
	4. Use Q&A tab to ask questions
	""")

	st.title("Model Information")
	st.write("""
	- Classification: Fine-tuned DistilBERT
	- Q&A: RoBERTa-base
	- Sentiment: DistilBERT-base
	""")

	st.markdown("[View model on Hugging Face](https://huggingface.co/your-username/daily-mirror-news-classifier)")

	# Footer
	st.markdown("---")
	st.markdown("© 2023 Daily Mirror News Analyzer \| Powered by Hugging Face Transformers")