Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import re | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.stem import WordNetLemmatizer | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
| from wordcloud import WordCloud | |
| import matplotlib.pyplot as plt | |
| import io | |
| import speech_recognition as sr | |
| from gtts import gTTS | |
| import os | |
| # Download NLTK resources | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| # Initialize lemmatizer | |
| lemmatizer = WordNetLemmatizer() | |
| # Load models (cache them to avoid reloading on every interaction) | |
| def load_classification_model(): | |
| model_name = "Prageeth-1/News_classification.2" # Replace with your model path | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| return pipeline("text-classification", model=model, tokenizer=tokenizer) | |
| def load_qa_model(): | |
| return pipeline("question-answering", model="deepset/roberta-base-squad2") | |
| def recognize_speech(): | |
| recognizer = sr.Recognizer() | |
| with sr.Microphone() as source: | |
| st.info("Listening... Speak now.") | |
| try: | |
| audio = recognizer.listen(source, timeout=5) # Listen for 5 seconds | |
| question_text = recognizer.recognize_google(audio) # Convert speech to text | |
| st.success(f"You said: {question_text}") # Show recognized text | |
| return question_text | |
| except sr.UnknownValueError: | |
| st.error("Sorry, could not understand the audio.") | |
| except sr.RequestError: | |
| st.error("Could not request results, check your internet connection.") | |
| return None | |
| # Preprocessing function (same as in Section 01) | |
| def preprocess_text(text): | |
| # Lowercase | |
| text = text.lower() | |
| # Remove URLs | |
| text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE) | |
| # Remove special characters and numbers | |
| text = re.sub(r'[^a-zA-Z\s]', '', text) | |
| # Tokenize | |
| tokens = word_tokenize(text) | |
| # Remove stopwords | |
| stop_words = set(stopwords.words('english')) | |
| tokens = [token for token in tokens if token not in stop_words] | |
| # Lemmatization | |
| tokens = [lemmatizer.lemmatize(token) for token in tokens] | |
| # Join tokens back to string | |
| return ' '.join(tokens) | |
| # Function to generate word cloud | |
| def generate_wordcloud(text, title=None): | |
| wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) | |
| plt.figure(figsize=(10, 5)) | |
| plt.imshow(wordcloud, interpolation='bilinear') | |
| plt.axis("off") | |
| plt.title(title, fontsize=20) | |
| st.pyplot(plt) | |
| # Set page config | |
| st.set_page_config( | |
| page_title="News Analysis Dashboard", | |
| page_icon="π°", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| .main { | |
| background-color: #f5f5f5; | |
| } | |
| .stButton>button { | |
| background-color: #4CAF50; | |
| color: white; | |
| } | |
| .stDownloadButton>button { | |
| background-color: #2196F3; | |
| color: white; | |
| } | |
| .stTextInput>div>div>input { | |
| background-color: #ffffff; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # App title and description | |
| st.title("π° Daily Mirror News Analyzer") | |
| st.markdown(""" | |
| Analyze news excerpts with our powerful AI tools: | |
| - Classify news articles into categories | |
| - Get answers to your questions about the news content | |
| - Visualize key themes | |
| """) | |
| # Create tabs for different functionalities | |
| tab1, tab2, tab3 = st.tabs(["π News Classification", "β Q&A Pipeline", "β¨ Advanced Features"]) | |
| with tab1: | |
| st.header("News Classification Pipeline") | |
| st.write("Upload a CSV file containing news excerpts to classify them into categories.") | |
| # File uploader | |
| uploaded_file = st.file_uploader("Choose a CSV file", type="csv") | |
| # Check the file | |
| if uploaded_file is None: | |
| st.warning("Please upload a CSV file.") | |
| else: | |
| df = pd.read_csv(uploaded_file) | |
| # Load the fine-tuned news classifier | |
| classifier = pipeline("text-classification", model="Prageeth-1/News_classification.2") | |
| # Classify each article and store the predictions | |
| df["predicted_category"] = df["content"].apply(lambda text: classifier(text)[0]["label"]) | |
| # Preprocess and classify | |
| # Show results | |
| st.subheader("Classification Results") | |
| st.write(df) | |
| # Show distribution | |
| st.subheader("Class Distribution") | |
| class_dist = df['predicted_category'].value_counts() | |
| st.bar_chart(class_dist) | |
| # Download button | |
| st.subheader("Download Results") | |
| csv = df.to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| label="Download output.csv", | |
| data=csv, | |
| file_name='output.csv', | |
| mime='text/csv' | |
| ) | |
| with tab2: | |
| st.header("Question Answering Pipeline") | |
| st.write("Ask questions about news content and get answers from our AI model.") | |
| if uploaded_file is not None: | |
| context = ' '.join(df['content'].tolist()) # Use predictions for Q&A | |
| st.write(f"Loaded {len(df)} news excerpts") | |
| else: | |
| st.warning("Please upload a CSV file.") | |
| question = st.text_input("Enter your question:") | |
| use_voice = False | |
| if st.button("π€ Speak"): | |
| question = recognize_speech() | |
| use_voice = True | |
| if st.button("Get Answer") and context and question: | |
| with st.spinner("Searching for answers..."): | |
| qa_pipeline = load_qa_model() | |
| result = qa_pipeline(question=question, context=context) | |
| st.subheader("Answer") | |
| st.success(result['answer']) | |
| st.subheader("Details") | |
| st.write(f"Confidence: {result['score']:.2f}") | |
| if use_voice: | |
| tts = gTTS(result['answer']) # Convert text answer to speech | |
| temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") # Create temp file | |
| tts.save(temp_audio.name) | |
| # Play the Answer | |
| st.audio(temp_audio.name, format="audio/mp3") | |
| # Cleanup temp file | |
| os.remove(temp_audio.name) | |
| with tab3: | |
| st.header("Advanced Features") | |
| st.write("Explore additional functionalities to enhance your news analysis.") | |
| # Sentiment Analysis | |
| st.subheader("π Sentiment Analysis") | |
| sentiment_text = st.text_area("Enter text for sentiment analysis:", height=100) | |
| if st.button("Analyze Sentiment"): | |
| with st.spinner("Analyzing sentiment..."): | |
| sentiment_pipeline = pipeline("sentiment-analysis") | |
| result = sentiment_pipeline(sentiment_text)[0] | |
| st.write(f"Label: {result['label']}") | |
| st.write(f"Confidence: {result['score']:.2f}") | |
| if result['label'] == 'POSITIVE': | |
| st.success("This text appears positive!") | |
| else: | |
| st.warning("This text appears negative.") | |
| # Named Entity Recognition | |
| st.subheader("π·οΈ Named Entity Recognition") | |
| ner_text = st.text_area("Enter text for entity recognition:", height=100) | |
| if st.button("Extract Entities"): | |
| with st.spinner("Identifying entities..."): | |
| ner_pipeline = pipeline("ner", grouped_entities=True) | |
| results = ner_pipeline(ner_text) | |
| entities = [] | |
| for entity in results: | |
| entities.append({ | |
| "Entity": entity['entity_group'], | |
| "Word": entity['word'], | |
| "Score": entity['score'] | |
| }) | |
| st.table(pd.DataFrame(entities)) | |
| # Text Summarization | |
| st.subheader("βοΈ Text Summarization") | |
| summary_text = st.text_area("Enter text to summarize:", height=150) | |
| if st.button("Generate Summary"): | |
| with st.spinner("Generating summary..."): | |
| summarizer = pipeline("summarization") | |
| summary = summarizer(summary_text, max_length=130, min_length=30) | |
| st.write(summary[0]['summary_text']) | |
| # Sidebar with additional info | |
| with st.sidebar: | |
| st.image("https://via.placeholder.com/150x50?text=Daily+Mirror", width=150) | |
| st.title("About") | |
| st.write(""" | |
| This app helps analyze news content using AI-powered tools: | |
| - Classify news into categories | |
| - Answer questions about news content | |
| - Perform advanced text analysis | |
| """) | |
| st.title("Instructions") | |
| st.write(""" | |
| 1. Upload a CSV file with 'excerpt' column | |
| 2. Click classify to categorize news | |
| 3. Download results as CSV | |
| 4. Use Q&A tab to ask questions | |
| """) | |
| st.title("Model Information") | |
| st.write(""" | |
| - Classification: Fine-tuned DistilBERT | |
| - Q&A: RoBERTa-base | |
| - Sentiment: DistilBERT-base | |
| """) | |
| st.markdown("[View model on Hugging Face](https://huggingface.co/your-username/daily-mirror-news-classifier)") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown("Β© 2023 Daily Mirror News Analyzer | Powered by Hugging Face Transformers") |