Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.decomposition import LatentDirichletAllocation | |
| import matplotlib.pyplot as plt | |
| # Title and tabs | |
| st.title("Topic Modeling for News Articles") | |
| tab1, tab2 = st.tabs(["LDA Topic Modeling", "About the Dataset"]) | |
| # First Tab: Topic Modeling | |
| with tab1: | |
| st.header("Input Data") | |
| # Text input for articles | |
| st.write("Paste your news articles (one article per line):") | |
| user_input = st.text_area("Enter articles here", height=200) | |
| if st.button("Analyze Topics"): | |
| if user_input.strip(): | |
| # Convert input into a list of articles | |
| articles = user_input.split("\n") | |
| articles = [article.strip() for article in articles if article.strip()] | |
| # TF-IDF Vectorization | |
| vectorizer = TfidfVectorizer(stop_words='english', max_features=5000) | |
| tfidf_matrix = vectorizer.fit_transform(articles) | |
| # LDA Topic Modeling | |
| lda = LatentDirichletAllocation(n_components=5, random_state=42) | |
| lda.fit(tfidf_matrix) | |
| # Display topics | |
| st.subheader("Identified Topics") | |
| feature_names = vectorizer.get_feature_names_out() | |
| for idx, topic in enumerate(lda.components_): | |
| st.write(f"**Topic {idx + 1}:**", ", ".join([feature_names[i] for i in topic.argsort()[-10:]])) | |
| # Visualize topic distribution | |
| st.subheader("Topic Distribution") | |
| topic_distribution = lda.transform(tfidf_matrix) | |
| plt.figure(figsize=(10, 5)) | |
| plt.bar(range(len(topic_distribution[0])), topic_distribution[0]) | |
| plt.xlabel("Topics") | |
| plt.ylabel("Contribution") | |
| plt.title("Topic Distribution for the First Article") | |
| st.pyplot(plt.gcf()) | |
| else: | |
| st.warning("Please input some articles to analyze.") | |
| # Second Tab: About the Dataset | |
| with tab2: | |
| st.header("About") | |
| st.write("This app performs topic modeling on news articles using Latent Dirichlet Allocation (LDA).") | |
| st.write("Paste articles in the text area, and the app will identify underlying topics.") | |