Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from sklearn.decomposition import LatentDirichletAllocation | |
| import pickle | |
| # Title | |
| st.title("Unsupervised Text Analysis App with Training") | |
| st.subheader("Train an LDA Model for Topic Modeling") | |
| # Initialize Session State | |
| if "lda_model" not in st.session_state: | |
| st.session_state.lda_model = None | |
| # Built-in Dataset | |
| st.write("### Dataset:") | |
| texts = [ | |
| "The economy is experiencing significant growth this year.", | |
| "Climate change is one of the most pressing global challenges.", | |
| "Artificial intelligence is transforming industries worldwide.", | |
| "Renewable energy sources are becoming more popular and cost-effective.", | |
| "Sports events bring people together and promote cultural exchange.", | |
| "Advances in medicine have greatly improved life expectancy.", | |
| "Education plays a critical role in shaping the future of societies.", | |
| "Travel and tourism contribute significantly to the global economy.", | |
| "Space exploration inspires innovation and collaboration.", | |
| "Social media platforms influence public opinion and behavior." | |
| ] | |
| # Display dataset | |
| st.write(texts) | |
| # Input: Number of Topics | |
| st.subheader("Training Parameters") | |
| num_topics = st.slider("Select the number of topics for training", 2, 10, 3) | |
| # Vectorization | |
| vectorizer = CountVectorizer(stop_words="english", max_features=1000) | |
| doc_term_matrix = vectorizer.fit_transform(texts) | |
| # Train LDA Model | |
| st.subheader("Training the LDA Model") | |
| if st.button("Train Model"): | |
| with st.spinner("Training the LDA model..."): | |
| lda = LatentDirichletAllocation(n_components=num_topics, random_state=42) | |
| lda.fit(doc_term_matrix) | |
| st.session_state.lda_model = lda # Save the trained model in session state | |
| # Display Topics | |
| st.success("Training Completed!") | |
| feature_names = vectorizer.get_feature_names_out() | |
| topics = [] | |
| for topic_idx, topic in enumerate(lda.components_): | |
| top_features = [feature_names[i] for i in topic.argsort()[:-6:-1]] | |
| topics.append(f"Topic {topic_idx + 1}: {', '.join(top_features)}") | |
| st.write("### Identified Topics:") | |
| for topic in topics: | |
| st.write(topic) | |
| # Save the Trained Model | |
| st.subheader("Save the Trained Model") | |
| if st.button("Save Model"): | |
| if st.session_state.lda_model: | |
| with open("lda_model.pkl", "wb") as f: | |
| pickle.dump(st.session_state.lda_model, f) | |
| st.success("Model saved as `lda_model.pkl`.") | |
| else: | |
| st.error("Please train the model first before saving.") | |