Spaces:
Runtime error
Runtime error
| from bertopic import BERTopic | |
| import streamlit as st | |
| import streamlit.components.v1 as components | |
| from datasets import load_dataset | |
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer | |
| from umap import UMAP | |
| from hdbscan import HDBSCAN | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| st.set_page_config(page_title='eRupt Topic Trendy (e-Commerce x Social Media)', page_icon=None, layout='centered', initial_sidebar_state='auto') | |
| st.markdown("<h1 style='text-align: center;'>Topic Trendy</h1>", unsafe_allow_html=True) | |
| #BerTopic_model = BERTopic.load("my_topics_model") | |
| #sentence_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| #umap_model = UMAP(n_neighbors=15, n_components=2, min_dist=0.1, metric="cosine") | |
| #hdbscan_model = HDBSCAN(min_cluster_size=5, min_samples = 3, metric="euclidean", prediction_data=True) | |
| #vectorizer_model = CountVectorizer(lowercase = True, ngram_range=(1, 3), analyzer="word", max_df=1.0, min_df=0.5, stop_words="english") | |
| #kw_model = BERTopic(embedding_model=sentence_model, umap_model = umap_model, hdbscan_model = hdbscan_model, vectorizer_model = vectorizer_model, nr_topics = "auto", calculate_probabilities = True) | |
| #BerTopic_model = kw_model | |
| topic = pd.read_csv('./Data/tiktok_utf8.csv') | |
| timestamps = topic.date.to_list() | |
| tiktok = topic.text.to_list() | |
| vectorizer_model = CountVectorizer(stop_words="english") | |
| topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model) | |
| def fit_transform(model, docs): | |
| topics, probs = model.fit_transform(docs) | |
| return topics, probs | |
| #topics, probs = fit_transform(topic_model, tiktok) | |
| #topics_over_times = topic_model.topics_over_time(tiktok, topics, timestamps, nr_bins=20) | |
| #topic_model.visualize_topics_over_time(topics_over_times, top_n_topics=30) | |
| #topics, probs = topic_model.fit_transform(tiktok) | |
| #placeholder = st.empty() | |
| #text_input = placeholder.text_area("Enter product topic here", height=300) | |
| #text_input = st.text_area("Enter product topic here", value = "motor") | |
| form = st.sidebar.form("Main Settings") | |
| form.header("Main Settings") | |
| ebay_topic= form.selectbox("eBay Products Topic Selection", ["Motor", "Bicycle", "Beauty", "Basketball", "Fitness"]) | |
| top_n = form.number_input("What's the max length of the text?", value = 10) | |
| form.form_submit_button("Run") | |
| if ebay_topic == "Motor": | |
| topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model) | |
| topics, probs = fit_transform(topic_model, tiktok) | |
| similar_topics, similarity = topic_model.find_topics("Motor", top_n=top_n) | |
| elif ebay_topic == "Bicycle": | |
| topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model) | |
| topics, probs = fit_transform(topic_model, tiktok) | |
| similar_topics, similarity = topic_model.find_topics("Bicycle", top_n=top_n) | |
| elif ebay_topic == "Beauty": | |
| topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model) | |
| topics, probs = fit_transform(topic_model, tiktok) | |
| similar_topics, similarity = topic_model.find_topics("Beauty", top_n=top_n) | |
| elif ebay_topic == "Basketball": | |
| topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model) | |
| topics, probs = fit_transform(topic_model, tiktok) | |
| similar_topics, similarity = topic_model.find_topics("Basketball", top_n=top_n) | |
| elif ebay_topic == "Fitness": | |
| topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model) | |
| topics, probs = fit_transform(topic_model, tiktok) | |
| similar_topics, similarity = topic_model.find_topics("Fitness", top_n=top_n) | |
| else: | |
| topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model) | |
| topics, probs = fit_transform(topic_model, tiktok) | |
| similar_topics, similarity = topic_model.find_topics("Motor", top_n=top_n) | |
| if similar_topics != []: | |
| most_similar = similar_topics[0] | |
| #print(similar_topics[0]) | |
| #print("Most Similar Topic Info: \n{}".format(topic_model.get_topic(most_similar))) | |
| #print("Similarity Score: {}".format(similarity[0])) | |
| answer_as_string = topic_model.get_topic(most_similar) | |
| st.info("Extracted Topic") | |
| st.text_area("Most Similar Topic List is Here",answer_as_string,key="topic_list") | |
| st.image('https://freepngimg.com/download/keyboard/6-2-keyboard-png-file.png',use_column_width=True) | |
| #st.markdown("<h6 style='text-align: center; color: #808080;'>Created By LiHE</a></h6>", unsafe_allow_html=True) | |