Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- app.py +53 -0
- doc2bow.sav +0 -0
- ldamodel.sav +0 -0
- requirements.txt +6 -0
app.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import joblib
|
| 4 |
+
from gensim import corpora, models
|
| 5 |
+
from PIL import Image
|
| 6 |
+
|
| 7 |
+
# Load the saved models and data
|
| 8 |
+
dictionary = joblib.load('doc2bow.sav')
|
| 9 |
+
lda_model = joblib.load('ldamodel.sav')
|
| 10 |
+
|
| 11 |
+
# Function to preprocess input text and get topic distribution
|
| 12 |
+
def get_topics(text):
|
| 13 |
+
bow_vector = dictionary(text.split())
|
| 14 |
+
topics = lda_model[bow_vector]
|
| 15 |
+
return topics
|
| 16 |
+
|
| 17 |
+
# Function to get top keywords for a topic
|
| 18 |
+
def get_top_keywords(topic, num_keywords=10):
|
| 19 |
+
topic = lda_model.show_topic(topic, topn=num_keywords)
|
| 20 |
+
keywords = [f"{word} ({weight:.3f})" for word, weight in topic]
|
| 21 |
+
return keywords
|
| 22 |
+
|
| 23 |
+
# Streamlit app
|
| 24 |
+
def main():
|
| 25 |
+
st.title("Web Berita Topic Clustering Untuk Program Kedaireka UMKM📰")
|
| 26 |
+
|
| 27 |
+
# Sidebar with title and description
|
| 28 |
+
st.sidebar.title("Topic Clustering")
|
| 29 |
+
st.sidebar.write("Discover topics in news articles.")
|
| 30 |
+
|
| 31 |
+
# Input text area for user to enter their text
|
| 32 |
+
user_input = st.text_area("Enter your text here:", "")
|
| 33 |
+
|
| 34 |
+
# Submit button
|
| 35 |
+
if st.button("Submit"):
|
| 36 |
+
if user_input:
|
| 37 |
+
# Process the user's input and get topic distribution
|
| 38 |
+
topics = get_topics(user_input)
|
| 39 |
+
|
| 40 |
+
# Display the top topics
|
| 41 |
+
st.subheader("🔥Top Topics🔥")
|
| 42 |
+
for topic in topics:
|
| 43 |
+
st.write(f"**📍Topic {topic[0] + 1}** (Score: {topic[1]:.4f})")
|
| 44 |
+
top_keywords = get_top_keywords(topic[0])
|
| 45 |
+
st.markdown(", ".join(top_keywords))
|
| 46 |
+
st.write("---")
|
| 47 |
+
|
| 48 |
+
# Add a footer
|
| 49 |
+
st.sidebar.markdown("---")
|
| 50 |
+
st.sidebar.write("© 2023 Web Berita Topic Clustering")
|
| 51 |
+
|
| 52 |
+
if __name__ == "__main__":
|
| 53 |
+
main()
|
doc2bow.sav
ADDED
|
Binary file (3.87 kB). View file
|
|
|
ldamodel.sav
ADDED
|
Binary file (14.4 kB). View file
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
scikit-learn
|
| 2 |
+
streamlit
|
| 3 |
+
pandas
|
| 4 |
+
joblib
|
| 5 |
+
gensim
|
| 6 |
+
plotly
|