alzami commited on
Commit
1f9552e
·
1 Parent(s): aeb1f93

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +53 -0
  2. doc2bow.sav +0 -0
  3. ldamodel.sav +0 -0
  4. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import joblib
4
+ from gensim import corpora, models
5
+ from PIL import Image
6
+
7
+ # Load the saved models and data
8
+ dictionary = joblib.load('doc2bow.sav')
9
+ lda_model = joblib.load('ldamodel.sav')
10
+
11
+ # Function to preprocess input text and get topic distribution
12
+ def get_topics(text):
13
+ bow_vector = dictionary(text.split())
14
+ topics = lda_model[bow_vector]
15
+ return topics
16
+
17
+ # Function to get top keywords for a topic
18
+ def get_top_keywords(topic, num_keywords=10):
19
+ topic = lda_model.show_topic(topic, topn=num_keywords)
20
+ keywords = [f"{word} ({weight:.3f})" for word, weight in topic]
21
+ return keywords
22
+
23
+ # Streamlit app
24
+ def main():
25
+ st.title("Web Berita Topic Clustering Untuk Program Kedaireka UMKM📰")
26
+
27
+ # Sidebar with title and description
28
+ st.sidebar.title("Topic Clustering")
29
+ st.sidebar.write("Discover topics in news articles.")
30
+
31
+ # Input text area for user to enter their text
32
+ user_input = st.text_area("Enter your text here:", "")
33
+
34
+ # Submit button
35
+ if st.button("Submit"):
36
+ if user_input:
37
+ # Process the user's input and get topic distribution
38
+ topics = get_topics(user_input)
39
+
40
+ # Display the top topics
41
+ st.subheader("🔥Top Topics🔥")
42
+ for topic in topics:
43
+ st.write(f"**📍Topic {topic[0] + 1}** (Score: {topic[1]:.4f})")
44
+ top_keywords = get_top_keywords(topic[0])
45
+ st.markdown(", ".join(top_keywords))
46
+ st.write("---")
47
+
48
+ # Add a footer
49
+ st.sidebar.markdown("---")
50
+ st.sidebar.write("© 2023 Web Berita Topic Clustering")
51
+
52
+ if __name__ == "__main__":
53
+ main()
doc2bow.sav ADDED
Binary file (3.87 kB). View file
 
ldamodel.sav ADDED
Binary file (14.4 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ scikit-learn
2
+ streamlit
3
+ pandas
4
+ joblib
5
+ gensim
6
+ plotly