Spaces:
Build error
Build error
| import streamlit as st | |
| import re | |
| from kiwipiepy import Kiwi | |
| kiwi = Kiwi() | |
| from collections import Counter | |
| from wordcloud import WordCloud | |
| import matplotlib.pyplot as plt | |
| def kiwi_tokenize(txt, nouns=True, remove1=False, stopwords=[]): | |
| '''๋ฌธ์์ด txt๋ฅผ ๋ฐ์ kiwi๋ก ํํ์ ์ถ์ถ: nouns=๋ช ์ฌ๋ง ์ถ์ถ ์ฌ๋ถ, remove1=1์์ ํ ํฐ ์ ์ธ ์ฌ๋ถ, stopwords=๋ถ์ฉ์ด ๋ฆฌ์คํธ ''' | |
| try: | |
| # ์ ์ (cleaning): ๋น๋ฌธ์์ซ์ ๋ฑ ๋ ธ์ด์ฆ ์ ๊ฑฐ | |
| txt1=re.sub(r"[^\s๊ฐ-ํฃa-zA-Z0-9]", " ", txt) # re.sub: ๋ฌธ์์ด ๋ถ๋ถ ๊ต์ฒด. r์ ์ ๊ทํํ์ ์ฌ์ฉํ๋ค๋ ํ์. | |
| # "[^ ๊ฐ-ํฃa-zA-Z1-9]"๋ ํ๊ธ ์์ด ์ซ์ ์ด์ธ์ ๋ฌธ์์ด ์๋ฏธ. | |
| # txt1=txt1.replace("X", " "): ํน์ ๋จ์ด๋ง ์ญ์ ํ ๋์๋ replace ํจ์๋ก ๊ฐ๋จํ ์คํ | |
| # ํ ํฐํ(tokenization): ํํ์ ์ถ์ถ | |
| morphs=kiwi.tokenize(txt1) | |
| morphs_all=[m[0] for m in morphs] # ๋ชจ๋ ํ์ฌ์ ํด๋นํ๋ ํํ์ ๋ชจ๋ ์ถ์ถ | |
| morphs_select=['NNG', 'NNP', 'NP', 'NR', 'VV', 'VX', 'VCP', 'VCN', 'VA','VA-I', 'MM', 'MAG'] # ์ผ๋ฐ๋ช ์ฌ, ๊ณ ์ ๋ช ์ฌ, ์ฉ์ธ(๋์ฌ, ํ์ฉ์ฌ ๋ฑ), ๊ดํ์ฌ, ์ผ๋ฐ๋ถ์ฌ # ํ์ฌ ๋ถ๋ฅํ ์ฐธ์กฐ | |
| # ๋ช ์ฌ ์ถ์ถ(nou extraction) ์ฌ๋ถ ์ ํ | |
| if nouns==True: | |
| token_lst=[m[0] for m in morphs if m[1] in morphs_select[:4]] | |
| else: | |
| token_lst=[m for m in morphs if m[1] in morphs_select] | |
| # stemming(์ด๊ฐ ์ถ์ถ, ๋์ฌ-ํ์ฉ์ฌ ๋ฑ ์ฉ์ธ์ ์ํ ๋ณต๊ตฌ) ์ ์ฉ | |
| token_lst=[m[0]+'๋ค' if m[1].startswith('V') else m[0] for m in token_lst] | |
| # 1์์ ํ ํฐ ์ ์ธ ์ฌ๋ถ ์ ํ | |
| if remove1==True: | |
| token_lst=[t for t in token_lst if len(t)>1 ] | |
| else: | |
| pass | |
| # ๋ถ์ฉ์ด(stopwords) ์ ์ฉ: ์ ์ธํด์ผ ํ ํ ํฐ๋ค์ ์งํฉ | |
| token_lst=[t for t in token_lst if t not in stopwords] | |
| except: | |
| token_lst=[] | |
| return token_lst | |
| def generate_wordcloud(text): | |
| token_list=kiwi_tokenize(text, nouns=True, remove1=True, stopwords=[]) | |
| keywords_all=Counter(token_list).most_common(100) | |
| mywordcloud = WordCloud( | |
| font_path = 'NanumGothic-Regular.ttf', # ํฐํธ ์ ์ฅ ๊ฒฝ๋ก | |
| background_color='white', | |
| colormap = "Accent_r", # ์ฌ์ฉ ์์ ์ง์ # https://matplotlib.org/stable/tutorials/colors/colormaps.html | |
| width=1500, height=1000 # ๊ทธ๋ฆผ ํฝ์ | |
| ).generate_from_frequencies(dict(keywords_all)) | |
| fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(12, 8)) | |
| plt.imshow(mywordcloud, interpolation='bilinear') | |
| plt.axis('off') | |
| st.pyplot(fig) | |
| def main(): | |
| st.title("์๋ํด๋ผ์ฐ๋(Word Cloud) ๋ง๋ค๊ธฐ") | |
| st.write("๊ฐ๊ณตํ ํ ์คํธ๋ฅผ ์ ๋ ฅํ์ธ์:") | |
| text_input = st.text_area("ํ ์คํธ", "") | |
| if st.button("์๋ํด๋ผ์ฐ๋ ์์"): | |
| if text_input: | |
| generate_wordcloud(text_input) | |
| else: | |
| st.warning("Please enter some text.") | |
| if __name__ == "__main__": | |
| main() | |