Spaces:
Sleeping
Sleeping
| import re | |
| import os | |
| import requests | |
| import jieba | |
| from PIL import Image | |
| from wordcloud import WordCloud | |
| import gradio as gr | |
| from typing import List | |
| FONT_PATH = "NotoSansCJK-Regular.otf" | |
| FONT_URL = "https://github.com/adobe-fonts/source-han-sans/raw/release/OTF/SimplifiedChinese/SourceHanSansSC-Regular.otf" | |
| if not os.path.exists(FONT_PATH): | |
| print("Downloading font...") | |
| with open(FONT_PATH, "wb") as f: | |
| f.write(requests.get(FONT_URL).content) | |
| def tokenize_text(text:str) -> List: | |
| """ | |
| Tokenize the text by utilising the regex, and then segment the text by jieba. | |
| Args: | |
| text: a string mixed with chinese and english. | |
| Returns: | |
| a list of tokens. | |
| """ | |
| compiler = re.findall(r'[\u4e00-\u9fa5]+|[a-zA-Z0-9]+',text) | |
| segmented = [] | |
| for token in compiler: | |
| if re.match(r'[\u4e00-\u9fa5]', token): | |
| segmented += jieba.cut(token) | |
| else: | |
| segmented.append(token.lower()) | |
| return segmented | |
| def generate_world_cloud(text:str) -> Image.Image: | |
| """ | |
| Generate a world cloud from the text. | |
| Args: | |
| text: a string mixed with chinese and english. | |
| Returns: | |
| Image: PIL.Image.Image | |
| """ | |
| segmented = tokenize_text(text) | |
| wc = WordCloud( | |
| font_path=FONT_PATH, | |
| width=800, | |
| height=400, | |
| background_color='white' | |
| ) | |
| return wc.generate(" ".join(segmented)).to_image() | |
| def generate_word_cloud_from_file(file:str) -> Image.Image: | |
| """ | |
| Generate a word cloud from a file. | |
| Args: | |
| file: a file path. | |
| Returns: | |
| Image: PIL.Image.Image | |
| """ | |
| text = open(file, "r", encoding="utf-8").read() | |
| return generate_world_cloud(text) | |
| with gr.Blocks(title="World Cloud Generator") as demo: | |
| gr.Markdown("## Word Cloud Generator\nSupports mixed Chinese-English input via **text** or **.txt file upload**.") | |
| with gr.Tab("Text Input"): | |
| with gr.Row(): | |
| text_input = gr.Textbox(lines=10, label="Input texts") | |
| text_button = gr.Button("Generate Word Cloud") | |
| text_output = gr.Image(type="pil", label="world cloud") | |
| text_button.click(generate_world_cloud, inputs=text_input, outputs=text_output) | |
| with gr.Tab("File Input"): | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload .txt file") | |
| file_button = gr.Button("Generate Word Cloud") | |
| file_output = gr.Image(type="pil", label="world cloud") | |
| file_button.click(generate_word_cloud_from_file, inputs=file_input, outputs=file_output) | |
| demo.launch() | |