| """ |
| wordcloud_service.py β Generate a word-cloud image from a list of texts. |
| Stripped from the original Colab notebook; only the generation function remains. |
| """ |
| from __future__ import annotations |
|
|
| import io |
| import os |
| import re |
| import numpy as np |
| import matplotlib |
| matplotlib.use("Agg") |
| import matplotlib.pyplot as plt |
| from wordcloud import WordCloud |
|
|
| |
| try: |
| from stop_words import get_stop_words |
| _stopwords_id = get_stop_words('indonesian') |
| except Exception: |
| _stopwords_id = [] |
|
|
| try: |
| from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory |
| _sastrawi_sw = StopWordRemoverFactory().get_stop_words() |
| except Exception: |
| _sastrawi_sw = [] |
|
|
| _EXTRA_STOPWORDS = [ |
| 'yg','ga','gak','nggak','aja','saja','nya','oke','ok','bgt','jg', |
| 'deh','sih','kok','dong','udah','ya','banget','pakai','jadi','baru', |
| ] |
|
|
| _BLOCKLIST = set(_stopwords_id + _sastrawi_sw + _EXTRA_STOPWORDS) |
| _SINGLE_LETTERS = set('abcdefghijklmnopqrstuvwxyz') |
| WORDCLOUD_STOPWORDS = _BLOCKLIST | _SINGLE_LETTERS |
|
|
|
|
| |
|
|
| def _merge_texts(texts: list) -> str: |
| """Join a list of strings, keeping only alphabetic tokens.""" |
| joined = " ".join(str(t) for t in texts if t) |
| tokens = joined.lower().split() |
| tokens = [ |
| w for w in tokens |
| if re.match(r'^[a-z]+$', w) and w not in WORDCLOUD_STOPWORDS and len(w) > 2 |
| ] |
| return " ".join(tokens) |
|
|
|
|
| def _circular_mask(size: int = 400) -> np.ndarray: |
| x, y = np.ogrid[:size, :size] |
| center = size // 2 |
| radius = center - 10 |
| mask = (x - center) ** 2 + (y - center) ** 2 > radius ** 2 |
| return (255 * mask).astype(np.uint8) |
|
|
|
|
| |
|
|
| def generate_wordcloud(texts: list, output_dest) -> bool: |
| """ |
| Generate a circular wordcloud from a list of text strings. |
| |
| Args: |
| texts: list of strings (raw or pre-processed) |
| output_dest: file path string OR a BytesIO buffer. |
| If a string path is given, the PNG is saved to disk. |
| If a BytesIO buffer is given, the PNG is written there |
| (no file is created on disk). |
| |
| Returns: |
| True on success, False on failure. |
| """ |
| if not texts: |
| print("[WordCloud] No texts provided.") |
| return False |
|
|
| text_data = _merge_texts(texts) |
| if not text_data.strip(): |
| print("[WordCloud] All text was filtered out by stopwords; nothing to plot.") |
| return False |
|
|
| |
| if isinstance(output_dest, str): |
| output_dir = os.path.dirname(output_dest) |
| if output_dir: |
| os.makedirs(output_dir, exist_ok=True) |
|
|
| try: |
| mask = _circular_mask(400) |
| wc = WordCloud( |
| width=800, |
| height=800, |
| background_color="white", |
| colormap="viridis", |
| mask=mask, |
| contour_width=2, |
| contour_color="steelblue", |
| stopwords=WORDCLOUD_STOPWORDS, |
| max_words=100, |
| ).generate(text_data) |
|
|
| fig, ax = plt.subplots(figsize=(8, 8)) |
| ax.imshow(wc, interpolation="bilinear") |
| ax.axis("off") |
| plt.tight_layout(pad=0) |
| plt.savefig(output_dest, dpi=150, bbox_inches="tight", format="png") |
| plt.close(fig) |
|
|
| if isinstance(output_dest, str): |
| print(f"[WordCloud] Saved to {output_dest}") |
| else: |
| print("[WordCloud] Written to in-memory buffer (temporal).") |
| return True |
|
|
| except Exception as e: |
| print(f"[WordCloud] Error generating wordcloud: {e}") |
| return False |