from datetime import datetime import pandas as pd import numpy as np def now_utc_str() -> str: return datetime.utcnow().strftime("%Y%m%dT%H%M%SZ") def text_clean(s: str) -> str: s = (s or "").strip() s = s.replace("\u3000", " ").replace("\n", " ") return " ".join(s.split()) def load_sample_df() -> pd.DataFrame: try: return pd.read_csv("data/sample_multilingual_reviews.csv") except Exception: return pd.DataFrame({"text": [ "音質は良いがアプリが使いづらい", "Great battery life, app UX is confusing", "El micrófono capta demasiado viento en bici", "ノイズキャンセリングは強力だが風の音に弱い", "앱의 초기 튜토리얼が分かりづらい", ]}) def normalize_rows(x: np.ndarray) -> np.ndarray: n = np.linalg.norm(x, axis=1, keepdims=True) + 1e-12 return x / n