File size: 923 Bytes
63fc25c d6a1cb7 63fc25c d6a1cb7 9d1441e edf2758 d6a1cb7 9d1441e d6a1cb7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | from datetime import datetime
import pandas as pd
import numpy as np
def now_utc_str() -> str:
return datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
def text_clean(s: str) -> str:
s = (s or "").strip()
s = s.replace("\u3000", " ").replace("\n", " ")
return " ".join(s.split())
def load_sample_df() -> pd.DataFrame:
try:
return pd.read_csv("data/sample_multilingual_reviews.csv")
except Exception:
return pd.DataFrame({"text": [
"音質は良いがアプリが使いづらい",
"Great battery life, app UX is confusing",
"El micrófono capta demasiado viento en bici",
"ノイズキャンセリングは強力だが風の音に弱い",
"앱의 초기 튜토리얼が分かりづらい",
]})
def normalize_rows(x: np.ndarray) -> np.ndarray:
n = np.linalg.norm(x, axis=1, keepdims=True) + 1e-12
return x / n
|