Corin1998's picture
Update lib/utils.py
d6a1cb7 verified
raw
history blame contribute delete
923 Bytes
from datetime import datetime
import pandas as pd
import numpy as np
def now_utc_str() -> str:
return datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
def text_clean(s: str) -> str:
s = (s or "").strip()
s = s.replace("\u3000", " ").replace("\n", " ")
return " ".join(s.split())
def load_sample_df() -> pd.DataFrame:
try:
return pd.read_csv("data/sample_multilingual_reviews.csv")
except Exception:
return pd.DataFrame({"text": [
"音質は良いがアプリが使いづらい",
"Great battery life, app UX is confusing",
"El micrófono capta demasiado viento en bici",
"ノイズキャンセリングは強力だが風の音に弱い",
"앱의 초기 튜토리얼が分かりづらい",
]})
def normalize_rows(x: np.ndarray) -> np.ndarray:
n = np.linalg.norm(x, axis=1, keepdims=True) + 1e-12
return x / n