Corin1998 commited on
Commit
d6a1cb7
·
verified ·
1 Parent(s): 770385f

Update lib/utils.py

Browse files
Files changed (1) hide show
  1. lib/utils.py +10 -4
lib/utils.py CHANGED
@@ -1,6 +1,6 @@
1
  from datetime import datetime
2
  import pandas as pd
3
- import os
4
 
5
  def now_utc_str() -> str:
6
  return datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
@@ -11,11 +11,17 @@ def text_clean(s: str) -> str:
11
  return " ".join(s.split())
12
 
13
  def load_sample_df() -> pd.DataFrame:
14
- path = "data/sample_multilingual_reviews.csv"
15
- if not os.path.exists(path):
 
16
  return pd.DataFrame({"text": [
17
  "音質は良いがアプリが使いづらい",
18
  "Great battery life, app UX is confusing",
19
  "El micrófono capta demasiado viento en bici",
 
 
20
  ]})
21
- return pd.read_csv(path)
 
 
 
 
1
  from datetime import datetime
2
  import pandas as pd
3
+ import numpy as np
4
 
5
  def now_utc_str() -> str:
6
  return datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
 
11
  return " ".join(s.split())
12
 
13
  def load_sample_df() -> pd.DataFrame:
14
+ try:
15
+ return pd.read_csv("data/sample_multilingual_reviews.csv")
16
+ except Exception:
17
  return pd.DataFrame({"text": [
18
  "音質は良いがアプリが使いづらい",
19
  "Great battery life, app UX is confusing",
20
  "El micrófono capta demasiado viento en bici",
21
+ "ノイズキャンセリングは強力だが風の音に弱い",
22
+ "앱의 초기 튜토리얼が分かりづらい",
23
  ]})
24
+
25
+ def normalize_rows(x: np.ndarray) -> np.ndarray:
26
+ n = np.linalg.norm(x, axis=1, keepdims=True) + 1e-12
27
+ return x / n