Create utils.py
Browse files- lib/utils.py +18 -0
lib/utils.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
def now_utc_str() -> str:
|
| 6 |
+
return datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
|
| 7 |
+
|
| 8 |
+
def text_clean(s: str) -> str:
|
| 9 |
+
s = (s or "").strip()
|
| 10 |
+
# 軽めのクレンジング
|
| 11 |
+
s = s.replace("\u3000", " ").replace("\n", " ")
|
| 12 |
+
return " ".join(s.split())
|
| 13 |
+
|
| 14 |
+
def load_sample_df() -> pd.DataFrame:
|
| 15 |
+
path = "data/sample_multilingual_reviews.csv"
|
| 16 |
+
if not os.path.exists(path):
|
| 17 |
+
return pd.DataFrame({"text": ["音質は良いがアプリが使いづらい", "Great battery life, app UX is confusing"]})
|
| 18 |
+
return pd.read_csv(path)
|