Spaces:
Sleeping
Sleeping
wenbemi
commited on
Update chat_a.py
Browse files
chat_a.py
CHANGED
|
@@ -2,7 +2,45 @@
|
|
| 2 |
# coding: utf-8
|
| 3 |
|
| 4 |
# In[10]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
import pandas as pd
|
| 8 |
import torch
|
|
@@ -44,14 +82,14 @@ def load_csv_any(p):
|
|
| 44 |
# st.error("TRIPDATA_URL ๋ฏธ์ค์ : Streamlit Secrets์ URL์ ๋ฃ์ด์ฃผ์ธ์.")
|
| 45 |
# st.stop()
|
| 46 |
|
| 47 |
-
travel_df =
|
| 48 |
-
festival_df =
|
| 49 |
-
external_score_df =
|
| 50 |
external_score_df.columns = external_score_df.columns.str.strip()
|
| 51 |
-
weather_df =
|
| 52 |
-
package_df =
|
| 53 |
package_df.columns = package_df.columns.str.strip()
|
| 54 |
-
master_df =
|
| 55 |
|
| 56 |
countries = travel_df["์ฌํ๋๋ผ"].dropna().unique().tolist()
|
| 57 |
cities = travel_df["์ฌํ๋์"].dropna().unique().tolist()
|
|
|
|
| 2 |
# coding: utf-8
|
| 3 |
|
| 4 |
# In[10]:
|
| 5 |
+
import os, io, pandas as pd
|
| 6 |
+
from huggingface_hub import hf_hub_download
|
| 7 |
+
|
| 8 |
+
HF_DATASET_REPO = os.getenv("HF_DATASET_REPO", "emisdfde/moai-travel-data") # โ ๋ณธ์ธ ๋ฆฌํฌ
|
| 9 |
+
HF_DATASET_REV = os.getenv("HF_DATASET_REV", "main")
|
| 10 |
+
|
| 11 |
+
def _is_pointer_bytes(b: bytes) -> bool:
|
| 12 |
+
head = b[:2048].decode(errors="ignore").lower()
|
| 13 |
+
# git-lfs / xet ํฌ์ธํฐ ํ
์คํธ ํจํด ๋ชจ๋ ๊ฐ์ง
|
| 14 |
+
return (
|
| 15 |
+
"version https://git-lfs.github.com/spec/v1" in head or
|
| 16 |
+
"git-lfs" in head or
|
| 17 |
+
"xet" in head or # e.g. "Xet backed hash"
|
| 18 |
+
"pointer size" in head
|
| 19 |
+
)
|
| 20 |
|
| 21 |
+
def _read_csv_bytes(b: bytes) -> pd.DataFrame:
|
| 22 |
+
try:
|
| 23 |
+
return pd.read_csv(io.BytesIO(b), encoding="utf-8")
|
| 24 |
+
except UnicodeDecodeError:
|
| 25 |
+
return pd.read_csv(io.BytesIO(b), encoding="cp949")
|
| 26 |
+
|
| 27 |
+
def load_csv_smart(local_path: str, hub_filename: str,
|
| 28 |
+
repo_id: str = HF_DATASET_REPO, repo_type: str = "dataset",
|
| 29 |
+
revision: str = HF_DATASET_REV) -> pd.DataFrame:
|
| 30 |
+
# 1) ๋ก์ปฌ ์ฐ์
|
| 31 |
+
if os.path.exists(local_path):
|
| 32 |
+
with open(local_path, "rb") as f:
|
| 33 |
+
data = f.read()
|
| 34 |
+
if not _is_pointer_bytes(data):
|
| 35 |
+
return _read_csv_bytes(data)
|
| 36 |
+
# ํฌ์ธํฐ๋ฉด ํ๋ธ๋ก ํด๋ฐฑ
|
| 37 |
+
# 2) ํ๋ธ ๋ค์ด๋ก๋
|
| 38 |
+
cached = hf_hub_download(repo_id=repo_id, filename=hub_filename,
|
| 39 |
+
repo_type=repo_type, revision=revision)
|
| 40 |
+
try:
|
| 41 |
+
return pd.read_csv(cached, encoding="utf-8")
|
| 42 |
+
except UnicodeDecodeError:
|
| 43 |
+
return pd.read_csv(cached, encoding="cp949")
|
| 44 |
|
| 45 |
import pandas as pd
|
| 46 |
import torch
|
|
|
|
| 82 |
# st.error("TRIPDATA_URL ๋ฏธ์ค์ : Streamlit Secrets์ URL์ ๋ฃ์ด์ฃผ์ธ์.")
|
| 83 |
# st.stop()
|
| 84 |
|
| 85 |
+
travel_df = load_csv_smart("trip_emotions.csv")
|
| 86 |
+
festival_df = load_csv_smart("festivals.csv")
|
| 87 |
+
external_score_df = load_csv_smart("external_scores.csv")
|
| 88 |
external_score_df.columns = external_score_df.columns.str.strip()
|
| 89 |
+
weather_df = load_csv_smart("weather.csv")
|
| 90 |
+
package_df = load_csv_smart("packages.csv")
|
| 91 |
package_df.columns = package_df.columns.str.strip()
|
| 92 |
+
master_df = load_csv_smart("countries_cities.csv")
|
| 93 |
|
| 94 |
countries = travel_df["์ฌํ๋๋ผ"].dropna().unique().tolist()
|
| 95 |
cities = travel_df["์ฌํ๋์"].dropna().unique().tolist()
|