wenbemi commited on
Commit
9f04a9f
ยท
verified ยท
1 Parent(s): 61be846

Update chat_a.py

Browse files
Files changed (1) hide show
  1. chat_a.py +44 -6
chat_a.py CHANGED
@@ -2,7 +2,45 @@
2
  # coding: utf-8
3
 
4
  # In[10]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  import pandas as pd
8
  import torch
@@ -44,14 +82,14 @@ def load_csv_any(p):
44
  # st.error("TRIPDATA_URL ๋ฏธ์„ค์ •: Streamlit Secrets์— URL์„ ๋„ฃ์–ด์ฃผ์„ธ์š”.")
45
  # st.stop()
46
 
47
- travel_df = pd.read_csv("ํŠธ๋ฆฝ๋‹ท์ปด_๊ฐ์ •_ํ…Œ๋งˆ_ํ•œ์ค„์„ค๋ช…_ํ†ตํ•ฉ_07_08.csv")
48
- festival_df = pd.read_csv("์ „์ฒ˜๋ฆฌ_ํ†ตํ•ฉ์ง€์—ญ์ถ•์ œ.csv")
49
- external_score_df = pd.read_csv("ํด๋Ÿฌ์Šคํ„ฐ_ํฌํ•จ_์™ธ๋ถ€์š”์ธ_์ข…ํ•ฉ์ ์ˆ˜_๊ฒฐ๊ณผ_์ตœ์ข….csv")
50
  external_score_df.columns = external_score_df.columns.str.strip()
51
- weather_df = pd.read_csv("์ „์ฒ˜๋ฆฌ_๋‚ ์”จ_ํ†ตํ•ฉ_07_08.csv")
52
- package_df = pd.read_csv("๋ชจ๋‘ํˆฌ์–ด_์ปฌ๋Ÿผ๋ณ„_๊ฐœ์ˆ˜_07_08.csv")
53
  package_df.columns = package_df.columns.str.strip()
54
- master_df = pd.read_csv("๋‚˜๋ผ_๋„์‹œ_๋ฆฌ์ŠคํŠธ.csv")
55
 
56
  countries = travel_df["์—ฌํ–‰๋‚˜๋ผ"].dropna().unique().tolist()
57
  cities = travel_df["์—ฌํ–‰๋„์‹œ"].dropna().unique().tolist()
 
2
  # coding: utf-8
3
 
4
  # In[10]:
5
+ import os, io, pandas as pd
6
+ from huggingface_hub import hf_hub_download
7
+
8
+ HF_DATASET_REPO = os.getenv("HF_DATASET_REPO", "emisdfde/moai-travel-data") # โ† ๋ณธ์ธ ๋ฆฌํฌ
9
+ HF_DATASET_REV = os.getenv("HF_DATASET_REV", "main")
10
+
11
+ def _is_pointer_bytes(b: bytes) -> bool:
12
+ head = b[:2048].decode(errors="ignore").lower()
13
+ # git-lfs / xet ํฌ์ธํ„ฐ ํ…์ŠคํŠธ ํŒจํ„ด ๋ชจ๋‘ ๊ฐ์ง€
14
+ return (
15
+ "version https://git-lfs.github.com/spec/v1" in head or
16
+ "git-lfs" in head or
17
+ "xet" in head or # e.g. "Xet backed hash"
18
+ "pointer size" in head
19
+ )
20
 
21
+ def _read_csv_bytes(b: bytes) -> pd.DataFrame:
22
+ try:
23
+ return pd.read_csv(io.BytesIO(b), encoding="utf-8")
24
+ except UnicodeDecodeError:
25
+ return pd.read_csv(io.BytesIO(b), encoding="cp949")
26
+
27
+ def load_csv_smart(local_path: str, hub_filename: str,
28
+ repo_id: str = HF_DATASET_REPO, repo_type: str = "dataset",
29
+ revision: str = HF_DATASET_REV) -> pd.DataFrame:
30
+ # 1) ๋กœ์ปฌ ์šฐ์„ 
31
+ if os.path.exists(local_path):
32
+ with open(local_path, "rb") as f:
33
+ data = f.read()
34
+ if not _is_pointer_bytes(data):
35
+ return _read_csv_bytes(data)
36
+ # ํฌ์ธํ„ฐ๋ฉด ํ—ˆ๋ธŒ๋กœ ํด๋ฐฑ
37
+ # 2) ํ—ˆ๋ธŒ ๋‹ค์šด๋กœ๋“œ
38
+ cached = hf_hub_download(repo_id=repo_id, filename=hub_filename,
39
+ repo_type=repo_type, revision=revision)
40
+ try:
41
+ return pd.read_csv(cached, encoding="utf-8")
42
+ except UnicodeDecodeError:
43
+ return pd.read_csv(cached, encoding="cp949")
44
 
45
  import pandas as pd
46
  import torch
 
82
  # st.error("TRIPDATA_URL ๋ฏธ์„ค์ •: Streamlit Secrets์— URL์„ ๋„ฃ์–ด์ฃผ์„ธ์š”.")
83
  # st.stop()
84
 
85
+ travel_df = load_csv_smart("trip_emotions.csv")
86
+ festival_df = load_csv_smart("festivals.csv")
87
+ external_score_df = load_csv_smart("external_scores.csv")
88
  external_score_df.columns = external_score_df.columns.str.strip()
89
+ weather_df = load_csv_smart("weather.csv")
90
+ package_df = load_csv_smart("packages.csv")
91
  package_df.columns = package_df.columns.str.strip()
92
+ master_df = load_csv_smart("countries_cities.csv")
93
 
94
  countries = travel_df["์—ฌํ–‰๋‚˜๋ผ"].dropna().unique().tolist()
95
  cities = travel_df["์—ฌํ–‰๋„์‹œ"].dropna().unique().tolist()