Spaces:
Sleeping
Sleeping
wenbemi
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,45 +1,54 @@
|
|
| 1 |
-
#
|
| 2 |
-
#
|
| 3 |
-
import os, pathlib, io
|
| 4 |
APP_DIR = pathlib.Path(__file__).parent.resolve()
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
os.environ
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
os.environ
|
| 11 |
-
os.environ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
|
|
|
|
| 13 |
|
| 14 |
-
# λλ ν°λ¦¬ 보μ₯
|
| 15 |
-
for p in ["/tmp/hf-home", "/tmp/hf-cache", "/tmp/torch-cache", "/tmp/xdg-cache"]:
|
| 16 |
-
os.makedirs(p, exist_ok=True)
|
| 17 |
-
|
| 18 |
from huggingface_hub import hf_hub_download
|
| 19 |
import pandas as pd
|
| 20 |
-
import
|
| 21 |
-
import
|
| 22 |
-
|
| 23 |
-
APP_DIR = pathlib.Path(__file__).parent.resolve()
|
| 24 |
-
os.environ.setdefault("HOME", str(APP_DIR)) # '~'κ° /κ° μλλΌ /appμΌλ‘ κ°λλ‘
|
| 25 |
-
CONFIG_DIR = APP_DIR / ".streamlit"
|
| 26 |
-
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
| 27 |
|
| 28 |
-
|
| 29 |
-
os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"
|
| 30 |
-
os.environ["STREAMLIT_BROWSER_GATHER_USAGE_STATS"] = "false" # μ ν: metrics νμΌ μμ± μ€μ΄κΈ°
|
| 31 |
|
| 32 |
-
|
|
|
|
| 33 |
HF_DATASET_REV = os.getenv("HF_DATASET_REV", "main")
|
| 34 |
|
| 35 |
def _is_pointer_bytes(b: bytes) -> bool:
|
| 36 |
head = b[:2048].decode(errors="ignore").lower()
|
| 37 |
-
# git-lfs / xet ν¬μΈν° ν
μ€νΈ ν¨ν΄ λͺ¨λ κ°μ§
|
| 38 |
return (
|
| 39 |
-
"version https://git-lfs.github.com/spec/v1" in head
|
| 40 |
-
"git-lfs" in head
|
| 41 |
-
"xet" in head
|
| 42 |
-
"pointer size" in head
|
| 43 |
)
|
| 44 |
|
| 45 |
def _read_csv_bytes(b: bytes) -> pd.DataFrame:
|
|
@@ -52,88 +61,59 @@ def load_csv_smart(local_path: str,
|
|
| 52 |
hub_filename: str | None = None,
|
| 53 |
repo_id: str = HF_DATASET_REPO,
|
| 54 |
repo_type: str = "dataset",
|
| 55 |
-
revision: str = HF_DATASET_REV):
|
|
|
|
| 56 |
if hub_filename is None:
|
| 57 |
hub_filename = os.path.basename(local_path)
|
|
|
|
| 58 |
if os.path.exists(local_path):
|
| 59 |
with open(local_path, "rb") as f:
|
| 60 |
data = f.read()
|
| 61 |
if not _is_pointer_bytes(data):
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
except UnicodeDecodeError:
|
| 65 |
-
return pd.read_csv(io.BytesIO(data), encoding="cp949")
|
| 66 |
cached = hf_hub_download(repo_id=repo_id, filename=hub_filename,
|
| 67 |
repo_type=repo_type, revision=revision)
|
| 68 |
try:
|
| 69 |
return pd.read_csv(cached, encoding="utf-8")
|
| 70 |
except UnicodeDecodeError:
|
| 71 |
return pd.read_csv(cached, encoding="cp949")
|
| 72 |
-
|
| 73 |
-
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
print(f"Caching {file_path}...") # μΊμκ° μΈμ μ€νλλμ§ νμΈμ©
|
| 91 |
-
return pd.read_csv(file_path)
|
| 92 |
-
|
| 93 |
-
@st.cache_data
|
| 94 |
-
def load_json_data(file_path):
|
| 95 |
-
print(f"Caching {file_path}...")
|
| 96 |
-
with open(file_path, "r", encoding="utf-8") as f:
|
| 97 |
return json.load(f)
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
# βββββββββββββββββββββββββββββββββββββ λ°μ΄ν° λ‘λ
|
| 118 |
-
# trip_url = st.secrets.get("TRIPDATA_URL")
|
| 119 |
-
# if not trip_url:
|
| 120 |
-
# st.error("TRIPDATA_URL λ―Έμ€μ : Streamlit Secretsμ URLμ λ£μ΄μ£ΌμΈμ.")
|
| 121 |
-
# st.stop()
|
| 122 |
-
|
| 123 |
-
travel_df = load_csv_smart("trip_emotions.csv")
|
| 124 |
-
external_score_df = load_csv_smart("external_scores.csv")
|
| 125 |
-
festival_df = load_csv_smart("festivals.csv")
|
| 126 |
-
weather_df = load_csv_smart("weather.csv")
|
| 127 |
-
package_df = load_csv_smart("packages.csv")
|
| 128 |
-
master_df = load_csv_smart("countries_cities.csv")
|
| 129 |
-
theme_title_phrases = load_json_data("theme_title_phrases.json")
|
| 130 |
-
|
| 131 |
-
# travel_dfκ° μ±κ³΅μ μΌλ‘ λ‘λλμλμ§ μ΅μ’
νμΈ
|
| 132 |
-
if 'μ¬νλλΌ' not in travel_df.columns:
|
| 133 |
-
st.error(f"λ°μ΄ν° λ‘λ© νμλ 'travel_df'μ 'μ¬νλλΌ' 컬λΌμ΄ μμ΅λλ€. μ€μ 컬λΌ: {travel_df.columns.tolist()}")
|
| 134 |
-
st.stop()
|
| 135 |
-
|
| 136 |
from chat_a import (
|
|
|
|
| 137 |
analyze_emotion,
|
| 138 |
detect_intent,
|
| 139 |
extract_themes,
|
|
@@ -157,6 +137,17 @@ from chat_a import (
|
|
| 157 |
format_summary_tags_custom,
|
| 158 |
make_companion_age_message
|
| 159 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
# βββββββββββββββββββββββββββββββββββββ streamlitμ© ν¨μ
|
| 161 |
def init_session():
|
| 162 |
if "chat_log" not in st.session_state:
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
# ββββββββββββββββββββββββββββββββ BOOTSTRAP (must be first) ββββββββββββββββββββββββββββββββ
|
| 3 |
+
import os, pathlib, io, json, random
|
| 4 |
APP_DIR = pathlib.Path(__file__).parent.resolve()
|
| 5 |
|
| 6 |
+
# Streamlit ν/μ€μ
|
| 7 |
+
os.environ["HOME"] = str(APP_DIR)
|
| 8 |
+
CONFIG_DIR = APP_DIR / ".streamlit"
|
| 9 |
+
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
| 10 |
+
os.environ["STREAMLIT_HOME"] = str(CONFIG_DIR)
|
| 11 |
+
os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"
|
| 12 |
+
os.environ["STREAMLIT_BROWSER_GATHER_USAGE_STATS"] = "false"
|
| 13 |
+
|
| 14 |
+
# HF/Transformers μΊμλ₯Ό **/data**λ‘ κ³ μ (Spacesμμ μ°κΈ° κ°λ₯)
|
| 15 |
+
CACHE_ROOT = os.environ.get("HF_CACHE_ROOT", "/data")
|
| 16 |
+
ENV_DIRS = {
|
| 17 |
+
"HF_HOME": f"{CACHE_ROOT}/hf-home",
|
| 18 |
+
"TRANSFORMERS_CACHE": f"{CACHE_ROOT}/hf-cache",
|
| 19 |
+
"HUGGINGFACE_HUB_CACHE": f"{CACHE_ROOT}/hf-cache",
|
| 20 |
+
"TORCH_HOME": f"{CACHE_ROOT}/torch-cache",
|
| 21 |
+
"XDG_CACHE_HOME": f"{CACHE_ROOT}/xdg-cache",
|
| 22 |
+
}
|
| 23 |
+
for k, v in ENV_DIRS.items():
|
| 24 |
+
os.environ[k] = v
|
| 25 |
+
os.makedirs(v, exist_ok=True)
|
| 26 |
+
try:
|
| 27 |
+
os.chmod(v, 0o777)
|
| 28 |
+
except Exception:
|
| 29 |
+
pass
|
| 30 |
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
|
| 31 |
+
os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
from huggingface_hub import hf_hub_download
|
| 34 |
import pandas as pd
|
| 35 |
+
import streamlit as st
|
| 36 |
+
from streamlit.components.v1 import html
|
| 37 |
+
from css import render_message, render_chip_buttons, log_and_render, replay_log
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
st.success("π μ±μ΄ μ±κ³΅μ μΌλ‘ μμλμμ΅λλ€! λΌμ΄λΈλ¬λ¦¬ μ€μΉ μ±κ³΅!")
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
# ββββββββββββββββββββββββββββββββ Dataset Repo μ€μ ββββββββββββββββββββββββββββββββ
|
| 42 |
+
HF_DATASET_REPO = os.getenv("HF_DATASET_REPO", "emisdfde/moai-travel-data")
|
| 43 |
HF_DATASET_REV = os.getenv("HF_DATASET_REV", "main")
|
| 44 |
|
| 45 |
def _is_pointer_bytes(b: bytes) -> bool:
|
| 46 |
head = b[:2048].decode(errors="ignore").lower()
|
|
|
|
| 47 |
return (
|
| 48 |
+
"version https://git-lfs.github.com/spec/v1" in head
|
| 49 |
+
or "git-lfs" in head
|
| 50 |
+
or "xet" in head # e.g. xet ν¬μΈν°
|
| 51 |
+
or "pointer size" in head
|
| 52 |
)
|
| 53 |
|
| 54 |
def _read_csv_bytes(b: bytes) -> pd.DataFrame:
|
|
|
|
| 61 |
hub_filename: str | None = None,
|
| 62 |
repo_id: str = HF_DATASET_REPO,
|
| 63 |
repo_type: str = "dataset",
|
| 64 |
+
revision: str = HF_DATASET_REV) -> pd.DataFrame:
|
| 65 |
+
# hub_filename μλ΅ μ λ‘컬 νμΌλͺ
μ¬μ©
|
| 66 |
if hub_filename is None:
|
| 67 |
hub_filename = os.path.basename(local_path)
|
| 68 |
+
# 1) λ‘컬 μ°μ
|
| 69 |
if os.path.exists(local_path):
|
| 70 |
with open(local_path, "rb") as f:
|
| 71 |
data = f.read()
|
| 72 |
if not _is_pointer_bytes(data):
|
| 73 |
+
return _read_csv_bytes(data)
|
| 74 |
+
# 2) νλΈ λ€μ΄λ‘λ
|
|
|
|
|
|
|
| 75 |
cached = hf_hub_download(repo_id=repo_id, filename=hub_filename,
|
| 76 |
repo_type=repo_type, revision=revision)
|
| 77 |
try:
|
| 78 |
return pd.read_csv(cached, encoding="utf-8")
|
| 79 |
except UnicodeDecodeError:
|
| 80 |
return pd.read_csv(cached, encoding="cp949")
|
|
|
|
|
|
|
| 81 |
|
| 82 |
+
def load_json_smart(local_path: str,
|
| 83 |
+
hub_filename: str | None = None,
|
| 84 |
+
repo_id: str = HF_DATASET_REPO,
|
| 85 |
+
repo_type: str = "dataset",
|
| 86 |
+
revision: str = HF_DATASET_REV):
|
| 87 |
+
if hub_filename is None:
|
| 88 |
+
hub_filename = os.path.basename(local_path)
|
| 89 |
+
if os.path.exists(local_path):
|
| 90 |
+
with open(local_path, "rb") as f:
|
| 91 |
+
data = f.read()
|
| 92 |
+
if not _is_pointer_bytes(data):
|
| 93 |
+
return json.loads(data.decode("utf-8"))
|
| 94 |
+
cached = hf_hub_download(repo_id=repo_id, filename=hub_filename,
|
| 95 |
+
repo_type=repo_type, revision=revision)
|
| 96 |
+
with open(cached, "r", encoding="utf-8") as f:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
return json.load(f)
|
| 98 |
|
| 99 |
+
# ββββββββββββββββββββββββββββββββ λ°μ΄ν° λ‘λ ββββββββββββββββββββββββββββββββ
|
| 100 |
+
travel_df = load_csv_smart("trip_emotions.csv", "trip_emotions.csv")
|
| 101 |
+
external_score_df = load_csv_smart("external_scores.csv", "external_scores.csv")
|
| 102 |
+
festival_df = load_csv_smart("festivals.csv", "festivals.csv")
|
| 103 |
+
weather_df = load_csv_smart("weather.csv", "weather.csv")
|
| 104 |
+
package_df = load_csv_smart("packages.csv", "packages.csv")
|
| 105 |
+
master_df = load_csv_smart("countries_cities.csv", "countries_cities.csv")
|
| 106 |
+
theme_title_phrases = load_json_smart("theme_title_phrases.json", "theme_title_phrases.json")
|
| 107 |
+
|
| 108 |
+
# νμ μ»¬λΌ κ°λ
|
| 109 |
+
for col in ("μ¬νλλΌ", "μ¬νλμ", "μ¬νμ§"):
|
| 110 |
+
if col not in travel_df.columns:
|
| 111 |
+
st.error(f"'travel_df'μ '{col}' 컬λΌμ΄ μμ΅λλ€. μ€μ 컬λΌ: {travel_df.columns.tolist()}")
|
| 112 |
+
st.stop()
|
| 113 |
+
|
| 114 |
+
# ββββββββββββββββββββββββββββββββ chat_a import & μ΄κΈ°ν ββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
from chat_a import (
|
| 116 |
+
init_datasets, # β¬
οΈ μλ‘ μΆκ°λ μ§μ° μ΄κΈ°ν ν¨μ
|
| 117 |
analyze_emotion,
|
| 118 |
detect_intent,
|
| 119 |
extract_themes,
|
|
|
|
| 137 |
format_summary_tags_custom,
|
| 138 |
make_companion_age_message
|
| 139 |
)
|
| 140 |
+
|
| 141 |
+
# μ§μ° μ΄κΈ°ν: import μμ μλ λ°μ΄ν° μ κ·Ό κΈμ§, μ¬κΈ°μ ν λ²λ§ μ£Όμ
|
| 142 |
+
init_datasets(
|
| 143 |
+
travel_df=travel_df,
|
| 144 |
+
festival_df=festival_df,
|
| 145 |
+
external_score_df=external_score_df,
|
| 146 |
+
weather_df=weather_df,
|
| 147 |
+
package_df=package_df,
|
| 148 |
+
master_df=master_df,
|
| 149 |
+
theme_title_phrases=theme_title_phrases,
|
| 150 |
+
)
|
| 151 |
# βββββββββββββββββββββββββββββββββββββ streamlitμ© ν¨μ
|
| 152 |
def init_session():
|
| 153 |
if "chat_log" not in st.session_state:
|