Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -66,6 +66,22 @@ AUTO_CONT_NEW_TOKENS = 256 # tokens per continuation step
|
|
| 66 |
def dlog(tag, msg):
|
| 67 |
if DEBUG: print(f"[{tag}] {msg}")
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
# ================== HEAVY IMPORTS ==================
|
| 70 |
import faiss
|
| 71 |
from sentence_transformers import SentenceTransformer
|
|
@@ -916,53 +932,80 @@ def enter_app(first_name, last_name, state):
|
|
| 916 |
state["last_name"] = last_name
|
| 917 |
return gr.update(visible=False), gr.update(visible=True), state, f"Welcome, {first_name}! You can start chatting."
|
| 918 |
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 922 |
if not os.path.exists(local_path):
|
| 923 |
-
dlog("UPLOAD", f"Skip: {local_path} does not exist")
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
dlog("UPLOAD", "Skip: SPACE_REPO_ID not set")
|
| 927 |
-
return
|
| 928 |
if not HF_WRITE_TOKEN:
|
| 929 |
-
dlog("UPLOAD", "Skip: HF_WRITE_TOKEN not set")
|
| 930 |
-
return
|
| 931 |
try:
|
| 932 |
api = HfApi(token=HF_WRITE_TOKEN)
|
| 933 |
api.upload_file(
|
| 934 |
path_or_fileobj=local_path,
|
| 935 |
-
path_in_repo=repo_path,
|
| 936 |
-
repo_id=
|
| 937 |
-
repo_type="
|
| 938 |
commit_message=f"Update {repo_path}",
|
| 939 |
)
|
| 940 |
-
dlog("UPLOAD", f"Uploaded {repo_path} to
|
| 941 |
except Exception as e:
|
| 942 |
dlog("UPLOAD", f"Upload failed: {e}")
|
| 943 |
|
|
|
|
| 944 |
def _push_feedback_to_hub() -> None:
|
| 945 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 946 |
|
| 947 |
def _log_turn(state: Dict[str, Any], question: str, answer: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 948 |
rec = {
|
| 949 |
"timestamp_utc": _now_iso(),
|
| 950 |
"session_id": state.get("session_id", ""),
|
| 951 |
-
"first_name":
|
| 952 |
-
"last_name":
|
| 953 |
-
"question":
|
| 954 |
-
"answer":
|
| 955 |
}
|
|
|
|
|
|
|
| 956 |
try:
|
|
|
|
| 957 |
with open(TRANSCRIPT_PATH, "a", encoding="utf-8") as f:
|
| 958 |
f.write(json.dumps(rec, ensure_ascii=False) + "\n")
|
| 959 |
-
except Exception:
|
| 960 |
-
|
| 961 |
-
|
| 962 |
-
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 966 |
|
| 967 |
def save_feedback(rating, comment, state):
|
| 968 |
if rating is None:
|
|
|
|
| 66 |
def dlog(tag, msg):
|
| 67 |
if DEBUG: print(f"[{tag}] {msg}")
|
| 68 |
|
| 69 |
+
|
| 70 |
+
# --- Logging/upload config (OFF by default unless env says "1")
|
| 71 |
+
PUSH_TRANSCRIPTS = os.environ.get("PUSH_TRANSCRIPTS", "0") == "1"
|
| 72 |
+
PUSH_FEEDBACK = os.environ.get("PUSH_FEEDBACK", "0") == "1"
|
| 73 |
+
HF_WRITE_TOKEN = os.environ.get("HF_WRITE_TOKEN", os.environ.get("HF_TOKEN", ""))
|
| 74 |
+
LOGS_DATASET_ID = os.environ.get("LOGS_DATASET_ID", "")
|
| 75 |
+
|
| 76 |
+
def _log_path(prefix: str, ext: str) -> str:
|
| 77 |
+
# rotate daily, e.g., transcripts-20250929.jsonl
|
| 78 |
+
today = datetime.datetime.utcnow().strftime("%Y%m%d")
|
| 79 |
+
return f"{prefix}-{today}.{ext}"
|
| 80 |
+
|
| 81 |
+
# rotate by default if user didn't override via env
|
| 82 |
+
TRANSCRIPT_PATH = os.environ.get("TRANSCRIPT_PATH", _log_path("transcripts", "jsonl"))
|
| 83 |
+
FEEDBACK_PATH = os.environ.get("FEEDBACK_PATH", _log_path("feedback", "csv"))
|
| 84 |
+
|
| 85 |
# ================== HEAVY IMPORTS ==================
|
| 86 |
import faiss
|
| 87 |
from sentence_transformers import SentenceTransformer
|
|
|
|
| 932 |
state["last_name"] = last_name
|
| 933 |
return gr.update(visible=False), gr.update(visible=True), state, f"Welcome, {first_name}! You can start chatting."
|
| 934 |
|
| 935 |
+
from huggingface_hub import HfApi
|
| 936 |
+
|
| 937 |
+
def _push_file_to_dataset(local_path: str, repo_path: str) -> None:
|
| 938 |
+
"""
|
| 939 |
+
Upload a local file to a *dataset* repo so the Space doesn't rebuild.
|
| 940 |
+
Requires: HF_WRITE_TOKEN, LOGS_DATASET_ID
|
| 941 |
+
"""
|
| 942 |
if not os.path.exists(local_path):
|
| 943 |
+
dlog("UPLOAD", f"Skip: {local_path} does not exist"); return
|
| 944 |
+
if not LOGS_DATASET_ID:
|
| 945 |
+
dlog("UPLOAD", "Skip: LOGS_DATASET_ID not set"); return
|
|
|
|
|
|
|
| 946 |
if not HF_WRITE_TOKEN:
|
| 947 |
+
dlog("UPLOAD", "Skip: HF_WRITE_TOKEN not set"); return
|
|
|
|
| 948 |
try:
|
| 949 |
api = HfApi(token=HF_WRITE_TOKEN)
|
| 950 |
api.upload_file(
|
| 951 |
path_or_fileobj=local_path,
|
| 952 |
+
path_in_repo=repo_path, # keep short names at the root
|
| 953 |
+
repo_id=LOGS_DATASET_ID,
|
| 954 |
+
repo_type="dataset", # <- key change: dataset, not space
|
| 955 |
commit_message=f"Update {repo_path}",
|
| 956 |
)
|
| 957 |
+
dlog("UPLOAD", f"Uploaded {repo_path} to dataset {LOGS_DATASET_ID}")
|
| 958 |
except Exception as e:
|
| 959 |
dlog("UPLOAD", f"Upload failed: {e}")
|
| 960 |
|
| 961 |
+
|
| 962 |
def _push_feedback_to_hub() -> None:
|
| 963 |
+
"""
|
| 964 |
+
Back-compat name, but uploads to a *dataset* repo so the Space won't rebuild.
|
| 965 |
+
Requires: PUSH_FEEDBACK=1, HF_WRITE_TOKEN, LOGS_DATASET_ID, and _push_file_to_dataset().
|
| 966 |
+
"""
|
| 967 |
+
if not PUSH_FEEDBACK:
|
| 968 |
+
return
|
| 969 |
+
try:
|
| 970 |
+
# Use the rotated filename if you enabled daily rotation
|
| 971 |
+
remote_name = os.path.basename(FEEDBACK_PATH)
|
| 972 |
+
_push_file_to_dataset(FEEDBACK_PATH, remote_name)
|
| 973 |
+
dlog("UPLOAD", f"Feedback uploaded: {remote_name}")
|
| 974 |
+
except Exception as e:
|
| 975 |
+
dlog("UPLOAD", f"Feedback upload failed: {e}")
|
| 976 |
+
|
| 977 |
|
| 978 |
def _log_turn(state: Dict[str, Any], question: str, answer: str):
|
| 979 |
+
"""
|
| 980 |
+
Append locally first, then (optionally) upload to a *dataset* repo.
|
| 981 |
+
This avoids commits to the Space repo and prevents rebuilds after each turn.
|
| 982 |
+
"""
|
| 983 |
rec = {
|
| 984 |
"timestamp_utc": _now_iso(),
|
| 985 |
"session_id": state.get("session_id", ""),
|
| 986 |
+
"first_name": state.get("first_name", ""),
|
| 987 |
+
"last_name": state.get("last_name", ""),
|
| 988 |
+
"question": question,
|
| 989 |
+
"answer": answer,
|
| 990 |
}
|
| 991 |
+
|
| 992 |
+
# Local append (ensure dir exists)
|
| 993 |
try:
|
| 994 |
+
os.makedirs(os.path.dirname(TRANSCRIPT_PATH) or ".", exist_ok=True)
|
| 995 |
with open(TRANSCRIPT_PATH, "a", encoding="utf-8") as f:
|
| 996 |
f.write(json.dumps(rec, ensure_ascii=False) + "\n")
|
| 997 |
+
except Exception as e:
|
| 998 |
+
dlog("LOG", f"Failed to write transcript locally: {e}")
|
| 999 |
+
|
| 1000 |
+
# Optional remote upload (dataset repo -> no Space rebuild)
|
| 1001 |
+
if PUSH_TRANSCRIPTS:
|
| 1002 |
+
try:
|
| 1003 |
+
remote_name = os.path.basename(TRANSCRIPT_PATH)
|
| 1004 |
+
_push_file_to_dataset(TRANSCRIPT_PATH, remote_name)
|
| 1005 |
+
dlog("UPLOAD", f"Transcript uploaded: {remote_name}")
|
| 1006 |
+
except Exception as e:
|
| 1007 |
+
dlog("UPLOAD", f"Transcript upload failed: {e}")
|
| 1008 |
+
|
| 1009 |
|
| 1010 |
def save_feedback(rating, comment, state):
|
| 1011 |
if rating is None:
|