Slaiwala commited on
Commit
7c85f49
·
verified ·
1 Parent(s): e10654a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -26
app.py CHANGED
@@ -66,6 +66,22 @@ AUTO_CONT_NEW_TOKENS = 256 # tokens per continuation step
66
  def dlog(tag, msg):
67
  if DEBUG: print(f"[{tag}] {msg}")
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # ================== HEAVY IMPORTS ==================
70
  import faiss
71
  from sentence_transformers import SentenceTransformer
@@ -916,53 +932,80 @@ def enter_app(first_name, last_name, state):
916
  state["last_name"] = last_name
917
  return gr.update(visible=False), gr.update(visible=True), state, f"Welcome, {first_name}! You can start chatting."
918
 
919
- def _push_file_to_hub(local_path: str, repo_path: str) -> None:
920
- if not PUSH_FEEDBACK:
921
- return
 
 
 
 
922
  if not os.path.exists(local_path):
923
- dlog("UPLOAD", f"Skip: {local_path} does not exist")
924
- return
925
- if not SPACE_REPO_ID:
926
- dlog("UPLOAD", "Skip: SPACE_REPO_ID not set")
927
- return
928
  if not HF_WRITE_TOKEN:
929
- dlog("UPLOAD", "Skip: HF_WRITE_TOKEN not set")
930
- return
931
  try:
932
  api = HfApi(token=HF_WRITE_TOKEN)
933
  api.upload_file(
934
  path_or_fileobj=local_path,
935
- path_in_repo=repo_path,
936
- repo_id=SPACE_REPO_ID,
937
- repo_type="space",
938
  commit_message=f"Update {repo_path}",
939
  )
940
- dlog("UPLOAD", f"Uploaded {repo_path} to Hub")
941
  except Exception as e:
942
  dlog("UPLOAD", f"Upload failed: {e}")
943
 
 
944
  def _push_feedback_to_hub() -> None:
945
- _push_file_to_hub(FEEDBACK_PATH, "analytics/feedback.csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
946
 
947
  def _log_turn(state: Dict[str, Any], question: str, answer: str):
 
 
 
 
948
  rec = {
949
  "timestamp_utc": _now_iso(),
950
  "session_id": state.get("session_id", ""),
951
- "first_name": state.get("first_name", ""),
952
- "last_name": state.get("last_name", ""),
953
- "question": question,
954
- "answer": answer,
955
  }
 
 
956
  try:
 
957
  with open(TRANSCRIPT_PATH, "a", encoding="utf-8") as f:
958
  f.write(json.dumps(rec, ensure_ascii=False) + "\n")
959
- except Exception:
960
- pass
961
- try:
962
- if PUSH_TRANSCRIPTS:
963
- _push_file_to_hub(TRANSCRIPT_PATH, "analytics/transcripts.jsonl")
964
- except Exception:
965
- pass
 
 
 
 
 
966
 
967
  def save_feedback(rating, comment, state):
968
  if rating is None:
 
66
  def dlog(tag, msg):
67
  if DEBUG: print(f"[{tag}] {msg}")
68
 
69
+
70
+ # --- Logging/upload config (OFF by default unless env says "1")
71
+ PUSH_TRANSCRIPTS = os.environ.get("PUSH_TRANSCRIPTS", "0") == "1"
72
+ PUSH_FEEDBACK = os.environ.get("PUSH_FEEDBACK", "0") == "1"
73
+ HF_WRITE_TOKEN = os.environ.get("HF_WRITE_TOKEN", os.environ.get("HF_TOKEN", ""))
74
+ LOGS_DATASET_ID = os.environ.get("LOGS_DATASET_ID", "")
75
+
76
+ def _log_path(prefix: str, ext: str) -> str:
77
+ # rotate daily, e.g., transcripts-20250929.jsonl
78
+ today = datetime.datetime.utcnow().strftime("%Y%m%d")
79
+ return f"{prefix}-{today}.{ext}"
80
+
81
+ # rotate by default if user didn't override via env
82
+ TRANSCRIPT_PATH = os.environ.get("TRANSCRIPT_PATH", _log_path("transcripts", "jsonl"))
83
+ FEEDBACK_PATH = os.environ.get("FEEDBACK_PATH", _log_path("feedback", "csv"))
84
+
85
  # ================== HEAVY IMPORTS ==================
86
  import faiss
87
  from sentence_transformers import SentenceTransformer
 
932
  state["last_name"] = last_name
933
  return gr.update(visible=False), gr.update(visible=True), state, f"Welcome, {first_name}! You can start chatting."
934
 
935
+ from huggingface_hub import HfApi
936
+
937
+ def _push_file_to_dataset(local_path: str, repo_path: str) -> None:
938
+ """
939
+ Upload a local file to a *dataset* repo so the Space doesn't rebuild.
940
+ Requires: HF_WRITE_TOKEN, LOGS_DATASET_ID
941
+ """
942
  if not os.path.exists(local_path):
943
+ dlog("UPLOAD", f"Skip: {local_path} does not exist"); return
944
+ if not LOGS_DATASET_ID:
945
+ dlog("UPLOAD", "Skip: LOGS_DATASET_ID not set"); return
 
 
946
  if not HF_WRITE_TOKEN:
947
+ dlog("UPLOAD", "Skip: HF_WRITE_TOKEN not set"); return
 
948
  try:
949
  api = HfApi(token=HF_WRITE_TOKEN)
950
  api.upload_file(
951
  path_or_fileobj=local_path,
952
+ path_in_repo=repo_path, # keep short names at the root
953
+ repo_id=LOGS_DATASET_ID,
954
+ repo_type="dataset", # <- key change: dataset, not space
955
  commit_message=f"Update {repo_path}",
956
  )
957
+ dlog("UPLOAD", f"Uploaded {repo_path} to dataset {LOGS_DATASET_ID}")
958
  except Exception as e:
959
  dlog("UPLOAD", f"Upload failed: {e}")
960
 
961
+
962
  def _push_feedback_to_hub() -> None:
963
+ """
964
+ Back-compat name, but uploads to a *dataset* repo so the Space won't rebuild.
965
+ Requires: PUSH_FEEDBACK=1, HF_WRITE_TOKEN, LOGS_DATASET_ID, and _push_file_to_dataset().
966
+ """
967
+ if not PUSH_FEEDBACK:
968
+ return
969
+ try:
970
+ # Use the rotated filename if you enabled daily rotation
971
+ remote_name = os.path.basename(FEEDBACK_PATH)
972
+ _push_file_to_dataset(FEEDBACK_PATH, remote_name)
973
+ dlog("UPLOAD", f"Feedback uploaded: {remote_name}")
974
+ except Exception as e:
975
+ dlog("UPLOAD", f"Feedback upload failed: {e}")
976
+
977
 
978
  def _log_turn(state: Dict[str, Any], question: str, answer: str):
979
+ """
980
+ Append locally first, then (optionally) upload to a *dataset* repo.
981
+ This avoids commits to the Space repo and prevents rebuilds after each turn.
982
+ """
983
  rec = {
984
  "timestamp_utc": _now_iso(),
985
  "session_id": state.get("session_id", ""),
986
+ "first_name": state.get("first_name", ""),
987
+ "last_name": state.get("last_name", ""),
988
+ "question": question,
989
+ "answer": answer,
990
  }
991
+
992
+ # Local append (ensure dir exists)
993
  try:
994
+ os.makedirs(os.path.dirname(TRANSCRIPT_PATH) or ".", exist_ok=True)
995
  with open(TRANSCRIPT_PATH, "a", encoding="utf-8") as f:
996
  f.write(json.dumps(rec, ensure_ascii=False) + "\n")
997
+ except Exception as e:
998
+ dlog("LOG", f"Failed to write transcript locally: {e}")
999
+
1000
+ # Optional remote upload (dataset repo -> no Space rebuild)
1001
+ if PUSH_TRANSCRIPTS:
1002
+ try:
1003
+ remote_name = os.path.basename(TRANSCRIPT_PATH)
1004
+ _push_file_to_dataset(TRANSCRIPT_PATH, remote_name)
1005
+ dlog("UPLOAD", f"Transcript uploaded: {remote_name}")
1006
+ except Exception as e:
1007
+ dlog("UPLOAD", f"Transcript upload failed: {e}")
1008
+
1009
 
1010
  def save_feedback(rating, comment, state):
1011
  if rating is None: