Spaces:

ai-tomoni
/

tomoni.helps

Sleeping

tomoni.helps / collect_feedback.py

Update collect_feedback.py

6639133 verified 6 months ago

1.72 kB

	# collect_feedback.py
	import os, json, pandas as pd, fsspec

	def collect_feedback():
	REPO = os.environ.get("HF_DATASET_REPO", "ai-tomoni/julia-dialog-logs")
	HF_TOKEN = os.environ.get("HF_TOKEN") # set in Space secrets if private

	fs = fsspec.filesystem("hf", token=HF_TOKEN)
	paths = fs.glob(f"datasets/{REPO}/flat_logs/*/.ndjson")
	print("paths",paths)

	rows = []
	for p in paths:
	with fs.open(p, "rb") as f:
	for line in f:
	try:
	rec = json.loads(line)
	except Exception:
	continue
	# keep full dict + path for later filtering
	rec["_file"] = p
	rows.append(rec)

	if not rows:
	print("⚠️ No records found.")
	return None

	df = pd.DataFrame(rows)

	# optional: sort by timestamp if available
	if "ts" in df.columns:
	df = df.sort_values("ts", na_position="last")

	df.to_csv("all_logs.csv", index=False)
	print(f"✅ all_logs.csv written with {len(df)} records and {len(df.columns)} columns.")
	return df


	from huggingface_hub import HfApi, CommitOperationAdd

	def push_csv_to_space():
	HF_TOKEN = os.environ.get("HF_TOKEN") # <- uses the secret
	api = HfApi(token=HF_TOKEN)
	repo_id = "ai-tomoni/tomoni.helps" # your Space repo id
	api.create_commit(
	repo_id=repo_id,
	repo_type="space",
	operations=[CommitOperationAdd(
	path_in_repo="all_logs.csv",
	path_or_fileobj="all_logs.csv"
	)],
	commit_message="Add/update all_logs.csv",
	)
	print("✅ all_logs.csv pushed to repo")



	if __name__ == "__main__":
	collect_feedback()