tomoni.helps / collect_feedback.py
ai-tomoni's picture
Update collect_feedback.py
6639133 verified
# collect_feedback.py
import os, json, pandas as pd, fsspec
def collect_feedback():
REPO = os.environ.get("HF_DATASET_REPO", "ai-tomoni/julia-dialog-logs")
HF_TOKEN = os.environ.get("HF_TOKEN") # set in Space secrets if private
fs = fsspec.filesystem("hf", token=HF_TOKEN)
paths = fs.glob(f"datasets/{REPO}/flat_logs/**/*.ndjson")
print("paths",paths)
rows = []
for p in paths:
with fs.open(p, "rb") as f:
for line in f:
try:
rec = json.loads(line)
except Exception:
continue
# keep full dict + path for later filtering
rec["_file"] = p
rows.append(rec)
if not rows:
print("⚠️ No records found.")
return None
df = pd.DataFrame(rows)
# optional: sort by timestamp if available
if "ts" in df.columns:
df = df.sort_values("ts", na_position="last")
df.to_csv("all_logs.csv", index=False)
print(f"✅ all_logs.csv written with {len(df)} records and {len(df.columns)} columns.")
return df
from huggingface_hub import HfApi, CommitOperationAdd
def push_csv_to_space():
HF_TOKEN = os.environ.get("HF_TOKEN") # <- uses the secret
api = HfApi(token=HF_TOKEN)
repo_id = "ai-tomoni/tomoni.helps" # your Space repo id
api.create_commit(
repo_id=repo_id,
repo_type="space",
operations=[CommitOperationAdd(
path_in_repo="all_logs.csv",
path_or_fileobj="all_logs.csv"
)],
commit_message="Add/update all_logs.csv",
)
print("✅ all_logs.csv pushed to repo")
if __name__ == "__main__":
collect_feedback()