| "log chat messages and feedbacks to a dataset" | |
| from typing import Tuple | |
| import os | |
| import tempfile | |
| import ujson | |
| import uuid | |
| import huggingface_hub | |
| import pandas as pd | |
| LOGS_DATSET_PATH = "logikon/benjamin-logs" | |
| async def log_messages( | |
| messages: Tuple[str, str], | |
| conversation_id: str, | |
| step: int, | |
| metadata: dict = None | |
| ): | |
| data = { | |
| "conversation_id": conversation_id, | |
| "step": step, | |
| "human": messages[0], | |
| "ai": messages[1], | |
| "metadata": list(metadata.items()) if metadata else [] | |
| } | |
| with tempfile.TemporaryFile(mode="w+") as f: | |
| ujson.dump(data, f) | |
| f.flush() | |
| api = huggingface_hub.HfApi() | |
| api.upload_file( | |
| path_or_fileobj=f.buffer, | |
| path_in_repo=os.path.join("data", pd.Timestamp.now().date().isoformat(), conversation_id, f"step_{step}.json"), | |
| repo_id=LOGS_DATSET_PATH, | |
| repo_type="dataset", | |
| token=os.environ["HF_DATASETS_TOKEN"] | |
| ) | |
| async def log_feedback( | |
| liked: bool, | |
| conversation_id: str, | |
| step: int, | |
| metadata: dict = None | |
| ): | |
| data = { | |
| "conversation_id": conversation_id, | |
| "step": step, | |
| "liked": liked, | |
| "metadata": list(metadata.items()) if metadata else [] | |
| } | |
| with tempfile.TemporaryFile(mode="w+") as f: | |
| ujson.dump(data, f) | |
| f.flush() | |
| api = huggingface_hub.HfApi() | |
| api.upload_file( | |
| path_or_fileobj=f.buffer, | |
| path_in_repo=os.path.join("data", pd.Timestamp.now().date().isoformat(), conversation_id, f"feedback_{step[0]}_{str(uuid.uuid4())}.json"), | |
| repo_id=LOGS_DATSET_PATH, | |
| repo_type="dataset", | |
| token=os.environ["HF_DATASETS_TOKEN"] | |
| ) |