loan-collection / src /storage.py
utkarshshukla2912's picture
Deploy LLM comparison playground
5ceed35
Raw
History Blame Contribute Delete
3.28 kB
"""Persistence for comparison sessions.
Outputs under config.DATA_DIR (set DATA_DIR=/data on Hugging Face Spaces with
persistent storage so they survive restarts):
- sessions/<session_id>.json : the full session, overwritten every turn. Always
complete (even if the user never picks a preferred response).
- requests.jsonl : one line per turn, EVERY request that is made
(appended unconditionally, including turns that errored).
- comparisons.jsonl : one line per turn the user explicitly saves to the
dataset, capturing the chosen `preferred`.
- issues.jsonl : one line per response the user flags as having an
issue.
"""
import json
import os
from datetime import datetime
import config
def _sessions_dir() -> str:
path = os.path.join(config.DATA_DIR, "sessions")
os.makedirs(path, exist_ok=True)
return path
def _data_path(filename: str) -> str:
os.makedirs(config.DATA_DIR, exist_ok=True)
return os.path.join(config.DATA_DIR, filename)
def new_session(system_prompt: str, intro: str, temperature: float, max_tokens: int) -> dict:
"""Create a fresh session record (in-memory)."""
now = datetime.now()
session_id = now.strftime("%Y%m%d-%H%M%S-%f")
return {
"session_id": session_id,
"created_at": now.isoformat(),
"system_prompt": system_prompt,
"intro": intro,
"temperature": temperature,
"max_tokens": max_tokens,
"backends": [
{k: b.get(k) for k in ("key", "label", "type", "model", "endpoint_id", "deployment")}
for b in config.BACKENDS
],
"turns": [],
}
def save_session(session: dict) -> str:
"""Overwrite the session JSON on disk. Returns the file path."""
path = os.path.join(_sessions_dir(), f"{session['session_id']}.json")
with open(path, "w", encoding="utf-8") as f:
json.dump(session, f, ensure_ascii=False, indent=2)
return path
def _record_for(session: dict, turn: dict) -> dict:
"""Flatten one turn with its session-level context for a JSONL line."""
return {
"session_id": session["session_id"],
"system_prompt": session["system_prompt"],
"intro": session["intro"],
"temperature": session["temperature"],
"max_tokens": session["max_tokens"],
**turn,
}
def append_request_log(session: dict, turn: dict) -> str:
"""Append EVERY turn (request + responses + metrics) to requests.jsonl."""
path = _data_path("requests.jsonl")
with open(path, "a", encoding="utf-8") as f:
f.write(json.dumps(_record_for(session, turn), ensure_ascii=False) + "\n")
return path
def append_to_dataset(session: dict, turn: dict) -> str:
"""Append one turn (with its preferred pick) to comparisons.jsonl."""
path = _data_path("comparisons.jsonl")
with open(path, "a", encoding="utf-8") as f:
f.write(json.dumps(_record_for(session, turn), ensure_ascii=False) + "\n")
return path
def append_issue(record: dict) -> str:
"""Append one flagged-issue record to issues.jsonl."""
path = _data_path("issues.jsonl")
with open(path, "a", encoding="utf-8") as f:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
return path