File size: 2,000 Bytes
7d0fa43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
Inference logger.
Writes one JSON line per query to logs/inference.jsonl.
Called as FastAPI BackgroundTask — does not block response.

WHY two-layer logging?
HF Spaces containers are ephemeral — local files are wiped on restart.
Local JSONL is fast for same-session analytics.
In future, add HF Dataset API push here for durable storage.
"""

import json
import os
import logging
from datetime import datetime, timezone

logger = logging.getLogger(__name__)

LOG_PATH = os.getenv("LOG_PATH", "logs/inference.jsonl")


def ensure_log_dir():
    os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)


def log_inference(
    query: str,
    session_id: str,
    answer: str,
    num_sources: int,
    verification_status,
    entities: dict,
    latency_ms: float,
    stage: str = "",
    truncated: bool = False,
    out_of_domain: bool = False,
):
    """
    Write one inference record to logs/inference.jsonl.
    Called as BackgroundTask in api/main.py.
    Fails silently — never blocks or crashes the main response.
    """
    try:
        ensure_log_dir()
        record = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "session_id": session_id,
            "query_length": len(query),
            "query_hash": hash(query) % 100000,
            "num_sources": num_sources,
            "verification_status": str(verification_status),
            "verified": verification_status is True or verification_status == "verified",
            "entities_found": list(entities.keys()) if entities else [],
            "num_entity_types": len(entities) if entities else 0,
            "latency_ms": latency_ms,
            "stage": stage,
            "truncated": truncated,
            "out_of_domain": out_of_domain,
            "answer_length": len(answer),
        }
        with open(LOG_PATH, "a", encoding="utf-8") as f:
            f.write(json.dumps(record) + "\n")
    except Exception as e:
        logger.warning(f"Inference logging failed: {e}")