hicxai-condition-2 / src /data_logger.py
Suvh
Update to v1.1-chatty-luna (2025-12-07)
070061f
"""
Data Logger for HicXAI Research
Tracks user interactions, application data, and behavior metrics
Saves to private GitHub repository: hicxai-data-private
"""
import json
import os
from datetime import datetime
from typing import Optional, Dict, Any, List
import streamlit as st
import requests
class DataLogger:
"""Logs user interactions and saves to private GitHub repository"""
def __init__(self, prolific_id: str, condition: int, session_id: str):
self.prolific_id = prolific_id
self.condition = condition
self.session_id = session_id
self.session_start = datetime.now().isoformat()
self.interactions: List[Dict] = []
self.application_data: Dict = {}
self.behavior_metrics = {
"total_messages": 0,
"typed_responses": 0,
"clicked_responses": 0,
"help_clicks": 0,
"explanation_requests": 0,
"progress_checks": 0,
"fields_changed": 0
}
def log_interaction(self, interaction_type: str, content: Dict[str, Any]):
"""Log a single interaction event"""
self.interactions.append({
"timestamp": datetime.now().isoformat(),
"type": interaction_type,
**content
})
# Update behavior metrics
if interaction_type == "user_message":
self.behavior_metrics["total_messages"] += 1
if content.get("input_method") == "typed":
self.behavior_metrics["typed_responses"] += 1
elif content.get("input_method") == "clicked":
self.behavior_metrics["clicked_responses"] += 1
elif interaction_type == "help_click":
self.behavior_metrics["help_clicks"] += 1
elif interaction_type == "explanation_request":
self.behavior_metrics["explanation_requests"] += 1
elif interaction_type == "progress_check":
self.behavior_metrics["progress_checks"] += 1
def update_application_data(self, field: str, value: Any):
"""Update application field data"""
if field in self.application_data and self.application_data[field] != value:
self.behavior_metrics["fields_changed"] += 1
self.application_data[field] = value
def set_prediction(self, prediction: str, probability: float):
"""Set final prediction result"""
self.application_data["prediction"] = prediction
self.application_data["prediction_probability"] = probability
def set_feedback(self, feedback_data: Dict[str, Any]):
"""Set feedback data"""
self.feedback_data = feedback_data
def build_final_data(self) -> Dict[str, Any]:
"""Build complete data structure for saving"""
session_end = datetime.now().isoformat()
start_dt = datetime.fromisoformat(self.session_start)
end_dt = datetime.fromisoformat(session_end)
duration = (end_dt - start_dt).total_seconds()
# Get A/B testing info
try:
from ab_config import config
ab_version = config.version
assistant_name = config.assistant_name
has_shap = config.show_shap_visualizations
except:
ab_version = "unknown"
assistant_name = "unknown"
has_shap = False
return {
"session_id": self.session_id,
"prolific_id": self.prolific_id,
"condition": self.condition,
"ab_version": ab_version,
"assistant_name": assistant_name,
"has_shap_visualizations": has_shap,
"timestamps": {
"session_start": self.session_start,
"session_end": session_end,
"duration_seconds": duration
},
"application_data": self.application_data,
"interactions": self.interactions,
"behavior_metrics": self.behavior_metrics,
"feedback": getattr(self, 'feedback_data', None)
}
def save_to_github(self) -> bool:
"""Save data to private GitHub repository"""
# Try Streamlit secrets first, then fall back to env variable (for local dev)
try:
github_token = st.secrets.get("GITHUB_DATA_TOKEN") or st.secrets.get("GITHUB_TOKEN")
except:
github_token = os.getenv('GITHUB_TOKEN')
if not github_token:
# Fallback to local save
return self._save_local()
try:
repo = "ksauka/hicxai-data-private"
date_str = datetime.now().strftime('%Y-%m-%d')
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"sessions/{date_str}/{self.prolific_id}_{self.condition}_{timestamp}.json"
data = self.build_final_data()
content = json.dumps(data, indent=2)
# GitHub API: Create or update file
url = f"https://api.github.com/repos/{repo}/contents/{filename}"
headers = {
"Authorization": f"token {github_token}",
"Accept": "application/vnd.github.v3+json"
}
# Check if file exists
response = requests.get(url, headers=headers)
sha = response.json().get("sha") if response.status_code == 200 else None
# Create/update file
import base64
payload = {
"message": f"Session data: {self.prolific_id} condition {self.condition}",
"content": base64.b64encode(content.encode()).decode()
}
if sha:
payload["sha"] = sha
response = requests.put(url, headers=headers, json=payload)
if response.status_code in [200, 201]:
return True
else:
# Fallback to local
return self._save_local()
except Exception as e:
print(f"GitHub save failed: {e}")
return self._save_local()
def _save_local(self) -> bool:
"""Fallback: Save to local file"""
try:
os.makedirs('data/sessions', exist_ok=True)
date_str = datetime.now().strftime('%Y-%m-%d')
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"data/sessions/{date_str}_{self.prolific_id}_{self.condition}_{timestamp}.json"
data = self.build_final_data()
with open(filename, 'w') as f:
json.dump(data, f, indent=2)
return True
except Exception as e:
print(f"Local save failed: {e}")
return False
def init_logger() -> Optional[DataLogger]:
"""Initialize data logger from query parameters"""
if "data_logger" in st.session_state:
return st.session_state.data_logger
try:
# Get query params
try:
qs = dict(st.query_params)
except:
qs = st.experimental_get_query_params()
def _as_str(v):
return v[0] if isinstance(v, list) and v else (v if isinstance(v, str) else "")
# Extract Prolific ID and condition
prolific_id = _as_str(qs.get("pid") or qs.get("PROLIFIC_PID", "unknown"))
condition_str = _as_str(qs.get("cond", "0"))
condition = int(condition_str) if condition_str.isdigit() else 0
# Generate session ID
from ab_config import config
session_id = config.session_id
logger = DataLogger(prolific_id, condition, session_id)
st.session_state.data_logger = logger
return logger
except Exception as e:
print(f"Failed to initialize logger: {e}")
return None