Spaces:
Sleeping
Sleeping
| """ | |
| Data Logger for HicXAI Research | |
| Tracks user interactions, application data, and behavior metrics | |
| Saves to private GitHub repository: hicxai-data-private | |
| """ | |
| import json | |
| import os | |
| from datetime import datetime | |
| from typing import Optional, Dict, Any, List | |
| import streamlit as st | |
| import requests | |
| class DataLogger: | |
| """Logs user interactions and saves to private GitHub repository""" | |
| def __init__(self, prolific_id: str, condition: int, session_id: str): | |
| self.prolific_id = prolific_id | |
| self.condition = condition | |
| self.session_id = session_id | |
| self.session_start = datetime.now().isoformat() | |
| self.interactions: List[Dict] = [] | |
| self.application_data: Dict = {} | |
| self.behavior_metrics = { | |
| "total_messages": 0, | |
| "typed_responses": 0, | |
| "clicked_responses": 0, | |
| "help_clicks": 0, | |
| "explanation_requests": 0, | |
| "progress_checks": 0, | |
| "fields_changed": 0 | |
| } | |
| def log_interaction(self, interaction_type: str, content: Dict[str, Any]): | |
| """Log a single interaction event""" | |
| self.interactions.append({ | |
| "timestamp": datetime.now().isoformat(), | |
| "type": interaction_type, | |
| **content | |
| }) | |
| # Update behavior metrics | |
| if interaction_type == "user_message": | |
| self.behavior_metrics["total_messages"] += 1 | |
| if content.get("input_method") == "typed": | |
| self.behavior_metrics["typed_responses"] += 1 | |
| elif content.get("input_method") == "clicked": | |
| self.behavior_metrics["clicked_responses"] += 1 | |
| elif interaction_type == "help_click": | |
| self.behavior_metrics["help_clicks"] += 1 | |
| elif interaction_type == "explanation_request": | |
| self.behavior_metrics["explanation_requests"] += 1 | |
| elif interaction_type == "progress_check": | |
| self.behavior_metrics["progress_checks"] += 1 | |
| def update_application_data(self, field: str, value: Any): | |
| """Update application field data""" | |
| if field in self.application_data and self.application_data[field] != value: | |
| self.behavior_metrics["fields_changed"] += 1 | |
| self.application_data[field] = value | |
| def set_prediction(self, prediction: str, probability: float): | |
| """Set final prediction result""" | |
| self.application_data["prediction"] = prediction | |
| self.application_data["prediction_probability"] = probability | |
| def set_feedback(self, feedback_data: Dict[str, Any]): | |
| """Set feedback data""" | |
| self.feedback_data = feedback_data | |
| def build_final_data(self) -> Dict[str, Any]: | |
| """Build complete data structure for saving""" | |
| session_end = datetime.now().isoformat() | |
| start_dt = datetime.fromisoformat(self.session_start) | |
| end_dt = datetime.fromisoformat(session_end) | |
| duration = (end_dt - start_dt).total_seconds() | |
| # Get A/B testing info | |
| try: | |
| from ab_config import config | |
| ab_version = config.version | |
| assistant_name = config.assistant_name | |
| has_shap = config.show_shap_visualizations | |
| except: | |
| ab_version = "unknown" | |
| assistant_name = "unknown" | |
| has_shap = False | |
| return { | |
| "session_id": self.session_id, | |
| "prolific_id": self.prolific_id, | |
| "condition": self.condition, | |
| "ab_version": ab_version, | |
| "assistant_name": assistant_name, | |
| "has_shap_visualizations": has_shap, | |
| "timestamps": { | |
| "session_start": self.session_start, | |
| "session_end": session_end, | |
| "duration_seconds": duration | |
| }, | |
| "application_data": self.application_data, | |
| "interactions": self.interactions, | |
| "behavior_metrics": self.behavior_metrics, | |
| "feedback": getattr(self, 'feedback_data', None) | |
| } | |
| def save_to_github(self) -> bool: | |
| """Save data to private GitHub repository""" | |
| # Try Streamlit secrets first, then fall back to env variable (for local dev) | |
| try: | |
| github_token = st.secrets.get("GITHUB_DATA_TOKEN") or st.secrets.get("GITHUB_TOKEN") | |
| except: | |
| github_token = os.getenv('GITHUB_TOKEN') | |
| if not github_token: | |
| # Fallback to local save | |
| return self._save_local() | |
| try: | |
| repo = "ksauka/hicxai-data-private" | |
| date_str = datetime.now().strftime('%Y-%m-%d') | |
| timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') | |
| filename = f"sessions/{date_str}/{self.prolific_id}_{self.condition}_{timestamp}.json" | |
| data = self.build_final_data() | |
| content = json.dumps(data, indent=2) | |
| # GitHub API: Create or update file | |
| url = f"https://api.github.com/repos/{repo}/contents/{filename}" | |
| headers = { | |
| "Authorization": f"token {github_token}", | |
| "Accept": "application/vnd.github.v3+json" | |
| } | |
| # Check if file exists | |
| response = requests.get(url, headers=headers) | |
| sha = response.json().get("sha") if response.status_code == 200 else None | |
| # Create/update file | |
| import base64 | |
| payload = { | |
| "message": f"Session data: {self.prolific_id} condition {self.condition}", | |
| "content": base64.b64encode(content.encode()).decode() | |
| } | |
| if sha: | |
| payload["sha"] = sha | |
| response = requests.put(url, headers=headers, json=payload) | |
| if response.status_code in [200, 201]: | |
| return True | |
| else: | |
| # Fallback to local | |
| return self._save_local() | |
| except Exception as e: | |
| print(f"GitHub save failed: {e}") | |
| return self._save_local() | |
| def _save_local(self) -> bool: | |
| """Fallback: Save to local file""" | |
| try: | |
| os.makedirs('data/sessions', exist_ok=True) | |
| date_str = datetime.now().strftime('%Y-%m-%d') | |
| timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') | |
| filename = f"data/sessions/{date_str}_{self.prolific_id}_{self.condition}_{timestamp}.json" | |
| data = self.build_final_data() | |
| with open(filename, 'w') as f: | |
| json.dump(data, f, indent=2) | |
| return True | |
| except Exception as e: | |
| print(f"Local save failed: {e}") | |
| return False | |
| def init_logger() -> Optional[DataLogger]: | |
| """Initialize data logger from query parameters""" | |
| if "data_logger" in st.session_state: | |
| return st.session_state.data_logger | |
| try: | |
| # Get query params | |
| try: | |
| qs = dict(st.query_params) | |
| except: | |
| qs = st.experimental_get_query_params() | |
| def _as_str(v): | |
| return v[0] if isinstance(v, list) and v else (v if isinstance(v, str) else "") | |
| # Extract Prolific ID and condition | |
| prolific_id = _as_str(qs.get("pid") or qs.get("PROLIFIC_PID", "unknown")) | |
| condition_str = _as_str(qs.get("cond", "0")) | |
| condition = int(condition_str) if condition_str.isdigit() else 0 | |
| # Generate session ID | |
| from ab_config import config | |
| session_id = config.session_id | |
| logger = DataLogger(prolific_id, condition, session_id) | |
| st.session_state.data_logger = logger | |
| return logger | |
| except Exception as e: | |
| print(f"Failed to initialize logger: {e}") | |
| return None | |