""" User Study: Firestore logging and study configuration. STORAGE: Google Cloud Firestore (Firebase). The service account JSON file is loaded from: 1. The file path in FIREBASE_KEY_PATH env var, OR 2. The default path: steering-vision-language-model-firebase-adminsdk-fbsvc-b7d95b30a2.json (relative to this file's directory), OR 3. The JSON string in FIREBASE_SERVICE_ACCOUNT_JSON env var (for HF Spaces secrets). Firestore collections: participants, interaction_log, image_annotations, method_comparison, survey_responses, final_selections, final_survey """ import json import os import re from pathlib import Path from datetime import datetime, timezone # ── Firestore initialisation ──────────────────────────────────────────────── _firestore_db = None _firestore_init_done = False # Default key file path (same directory as this module) _DEFAULT_KEY_FILE = Path(__file__).parent / "steering-vision-language-model-firebase-adminsdk-fbsvc-b7d95b30a2.json" def _init_firestore(): """Lazy-init Firestore. Called once; sets _firestore_db or raises.""" global _firestore_db, _firestore_init_done if _firestore_init_done: return _firestore_init_done = True import firebase_admin from firebase_admin import credentials, firestore # Already initialised by another module? if firebase_admin._apps: _firestore_db = firestore.client() print("[study_utils] Firestore: reusing existing app") return # Option 1: explicit file path from env var key_path = os.environ.get("FIREBASE_KEY_PATH", "").strip() if key_path and Path(key_path).exists(): cred = credentials.Certificate(key_path) firebase_admin.initialize_app(cred) _firestore_db = firestore.client() print(f"[study_utils] Firestore: initialised from FIREBASE_KEY_PATH={key_path}") return # Option 2: default key file next to this module if _DEFAULT_KEY_FILE.exists(): cred = credentials.Certificate(str(_DEFAULT_KEY_FILE)) firebase_admin.initialize_app(cred) _firestore_db = firestore.client() print(f"[study_utils] Firestore: initialised from {_DEFAULT_KEY_FILE.name}") return # Option 3: JSON string in env var (for HF Spaces secrets) sa_json = os.environ.get("FIREBASE_SERVICE_ACCOUNT_JSON", "").strip() if sa_json: info = json.loads(sa_json) cred = credentials.Certificate(info) firebase_admin.initialize_app(cred) _firestore_db = firestore.client() print("[study_utils] Firestore: initialised from FIREBASE_SERVICE_ACCOUNT_JSON") return raise RuntimeError( "No Firebase credentials found. Place the service account JSON file " f"at {_DEFAULT_KEY_FILE} or set FIREBASE_SERVICE_ACCOUNT_JSON env var." ) def _get_db(): """Return the Firestore client, initialising if needed.""" _init_firestore() if _firestore_db is None: raise RuntimeError("Firestore is not initialised.") return _firestore_db def firestore_add(collection: str, data: dict) -> None: """Add a document to a Firestore collection.""" db = _get_db() db.collection(collection).add(data) def firestore_batch_add(items: list[tuple[str, dict]]) -> None: """Add many documents efficiently using Firestore batch writes. Args: items: list of (collection_name, data_dict) tuples. Firestore batches support up to 500 ops each; this function auto-splits into multiple batches. """ db = _get_db() BATCH_LIMIT = 450 # stay under Firestore's 500-op limit for start in range(0, len(items), BATCH_LIMIT): batch = db.batch() for collection, data in items[start:start + BATCH_LIMIT]: ref = db.collection(collection).document() batch.set(ref, data) batch.commit() def firestore_query_exists(collection: str, field: str, value) -> bool: """Return True if at least one document matches field == value.""" db = _get_db() docs = (db.collection(collection) .where(field, "==", value) .limit(1) .get()) return len(docs) > 0 # ── 22 study queries ──────────────────────────────────────────────────────── STUDY_QUERIES = [ # Stanford Dogs (7) ("a golden retriever", "stanford_dogs"), ("Dog on the beach", "stanford_dogs"), ("Dog looking guilty", "stanford_dogs"), ("friendly looking dog", "stanford_dogs"), ("aggressive looking dog", "stanford_dogs"), ("nervous looking dog", "stanford_dogs"), ("Hyper active dog", "stanford_dogs"), # Flickr (7) ("a person riding a bicycle", "flickr"), ("A dog playing", "flickr"), ("an exciting action scene", "flickr"), ("a joyful moment", "flickr"), ("A kid having fun", "flickr"), ("peaceful scene", "flickr"), ("a photo with motion", "flickr"), # CelebA (8) ("wearing eyeglasses", "celeba"), ("a person smiling", "celeba"), ("looking guilty", "celeba"), ("looking happy", "celeba"), ("looking sad", "celeba"), ("looking suspicious", "celeba"), ("looking tired", "celeba"), ("looking confident", "celeba"), ] NUM_QUERIES = len(STUDY_QUERIES) MAX_ROUNDS = 3 # ── Helpers ────────────────────────────────────────────────────────────────── def _iso_ts() -> str: return datetime.now(timezone.utc).isoformat() def validate_email(email: str) -> bool: if not email or not isinstance(email, str): return False email = email.strip().lower() pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$" return bool(re.match(pattern, email)) def participant_exists(participant_id: str) -> bool: """Return True if participant_id already registered in Firestore.""" return firestore_query_exists("participants", "participant_id", participant_id.strip().lower()) def register_participant(email: str, gender: str, age_range: str) -> tuple: """ Write participant to Firestore. Returns (success: bool, error_message: str). """ email = email.strip().lower() if not validate_email(email): return False, "Please enter a valid email address." if participant_exists(email): return False, "This email is already registered. Use a different one or contact the researchers." row = { "participant_id": email, "email": email, "gender": (gender or "").strip(), "age_range": (age_range or "").strip(), "timestamp": _iso_ts(), } firestore_add("participants", row) return True, "" def log_interaction( participant_id: str, query_id: int, query_text: str, method: str, round_number: int, attributes_used: str, retrieved_image_ids: str, time_elapsed: float, user_satisfied: int, ) -> None: firestore_add("interaction_log", { "participant_id": participant_id, "query_id": query_id, "query_text": query_text, "method": method, "round_number": round_number, "attributes_used": attributes_used, "retrieved_image_ids": retrieved_image_ids, "time_elapsed": time_elapsed, "user_satisfied": user_satisfied, "timestamp": _iso_ts(), }) def log_image_annotations( participant_id: str, query_id: int, image_id: str, method: str, meets_intent: int, ) -> None: firestore_add("image_annotations", { "participant_id": participant_id, "query_id": query_id, "image_id": image_id, "method": method, "meets_intent": meets_intent, "timestamp": _iso_ts(), }) def log_method_comparison( participant_id: str, query_id: int, linear_better: str, ) -> None: firestore_add("method_comparison", { "participant_id": participant_id, "query_id": query_id, "linear_better": linear_better, "timestamp": _iso_ts(), }) def log_survey_responses( participant_id: str, query_id: int, alignment_score: int, agency_score: int, satisfaction_score: int, frustration_score: int, round_satisfied: int, time_elapsed: float, ) -> None: firestore_add("survey_responses", { "participant_id": participant_id, "query_id": query_id, "alignment_score": alignment_score, "agency_score": agency_score, "satisfaction_score": satisfaction_score, "frustration_score": frustration_score, "round_satisfied": round_satisfied, "time_elapsed": time_elapsed, "timestamp": _iso_ts(), }) def log_final_selections( participant_id: str, query_id: int, baseline_final_image_ids: str, linear_final_image_ids: str, round_satisfied: int, time_elapsed: float, ) -> None: firestore_add("final_selections", { "participant_id": participant_id, "query_id": query_id, "baseline_final_image_ids": baseline_final_image_ids, "linear_final_image_ids": linear_final_image_ids, "round_satisfied": round_satisfied, "time_elapsed": time_elapsed, "timestamp": _iso_ts(), }) def log_final_survey( participant_id: str, preferred_system: str, concept_changed: str, open_feedback: str, ) -> None: firestore_add("final_survey", { "participant_id": participant_id, "preferred_system": preferred_system, "concept_changed": concept_changed, "open_feedback": (open_feedback or "").strip(), "timestamp": _iso_ts(), })