| """ |
| User Study: Firestore logging and study configuration. |
| |
| STORAGE: Google Cloud Firestore (Firebase). |
| The service account JSON file is loaded from: |
| 1. The file path in FIREBASE_KEY_PATH env var, OR |
| 2. The default path: steering-vision-language-model-firebase-adminsdk-fbsvc-b7d95b30a2.json |
| (relative to this file's directory), OR |
| 3. The JSON string in FIREBASE_SERVICE_ACCOUNT_JSON env var (for HF Spaces secrets). |
| |
| Firestore collections: |
| participants, interaction_log, image_annotations, |
| method_comparison, survey_responses, final_selections, final_survey |
| """ |
|
|
| import json |
| import os |
| import re |
| from pathlib import Path |
| from datetime import datetime, timezone |
|
|
| |
|
|
| _firestore_db = None |
| _firestore_init_done = False |
|
|
| |
| _DEFAULT_KEY_FILE = Path(__file__).parent / "steering-vision-language-model-firebase-adminsdk-fbsvc-b7d95b30a2.json" |
|
|
|
|
| def _init_firestore(): |
| """Lazy-init Firestore. Called once; sets _firestore_db or raises.""" |
| global _firestore_db, _firestore_init_done |
| if _firestore_init_done: |
| return |
| _firestore_init_done = True |
|
|
| import firebase_admin |
| from firebase_admin import credentials, firestore |
|
|
| |
| if firebase_admin._apps: |
| _firestore_db = firestore.client() |
| print("[study_utils] Firestore: reusing existing app") |
| return |
|
|
| |
| key_path = os.environ.get("FIREBASE_KEY_PATH", "").strip() |
| if key_path and Path(key_path).exists(): |
| cred = credentials.Certificate(key_path) |
| firebase_admin.initialize_app(cred) |
| _firestore_db = firestore.client() |
| print(f"[study_utils] Firestore: initialised from FIREBASE_KEY_PATH={key_path}") |
| return |
|
|
| |
| if _DEFAULT_KEY_FILE.exists(): |
| cred = credentials.Certificate(str(_DEFAULT_KEY_FILE)) |
| firebase_admin.initialize_app(cred) |
| _firestore_db = firestore.client() |
| print(f"[study_utils] Firestore: initialised from {_DEFAULT_KEY_FILE.name}") |
| return |
|
|
| |
| sa_json = os.environ.get("FIREBASE_SERVICE_ACCOUNT_JSON", "").strip() |
| if sa_json: |
| info = json.loads(sa_json) |
| cred = credentials.Certificate(info) |
| firebase_admin.initialize_app(cred) |
| _firestore_db = firestore.client() |
| print("[study_utils] Firestore: initialised from FIREBASE_SERVICE_ACCOUNT_JSON") |
| return |
|
|
| raise RuntimeError( |
| "No Firebase credentials found. Place the service account JSON file " |
| f"at {_DEFAULT_KEY_FILE} or set FIREBASE_SERVICE_ACCOUNT_JSON env var." |
| ) |
|
|
|
|
| def _get_db(): |
| """Return the Firestore client, initialising if needed.""" |
| _init_firestore() |
| if _firestore_db is None: |
| raise RuntimeError("Firestore is not initialised.") |
| return _firestore_db |
|
|
|
|
| def firestore_add(collection: str, data: dict) -> None: |
| """Add a document to a Firestore collection.""" |
| db = _get_db() |
| db.collection(collection).add(data) |
|
|
|
|
| def firestore_batch_add(items: list[tuple[str, dict]]) -> None: |
| """Add many documents efficiently using Firestore batch writes. |
| |
| Args: |
| items: list of (collection_name, data_dict) tuples. |
| Firestore batches support up to 500 ops each; |
| this function auto-splits into multiple batches. |
| """ |
| db = _get_db() |
| BATCH_LIMIT = 450 |
| for start in range(0, len(items), BATCH_LIMIT): |
| batch = db.batch() |
| for collection, data in items[start:start + BATCH_LIMIT]: |
| ref = db.collection(collection).document() |
| batch.set(ref, data) |
| batch.commit() |
|
|
|
|
| def firestore_query_exists(collection: str, field: str, value) -> bool: |
| """Return True if at least one document matches field == value.""" |
| db = _get_db() |
| docs = (db.collection(collection) |
| .where(field, "==", value) |
| .limit(1) |
| .get()) |
| return len(docs) > 0 |
|
|
|
|
| |
|
|
| STUDY_QUERIES = [ |
| |
| ("a golden retriever", "stanford_dogs"), |
| ("Dog on the beach", "stanford_dogs"), |
| ("Dog looking guilty", "stanford_dogs"), |
| ("friendly looking dog", "stanford_dogs"), |
| ("aggressive looking dog", "stanford_dogs"), |
| ("nervous looking dog", "stanford_dogs"), |
| ("Hyper active dog", "stanford_dogs"), |
| |
| ("a person riding a bicycle", "flickr"), |
| ("A dog playing", "flickr"), |
| ("an exciting action scene", "flickr"), |
| ("a joyful moment", "flickr"), |
| ("A kid having fun", "flickr"), |
| ("peaceful scene", "flickr"), |
| ("a photo with motion", "flickr"), |
| |
| ("wearing eyeglasses", "celeba"), |
| ("a person smiling", "celeba"), |
| ("looking guilty", "celeba"), |
| ("looking happy", "celeba"), |
| ("looking sad", "celeba"), |
| ("looking suspicious", "celeba"), |
| ("looking tired", "celeba"), |
| ("looking confident", "celeba"), |
| ] |
|
|
| NUM_QUERIES = len(STUDY_QUERIES) |
| MAX_ROUNDS = 3 |
|
|
|
|
| |
|
|
| def _iso_ts() -> str: |
| return datetime.now(timezone.utc).isoformat() |
|
|
|
|
| def validate_email(email: str) -> bool: |
| if not email or not isinstance(email, str): |
| return False |
| email = email.strip().lower() |
| pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$" |
| return bool(re.match(pattern, email)) |
|
|
|
|
| def participant_exists(participant_id: str) -> bool: |
| """Return True if participant_id already registered in Firestore.""" |
| return firestore_query_exists("participants", "participant_id", participant_id.strip().lower()) |
|
|
|
|
| def register_participant(email: str, gender: str, age_range: str) -> tuple: |
| """ |
| Write participant to Firestore. |
| Returns (success: bool, error_message: str). |
| """ |
| email = email.strip().lower() |
| if not validate_email(email): |
| return False, "Please enter a valid email address." |
| if participant_exists(email): |
| return False, "This email is already registered. Use a different one or contact the researchers." |
| row = { |
| "participant_id": email, |
| "email": email, |
| "gender": (gender or "").strip(), |
| "age_range": (age_range or "").strip(), |
| "timestamp": _iso_ts(), |
| } |
| firestore_add("participants", row) |
| return True, "" |
|
|
|
|
| def log_interaction( |
| participant_id: str, |
| query_id: int, |
| query_text: str, |
| method: str, |
| round_number: int, |
| attributes_used: str, |
| retrieved_image_ids: str, |
| time_elapsed: float, |
| user_satisfied: int, |
| ) -> None: |
| firestore_add("interaction_log", { |
| "participant_id": participant_id, |
| "query_id": query_id, |
| "query_text": query_text, |
| "method": method, |
| "round_number": round_number, |
| "attributes_used": attributes_used, |
| "retrieved_image_ids": retrieved_image_ids, |
| "time_elapsed": time_elapsed, |
| "user_satisfied": user_satisfied, |
| "timestamp": _iso_ts(), |
| }) |
|
|
|
|
| def log_image_annotations( |
| participant_id: str, |
| query_id: int, |
| image_id: str, |
| method: str, |
| meets_intent: int, |
| ) -> None: |
| firestore_add("image_annotations", { |
| "participant_id": participant_id, |
| "query_id": query_id, |
| "image_id": image_id, |
| "method": method, |
| "meets_intent": meets_intent, |
| "timestamp": _iso_ts(), |
| }) |
|
|
|
|
| def log_method_comparison( |
| participant_id: str, |
| query_id: int, |
| linear_better: str, |
| ) -> None: |
| firestore_add("method_comparison", { |
| "participant_id": participant_id, |
| "query_id": query_id, |
| "linear_better": linear_better, |
| "timestamp": _iso_ts(), |
| }) |
|
|
|
|
| def log_survey_responses( |
| participant_id: str, |
| query_id: int, |
| alignment_score: int, |
| agency_score: int, |
| satisfaction_score: int, |
| frustration_score: int, |
| round_satisfied: int, |
| time_elapsed: float, |
| ) -> None: |
| firestore_add("survey_responses", { |
| "participant_id": participant_id, |
| "query_id": query_id, |
| "alignment_score": alignment_score, |
| "agency_score": agency_score, |
| "satisfaction_score": satisfaction_score, |
| "frustration_score": frustration_score, |
| "round_satisfied": round_satisfied, |
| "time_elapsed": time_elapsed, |
| "timestamp": _iso_ts(), |
| }) |
|
|
|
|
| def log_final_selections( |
| participant_id: str, |
| query_id: int, |
| baseline_final_image_ids: str, |
| linear_final_image_ids: str, |
| round_satisfied: int, |
| time_elapsed: float, |
| ) -> None: |
| firestore_add("final_selections", { |
| "participant_id": participant_id, |
| "query_id": query_id, |
| "baseline_final_image_ids": baseline_final_image_ids, |
| "linear_final_image_ids": linear_final_image_ids, |
| "round_satisfied": round_satisfied, |
| "time_elapsed": time_elapsed, |
| "timestamp": _iso_ts(), |
| }) |
|
|
|
|
| def log_final_survey( |
| participant_id: str, |
| preferred_system: str, |
| concept_changed: str, |
| open_feedback: str, |
| ) -> None: |
| firestore_add("final_survey", { |
| "participant_id": participant_id, |
| "preferred_system": preferred_system, |
| "concept_changed": concept_changed, |
| "open_feedback": (open_feedback or "").strip(), |
| "timestamp": _iso_ts(), |
| }) |
|
|