""" User Management Module ====================== Handles user database storage in Hugging Face repository instead of local JSON. Stores users_db.json in: Hamza4100/multi-pdf-storage/users_db.json """ import os import json import hashlib import tempfile from typing import Optional, Dict, Any from datetime import datetime from huggingface_hub import HfApi, hf_hub_download, login class HFUserManager: """Manages user database stored in Hugging Face repository.""" def __init__(self, hf_token: Optional[str], hf_repo: str): """ Initialize HF User Manager. Args: hf_token: Hugging Face API token with write access hf_repo: HF repository ID (e.g., "Hamza4100/multi-pdf-storage") """ self.hf_token = hf_token self.hf_repo = hf_repo self.enabled = bool(hf_token and hf_repo) self.api = None self.users_db_file = "users_db.json" # File stored at repo root self.users_data: Dict[str, Any] = {} if self.enabled: try: login(token=hf_token, add_to_git_credential=True) self.api = HfApi() print(f"✅ HF User Manager initialized: {hf_repo}") # Load users database on init self._load_users_from_hf() except Exception as e: print(f"⚠️ HF User Manager initialization failed: {e}") self.enabled = False else: print("⚠️ HF User Manager disabled (HF_TOKEN or HF_REPO not set)") def _load_users_from_hf(self) -> bool: """ Load users_db.json from HF repository. Returns: bool: True if loaded successfully, False otherwise """ if not self.enabled: return False try: print(f"📥 Loading users database from {self.hf_repo}/{self.users_db_file}") # Download users_db.json from HF repo downloaded_path = hf_hub_download( repo_id=self.hf_repo, filename=self.users_db_file, token=self.hf_token, repo_type="model", local_dir_use_symlinks=False ) # Read the file with open(downloaded_path, 'r') as f: self.users_data = json.load(f) print(f"✅ Loaded {len(self.users_data)} user(s) from HF repo") # Backfill missing user_id fields (derive from api_key) and save modified = False for username, udata in list(self.users_data.items()): if 'user_id' not in udata and udata.get('api_key'): try: udata['user_id'] = hashlib.sha256(udata['api_key'].encode()).hexdigest()[:12] self.users_data[username] = udata modified = True except Exception: continue if modified: # Save back to HF to persist user_id fields self._save_users_to_hf(commit_message="Backfill user_id for existing users") return True except Exception as e: # File might not exist yet (first run is okay) print(f"⚠️ Could not load users database from HF: {str(e)[:100]}") print(" Starting with empty database (will be created on first signup)") self.users_data = {} return False def _save_users_to_hf(self, commit_message: str = "Update users database") -> bool: """ Save users_db.json to HF repository. Args: commit_message: Commit message for the upload Returns: bool: True if saved successfully, False otherwise """ if not self.enabled: print("⚠️ HF User Manager disabled, cannot save to HF") return False try: print(f"📤 Saving users database to {self.hf_repo}/{self.users_db_file}") # Create a temporary file with the users data with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp: json.dump(self.users_data, tmp, indent=2) tmp_path = tmp.name try: # Upload to HF repo self.api.upload_file( path_or_fileobj=tmp_path, path_in_repo=self.users_db_file, repo_id=self.hf_repo, token=self.hf_token, repo_type="model", commit_message=commit_message ) print(f"✅ Users database saved to HF repo") return True finally: # Clean up temporary file if os.path.exists(tmp_path): os.remove(tmp_path) except Exception as e: print(f"❌ Failed to save users database to HF: {e}") return False def get_user(self, username: str) -> Optional[Dict[str, Any]]: """ Get user by username. Args: username: Username to retrieve Returns: User data dict or None if not found """ return self.users_data.get(username) def user_exists(self, username: str) -> bool: """ Check if user exists. Args: username: Username to check Returns: True if user exists, False otherwise """ return username in self.users_data def create_user( self, username: str, password_hash: str, email: str, api_key: str ) -> bool: """ Create a new user account. Args: username: Username password_hash: Hashed password email: User email api_key: API key for this user Returns: bool: True if user created, False if user already exists """ if self.user_exists(username): return False # Derive stable user_id from API key for storage isolation try: user_id = hashlib.sha256(api_key.encode()).hexdigest()[:12] except Exception: user_id = None self.users_data[username] = { "password_hash": password_hash, "email": email, "api_key": api_key, "user_id": user_id, "created_at": datetime.now().isoformat() } # Save to HF success = self._save_users_to_hf( commit_message=f"Add new user: {username}" ) if success: print(f"✅ User '{username}' created and saved to HF") return success def update_user(self, username: str, updates: Dict[str, Any]) -> bool: """ Update user data. Args: username: Username to update updates: Dictionary with updates Returns: bool: True if updated successfully, False if user not found """ if not self.user_exists(username): return False self.users_data[username].update(updates) # Save to HF success = self._save_users_to_hf( commit_message=f"Update user: {username}" ) if success: print(f"✅ User '{username}' updated in HF") return success def delete_user(self, username: str) -> bool: """ Delete a user account. Args: username: Username to delete Returns: bool: True if deleted successfully, False if user not found """ if not self.user_exists(username): return False del self.users_data[username] # Save to HF success = self._save_users_to_hf( commit_message=f"Delete user: {username}" ) if success: print(f"✅ User '{username}' deleted from HF") return success def get_all_users(self) -> Dict[str, Any]: """ Get all users data. Returns: Dictionary of all users """ return self.users_data.copy() def get_user_by_api_key(self, api_key: str) -> Optional[tuple]: """ Find user by API key. Args: api_key: API key to search for Returns: Tuple of (username, user_data) or None if not found """ for username, user_data in self.users_data.items(): if user_data.get("api_key") == api_key: return (username, user_data) return None def get_user_by_email(self, email: str) -> Optional[tuple]: """ Find user by email. Args: email: Email to search for Returns: Tuple of (username, user_data) or None if not found """ for username, user_data in self.users_data.items(): if user_data.get("email") == email: return (username, user_data) return None def verify_password(self, username: str, password_hash: str) -> bool: """ Verify user password hash. Args: username: Username password_hash: Password hash to verify Returns: bool: True if password matches, False otherwise """ user = self.get_user(username) if not user: return False return user.get("password_hash") == password_hash # ============================================ # USER STORAGE FILE HELPERS # ============================================ def _path_in_repo_for_user(self, user_id: str, filename: str) -> str: return f"users/{user_id}/{filename}" def save_user_json(self, user_id: str, filename: str, data: dict, commit_message: str = None) -> bool: """ Save a JSON file into the user's storage folder in the HF repo. """ if not self.enabled: print("⚠️ HF User Manager disabled, cannot save user file") return False if commit_message is None: commit_message = f"Update {user_id}/{filename}" try: # Create temp file import tempfile with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp: json.dump(data, tmp, indent=2, ensure_ascii=False) tmp_path = tmp.name path_in_repo = self._path_in_repo_for_user(user_id, filename) try: self.api.upload_file( path_or_fileobj=tmp_path, path_in_repo=path_in_repo, repo_id=self.hf_repo, token=self.hf_token, repo_type='model', commit_message=commit_message ) print(f"✅ Saved {path_in_repo} to HF repo") return True finally: if os.path.exists(tmp_path): os.remove(tmp_path) except Exception as e: print(f"❌ Failed to save user file to HF: {e}") return False def load_user_json(self, user_id: str, filename: str) -> Optional[dict]: """ Load a JSON file from the user's storage folder in the HF repo. Returns the parsed JSON dict or None if not found / error. """ if not self.enabled: return None path_in_repo = self._path_in_repo_for_user(user_id, filename) try: downloaded_path = hf_hub_download( repo_id=self.hf_repo, filename=path_in_repo, token=self.hf_token, repo_type='model', local_dir_use_symlinks=False ) with open(downloaded_path, 'r', encoding='utf-8') as f: return json.load(f) except Exception as e: # Not found or error return None # ============================================ # CONVENIENCE FUNCTIONS # ============================================ def create_hf_user_manager( hf_token: Optional[str] = None, hf_repo: Optional[str] = None ) -> HFUserManager: """ Create and return an HF User Manager instance. Args: hf_token: HF token (reads from env if not provided) hf_repo: HF repo ID (reads from env if not provided) Returns: HFUserManager instance """ if hf_token is None: hf_token = os.environ.get("HF_TOKEN") if hf_repo is None: hf_repo = os.environ.get("HF_REPO", "Hamza4100/multi-pdf-storage") return HFUserManager(hf_token=hf_token, hf_repo=hf_repo)