# utils/metadata_utils.py from datetime import datetime from urllib.parse import urlparse def enhance_metadata(metadata): """ Enhance metadata with inferred fields based on known patterns. Args: metadata (dict): Original metadata dictionary Returns: dict: Enhanced metadata with additional standardized fields """ enhanced = metadata.copy() url = enhanced.get("url", "") parsed_url = urlparse(url) # Set institution and short name if "american.edu" in parsed_url.netloc: enhanced.setdefault("institution", "American University") enhanced.setdefault("institution_short", "AU") # Determine source system from subdomain or path if "catalog" in parsed_url.netloc: enhanced.setdefault("source_system", "Course Catalog") else: enhanced.setdefault("source_system", "University Website") # Standardized timestamp (ISO 8601) enhanced.setdefault("timestamp", datetime.now().isoformat()) return enhanced