File size: 1,045 Bytes
14fe1d7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | # utils/metadata_utils.py
from datetime import datetime
from urllib.parse import urlparse
def enhance_metadata(metadata):
"""
Enhance metadata with inferred fields based on known patterns.
Args:
metadata (dict): Original metadata dictionary
Returns:
dict: Enhanced metadata with additional standardized fields
"""
enhanced = metadata.copy()
url = enhanced.get("url", "")
parsed_url = urlparse(url)
# Set institution and short name
if "american.edu" in parsed_url.netloc:
enhanced.setdefault("institution", "American University")
enhanced.setdefault("institution_short", "AU")
# Determine source system from subdomain or path
if "catalog" in parsed_url.netloc:
enhanced.setdefault("source_system", "Course Catalog")
else:
enhanced.setdefault("source_system", "University Website")
# Standardized timestamp (ISO 8601)
enhanced.setdefault("timestamp", datetime.now().isoformat())
return enhanced |