demo10 / utils /metadata_utils.py
chaaim123's picture
Create utils/metadata_utils.py
14fe1d7 verified
# utils/metadata_utils.py
from datetime import datetime
from urllib.parse import urlparse
def enhance_metadata(metadata):
"""
Enhance metadata with inferred fields based on known patterns.
Args:
metadata (dict): Original metadata dictionary
Returns:
dict: Enhanced metadata with additional standardized fields
"""
enhanced = metadata.copy()
url = enhanced.get("url", "")
parsed_url = urlparse(url)
# Set institution and short name
if "american.edu" in parsed_url.netloc:
enhanced.setdefault("institution", "American University")
enhanced.setdefault("institution_short", "AU")
# Determine source system from subdomain or path
if "catalog" in parsed_url.netloc:
enhanced.setdefault("source_system", "Course Catalog")
else:
enhanced.setdefault("source_system", "University Website")
# Standardized timestamp (ISO 8601)
enhanced.setdefault("timestamp", datetime.now().isoformat())
return enhanced