File size: 1,045 Bytes
14fe1d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# utils/metadata_utils.py

from datetime import datetime
from urllib.parse import urlparse

def enhance_metadata(metadata):
    """
    Enhance metadata with inferred fields based on known patterns.
    
    Args:
        metadata (dict): Original metadata dictionary
        
    Returns:
        dict: Enhanced metadata with additional standardized fields
    """
    enhanced = metadata.copy()

    url = enhanced.get("url", "")
    parsed_url = urlparse(url)

    # Set institution and short name
    if "american.edu" in parsed_url.netloc:
        enhanced.setdefault("institution", "American University")
        enhanced.setdefault("institution_short", "AU")

        # Determine source system from subdomain or path
        if "catalog" in parsed_url.netloc:
            enhanced.setdefault("source_system", "Course Catalog")
        else:
            enhanced.setdefault("source_system", "University Website")

    # Standardized timestamp (ISO 8601)
    enhanced.setdefault("timestamp", datetime.now().isoformat())

    return enhanced