chaaim123 commited on
Commit
14fe1d7
·
verified ·
1 Parent(s): a8949a8

Create utils/metadata_utils.py

Browse files
Files changed (1) hide show
  1. utils/metadata_utils.py +35 -0
utils/metadata_utils.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/metadata_utils.py
2
+
3
+ from datetime import datetime
4
+ from urllib.parse import urlparse
5
+
6
+ def enhance_metadata(metadata):
7
+ """
8
+ Enhance metadata with inferred fields based on known patterns.
9
+
10
+ Args:
11
+ metadata (dict): Original metadata dictionary
12
+
13
+ Returns:
14
+ dict: Enhanced metadata with additional standardized fields
15
+ """
16
+ enhanced = metadata.copy()
17
+
18
+ url = enhanced.get("url", "")
19
+ parsed_url = urlparse(url)
20
+
21
+ # Set institution and short name
22
+ if "american.edu" in parsed_url.netloc:
23
+ enhanced.setdefault("institution", "American University")
24
+ enhanced.setdefault("institution_short", "AU")
25
+
26
+ # Determine source system from subdomain or path
27
+ if "catalog" in parsed_url.netloc:
28
+ enhanced.setdefault("source_system", "Course Catalog")
29
+ else:
30
+ enhanced.setdefault("source_system", "University Website")
31
+
32
+ # Standardized timestamp (ISO 8601)
33
+ enhanced.setdefault("timestamp", datetime.now().isoformat())
34
+
35
+ return enhanced