File size: 1,926 Bytes
67e93c9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | import re
def normalize_well_name(raw_name: str) -> str:
"""
Normalizes well names from various sources (WITSML, DDR, EDM) into a canonical format.
E.g.:
"15/9-F-5 W-508420" -> "15/9-F-5"
"NO 15/9-F-1 C 1bf1cc58-83af-4e13-9696-4fae2f9294ae" -> "15/9-F-1 C"
"15-9-F-1" -> "15/9-F-1"
"15_9-F-1" -> "15/9-F-1"
"15_9_F_1_C" -> "15/9-F-1 C"
"""
if not isinstance(raw_name, str) or not raw_name.strip():
return "UNKNOWN"
s = raw_name.strip()
# Remove leading "NO " or "NO-"
s = re.sub(r'^NO[\s\-]+', '', s, flags=re.IGNORECASE)
# Remove UUIDs or trailing IDs (e.g. " W-508420" or " 1bf1cc58...")
# Usually separated by double spaces in WITSML
if " " in s:
s = s.split(" ")[0]
# Standardize the block/quadrant: 15_9 or 15-9 -> 15/9
s = re.sub(r'^(\d+)[_\-](\d+)', r'\1/\2', s)
# If the format is entirely separated by underscores, try to fix it (e.g., 15_9_F_1_C)
if '_' in s and '/' in s:
# e.g., 15/9_F_1_C -> 15/9-F-1 C
parts = re.split(r'[_\-]+', s)
if len(parts) >= 3:
# Reconstruct
base = f"{parts[0]}-{parts[1]}-{parts[2]}"
if len(parts) > 3:
base += f" {' '.join(parts[3:])}"
s = base
# Also standardize typical "15/9-F-11_A" -> "15/9-F-11 A"
s = re.sub(r'_([A-Z])$', r' \1', s)
# And "15/9-F-1_C" -> "15/9-F-1 C"
s = re.sub(r'_(ST\d+|T\d+)$', r' \1', s)
# Replace remaining underscores with spaces or dashes appropriately?
# Usually we want 15/9-19 A or 15/9-F-1 C.
s = s.replace('_', ' ')
# Squeeze multiple spaces
s = re.sub(r'\s+', ' ', s)
return s.strip()
def safe_filename(name: str) -> str:
"""Converts a canonical name to a safe filename string."""
return name.replace("/", "_").replace(" ", "_").replace("-", "_")
|