Odin / src /data_pipeline /utils.py
ODIN
Initial commit: ODIN multi-agent drilling intelligence system
67e93c9
import re
def normalize_well_name(raw_name: str) -> str:
"""
Normalizes well names from various sources (WITSML, DDR, EDM) into a canonical format.
E.g.:
"15/9-F-5 W-508420" -> "15/9-F-5"
"NO 15/9-F-1 C 1bf1cc58-83af-4e13-9696-4fae2f9294ae" -> "15/9-F-1 C"
"15-9-F-1" -> "15/9-F-1"
"15_9-F-1" -> "15/9-F-1"
"15_9_F_1_C" -> "15/9-F-1 C"
"""
if not isinstance(raw_name, str) or not raw_name.strip():
return "UNKNOWN"
s = raw_name.strip()
# Remove leading "NO " or "NO-"
s = re.sub(r'^NO[\s\-]+', '', s, flags=re.IGNORECASE)
# Remove UUIDs or trailing IDs (e.g. " W-508420" or " 1bf1cc58...")
# Usually separated by double spaces in WITSML
if " " in s:
s = s.split(" ")[0]
# Standardize the block/quadrant: 15_9 or 15-9 -> 15/9
s = re.sub(r'^(\d+)[_\-](\d+)', r'\1/\2', s)
# If the format is entirely separated by underscores, try to fix it (e.g., 15_9_F_1_C)
if '_' in s and '/' in s:
# e.g., 15/9_F_1_C -> 15/9-F-1 C
parts = re.split(r'[_\-]+', s)
if len(parts) >= 3:
# Reconstruct
base = f"{parts[0]}-{parts[1]}-{parts[2]}"
if len(parts) > 3:
base += f" {' '.join(parts[3:])}"
s = base
# Also standardize typical "15/9-F-11_A" -> "15/9-F-11 A"
s = re.sub(r'_([A-Z])$', r' \1', s)
# And "15/9-F-1_C" -> "15/9-F-1 C"
s = re.sub(r'_(ST\d+|T\d+)$', r' \1', s)
# Replace remaining underscores with spaces or dashes appropriately?
# Usually we want 15/9-19 A or 15/9-F-1 C.
s = s.replace('_', ' ')
# Squeeze multiple spaces
s = re.sub(r'\s+', ' ', s)
return s.strip()
def safe_filename(name: str) -> str:
"""Converts a canonical name to a safe filename string."""
return name.replace("/", "_").replace(" ", "_").replace("-", "_")