mikeboone Claude Sonnet 4.6 commited on
Commit
56e1c8e
·
1 Parent(s): 89d99d9

fix: prevent string values from being inserted into numeric columns

Browse files

Add _events/_sessions/_views etc. to generic_patterns in config.py so
FEATURE_USAGE_EVENTS and similar numeric columns aren't misclassified as
AI_GEN (which generates text). Add a string→numeric coercion guard in
convert_value() as a second safety net for any future misclassification.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

legitdata_bridge.py CHANGED
@@ -233,7 +233,13 @@ class KeyPairSnowflakeWriter:
233
  value = value[:max_len]
234
 
235
  # Clamp numbers to fit DECIMAL precision
236
- elif 'NUMBER' in data_type or 'DECIMAL' in data_type or 'NUMERIC' in data_type:
 
 
 
 
 
 
237
  precision = info.get('precision', 38)
238
  scale = info.get('scale', 0)
239
  if isinstance(value, (int, float)):
 
233
  value = value[:max_len]
234
 
235
  # Clamp numbers to fit DECIMAL precision
236
+ elif 'NUMBER' in data_type or 'DECIMAL' in data_type or 'NUMERIC' in data_type or 'INT' in data_type or 'FLOAT' in data_type:
237
+ # If a string landed in a numeric column (AI misclassification), coerce or null it
238
+ if isinstance(value, str):
239
+ try:
240
+ value = float(value)
241
+ except (ValueError, TypeError):
242
+ return None # Can't coerce — use NULL rather than crash
243
  precision = info.get('precision', 38)
244
  scale = info.get('scale', 0)
245
  if isinstance(value, (int, float)):
legitdata_project/legitdata/config.py CHANGED
@@ -110,6 +110,20 @@ COLUMN_HINTS = {
110
  r"created.*",
111
  r"updated.*",
112
  r"modified.*",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  ]
114
  }
115
 
 
110
  r"created.*",
111
  r"updated.*",
112
  r"modified.*",
113
+ # Numeric event/activity columns — must be numbers, not text
114
+ r".*_events$",
115
+ r".*_sessions$",
116
+ r".*_views$",
117
+ r".*_clicks$",
118
+ r".*_impressions$",
119
+ r".*_conversions$",
120
+ r".*_installs$",
121
+ r".*_signups$",
122
+ r".*_leads$",
123
+ r"num_.*",
124
+ r".*_num$",
125
+ r".*_usage$",
126
+ r".*_volume$",
127
  ]
128
  }
129