Codex commited on
Commit
18389fc
·
1 Parent(s): 36a8733

Fix Circa strikeouts table parsing

Browse files
Files changed (2) hide show
  1. src/market-scanner.js +117 -2
  2. test/market-scanner.test.js +15 -0
src/market-scanner.js CHANGED
@@ -758,6 +758,31 @@ function normalizeCircaHalfLine(value) {
758
  return Number.isFinite(numeric) ? numeric : null;
759
  }
760
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
761
  function isReasonableCircaOdds(oddsInput, section) {
762
  if (!oddsInput) {
763
  return false;
@@ -1278,6 +1303,93 @@ function extractCircaOverUnderTableEntries(segment, section, state) {
1278
  return [];
1279
  }
1280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1281
  export function parseCircaOcrText(text) {
1282
  const entries = [];
1283
  const rawLines = String(text ?? '')
@@ -1344,7 +1456,10 @@ export function parseCircaOcrText(text) {
1344
  if (activeSections.length === 1) {
1345
  const section = activeSections[0];
1346
  for (const segment of segments) {
1347
- entries.push(...parseCircaSegmentWithSection(segment.text, section, overUnderStates, `single-${section.type}`));
 
 
 
1348
  }
1349
  continue;
1350
  }
@@ -1478,7 +1593,7 @@ function parseCircaSegmentWithSection(segment, section, overUnderStates, stateKe
1478
  pendingUnderOdds: null,
1479
  };
1480
  existingState.section = section;
1481
- const parsed = extractCircaOverUnderTableEntries(segment, section, existingState);
1482
  overUnderStates.set(stateKey, existingState);
1483
  return parsed;
1484
  }
 
758
  return Number.isFinite(numeric) ? numeric : null;
759
  }
760
 
761
+ function extractCircaTableLineValue(segment) {
762
+ const normalized = normalizeWhitespace(segment)
763
+ .toUpperCase()
764
+ .replace(/I/g, '1')
765
+ .replace(/O/g, '0');
766
+
767
+ const matches = [...normalized.matchAll(/(\d+)([^0-9\s]{0,3})?(?=\s+[+\-~]?[0-9BOSI]{2,4}\b)/g)];
768
+ if (matches.length === 0) {
769
+ return null;
770
+ }
771
+
772
+ const match = matches[matches.length - 1];
773
+ const whole = Number(match[1]);
774
+ if (!Number.isFinite(whole)) {
775
+ return null;
776
+ }
777
+
778
+ const suffix = String(match[2] ?? '');
779
+ if (suffix && suffix !== '.0' && suffix !== '.00') {
780
+ return whole + 0.5;
781
+ }
782
+
783
+ return whole;
784
+ }
785
+
786
  function isReasonableCircaOdds(oddsInput, section) {
787
  if (!oddsInput) {
788
  return false;
 
1303
  return [];
1304
  }
1305
 
1306
+ function extractCircaOverUnderTableEntriesV2(segment, section, state) {
1307
+ const normalizedSegment = normalizeWhitespace(segment)
1308
+ .toUpperCase()
1309
+ .replace(/[~]/g, '-')
1310
+ .replace(/\s+/g, ' ');
1311
+ const playerMatch = normalizedSegment.match(/([A-Z][A-Z.'\- ]{2,40}\([A-Z]{2,3}\))/);
1312
+ const lineValue = extractCircaTableLineValue(normalizedSegment);
1313
+ const oddsTokens = extractOddsTokensFromSegment(normalizedSegment);
1314
+
1315
+ const flushPending = () => {
1316
+ if (!state.pendingPlayer || !state.pendingTeam || !Number.isFinite(state.pendingLineValue)) {
1317
+ state.pendingPlayer = null;
1318
+ state.pendingTeam = null;
1319
+ state.pendingLineValue = null;
1320
+ state.pendingOverOdds = null;
1321
+ state.pendingUnderOdds = null;
1322
+ return [];
1323
+ }
1324
+
1325
+ const built = [];
1326
+ for (const sideEntry of [
1327
+ { side: 'over', oddsInput: state.pendingOverOdds },
1328
+ { side: 'under', oddsInput: state.pendingUnderOdds ?? null },
1329
+ ]) {
1330
+ if (!sideEntry.oddsInput || !isReasonableCircaOdds(sideEntry.oddsInput, { mode: 'over_under' })) {
1331
+ continue;
1332
+ }
1333
+
1334
+ const impliedProbability = americanToImpliedProbability(sideEntry.oddsInput);
1335
+ if (impliedProbability === null) {
1336
+ continue;
1337
+ }
1338
+
1339
+ const entry = {
1340
+ source: 'circa',
1341
+ book: 'Circa',
1342
+ eventName: 'Circa MLB',
1343
+ eventId: null,
1344
+ team: state.pendingTeam,
1345
+ playerName: state.pendingPlayer,
1346
+ playerKey: normalizePlayerName(state.pendingPlayer),
1347
+ marketType: section.type,
1348
+ marketLabel: section.label,
1349
+ side: sideEntry.side,
1350
+ lineValue: state.pendingLineValue,
1351
+ oddsInput: sideEntry.oddsInput,
1352
+ impliedProbability,
1353
+ rawLabel: normalizedSegment,
1354
+ };
1355
+ entry.marketKey = buildMarketKey(entry);
1356
+ built.push(entry);
1357
+ }
1358
+
1359
+ state.pendingPlayer = null;
1360
+ state.pendingTeam = null;
1361
+ state.pendingLineValue = null;
1362
+ state.pendingOverOdds = null;
1363
+ state.pendingUnderOdds = null;
1364
+ return built;
1365
+ };
1366
+
1367
+ if (!normalizedSegment) {
1368
+ return flushPending();
1369
+ }
1370
+
1371
+ if (playerMatch) {
1372
+ const flushed = flushPending();
1373
+ const { playerName, team } = extractPlayerAndTeam(playerMatch[1]);
1374
+ state.pendingPlayer = playerName;
1375
+ state.pendingTeam = team;
1376
+ state.pendingLineValue = lineValue;
1377
+ state.pendingOverOdds = oddsTokens[0] ? normalizeOcrOddsTokenForSide(oddsTokens[0], 'over') : null;
1378
+ state.pendingUnderOdds = oddsTokens[1] ? normalizeOcrOddsTokenForSide(oddsTokens[1], 'under') : null;
1379
+ if (state.pendingUnderOdds) {
1380
+ return [...flushed, ...flushPending()];
1381
+ }
1382
+ return flushed;
1383
+ }
1384
+
1385
+ if (state.pendingPlayer && Number.isFinite(lineValue) && oddsTokens.length > 0) {
1386
+ state.pendingUnderOdds = normalizeOcrOddsTokenForSide(oddsTokens[0], 'under');
1387
+ return flushPending();
1388
+ }
1389
+
1390
+ return [];
1391
+ }
1392
+
1393
  export function parseCircaOcrText(text) {
1394
  const entries = [];
1395
  const rawLines = String(text ?? '')
 
1456
  if (activeSections.length === 1) {
1457
  const section = activeSections[0];
1458
  for (const segment of segments) {
1459
+ const stateKey = section.mode === 'over_under_table'
1460
+ ? `single-${section.type}-col-${segment.index}`
1461
+ : `single-${section.type}`;
1462
+ entries.push(...parseCircaSegmentWithSection(segment.text, section, overUnderStates, stateKey));
1463
  }
1464
  continue;
1465
  }
 
1593
  pendingUnderOdds: null,
1594
  };
1595
  existingState.section = section;
1596
+ const parsed = extractCircaOverUnderTableEntriesV2(segment, section, existingState);
1597
  overUnderStates.set(stateKey, existingState);
1598
  return parsed;
1599
  }
test/market-scanner.test.js CHANGED
@@ -209,6 +209,21 @@ test('parses table-style total bases and strikeouts sections', () => {
209
  assert.ok(entries.some((entry) => entry.marketType === 'pitcher_strikeouts_generic' && entry.playerName === 'BUBBA CHANDLER' && entry.side === 'under' && entry.lineValue === 5.5 && entry.oddsInput === '-155'));
210
  });
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  test('ranks discrepancy, width, and circa alerts', () => {
213
  const entries = [
214
  {
 
209
  assert.ok(entries.some((entry) => entry.marketType === 'pitcher_strikeouts_generic' && entry.playerName === 'BUBBA CHANDLER' && entry.side === 'under' && entry.lineValue === 5.5 && entry.oddsInput === '-155'));
210
  });
211
 
212
+ test('parses single-header two-column strikeouts table', () => {
213
+ const entries = parseCircaOcrText([
214
+ 'Total Strikeouts',
215
+ '85001 GERMAN MARQUEZ (SD) 3½ +105 85029 LOGAN GILBERT (SEA) 6½ +105',
216
+ '3½ -125 6½ -125',
217
+ '85003 BUBBA CHANDLER (PIT) 5½ +135 85031 JACOB DEGROM (TEX) 6½ -145',
218
+ '5½ -155 6½ +125',
219
+ ].join('\n'));
220
+
221
+ assert.ok(entries.some((entry) => entry.marketType === 'pitcher_strikeouts_generic' && entry.playerName === 'GERMAN MARQUEZ' && entry.side === 'over' && entry.lineValue === 3.5 && entry.oddsInput === '+105'));
222
+ assert.ok(entries.some((entry) => entry.marketType === 'pitcher_strikeouts_generic' && entry.playerName === 'LOGAN GILBERT' && entry.side === 'under' && entry.lineValue === 6.5 && entry.oddsInput === '-125'));
223
+ assert.ok(entries.some((entry) => entry.marketType === 'pitcher_strikeouts_generic' && entry.playerName === 'BUBBA CHANDLER' && entry.side === 'under' && entry.lineValue === 5.5 && entry.oddsInput === '-155'));
224
+ assert.ok(entries.some((entry) => entry.marketType === 'pitcher_strikeouts_generic' && entry.playerName === 'JACOB DEGROM' && entry.side === 'over' && entry.lineValue === 6.5 && entry.oddsInput === '-145'));
225
+ });
226
+
227
  test('ranks discrepancy, width, and circa alerts', () => {
228
  const entries = [
229
  {