Codex commited on
Commit ·
18389fc
1
Parent(s): 36a8733
Fix Circa strikeouts table parsing
Browse files- src/market-scanner.js +117 -2
- test/market-scanner.test.js +15 -0
src/market-scanner.js
CHANGED
|
@@ -758,6 +758,31 @@ function normalizeCircaHalfLine(value) {
|
|
| 758 |
return Number.isFinite(numeric) ? numeric : null;
|
| 759 |
}
|
| 760 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 761 |
function isReasonableCircaOdds(oddsInput, section) {
|
| 762 |
if (!oddsInput) {
|
| 763 |
return false;
|
|
@@ -1278,6 +1303,93 @@ function extractCircaOverUnderTableEntries(segment, section, state) {
|
|
| 1278 |
return [];
|
| 1279 |
}
|
| 1280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1281 |
export function parseCircaOcrText(text) {
|
| 1282 |
const entries = [];
|
| 1283 |
const rawLines = String(text ?? '')
|
|
@@ -1344,7 +1456,10 @@ export function parseCircaOcrText(text) {
|
|
| 1344 |
if (activeSections.length === 1) {
|
| 1345 |
const section = activeSections[0];
|
| 1346 |
for (const segment of segments) {
|
| 1347 |
-
|
|
|
|
|
|
|
|
|
|
| 1348 |
}
|
| 1349 |
continue;
|
| 1350 |
}
|
|
@@ -1478,7 +1593,7 @@ function parseCircaSegmentWithSection(segment, section, overUnderStates, stateKe
|
|
| 1478 |
pendingUnderOdds: null,
|
| 1479 |
};
|
| 1480 |
existingState.section = section;
|
| 1481 |
-
const parsed =
|
| 1482 |
overUnderStates.set(stateKey, existingState);
|
| 1483 |
return parsed;
|
| 1484 |
}
|
|
|
|
| 758 |
return Number.isFinite(numeric) ? numeric : null;
|
| 759 |
}
|
| 760 |
|
| 761 |
+
function extractCircaTableLineValue(segment) {
|
| 762 |
+
const normalized = normalizeWhitespace(segment)
|
| 763 |
+
.toUpperCase()
|
| 764 |
+
.replace(/I/g, '1')
|
| 765 |
+
.replace(/O/g, '0');
|
| 766 |
+
|
| 767 |
+
const matches = [...normalized.matchAll(/(\d+)([^0-9\s]{0,3})?(?=\s+[+\-~]?[0-9BOSI]{2,4}\b)/g)];
|
| 768 |
+
if (matches.length === 0) {
|
| 769 |
+
return null;
|
| 770 |
+
}
|
| 771 |
+
|
| 772 |
+
const match = matches[matches.length - 1];
|
| 773 |
+
const whole = Number(match[1]);
|
| 774 |
+
if (!Number.isFinite(whole)) {
|
| 775 |
+
return null;
|
| 776 |
+
}
|
| 777 |
+
|
| 778 |
+
const suffix = String(match[2] ?? '');
|
| 779 |
+
if (suffix && suffix !== '.0' && suffix !== '.00') {
|
| 780 |
+
return whole + 0.5;
|
| 781 |
+
}
|
| 782 |
+
|
| 783 |
+
return whole;
|
| 784 |
+
}
|
| 785 |
+
|
| 786 |
function isReasonableCircaOdds(oddsInput, section) {
|
| 787 |
if (!oddsInput) {
|
| 788 |
return false;
|
|
|
|
| 1303 |
return [];
|
| 1304 |
}
|
| 1305 |
|
| 1306 |
+
function extractCircaOverUnderTableEntriesV2(segment, section, state) {
|
| 1307 |
+
const normalizedSegment = normalizeWhitespace(segment)
|
| 1308 |
+
.toUpperCase()
|
| 1309 |
+
.replace(/[~]/g, '-')
|
| 1310 |
+
.replace(/\s+/g, ' ');
|
| 1311 |
+
const playerMatch = normalizedSegment.match(/([A-Z][A-Z.'\- ]{2,40}\([A-Z]{2,3}\))/);
|
| 1312 |
+
const lineValue = extractCircaTableLineValue(normalizedSegment);
|
| 1313 |
+
const oddsTokens = extractOddsTokensFromSegment(normalizedSegment);
|
| 1314 |
+
|
| 1315 |
+
const flushPending = () => {
|
| 1316 |
+
if (!state.pendingPlayer || !state.pendingTeam || !Number.isFinite(state.pendingLineValue)) {
|
| 1317 |
+
state.pendingPlayer = null;
|
| 1318 |
+
state.pendingTeam = null;
|
| 1319 |
+
state.pendingLineValue = null;
|
| 1320 |
+
state.pendingOverOdds = null;
|
| 1321 |
+
state.pendingUnderOdds = null;
|
| 1322 |
+
return [];
|
| 1323 |
+
}
|
| 1324 |
+
|
| 1325 |
+
const built = [];
|
| 1326 |
+
for (const sideEntry of [
|
| 1327 |
+
{ side: 'over', oddsInput: state.pendingOverOdds },
|
| 1328 |
+
{ side: 'under', oddsInput: state.pendingUnderOdds ?? null },
|
| 1329 |
+
]) {
|
| 1330 |
+
if (!sideEntry.oddsInput || !isReasonableCircaOdds(sideEntry.oddsInput, { mode: 'over_under' })) {
|
| 1331 |
+
continue;
|
| 1332 |
+
}
|
| 1333 |
+
|
| 1334 |
+
const impliedProbability = americanToImpliedProbability(sideEntry.oddsInput);
|
| 1335 |
+
if (impliedProbability === null) {
|
| 1336 |
+
continue;
|
| 1337 |
+
}
|
| 1338 |
+
|
| 1339 |
+
const entry = {
|
| 1340 |
+
source: 'circa',
|
| 1341 |
+
book: 'Circa',
|
| 1342 |
+
eventName: 'Circa MLB',
|
| 1343 |
+
eventId: null,
|
| 1344 |
+
team: state.pendingTeam,
|
| 1345 |
+
playerName: state.pendingPlayer,
|
| 1346 |
+
playerKey: normalizePlayerName(state.pendingPlayer),
|
| 1347 |
+
marketType: section.type,
|
| 1348 |
+
marketLabel: section.label,
|
| 1349 |
+
side: sideEntry.side,
|
| 1350 |
+
lineValue: state.pendingLineValue,
|
| 1351 |
+
oddsInput: sideEntry.oddsInput,
|
| 1352 |
+
impliedProbability,
|
| 1353 |
+
rawLabel: normalizedSegment,
|
| 1354 |
+
};
|
| 1355 |
+
entry.marketKey = buildMarketKey(entry);
|
| 1356 |
+
built.push(entry);
|
| 1357 |
+
}
|
| 1358 |
+
|
| 1359 |
+
state.pendingPlayer = null;
|
| 1360 |
+
state.pendingTeam = null;
|
| 1361 |
+
state.pendingLineValue = null;
|
| 1362 |
+
state.pendingOverOdds = null;
|
| 1363 |
+
state.pendingUnderOdds = null;
|
| 1364 |
+
return built;
|
| 1365 |
+
};
|
| 1366 |
+
|
| 1367 |
+
if (!normalizedSegment) {
|
| 1368 |
+
return flushPending();
|
| 1369 |
+
}
|
| 1370 |
+
|
| 1371 |
+
if (playerMatch) {
|
| 1372 |
+
const flushed = flushPending();
|
| 1373 |
+
const { playerName, team } = extractPlayerAndTeam(playerMatch[1]);
|
| 1374 |
+
state.pendingPlayer = playerName;
|
| 1375 |
+
state.pendingTeam = team;
|
| 1376 |
+
state.pendingLineValue = lineValue;
|
| 1377 |
+
state.pendingOverOdds = oddsTokens[0] ? normalizeOcrOddsTokenForSide(oddsTokens[0], 'over') : null;
|
| 1378 |
+
state.pendingUnderOdds = oddsTokens[1] ? normalizeOcrOddsTokenForSide(oddsTokens[1], 'under') : null;
|
| 1379 |
+
if (state.pendingUnderOdds) {
|
| 1380 |
+
return [...flushed, ...flushPending()];
|
| 1381 |
+
}
|
| 1382 |
+
return flushed;
|
| 1383 |
+
}
|
| 1384 |
+
|
| 1385 |
+
if (state.pendingPlayer && Number.isFinite(lineValue) && oddsTokens.length > 0) {
|
| 1386 |
+
state.pendingUnderOdds = normalizeOcrOddsTokenForSide(oddsTokens[0], 'under');
|
| 1387 |
+
return flushPending();
|
| 1388 |
+
}
|
| 1389 |
+
|
| 1390 |
+
return [];
|
| 1391 |
+
}
|
| 1392 |
+
|
| 1393 |
export function parseCircaOcrText(text) {
|
| 1394 |
const entries = [];
|
| 1395 |
const rawLines = String(text ?? '')
|
|
|
|
| 1456 |
if (activeSections.length === 1) {
|
| 1457 |
const section = activeSections[0];
|
| 1458 |
for (const segment of segments) {
|
| 1459 |
+
const stateKey = section.mode === 'over_under_table'
|
| 1460 |
+
? `single-${section.type}-col-${segment.index}`
|
| 1461 |
+
: `single-${section.type}`;
|
| 1462 |
+
entries.push(...parseCircaSegmentWithSection(segment.text, section, overUnderStates, stateKey));
|
| 1463 |
}
|
| 1464 |
continue;
|
| 1465 |
}
|
|
|
|
| 1593 |
pendingUnderOdds: null,
|
| 1594 |
};
|
| 1595 |
existingState.section = section;
|
| 1596 |
+
const parsed = extractCircaOverUnderTableEntriesV2(segment, section, existingState);
|
| 1597 |
overUnderStates.set(stateKey, existingState);
|
| 1598 |
return parsed;
|
| 1599 |
}
|
test/market-scanner.test.js
CHANGED
|
@@ -209,6 +209,21 @@ test('parses table-style total bases and strikeouts sections', () => {
|
|
| 209 |
assert.ok(entries.some((entry) => entry.marketType === 'pitcher_strikeouts_generic' && entry.playerName === 'BUBBA CHANDLER' && entry.side === 'under' && entry.lineValue === 5.5 && entry.oddsInput === '-155'));
|
| 210 |
});
|
| 211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
test('ranks discrepancy, width, and circa alerts', () => {
|
| 213 |
const entries = [
|
| 214 |
{
|
|
|
|
| 209 |
assert.ok(entries.some((entry) => entry.marketType === 'pitcher_strikeouts_generic' && entry.playerName === 'BUBBA CHANDLER' && entry.side === 'under' && entry.lineValue === 5.5 && entry.oddsInput === '-155'));
|
| 210 |
});
|
| 211 |
|
| 212 |
+
test('parses single-header two-column strikeouts table', () => {
|
| 213 |
+
const entries = parseCircaOcrText([
|
| 214 |
+
'Total Strikeouts',
|
| 215 |
+
'85001 GERMAN MARQUEZ (SD) 3½ +105 85029 LOGAN GILBERT (SEA) 6½ +105',
|
| 216 |
+
'3½ -125 6½ -125',
|
| 217 |
+
'85003 BUBBA CHANDLER (PIT) 5½ +135 85031 JACOB DEGROM (TEX) 6½ -145',
|
| 218 |
+
'5½ -155 6½ +125',
|
| 219 |
+
].join('\n'));
|
| 220 |
+
|
| 221 |
+
assert.ok(entries.some((entry) => entry.marketType === 'pitcher_strikeouts_generic' && entry.playerName === 'GERMAN MARQUEZ' && entry.side === 'over' && entry.lineValue === 3.5 && entry.oddsInput === '+105'));
|
| 222 |
+
assert.ok(entries.some((entry) => entry.marketType === 'pitcher_strikeouts_generic' && entry.playerName === 'LOGAN GILBERT' && entry.side === 'under' && entry.lineValue === 6.5 && entry.oddsInput === '-125'));
|
| 223 |
+
assert.ok(entries.some((entry) => entry.marketType === 'pitcher_strikeouts_generic' && entry.playerName === 'BUBBA CHANDLER' && entry.side === 'under' && entry.lineValue === 5.5 && entry.oddsInput === '-155'));
|
| 224 |
+
assert.ok(entries.some((entry) => entry.marketType === 'pitcher_strikeouts_generic' && entry.playerName === 'JACOB DEGROM' && entry.side === 'over' && entry.lineValue === 6.5 && entry.oddsInput === '-145'));
|
| 225 |
+
});
|
| 226 |
+
|
| 227 |
test('ranks discrepancy, width, and circa alerts', () => {
|
| 228 |
const entries = [
|
| 229 |
{
|