Spaces:
Running
Running
github-actions[bot] commited on
Commit ·
4c52012
1
Parent(s): 5186e8e
🚀 Auto-deploy backend from GitHub (8c85dcd)
Browse files
main.py
CHANGED
|
@@ -1366,10 +1366,10 @@ async def call_hf_chat_async(
|
|
| 1366 |
effective_task = (task_type or "default").strip().lower()
|
| 1367 |
request_tag = f"{effective_task}-async-{int(time.time() * 1000)}"
|
| 1368 |
|
| 1369 |
-
|
| 1370 |
messages=messages,
|
| 1371 |
model=model,
|
| 1372 |
-
task_type=
|
| 1373 |
request_tag=request_tag,
|
| 1374 |
max_new_tokens=max_tokens,
|
| 1375 |
temperature=temperature,
|
|
@@ -1377,168 +1377,12 @@ async def call_hf_chat_async(
|
|
| 1377 |
repetition_penalty=repetition_penalty,
|
| 1378 |
timeout_sec=timeout,
|
| 1379 |
)
|
| 1380 |
-
selected_model, _ = client._resolve_primary_model(selection_req)
|
| 1381 |
-
model_chain = client._model_chain_for_task(effective_task, selected_model)
|
| 1382 |
-
provider_chain = client._provider_chain_for_task(effective_task)
|
| 1383 |
-
last_error: Optional[Exception] = None
|
| 1384 |
-
retryable_status = {408, 429, 500, 502, 503, 504}
|
| 1385 |
-
|
| 1386 |
-
for fallback_depth, model_name in enumerate(model_chain):
|
| 1387 |
-
request_for_model = InferenceRequest(
|
| 1388 |
-
messages=messages,
|
| 1389 |
-
model=model_name,
|
| 1390 |
-
task_type=task_type,
|
| 1391 |
-
request_tag=request_tag,
|
| 1392 |
-
max_new_tokens=max_tokens,
|
| 1393 |
-
temperature=temperature,
|
| 1394 |
-
top_p=top_p,
|
| 1395 |
-
repetition_penalty=repetition_penalty,
|
| 1396 |
-
timeout_sec=timeout,
|
| 1397 |
-
)
|
| 1398 |
-
for provider in provider_chain:
|
| 1399 |
-
route = client._resolve_route_label(provider, effective_task)
|
| 1400 |
-
if provider == "local_space":
|
| 1401 |
-
try:
|
| 1402 |
-
text = await _run_hf_blocking(
|
| 1403 |
-
client._generate_with_provider,
|
| 1404 |
-
request_for_model,
|
| 1405 |
-
provider,
|
| 1406 |
-
fallback_depth,
|
| 1407 |
-
)
|
| 1408 |
-
return _strip_repetition(text)
|
| 1409 |
-
except Exception as exc:
|
| 1410 |
-
last_error = exc
|
| 1411 |
-
logger.warning(
|
| 1412 |
-
"⚠️ Async local fallback failed: task=%s model=%s depth=%s error=%s",
|
| 1413 |
-
effective_task,
|
| 1414 |
-
model_name,
|
| 1415 |
-
fallback_depth,
|
| 1416 |
-
str(exc)[:180],
|
| 1417 |
-
)
|
| 1418 |
-
continue
|
| 1419 |
-
|
| 1420 |
-
stream_model = model_name if ":" in model_name else f"{model_name}:fastest"
|
| 1421 |
-
timeout_sec = client._timeout_for(request_for_model, provider)
|
| 1422 |
-
max_retries, backoff_sec = client._retry_profile(effective_task)
|
| 1423 |
-
headers = {
|
| 1424 |
-
"Authorization": f"Bearer {client.hf_token}",
|
| 1425 |
-
"Content-Type": "application/json",
|
| 1426 |
-
"X-MathPulse-Task": effective_task,
|
| 1427 |
-
}
|
| 1428 |
-
if route == "pro-priority" and client.pro_route_header_name.strip():
|
| 1429 |
-
headers[client.pro_route_header_name.strip()] = client.pro_route_header_value
|
| 1430 |
-
|
| 1431 |
-
payload: Dict[str, object] = {
|
| 1432 |
-
"model": stream_model,
|
| 1433 |
-
"messages": messages,
|
| 1434 |
-
"stream": False,
|
| 1435 |
-
"max_tokens": max_tokens,
|
| 1436 |
-
"temperature": temperature,
|
| 1437 |
-
"top_p": top_p,
|
| 1438 |
-
}
|
| 1439 |
-
|
| 1440 |
-
async_client = await _get_hf_async_http_client()
|
| 1441 |
-
for attempt in range(1, max_retries + 1):
|
| 1442 |
-
start = time.perf_counter()
|
| 1443 |
-
client._record_attempt(
|
| 1444 |
-
task_type=effective_task,
|
| 1445 |
-
provider=provider,
|
| 1446 |
-
route=route,
|
| 1447 |
-
fallback_depth=fallback_depth,
|
| 1448 |
-
)
|
| 1449 |
-
try:
|
| 1450 |
-
response = await async_client.post(
|
| 1451 |
-
client.hf_chat_url,
|
| 1452 |
-
headers=headers,
|
| 1453 |
-
json=payload,
|
| 1454 |
-
timeout=_resolve_async_hf_timeout(timeout_sec),
|
| 1455 |
-
)
|
| 1456 |
-
latency_ms = (time.perf_counter() - start) * 1000
|
| 1457 |
-
client._bump_bucket("status_code_counts", str(response.status_code), 1)
|
| 1458 |
-
|
| 1459 |
-
if response.status_code in retryable_status and attempt < max_retries:
|
| 1460 |
-
log_model_call(
|
| 1461 |
-
logger,
|
| 1462 |
-
provider=provider,
|
| 1463 |
-
model=model_name,
|
| 1464 |
-
endpoint=client.hf_chat_url,
|
| 1465 |
-
latency_ms=latency_ms,
|
| 1466 |
-
input_tokens=None,
|
| 1467 |
-
output_tokens=None,
|
| 1468 |
-
status="error",
|
| 1469 |
-
error_class="HTTPRetry",
|
| 1470 |
-
error_message=f"status={response.status_code}",
|
| 1471 |
-
task_type=effective_task,
|
| 1472 |
-
request_tag=request_tag,
|
| 1473 |
-
retry_attempt=attempt,
|
| 1474 |
-
fallback_depth=fallback_depth,
|
| 1475 |
-
route=route,
|
| 1476 |
-
)
|
| 1477 |
-
client._bump_metric("retries_total", 1)
|
| 1478 |
-
await asyncio.sleep(_hf_retry_sleep_seconds(backoff_sec, attempt))
|
| 1479 |
-
continue
|
| 1480 |
-
|
| 1481 |
-
if response.status_code != 200:
|
| 1482 |
-
client._bump_metric("requests_error", 1)
|
| 1483 |
-
raise RuntimeError(
|
| 1484 |
-
f"HF inference error {response.status_code}: {response.text[:280]}"
|
| 1485 |
-
)
|
| 1486 |
-
|
| 1487 |
-
data = response.json()
|
| 1488 |
-
text = client._extract_text(data)
|
| 1489 |
-
log_model_call(
|
| 1490 |
-
logger,
|
| 1491 |
-
provider=provider,
|
| 1492 |
-
model=model_name,
|
| 1493 |
-
endpoint=client.hf_chat_url,
|
| 1494 |
-
latency_ms=latency_ms,
|
| 1495 |
-
input_tokens=None,
|
| 1496 |
-
output_tokens=None,
|
| 1497 |
-
status="ok",
|
| 1498 |
-
task_type=effective_task,
|
| 1499 |
-
request_tag=request_tag,
|
| 1500 |
-
retry_attempt=attempt,
|
| 1501 |
-
fallback_depth=fallback_depth,
|
| 1502 |
-
route=route,
|
| 1503 |
-
)
|
| 1504 |
-
client._bump_metric("requests_ok", 1)
|
| 1505 |
-
return _strip_repetition(text)
|
| 1506 |
-
except Exception as exc:
|
| 1507 |
-
latency_ms = (time.perf_counter() - start) * 1000
|
| 1508 |
-
last_error = exc
|
| 1509 |
-
if attempt < max_retries:
|
| 1510 |
-
log_model_call(
|
| 1511 |
-
logger,
|
| 1512 |
-
provider=provider,
|
| 1513 |
-
model=model_name,
|
| 1514 |
-
endpoint=client.hf_chat_url,
|
| 1515 |
-
latency_ms=latency_ms,
|
| 1516 |
-
input_tokens=None,
|
| 1517 |
-
output_tokens=None,
|
| 1518 |
-
status="error",
|
| 1519 |
-
error_class=exc.__class__.__name__,
|
| 1520 |
-
error_message=str(exc),
|
| 1521 |
-
task_type=effective_task,
|
| 1522 |
-
request_tag=request_tag,
|
| 1523 |
-
retry_attempt=attempt,
|
| 1524 |
-
fallback_depth=fallback_depth,
|
| 1525 |
-
route=route,
|
| 1526 |
-
)
|
| 1527 |
-
client._bump_metric("retries_total", 1)
|
| 1528 |
-
await asyncio.sleep(_hf_retry_sleep_seconds(backoff_sec, attempt))
|
| 1529 |
-
continue
|
| 1530 |
-
|
| 1531 |
-
client._bump_metric("requests_error", 1)
|
| 1532 |
-
logger.warning(
|
| 1533 |
-
"⚠️ Async HF attempt failed: task=%s provider=%s model=%s depth=%s error=%s",
|
| 1534 |
-
effective_task,
|
| 1535 |
-
provider,
|
| 1536 |
-
model_name,
|
| 1537 |
-
fallback_depth,
|
| 1538 |
-
str(exc)[:180],
|
| 1539 |
-
)
|
| 1540 |
|
| 1541 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1542 |
|
| 1543 |
|
| 1544 |
async def call_hf_chat_stream_async(
|
|
@@ -1574,204 +1418,15 @@ async def call_hf_chat_stream_async(
|
|
| 1574 |
yield str(chunk)
|
| 1575 |
|
| 1576 |
client = get_inference_client()
|
| 1577 |
-
|
| 1578 |
-
|
| 1579 |
-
|
| 1580 |
-
selection_req = InferenceRequest(
|
| 1581 |
-
messages=messages,
|
| 1582 |
-
model=model,
|
| 1583 |
-
task_type=task_type,
|
| 1584 |
-
request_tag=request_tag,
|
| 1585 |
-
max_new_tokens=max_tokens,
|
| 1586 |
temperature=temperature,
|
| 1587 |
top_p=top_p,
|
| 1588 |
-
|
| 1589 |
-
|
| 1590 |
-
|
| 1591 |
-
|
| 1592 |
-
provider_chain = client._provider_chain_for_task(effective_task)
|
| 1593 |
-
last_error: Optional[Exception] = None
|
| 1594 |
-
retryable_status = {408, 429, 500, 502, 503, 504}
|
| 1595 |
-
|
| 1596 |
-
for fallback_depth, model_name in enumerate(model_chain):
|
| 1597 |
-
request_for_model = InferenceRequest(
|
| 1598 |
-
messages=messages,
|
| 1599 |
-
model=model_name,
|
| 1600 |
-
task_type=task_type,
|
| 1601 |
-
request_tag=request_tag,
|
| 1602 |
-
max_new_tokens=max_tokens,
|
| 1603 |
-
temperature=temperature,
|
| 1604 |
-
top_p=top_p,
|
| 1605 |
-
timeout_sec=timeout,
|
| 1606 |
-
)
|
| 1607 |
-
for provider in provider_chain:
|
| 1608 |
-
if provider == "local_space":
|
| 1609 |
-
last_error = RuntimeError("Streaming is not supported for local_space provider")
|
| 1610 |
-
continue
|
| 1611 |
-
|
| 1612 |
-
route = client._resolve_route_label(provider, effective_task)
|
| 1613 |
-
stream_model = model_name if ":" in model_name else f"{model_name}:fastest"
|
| 1614 |
-
timeout_sec = client._timeout_for(request_for_model, provider)
|
| 1615 |
-
max_retries, backoff_sec = client._retry_profile(effective_task)
|
| 1616 |
-
|
| 1617 |
-
headers = {
|
| 1618 |
-
"Authorization": f"Bearer {client.hf_token}",
|
| 1619 |
-
"Content-Type": "application/json",
|
| 1620 |
-
"X-MathPulse-Task": effective_task,
|
| 1621 |
-
}
|
| 1622 |
-
if route == "pro-priority" and client.pro_route_header_name.strip():
|
| 1623 |
-
headers[client.pro_route_header_name.strip()] = client.pro_route_header_value
|
| 1624 |
-
|
| 1625 |
-
payload: Dict[str, object] = {
|
| 1626 |
-
"model": stream_model,
|
| 1627 |
-
"messages": messages,
|
| 1628 |
-
"stream": True,
|
| 1629 |
-
"max_tokens": max_tokens,
|
| 1630 |
-
"temperature": temperature,
|
| 1631 |
-
"top_p": top_p,
|
| 1632 |
-
}
|
| 1633 |
-
|
| 1634 |
-
async_client = await _get_hf_async_http_client()
|
| 1635 |
-
for attempt in range(1, max_retries + 1):
|
| 1636 |
-
start = time.perf_counter()
|
| 1637 |
-
client._record_attempt(
|
| 1638 |
-
task_type=effective_task,
|
| 1639 |
-
provider=provider,
|
| 1640 |
-
route=route,
|
| 1641 |
-
fallback_depth=fallback_depth,
|
| 1642 |
-
)
|
| 1643 |
-
try:
|
| 1644 |
-
async with async_client.stream(
|
| 1645 |
-
"POST",
|
| 1646 |
-
client.hf_chat_url,
|
| 1647 |
-
headers=headers,
|
| 1648 |
-
json=payload,
|
| 1649 |
-
timeout=_resolve_async_hf_timeout(timeout_sec),
|
| 1650 |
-
) as response:
|
| 1651 |
-
client._bump_bucket("status_code_counts", str(response.status_code), 1)
|
| 1652 |
-
|
| 1653 |
-
if response.status_code in retryable_status and attempt < max_retries:
|
| 1654 |
-
body = await response.aread()
|
| 1655 |
-
body_preview = body[:220].decode("utf-8", errors="replace")
|
| 1656 |
-
latency_ms = (time.perf_counter() - start) * 1000
|
| 1657 |
-
log_model_call(
|
| 1658 |
-
logger,
|
| 1659 |
-
provider=provider,
|
| 1660 |
-
model=model_name,
|
| 1661 |
-
endpoint=client.hf_chat_url,
|
| 1662 |
-
latency_ms=latency_ms,
|
| 1663 |
-
input_tokens=None,
|
| 1664 |
-
output_tokens=None,
|
| 1665 |
-
status="error",
|
| 1666 |
-
error_class="HTTPRetry",
|
| 1667 |
-
error_message=f"status={response.status_code} body={body_preview}",
|
| 1668 |
-
task_type=effective_task,
|
| 1669 |
-
request_tag=request_tag,
|
| 1670 |
-
retry_attempt=attempt,
|
| 1671 |
-
fallback_depth=fallback_depth,
|
| 1672 |
-
route=route,
|
| 1673 |
-
)
|
| 1674 |
-
client._bump_metric("retries_total", 1)
|
| 1675 |
-
await asyncio.sleep(_hf_retry_sleep_seconds(backoff_sec, attempt))
|
| 1676 |
-
continue
|
| 1677 |
-
|
| 1678 |
-
if response.status_code != 200:
|
| 1679 |
-
body = await response.aread()
|
| 1680 |
-
body_preview = body[:280].decode("utf-8", errors="replace")
|
| 1681 |
-
client._bump_metric("requests_error", 1)
|
| 1682 |
-
raise RuntimeError(
|
| 1683 |
-
f"HF stream error {response.status_code}: {body_preview}"
|
| 1684 |
-
)
|
| 1685 |
-
|
| 1686 |
-
emitted_any = False
|
| 1687 |
-
async for raw_line in response.aiter_lines():
|
| 1688 |
-
if not raw_line:
|
| 1689 |
-
continue
|
| 1690 |
-
line = raw_line.strip()
|
| 1691 |
-
if not line.startswith("data:"):
|
| 1692 |
-
continue
|
| 1693 |
-
|
| 1694 |
-
data_raw = line.split("data:", 1)[1].strip()
|
| 1695 |
-
if data_raw == "[DONE]":
|
| 1696 |
-
continue
|
| 1697 |
-
|
| 1698 |
-
try:
|
| 1699 |
-
payload_obj = json.loads(data_raw)
|
| 1700 |
-
except json.JSONDecodeError:
|
| 1701 |
-
continue
|
| 1702 |
-
|
| 1703 |
-
choices = payload_obj.get("choices") or []
|
| 1704 |
-
if not choices:
|
| 1705 |
-
continue
|
| 1706 |
-
first = choices[0] if isinstance(choices[0], dict) else {}
|
| 1707 |
-
delta = first.get("delta") or {}
|
| 1708 |
-
chunk = delta.get("content")
|
| 1709 |
-
if not chunk:
|
| 1710 |
-
msg = first.get("message") or {}
|
| 1711 |
-
chunk = msg.get("content")
|
| 1712 |
-
if not chunk:
|
| 1713 |
-
continue
|
| 1714 |
-
|
| 1715 |
-
emitted_any = True
|
| 1716 |
-
yield str(chunk)
|
| 1717 |
-
|
| 1718 |
-
if emitted_any:
|
| 1719 |
-
latency_ms = (time.perf_counter() - start) * 1000
|
| 1720 |
-
log_model_call(
|
| 1721 |
-
logger,
|
| 1722 |
-
provider=provider,
|
| 1723 |
-
model=model_name,
|
| 1724 |
-
endpoint=client.hf_chat_url,
|
| 1725 |
-
latency_ms=latency_ms,
|
| 1726 |
-
input_tokens=None,
|
| 1727 |
-
output_tokens=None,
|
| 1728 |
-
status="ok",
|
| 1729 |
-
task_type=effective_task,
|
| 1730 |
-
request_tag=request_tag,
|
| 1731 |
-
retry_attempt=attempt,
|
| 1732 |
-
fallback_depth=fallback_depth,
|
| 1733 |
-
route=route,
|
| 1734 |
-
)
|
| 1735 |
-
client._bump_metric("requests_ok", 1)
|
| 1736 |
-
return
|
| 1737 |
-
|
| 1738 |
-
raise RuntimeError("HF stream ended without content")
|
| 1739 |
-
except Exception as exc:
|
| 1740 |
-
latency_ms = (time.perf_counter() - start) * 1000
|
| 1741 |
-
last_error = exc
|
| 1742 |
-
if attempt < max_retries:
|
| 1743 |
-
log_model_call(
|
| 1744 |
-
logger,
|
| 1745 |
-
provider=provider,
|
| 1746 |
-
model=model_name,
|
| 1747 |
-
endpoint=client.hf_chat_url,
|
| 1748 |
-
latency_ms=latency_ms,
|
| 1749 |
-
input_tokens=None,
|
| 1750 |
-
output_tokens=None,
|
| 1751 |
-
status="error",
|
| 1752 |
-
error_class=exc.__class__.__name__,
|
| 1753 |
-
error_message=str(exc),
|
| 1754 |
-
task_type=effective_task,
|
| 1755 |
-
request_tag=request_tag,
|
| 1756 |
-
retry_attempt=attempt,
|
| 1757 |
-
fallback_depth=fallback_depth,
|
| 1758 |
-
route=route,
|
| 1759 |
-
)
|
| 1760 |
-
client._bump_metric("retries_total", 1)
|
| 1761 |
-
await asyncio.sleep(_hf_retry_sleep_seconds(backoff_sec, attempt))
|
| 1762 |
-
continue
|
| 1763 |
-
|
| 1764 |
-
client._bump_metric("requests_error", 1)
|
| 1765 |
-
logger.warning(
|
| 1766 |
-
"⚠️ Async stream attempt failed: task=%s provider=%s model=%s depth=%s error=%s",
|
| 1767 |
-
effective_task,
|
| 1768 |
-
provider,
|
| 1769 |
-
model_name,
|
| 1770 |
-
fallback_depth,
|
| 1771 |
-
str(exc)[:180],
|
| 1772 |
-
)
|
| 1773 |
-
|
| 1774 |
-
raise last_error or RuntimeError("Streaming failed with empty model/provider chain")
|
| 1775 |
|
| 1776 |
|
| 1777 |
def load_local_math_model(model_name: str = "Qwen/Qwen2.5-Math-7B-Instruct"):
|
|
|
|
| 1366 |
effective_task = (task_type or "default").strip().lower()
|
| 1367 |
request_tag = f"{effective_task}-async-{int(time.time() * 1000)}"
|
| 1368 |
|
| 1369 |
+
req = InferenceRequest(
|
| 1370 |
messages=messages,
|
| 1371 |
model=model,
|
| 1372 |
+
task_type=effective_task,
|
| 1373 |
request_tag=request_tag,
|
| 1374 |
max_new_tokens=max_tokens,
|
| 1375 |
temperature=temperature,
|
|
|
|
| 1377 |
repetition_penalty=repetition_penalty,
|
| 1378 |
timeout_sec=timeout,
|
| 1379 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1380 |
|
| 1381 |
+
try:
|
| 1382 |
+
return await _run_hf_blocking(client._call_deepseek, req, 0)
|
| 1383 |
+
except Exception as exc:
|
| 1384 |
+
logger.error(f"DeepSeek chat failed: {exc}")
|
| 1385 |
+
raise RuntimeError("AI model service is temporarily unavailable. Please try again.")
|
| 1386 |
|
| 1387 |
|
| 1388 |
async def call_hf_chat_stream_async(
|
|
|
|
| 1418 |
yield str(chunk)
|
| 1419 |
|
| 1420 |
client = get_inference_client()
|
| 1421 |
+
async for chunk in client._call_deepseek_stream(
|
| 1422 |
+
messages,
|
| 1423 |
+
max_tokens=max_tokens,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1424 |
temperature=temperature,
|
| 1425 |
top_p=top_p,
|
| 1426 |
+
model=model,
|
| 1427 |
+
task_type=task_type,
|
| 1428 |
+
):
|
| 1429 |
+
yield str(chunk)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1430 |
|
| 1431 |
|
| 1432 |
def load_local_math_model(model_name: str = "Qwen/Qwen2.5-Math-7B-Instruct"):
|