Spaces:
Sleeping
Sleeping
Commit ·
b329f6a
1
Parent(s): d022d8f
fix: bump structured output token limit to 8192 + add noise words
Browse files- Increase max_tokens from 4096 to 8192 to reduce fallback rate
(reasoning models burn ~2000 tokens thinking before structured output)
- Add FINRA, LISA, ATM, AMA, FDA, PHNX, IPG and other financial
acronyms to the noise word filter
Made-with: Cursor
- src/core/ticker_utils.py +2 -0
- src/llm.py +1 -1
src/core/ticker_utils.py
CHANGED
|
@@ -40,6 +40,8 @@ NOISE_WORDS = frozenset({
|
|
| 40 |
# Financial acronyms / index names that aren't tradeable tickers
|
| 41 |
"ROCE", "FTSE", "DJIA", "EBIT", "WACC", "CAGR", "ROIC", "REIT",
|
| 42 |
"SPAC", "NBER", "OPEC", "MSCI", "EMEA", "APAC", "OECD", "FIFO",
|
|
|
|
|
|
|
| 43 |
})
|
| 44 |
|
| 45 |
_MAX_TICKER_LEN = 8 # longest valid ticker with suffix: e.g. CHE.UN.TO
|
|
|
|
| 40 |
# Financial acronyms / index names that aren't tradeable tickers
|
| 41 |
"ROCE", "FTSE", "DJIA", "EBIT", "WACC", "CAGR", "ROIC", "REIT",
|
| 42 |
"SPAC", "NBER", "OPEC", "MSCI", "EMEA", "APAC", "OECD", "FIFO",
|
| 43 |
+
"FINRA", "SIPC", "FDIC", "LISA", "ISA", "ATM", "AMA", "FDA",
|
| 44 |
+
"PHNX", "IPG", "GAAP", "IFRS", "FASB", "IASB", "PCAOB",
|
| 45 |
})
|
| 46 |
|
| 47 |
_MAX_TICKER_LEN = 8 # longest valid ticker with suffix: e.g. CHE.UN.TO
|
src/llm.py
CHANGED
|
@@ -48,7 +48,7 @@ def get_llm() -> ChatOpenAI:
|
|
| 48 |
return _llm_instance
|
| 49 |
|
| 50 |
|
| 51 |
-
def get_structured_llm(max_tokens: int =
|
| 52 |
"""Return an LLM instance configured for structured output.
|
| 53 |
|
| 54 |
Uses a capped ``max_tokens`` to prevent reasoning models from
|
|
|
|
| 48 |
return _llm_instance
|
| 49 |
|
| 50 |
|
| 51 |
+
def get_structured_llm(max_tokens: int = 8192) -> ChatOpenAI:
|
| 52 |
"""Return an LLM instance configured for structured output.
|
| 53 |
|
| 54 |
Uses a capped ``max_tokens`` to prevent reasoning models from
|