| # models.py β provider pools + model registry | |
| # Add more keys by appending _2, _3, etc. to each list below. | |
| # ββ Per-provider key pools ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Each string is an env-var name. Add as many accounts as you have. | |
| GROQ_KEYS = ["GROQ_API_KEY_1", "GROQ_API_KEY_2", "GROQ_API_KEY_3"] | |
| CEREBRAS_KEYS = ["CEREBRAS_API_KEY_1", "CEREBRAS_API_KEY_2"] | |
| MISTRAL_KEYS = ["MISTRAL_API_KEY_1"] | |
| SAMBANOVA_KEYS = ["SAMBANOVA_API_KEY_1"] | |
| # ββ Global fallback chain (order = priority when primary is exhausted) ββββββββ | |
| # Every (model_str, key_pool) pair is tried in sequence. | |
| FALLBACK_CHAIN: list[tuple[str, list[str]]] = [ | |
| ("groq/llama-3.1-8b-instant", GROQ_KEYS), | |
| ("cerebras/llama3.1-8b", CEREBRAS_KEYS), | |
| ("mistral/mistral-small-latest", MISTRAL_KEYS), | |
| ("sambanova/Meta-Llama-3.3-70B-Instruct", SAMBANOVA_KEYS), | |
| ] | |
| # ββ Extraction council β each model has a preferred primary βββββββββββββββββββ | |
| # Value: (primary_model_str, primary_key_pool) | |
| # Falls back to full FALLBACK_CHAIN automatically (see utils/llm.py). | |
| EXTRACTION_MODELS: dict[str, tuple[str, list[str]]] = { | |
| "Llama-Groq": ("groq/llama-3.1-8b-instant", GROQ_KEYS), | |
| "Llama-Cerebras": ("cerebras/llama3.1-8b", CEREBRAS_KEYS), | |
| "Llama-Mistral": ("mistral/mistral-small-latest", MISTRAL_KEYS), | |
| } | |
| # ββ Consolidation chain (70B models for better synthesis quality) βββββββββββββ | |
| CONSOLIDATION_CHAIN: list[tuple[str, list[str]]] = [ | |
| ("groq/llama-3.3-70b-versatile", GROQ_KEYS), | |
| ("cerebras/llama3.1-70b", CEREBRAS_KEYS), | |
| ("mistral/mistral-large-latest", MISTRAL_KEYS), | |
| ] | |
| # ββ Display label used in app.py UI ββββββββββββββββββββββββββββββββββββββββββ | |
| CONSOLIDATION_MODEL = "Llama-3.3-70B (Groq β Cerebras β Mistral)" | |
| # ββ Inference settings ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MAX_NEW_TOKENS = 2048 | |
| MAX_PAPER_CHARS = 12_000 # ~3k tokens β fits every model's context window | |
| MAX_SHEETS_CHARS = 4_000 # per-sheet truncation for consolidation prompt | |