File size: 2,406 Bytes
a7b7d8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1f25f7
 
a7b7d8d
 
 
 
 
 
 
 
 
 
 
f1f25f7
a7b7d8d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# models.py β€” provider pools + model registry
# Add more keys by appending _2, _3, etc. to each list below.

# ── Per-provider key pools ────────────────────────────────────────────────────
# Each string is an env-var name. Add as many accounts as you have.
GROQ_KEYS = ["GROQ_API_KEY_1", "GROQ_API_KEY_2", "GROQ_API_KEY_3"]
CEREBRAS_KEYS = ["CEREBRAS_API_KEY_1", "CEREBRAS_API_KEY_2"]
MISTRAL_KEYS = ["MISTRAL_API_KEY_1"]
SAMBANOVA_KEYS = ["SAMBANOVA_API_KEY_1"]

# ── Global fallback chain (order = priority when primary is exhausted) ────────
# Every (model_str, key_pool) pair is tried in sequence.
FALLBACK_CHAIN: list[tuple[str, list[str]]] = [
    ("groq/llama-3.1-8b-instant", GROQ_KEYS),
    ("cerebras/llama3.1-8b", CEREBRAS_KEYS),
    ("mistral/mistral-small-latest", MISTRAL_KEYS),
    ("sambanova/Meta-Llama-3.3-70B-Instruct", SAMBANOVA_KEYS),
]

# ── Extraction council β€” each model has a preferred primary ───────────────────
# Value: (primary_model_str, primary_key_pool)
# Falls back to full FALLBACK_CHAIN automatically (see utils/llm.py).
EXTRACTION_MODELS: dict[str, tuple[str, list[str]]] = {
    "Llama-Groq": ("groq/llama-3.1-8b-instant", GROQ_KEYS),
    "Llama-Cerebras": ("cerebras/llama3.1-8b", CEREBRAS_KEYS),
    "Llama-Mistral": ("mistral/mistral-small-latest", MISTRAL_KEYS),
}

# ── Consolidation chain (70B models for better synthesis quality) ─────────────
CONSOLIDATION_CHAIN: list[tuple[str, list[str]]] = [
    ("groq/llama-3.3-70b-versatile", GROQ_KEYS),
    ("cerebras/llama3.1-70b", CEREBRAS_KEYS),
    ("mistral/mistral-large-latest", MISTRAL_KEYS),
]

# ── Display label used in app.py UI ──────────────────────────────────────────
CONSOLIDATION_MODEL = "Llama-3.3-70B (Groq β†’ Cerebras β†’ Mistral)"

# ── Inference settings ────────────────────────────────────────────────────────
MAX_NEW_TOKENS = 2048
MAX_PAPER_CHARS = 12_000  # ~3k tokens β€” fits every model's context window
MAX_SHEETS_CHARS = 4_000  # per-sheet truncation for consolidation prompt