imtrt004 commited on
Commit
69975bb
Β·
1 Parent(s): 113b6c1

fix: update limit

Browse files
Files changed (1) hide show
  1. generation/cerebras_llm.py +8 -22
generation/cerebras_llm.py CHANGED
@@ -27,44 +27,30 @@ from typing import Generator
27
  from supabase import create_client
28
 
29
  # ── Cerebras model catalogue ──────────────────────────────────────────────────
30
- # Source: Cerebras Cloud β†’ Limits page (pay-as-you-go plan, March 2026)
31
  # { model_id: { label, context_length, rpm, rph, rpd, tpm } }
32
  CEREBRAS_MODELS: dict[str, dict] = {
33
  "gpt-oss-120b": {
34
  "label": "GPT-OSS 120B",
35
- "context": 131_000,
36
- "rpm": 1_000, "rph": 60_000, "rpd": 1_440_000,
37
- "tpm": 1_000_000,
38
  "status": "production",
39
  },
40
  "llama3.1-8b": {
41
  "label": "Llama 3.1 8B",
42
- "context": 32_768,
43
- "rpm": 2_000, "rph": 120_000, "rpd": 2_880_000,
44
- "tpm": 2_000_000,
45
  "status": "production",
46
  },
47
- "qwen-3-235b-a22b-instruct-2507": {
48
- "label": "Qwen 3 235B A22B",
49
- "context": 131_000,
50
- "rpm": 250, "rph": 15_000, "rpd": 360_000,
51
- "tpm": 250_000,
52
- "status": "preview",
53
- },
54
- "zai-glm-4.7": {
55
- "label": "ZAI GLM-4.7",
56
- "context": 131_072,
57
- "rpm": 500, "rph": 30_000, "rpd": 720_000,
58
- "tpm": 500_000,
59
- "status": "preview",
60
- },
61
  }
62
 
63
  DEFAULT_MODEL = os.environ.get("CEREBRAS_MODEL", "llama3.1-8b")
64
 
65
  SYSTEM_PROMPT = """You are DeepMind Super β€” an ultra-fast expert AI research assistant created by Md Tusar Akon.
66
  You are operating with the user's COMPLETE document loaded into context (all pages, every chunk).
67
- Your 131,000-token context window lets you see the ENTIRE uploaded file at once.
68
 
69
  CAPABILITIES:
70
  β€’ Solve every exam / problem-set question with detailed working and research-level interpretation
 
27
  from supabase import create_client
28
 
29
  # ── Cerebras model catalogue ──────────────────────────────────────────────────
30
+ # Source: Cerebras Cloud β†’ Limits page (Personal plan, March 2026)
31
  # { model_id: { label, context_length, rpm, rph, rpd, tpm } }
32
  CEREBRAS_MODELS: dict[str, dict] = {
33
  "gpt-oss-120b": {
34
  "label": "GPT-OSS 120B",
35
+ "context": 65_536,
36
+ "rpm": 30, "rph": 900, "rpd": 14_400,
37
+ "tpm": 64_000, "tpd": 1_000_000,
38
  "status": "production",
39
  },
40
  "llama3.1-8b": {
41
  "label": "Llama 3.1 8B",
42
+ "context": 8_192,
43
+ "rpm": 30, "rph": 900, "rpd": 14_400,
44
+ "tpm": 60_000, "tpd": 1_000_000,
45
  "status": "production",
46
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  }
48
 
49
  DEFAULT_MODEL = os.environ.get("CEREBRAS_MODEL", "llama3.1-8b")
50
 
51
  SYSTEM_PROMPT = """You are DeepMind Super β€” an ultra-fast expert AI research assistant created by Md Tusar Akon.
52
  You are operating with the user's COMPLETE document loaded into context (all pages, every chunk).
53
+ Your 65,536-token context window lets you see large documents in full at once.
54
 
55
  CAPABILITIES:
56
  β€’ Solve every exam / problem-set question with detailed working and research-level interpretation