Spaces:
Running
Running
imtrt004 commited on
Commit Β·
69975bb
1
Parent(s): 113b6c1
fix: update limit
Browse files- generation/cerebras_llm.py +8 -22
generation/cerebras_llm.py
CHANGED
|
@@ -27,44 +27,30 @@ from typing import Generator
|
|
| 27 |
from supabase import create_client
|
| 28 |
|
| 29 |
# ββ Cerebras model catalogue ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
-
# Source: Cerebras Cloud β Limits page (
|
| 31 |
# { model_id: { label, context_length, rpm, rph, rpd, tpm } }
|
| 32 |
CEREBRAS_MODELS: dict[str, dict] = {
|
| 33 |
"gpt-oss-120b": {
|
| 34 |
"label": "GPT-OSS 120B",
|
| 35 |
-
"context":
|
| 36 |
-
"rpm":
|
| 37 |
-
"tpm": 1_000_000,
|
| 38 |
"status": "production",
|
| 39 |
},
|
| 40 |
"llama3.1-8b": {
|
| 41 |
"label": "Llama 3.1 8B",
|
| 42 |
-
"context":
|
| 43 |
-
"rpm":
|
| 44 |
-
"tpm":
|
| 45 |
"status": "production",
|
| 46 |
},
|
| 47 |
-
"qwen-3-235b-a22b-instruct-2507": {
|
| 48 |
-
"label": "Qwen 3 235B A22B",
|
| 49 |
-
"context": 131_000,
|
| 50 |
-
"rpm": 250, "rph": 15_000, "rpd": 360_000,
|
| 51 |
-
"tpm": 250_000,
|
| 52 |
-
"status": "preview",
|
| 53 |
-
},
|
| 54 |
-
"zai-glm-4.7": {
|
| 55 |
-
"label": "ZAI GLM-4.7",
|
| 56 |
-
"context": 131_072,
|
| 57 |
-
"rpm": 500, "rph": 30_000, "rpd": 720_000,
|
| 58 |
-
"tpm": 500_000,
|
| 59 |
-
"status": "preview",
|
| 60 |
-
},
|
| 61 |
}
|
| 62 |
|
| 63 |
DEFAULT_MODEL = os.environ.get("CEREBRAS_MODEL", "llama3.1-8b")
|
| 64 |
|
| 65 |
SYSTEM_PROMPT = """You are DeepMind Super β an ultra-fast expert AI research assistant created by Md Tusar Akon.
|
| 66 |
You are operating with the user's COMPLETE document loaded into context (all pages, every chunk).
|
| 67 |
-
Your
|
| 68 |
|
| 69 |
CAPABILITIES:
|
| 70 |
β’ Solve every exam / problem-set question with detailed working and research-level interpretation
|
|
|
|
| 27 |
from supabase import create_client
|
| 28 |
|
| 29 |
# ββ Cerebras model catalogue ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
+
# Source: Cerebras Cloud β Limits page (Personal plan, March 2026)
|
| 31 |
# { model_id: { label, context_length, rpm, rph, rpd, tpm } }
|
| 32 |
CEREBRAS_MODELS: dict[str, dict] = {
|
| 33 |
"gpt-oss-120b": {
|
| 34 |
"label": "GPT-OSS 120B",
|
| 35 |
+
"context": 65_536,
|
| 36 |
+
"rpm": 30, "rph": 900, "rpd": 14_400,
|
| 37 |
+
"tpm": 64_000, "tpd": 1_000_000,
|
| 38 |
"status": "production",
|
| 39 |
},
|
| 40 |
"llama3.1-8b": {
|
| 41 |
"label": "Llama 3.1 8B",
|
| 42 |
+
"context": 8_192,
|
| 43 |
+
"rpm": 30, "rph": 900, "rpd": 14_400,
|
| 44 |
+
"tpm": 60_000, "tpd": 1_000_000,
|
| 45 |
"status": "production",
|
| 46 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
}
|
| 48 |
|
| 49 |
DEFAULT_MODEL = os.environ.get("CEREBRAS_MODEL", "llama3.1-8b")
|
| 50 |
|
| 51 |
SYSTEM_PROMPT = """You are DeepMind Super β an ultra-fast expert AI research assistant created by Md Tusar Akon.
|
| 52 |
You are operating with the user's COMPLETE document loaded into context (all pages, every chunk).
|
| 53 |
+
Your 65,536-token context window lets you see large documents in full at once.
|
| 54 |
|
| 55 |
CAPABILITIES:
|
| 56 |
β’ Solve every exam / problem-set question with detailed working and research-level interpretation
|