Spaces:

tusarway
/

rag-backend

Running

App Files Files Community

imtrt004 commited on Mar 3

Commit

69975bb

1 Parent(s): 113b6c1

fix: update limit

Browse files

Files changed (1) hide show

generation/cerebras_llm.py +8 -22

generation/cerebras_llm.py CHANGED Viewed

@@ -27,44 +27,30 @@ from typing import Generator
 from supabase import create_client
 # ── Cerebras model catalogue ──────────────────────────────────────────────────
-# Source: Cerebras Cloud → Limits page (pay-as-you-go plan, March 2026)
 # { model_id: { label, context_length, rpm, rph, rpd, tpm } }
 CEREBRAS_MODELS: dict[str, dict] = {
     "gpt-oss-120b": {
         "label": "GPT-OSS 120B",
-        "context": 131_000,
-        "rpm": 1_000, "rph": 60_000, "rpd": 1_440_000,
-        "tpm": 1_000_000,
         "status": "production",
     },
     "llama3.1-8b": {
         "label": "Llama 3.1 8B",
-        "context": 32_768,
-        "rpm": 2_000, "rph": 120_000, "rpd": 2_880_000,
-        "tpm": 2_000_000,
         "status": "production",
     },
-    "qwen-3-235b-a22b-instruct-2507": {
-        "label": "Qwen 3 235B A22B",
-        "context": 131_000,
-        "rpm": 250, "rph": 15_000, "rpd": 360_000,
-        "tpm": 250_000,
-        "status": "preview",
-    },
-    "zai-glm-4.7": {
-        "label": "ZAI GLM-4.7",
-        "context": 131_072,
-        "rpm": 500, "rph": 30_000, "rpd": 720_000,
-        "tpm": 500_000,
-        "status": "preview",
-    },
 }
 DEFAULT_MODEL = os.environ.get("CEREBRAS_MODEL", "llama3.1-8b")
 SYSTEM_PROMPT = """You are DeepMind Super — an ultra-fast expert AI research assistant created by Md Tusar Akon.
 You are operating with the user's COMPLETE document loaded into context (all pages, every chunk).
-Your 131,000-token context window lets you see the ENTIRE uploaded file at once.
 CAPABILITIES:
 • Solve every exam / problem-set question with detailed working and research-level interpretation

 from supabase import create_client
 # ── Cerebras model catalogue ──────────────────────────────────────────────────
+# Source: Cerebras Cloud → Limits page (Personal plan, March 2026)
 # { model_id: { label, context_length, rpm, rph, rpd, tpm } }
 CEREBRAS_MODELS: dict[str, dict] = {
     "gpt-oss-120b": {
         "label": "GPT-OSS 120B",
+        "context": 65_536,
+        "rpm": 30, "rph": 900, "rpd": 14_400,
+        "tpm": 64_000, "tpd": 1_000_000,
         "status": "production",
     },
     "llama3.1-8b": {
         "label": "Llama 3.1 8B",
+        "context": 8_192,
+        "rpm": 30, "rph": 900, "rpd": 14_400,
+        "tpm": 60_000, "tpd": 1_000_000,
         "status": "production",
     },
 }
 DEFAULT_MODEL = os.environ.get("CEREBRAS_MODEL", "llama3.1-8b")
 SYSTEM_PROMPT = """You are DeepMind Super — an ultra-fast expert AI research assistant created by Md Tusar Akon.
 You are operating with the user's COMPLETE document loaded into context (all pages, every chunk).
+Your 65,536-token context window lets you see large documents in full at once.
 CAPABILITIES:
 • Solve every exam / problem-set question with detailed working and research-level interpretation