fix: bump up rate limits wkw
Browse files
app.py
CHANGED
|
@@ -99,14 +99,14 @@ def respond(
|
|
| 99 |
response = ""
|
| 100 |
for msg in client.chat_completion(
|
| 101 |
messages,
|
| 102 |
-
model="meta-llama/llama-4-scout
|
| 103 |
max_tokens=max_tokens,
|
| 104 |
stream=True,
|
| 105 |
temperature=temperature,
|
| 106 |
seed=random.randint(1, 1000),
|
| 107 |
top_p=top_p,
|
| 108 |
extra_body={
|
| 109 |
-
"models": ["meta-llama/llama-4-maverick
|
| 110 |
},
|
| 111 |
):
|
| 112 |
token = msg.choices[0].delta.content
|
|
|
|
| 99 |
response = ""
|
| 100 |
for msg in client.chat_completion(
|
| 101 |
messages,
|
| 102 |
+
model="meta-llama/llama-4-scout",
|
| 103 |
max_tokens=max_tokens,
|
| 104 |
stream=True,
|
| 105 |
temperature=temperature,
|
| 106 |
seed=random.randint(1, 1000),
|
| 107 |
top_p=top_p,
|
| 108 |
extra_body={
|
| 109 |
+
"models": ["meta-llama/llama-4-maverick", "google/gemma-3-1b-it"]
|
| 110 |
},
|
| 111 |
):
|
| 112 |
token = msg.choices[0].delta.content
|