Spaces:
Sleeping
Sleeping
Refactor token parameters to standardize naming across models and generation settings
Browse files
src/lib/server/endpoints/openai/endpointOai.ts
CHANGED
|
@@ -119,11 +119,11 @@ export async function endpointOai(
|
|
| 119 |
model: model.id ?? model.name,
|
| 120 |
prompt,
|
| 121 |
stream: true,
|
| 122 |
-
max_tokens: parameters?.
|
| 123 |
stop: parameters?.stop,
|
| 124 |
temperature: parameters?.temperature,
|
| 125 |
top_p: parameters?.top_p,
|
| 126 |
-
|
| 127 |
presence_penalty: parameters?.presence_penalty,
|
| 128 |
};
|
| 129 |
|
|
@@ -168,12 +168,12 @@ export async function endpointOai(
|
|
| 168 |
stream: streamingSupported,
|
| 169 |
// Support two different ways of specifying token limits depending on the model
|
| 170 |
...(useCompletionTokens
|
| 171 |
-
? { max_completion_tokens: parameters?.
|
| 172 |
-
: { max_tokens: parameters?.
|
| 173 |
stop: parameters?.stop,
|
| 174 |
temperature: parameters?.temperature,
|
| 175 |
top_p: parameters?.top_p,
|
| 176 |
-
|
| 177 |
presence_penalty: parameters?.presence_penalty,
|
| 178 |
};
|
| 179 |
|
|
|
|
| 119 |
model: model.id ?? model.name,
|
| 120 |
prompt,
|
| 121 |
stream: true,
|
| 122 |
+
max_tokens: parameters?.max_tokens,
|
| 123 |
stop: parameters?.stop,
|
| 124 |
temperature: parameters?.temperature,
|
| 125 |
top_p: parameters?.top_p,
|
| 126 |
+
frequency_penalty: parameters?.frequency_penalty,
|
| 127 |
presence_penalty: parameters?.presence_penalty,
|
| 128 |
};
|
| 129 |
|
|
|
|
| 168 |
stream: streamingSupported,
|
| 169 |
// Support two different ways of specifying token limits depending on the model
|
| 170 |
...(useCompletionTokens
|
| 171 |
+
? { max_completion_tokens: parameters?.max_tokens }
|
| 172 |
+
: { max_tokens: parameters?.max_tokens }),
|
| 173 |
stop: parameters?.stop,
|
| 174 |
temperature: parameters?.temperature,
|
| 175 |
top_p: parameters?.top_p,
|
| 176 |
+
frequency_penalty: parameters?.frequency_penalty,
|
| 177 |
presence_penalty: parameters?.presence_penalty,
|
| 178 |
};
|
| 179 |
|
src/lib/server/models.ts
CHANGED
|
@@ -60,11 +60,11 @@ const modelConfig = z.object({
|
|
| 60 |
.object({
|
| 61 |
temperature: z.number().min(0).max(2).optional(),
|
| 62 |
truncate: z.number().int().positive().optional(),
|
| 63 |
-
|
| 64 |
stop: z.array(z.string()).optional(),
|
| 65 |
top_p: z.number().positive().optional(),
|
| 66 |
top_k: z.number().positive().optional(),
|
| 67 |
-
|
| 68 |
presence_penalty: z.number().min(-2).max(2).optional(),
|
| 69 |
})
|
| 70 |
.passthrough()
|
|
|
|
| 60 |
.object({
|
| 61 |
temperature: z.number().min(0).max(2).optional(),
|
| 62 |
truncate: z.number().int().positive().optional(),
|
| 63 |
+
max_tokens: z.number().int().positive().optional(),
|
| 64 |
stop: z.array(z.string()).optional(),
|
| 65 |
top_p: z.number().positive().optional(),
|
| 66 |
top_k: z.number().positive().optional(),
|
| 67 |
+
frequency_penalty: z.number().min(-2).max(2).optional(),
|
| 68 |
presence_penalty: z.number().min(-2).max(2).optional(),
|
| 69 |
})
|
| 70 |
.passthrough()
|
src/lib/server/textGeneration/generate.ts
CHANGED
|
@@ -111,7 +111,7 @@ If the user is just having a casual conversation that doesn't require explanatio
|
|
| 111 |
|
| 112 |
Do not use prefixes such as Response: or Answer: when answering to the user.`,
|
| 113 |
generateSettings: {
|
| 114 |
-
|
| 115 |
},
|
| 116 |
modelId: model.id,
|
| 117 |
});
|
|
|
|
| 111 |
|
| 112 |
Do not use prefixes such as Response: or Answer: when answering to the user.`,
|
| 113 |
generateSettings: {
|
| 114 |
+
max_tokens: 1024,
|
| 115 |
},
|
| 116 |
modelId: model.id,
|
| 117 |
});
|
src/lib/server/textGeneration/reasoning.ts
CHANGED
|
@@ -21,9 +21,9 @@ export async function generateSummaryOfReasoning(
|
|
| 21 |
preprompt: `You are tasked with summarizing the latest reasoning steps. Never describe results of the reasoning, only the process. Remain vague in your summary.
|
| 22 |
The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
|
| 23 |
Example: "Thinking about life...", "Summarizing the results...", "Processing the input..."`,
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
modelId,
|
| 28 |
})
|
| 29 |
);
|
|
|
|
| 21 |
preprompt: `You are tasked with summarizing the latest reasoning steps. Never describe results of the reasoning, only the process. Remain vague in your summary.
|
| 22 |
The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
|
| 23 |
Example: "Thinking about life...", "Summarizing the results...", "Processing the input..."`,
|
| 24 |
+
generateSettings: {
|
| 25 |
+
max_tokens: 50,
|
| 26 |
+
},
|
| 27 |
modelId,
|
| 28 |
})
|
| 29 |
);
|
src/lib/server/textGeneration/title.ts
CHANGED
|
@@ -44,9 +44,9 @@ Do not answer the question.
|
|
| 44 |
Do not include the word prompt into your response.
|
| 45 |
Do not include quotes, emojis, hashtags or trailing punctuation.
|
| 46 |
Return ONLY the title text.`,
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
modelId,
|
| 51 |
})
|
| 52 |
)
|
|
|
|
| 44 |
Do not include the word prompt into your response.
|
| 45 |
Do not include quotes, emojis, hashtags or trailing punctuation.
|
| 46 |
Return ONLY the title text.`,
|
| 47 |
+
generateSettings: {
|
| 48 |
+
max_tokens: 30,
|
| 49 |
+
},
|
| 50 |
modelId,
|
| 51 |
})
|
| 52 |
)
|
src/lib/types/Assistant.ts
CHANGED
|
@@ -19,7 +19,7 @@ export interface Assistant extends Timestamps {
|
|
| 19 |
generateSettings?: {
|
| 20 |
temperature?: number;
|
| 21 |
top_p?: number;
|
| 22 |
-
|
| 23 |
top_k?: number;
|
| 24 |
};
|
| 25 |
dynamicPrompt?: boolean;
|
|
|
|
| 19 |
generateSettings?: {
|
| 20 |
temperature?: number;
|
| 21 |
top_p?: number;
|
| 22 |
+
frequency_penalty?: number;
|
| 23 |
top_k?: number;
|
| 24 |
};
|
| 25 |
dynamicPrompt?: boolean;
|