victor HF Staff commited on
Commit
dd2acb7
·
1 Parent(s): d2390f1

Refactor token parameters to standardize naming across models and generation settings

Browse files
src/lib/server/endpoints/openai/endpointOai.ts CHANGED
@@ -119,11 +119,11 @@ export async function endpointOai(
119
  model: model.id ?? model.name,
120
  prompt,
121
  stream: true,
122
- max_tokens: parameters?.max_new_tokens,
123
  stop: parameters?.stop,
124
  temperature: parameters?.temperature,
125
  top_p: parameters?.top_p,
126
- frequency_penalty: parameters?.repetition_penalty,
127
  presence_penalty: parameters?.presence_penalty,
128
  };
129
 
@@ -168,12 +168,12 @@ export async function endpointOai(
168
  stream: streamingSupported,
169
  // Support two different ways of specifying token limits depending on the model
170
  ...(useCompletionTokens
171
- ? { max_completion_tokens: parameters?.max_new_tokens }
172
- : { max_tokens: parameters?.max_new_tokens }),
173
  stop: parameters?.stop,
174
  temperature: parameters?.temperature,
175
  top_p: parameters?.top_p,
176
- frequency_penalty: parameters?.repetition_penalty,
177
  presence_penalty: parameters?.presence_penalty,
178
  };
179
 
 
119
  model: model.id ?? model.name,
120
  prompt,
121
  stream: true,
122
+ max_tokens: parameters?.max_tokens,
123
  stop: parameters?.stop,
124
  temperature: parameters?.temperature,
125
  top_p: parameters?.top_p,
126
+ frequency_penalty: parameters?.frequency_penalty,
127
  presence_penalty: parameters?.presence_penalty,
128
  };
129
 
 
168
  stream: streamingSupported,
169
  // Support two different ways of specifying token limits depending on the model
170
  ...(useCompletionTokens
171
+ ? { max_completion_tokens: parameters?.max_tokens }
172
+ : { max_tokens: parameters?.max_tokens }),
173
  stop: parameters?.stop,
174
  temperature: parameters?.temperature,
175
  top_p: parameters?.top_p,
176
+ frequency_penalty: parameters?.frequency_penalty,
177
  presence_penalty: parameters?.presence_penalty,
178
  };
179
 
src/lib/server/models.ts CHANGED
@@ -60,11 +60,11 @@ const modelConfig = z.object({
60
  .object({
61
  temperature: z.number().min(0).max(2).optional(),
62
  truncate: z.number().int().positive().optional(),
63
- max_new_tokens: z.number().int().positive().optional(),
64
  stop: z.array(z.string()).optional(),
65
  top_p: z.number().positive().optional(),
66
  top_k: z.number().positive().optional(),
67
- repetition_penalty: z.number().min(-2).max(2).optional(),
68
  presence_penalty: z.number().min(-2).max(2).optional(),
69
  })
70
  .passthrough()
 
60
  .object({
61
  temperature: z.number().min(0).max(2).optional(),
62
  truncate: z.number().int().positive().optional(),
63
+ max_tokens: z.number().int().positive().optional(),
64
  stop: z.array(z.string()).optional(),
65
  top_p: z.number().positive().optional(),
66
  top_k: z.number().positive().optional(),
67
+ frequency_penalty: z.number().min(-2).max(2).optional(),
68
  presence_penalty: z.number().min(-2).max(2).optional(),
69
  })
70
  .passthrough()
src/lib/server/textGeneration/generate.ts CHANGED
@@ -111,7 +111,7 @@ If the user is just having a casual conversation that doesn't require explanatio
111
 
112
  Do not use prefixes such as Response: or Answer: when answering to the user.`,
113
  generateSettings: {
114
- max_new_tokens: 1024,
115
  },
116
  modelId: model.id,
117
  });
 
111
 
112
  Do not use prefixes such as Response: or Answer: when answering to the user.`,
113
  generateSettings: {
114
+ max_tokens: 1024,
115
  },
116
  modelId: model.id,
117
  });
src/lib/server/textGeneration/reasoning.ts CHANGED
@@ -21,9 +21,9 @@ export async function generateSummaryOfReasoning(
21
  preprompt: `You are tasked with summarizing the latest reasoning steps. Never describe results of the reasoning, only the process. Remain vague in your summary.
22
  The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
23
  Example: "Thinking about life...", "Summarizing the results...", "Processing the input..."`,
24
- generateSettings: {
25
- max_new_tokens: 50,
26
- },
27
  modelId,
28
  })
29
  );
 
21
  preprompt: `You are tasked with summarizing the latest reasoning steps. Never describe results of the reasoning, only the process. Remain vague in your summary.
22
  The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
23
  Example: "Thinking about life...", "Summarizing the results...", "Processing the input..."`,
24
+ generateSettings: {
25
+ max_tokens: 50,
26
+ },
27
  modelId,
28
  })
29
  );
src/lib/server/textGeneration/title.ts CHANGED
@@ -44,9 +44,9 @@ Do not answer the question.
44
  Do not include the word prompt into your response.
45
  Do not include quotes, emojis, hashtags or trailing punctuation.
46
  Return ONLY the title text.`,
47
- generateSettings: {
48
- max_new_tokens: 30,
49
- },
50
  modelId,
51
  })
52
  )
 
44
  Do not include the word prompt into your response.
45
  Do not include quotes, emojis, hashtags or trailing punctuation.
46
  Return ONLY the title text.`,
47
+ generateSettings: {
48
+ max_tokens: 30,
49
+ },
50
  modelId,
51
  })
52
  )
src/lib/types/Assistant.ts CHANGED
@@ -19,7 +19,7 @@ export interface Assistant extends Timestamps {
19
  generateSettings?: {
20
  temperature?: number;
21
  top_p?: number;
22
- repetition_penalty?: number;
23
  top_k?: number;
24
  };
25
  dynamicPrompt?: boolean;
 
19
  generateSettings?: {
20
  temperature?: number;
21
  top_p?: number;
22
+ frequency_penalty?: number;
23
  top_k?: number;
24
  };
25
  dynamicPrompt?: boolean;