File size: 8,697 Bytes
5c5b371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import { Request } from "express";
import {
  API_REQUEST_VALIDATORS,
  API_REQUEST_TRANSFORMERS,
} from "../../../../shared/api-schemas";
import { BadRequestError } from "../../../../shared/errors";
import { fixMistralPrompt, isMistralVisionModel } from "../../../../shared/api-schemas/mistral-ai";
import {
  isImageGenerationRequest,
  isTextGenerationRequest,
} from "../../common";
import { RequestPreprocessor } from "../index";

/** Transforms an incoming request body to one that matches the target API. */
export const transformOutboundPayload: RequestPreprocessor = async (req) => {
  const alreadyTransformed = req.retryCount > 0;
  const notTransformable =
    !isTextGenerationRequest(req) && !isImageGenerationRequest(req);

  if (alreadyTransformed) {
    return;
  } else if (notTransformable) {
    // This is probably an indication of a bug in the proxy.
    const { inboundApi, outboundApi, method, path } = req;
    req.log.warn(
      { inboundApi, outboundApi, method, path },
      "`transformOutboundPayload` called on a non-transformable request."
    );
    return;
  }

  applyMistralPromptFixes(req);
  applyGoogleAIKeyTransforms(req);
  applyOpenAIResponsesTransform(req);

  // Native prompts are those which were already provided by the client in the
  // target API format. We don't need to transform them.
  const isNativePrompt = req.inboundApi === req.outboundApi;
  if (isNativePrompt) {
    const result = API_REQUEST_VALIDATORS[req.inboundApi].parse(req.body);
    req.body = result;
    return;
  }

  // Prompt requires translation from one API format to another.
  const transformation = `${req.inboundApi}->${req.outboundApi}` as const;
  const transFn = API_REQUEST_TRANSFORMERS[transformation];

  if (transFn) {
    req.log.info({ transformation }, "Transforming request...");
    req.body = await transFn(req);
    return;
  }

  throw new BadRequestError(
    `${transformation} proxying is not supported. Make sure your client is configured to send requests in the correct format and to the correct endpoint.`
  );
};

// Handle OpenAI Responses API transformation
function applyOpenAIResponsesTransform(req: Request): void {
  if (req.outboundApi === "openai-responses") {
    req.log.info("Transforming request to OpenAI Responses API format");

    // Store the original body for reference if needed
    const originalBody = { ...req.body };

    // Map standard OpenAI chat completions format to Responses API format
    // The main differences are:
    // 1. Endpoint is /v1/responses instead of /v1/chat/completions
    // 2. 'messages' field moves to 'input.messages'
    
    // Move messages to input.messages
    if (req.body.messages && !req.body.input) {
      req.body.input = {
        messages: req.body.messages
      };
      delete req.body.messages;
    }
    
    // Keep all the original properties of the request but ensure compatibility
    // with Responses API specifics
    if (!req.body.previousResponseId && req.body.conversation_id) {
      req.body.previousResponseId = req.body.conversation_id;
      delete req.body.conversation_id;
    }

    // Convert max_tokens to max_output_tokens if present and not already set
    if (req.body.max_tokens && !req.body.max_output_tokens) {
      req.body.max_output_tokens = req.body.max_tokens;
      delete req.body.max_tokens;
    }

    // Set the correct tools format if needed
    if (req.body.tools) {
      // Tools structure is maintained but might need conversion if non-standard
      if (!req.body.tools.some((tool: any) => tool.type === "function" || tool.type === "web_search")) {
        req.body.tools = req.body.tools.map((tool: any) => ({
          ...tool,
          type: tool.type || "function"
        }));
      }
    }

    req.log.info({
      originalModel: originalBody.model,
      newFormat: "openai-responses"
    }, "Successfully transformed request to Responses API format");
  }
}

// handles weird cases that don't fit into our abstractions
function applyMistralPromptFixes(req: Request): void {
  if (req.inboundApi === "mistral-ai") {
    // Mistral Chat is very similar to OpenAI but not identical and many clients
    // don't properly handle the differences. We will try to validate the
    // mistral prompt and try to fix it if it fails. It will be re-validated
    // after this function returns.
    const result = API_REQUEST_VALIDATORS["mistral-ai"].parse(req.body);
    
    // Check if this is a vision model request
    const isVisionModel = isMistralVisionModel(req.body.model);
    
    // Check if the request contains image content
    const hasImageContent = result.messages?.some((msg: {content: string | any[]}) => 
      Array.isArray(msg.content) && 
      msg.content.some((item: any) => item.type === "image_url")
    );
    
    // For vision requests, normalize the image_url format
    if (hasImageContent && Array.isArray(result.messages)) {
      // Process each message with image content
      result.messages.forEach((msg: any) => {
        if (Array.isArray(msg.content)) {
          // Process each content item
          msg.content.forEach((item: any) => {
            if (item.type === "image_url") {
              // Normalize the image_url field to a string format that Mistral expects
              if (typeof item.image_url === "object") {
                // If it's an object, extract the URL or base64 data
                if (item.image_url.url) {
                  item.image_url = item.image_url.url;
                } else if (item.image_url.data) {
                  item.image_url = item.image_url.data;
                }
                
                req.log.info(
                  { model: req.body.model },
                  "Normalized object-format image_url to string format"
                );
              }
            }
          });
        }
      });
    }
    
    // Apply Mistral prompt fixes while preserving multimodal content
    req.body.messages = fixMistralPrompt(result.messages);
    req.log.info(
      { 
        n: req.body.messages.length, 
        prev: result.messages.length,
        isVisionModel,
        hasImageContent 
      },
      "Applied Mistral chat prompt fixes."
    );

    // If this is a vision model with image content, it MUST use the chat API
    // and cannot be converted to text completions
    if (hasImageContent) {
      req.log.info(
        { model: req.body.model },
        "Detected Mistral vision request with image content. Keeping as chat format."
      );
      return;
    }

    // If the prompt relies on `prefix: true` for the last message, we need to
    // convert it to a text completions request because AWS Mistral support for
    // this feature is broken.
    // On Mistral La Plateforme, we can't do this because they don't expose
    // a text completions endpoint.
    const { messages } = req.body;
    const lastMessage = messages && messages[messages.length - 1];
    if (lastMessage?.role === "assistant" && req.service === "aws") {
      // enable prefix if client forgot, otherwise the template will insert an
      // eos token which is very unlikely to be what the client wants.
      lastMessage.prefix = true;
      req.outboundApi = "mistral-text";
      req.log.info(
        "Native Mistral chat prompt relies on assistant message prefix. Converting to text completions request."
      );
    }
  }
}

function toCamelCase(str: string): string {
  return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
}

function transformKeysToCamelCase(obj: any, hasTransformed = { value: false }): any {
  if (Array.isArray(obj)) {
    return obj.map(item => transformKeysToCamelCase(item, hasTransformed));
  }
  
  if (obj !== null && typeof obj === 'object') {
    return Object.fromEntries(
      Object.entries(obj).map(([key, value]) => {
        const camelKey = toCamelCase(key);
        if (camelKey !== key) {
          hasTransformed.value = true;
        }
        return [
          camelKey,
          transformKeysToCamelCase(value, hasTransformed)
        ];
      })
    );
  }
  
  return obj;
}

function applyGoogleAIKeyTransforms(req: Request): void {
  // Google (Gemini) API in their infinite wisdom accepts both snake_case and camelCase
  // for some params even though in the docs they use snake_case.
  // Some frontends (e.g. ST) use snake_case and camelCase so we normalize all keys to camelCase
  if (req.outboundApi === "google-ai") {
    const hasTransformed = { value: false };
    req.body = transformKeysToCamelCase(req.body, hasTransformed);
    if (hasTransformed.value) {
      req.log.info("Applied Gemini camelCase -> snake_case transform");
    }
  }
}