evalstate HF Staff commited on
Commit
8559a03
·
verified ·
1 Parent(s): 1326efc

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---- Build Stage ----
2
+ FROM node:18-alpine AS builder
3
+
4
+ # Install pnpm as root
5
+ RUN corepack enable && corepack prepare pnpm@10.10.0 --activate
6
+
7
+ USER node
8
+ ENV HOME=/home/node \
9
+ PATH=/home/node/.local/bin:$PATH
10
+
11
+ WORKDIR $HOME/app
12
+
13
+ # Install dependencies and build
14
+ COPY --chown=node package.json pnpm-lock.yaml* ./
15
+ COPY --chown=node tsconfig.json ./
16
+ COPY --chown=node src ./src
17
+ RUN pnpm install --frozen-lockfile
18
+ RUN pnpm run build
19
+ RUN chown -R node:node $HOME/app
20
+
21
+ # ---- Production Stage ----
22
+ FROM node:18-alpine AS runner
23
+
24
+ # No need to install pnpm here, just switch to node user
25
+ USER node
26
+ ENV HOME=/home/node \
27
+ PATH=/home/node/.local/bin:$PATH
28
+
29
+ # Create app directory
30
+ WORKDIR $HOME/app
31
+
32
+ # Copy only necessary files from builder
33
+ COPY --chown=node --from=builder /home/node/app/dist ./dist
34
+ COPY --chown=node --from=builder /home/node/app/package.json ./
35
+ COPY --chown=node --from=builder /home/node/app/node_modules ./node_modules
36
+
37
+ # Use a non-root user for security
38
+ EXPOSE 3000
39
+
40
+ CMD ["node", "dist/index.js"]
README.md CHANGED
@@ -1,12 +1,208 @@
1
  ---
2
- title: Openrespones
3
- emoji: 👀
4
- colorFrom: indigo
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 6.3.0
8
- app_file: app.py
9
  pinned: false
 
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: OpenResponses
3
+ emoji: 😻
4
+ colorFrom: red
5
+ colorTo: red
6
+ sdk: docker
 
 
7
  pinned: false
8
+ license: mit
9
+ short_description: Private space for OpenResponses
10
+ app_port: 3000
11
+ private: true
12
  ---
13
 
14
+
15
+ # responses.js
16
+
17
+ A lightweight Express.js server that implements a translation layer between the two main LLM APIs currently available, Responses API and Chat Completions. Works with any Chat Completion API, local or remotely hosted.
18
+
19
+ ## 🎮 Live Demo
20
+
21
+ [**Try responses.js right now, no installation needed!**](https://huggingface.co/spaces/Wauplin/responses.js)
22
+
23
+ ## ✨ Features
24
+
25
+ - **ResponsesAPI**: Partial implementation of [OpenAI's Responses API](https://platform.openai.com/docs/api-reference/responses), on top of Chat Completion API
26
+ - **Provider Agnostic**: Works with any Chat Completion API (local or remote)
27
+ - **Streaming Support**: Support for streamed responses
28
+ - **Structured Output**: Support for structured data responses (e.g. jsonschema)
29
+ - **Function Calling**: Tool and function calling capabilities
30
+ - **Multi-modal Input**: Text and image input support
31
+ - **Remote MCP**: Execute MCP tool calls remotely
32
+ - **Demo UI**: Interactive web interface for testing
33
+
34
+ Not implemented: remote function calling, file upload, stateful API, etc.
35
+
36
+ ## 🚀 Quick Start
37
+
38
+ ### Prerequisites
39
+
40
+ - Node.js (v18 or higher)
41
+ - pnpm (recommended) or npm
42
+ - an Hugging Face token with inference permissions. Create one from your [user settings](https://huggingface.co/settings/tokens).
43
+
44
+ ### Installation & Setup
45
+
46
+ ```bash
47
+ # Clone the repository
48
+ git clone https://github.com/huggingface/responses.js.git
49
+ cd responses.js
50
+
51
+ # Install dependencies
52
+ pnpm install
53
+
54
+ # Start the development server
55
+ pnpm dev
56
+ ```
57
+
58
+ The server will be available at `http://localhost:3000`.
59
+
60
+ ### Running Examples
61
+
62
+ Explore the various capabilities with our example scripts located in the [./examples](./examples) folder:
63
+
64
+ ```bash
65
+ # Basic text input
66
+ pnpm run example text
67
+
68
+ # Multi-turn conversations
69
+ pnpm run example multi_turn
70
+
71
+ # Text + image input
72
+ pnpm run example image
73
+
74
+ # Streaming responses
75
+ pnpm run example streaming
76
+
77
+ # Structured output
78
+ pnpm run example structured_output
79
+ pnpm run example structured_output_streaming
80
+
81
+ # Function calling
82
+ pnpm run example function
83
+ pnpm run example function_streaming
84
+ ```
85
+
86
+ ## 🧪 Testing
87
+
88
+ ### Important Notes
89
+
90
+ - Server must be running (`pnpm dev`) on `http://localhost:3000`
91
+ - `API_KEY` environment variable set with your LLM provider's API key
92
+ - Tests use real inference providers and may incur costs
93
+ - Tests are not run in CI due to billing requirements
94
+
95
+ ### Running Tests
96
+
97
+ ```bash
98
+ # Run all tests
99
+ pnpm test
100
+
101
+ # Run specific test patterns
102
+ pnpm test --grep "streaming"
103
+ pnpm test --grep "function"
104
+ pnpm test --grep "structured"
105
+ ```
106
+
107
+ ### Interactive Demo UI
108
+
109
+ Experience the API through our interactive web interface, adapted from the [openai-responses-starter-app](https://github.com/openai/openai-responses-starter-app).
110
+
111
+ [![Demo Video](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/responses.js/demo_mini.png)](https://youtu.be/F-tAUnW-nd0)
112
+
113
+
114
+ #### Setup
115
+
116
+ 1. Create a configuration file:
117
+
118
+ ```bash
119
+ # Create demo/.env
120
+ cat > demo/.env << EOF
121
+ MODEL="moonshotai/Kimi-K2-Instruct:groq"
122
+ OPENAI_BASE_URL=http://localhost:3000/v1
123
+ OPENAI_API_KEY=${HF_TOKEN:-<your-huggingface-token>}
124
+ EOF
125
+ ```
126
+
127
+ 2. Install demo dependencies:
128
+
129
+ ```bash
130
+ pnpm demo:install
131
+ ```
132
+
133
+ 3. Launch the demo:
134
+
135
+ ```bash
136
+ pnpm demo:dev
137
+ ```
138
+
139
+ The demo will be available at `http://localhost:3001`.
140
+
141
+ ## 🐳 Running with Docker
142
+
143
+ You can run the server in a production-ready container using Docker.
144
+
145
+ ### Build the Docker image
146
+
147
+ ```bash
148
+ docker build -t responses.js .
149
+ ```
150
+
151
+ ### Run the server
152
+
153
+ ```bash
154
+ docker run -p 3000:3000 responses.js
155
+ ```
156
+
157
+ The server will be available at `http://localhost:3000`.
158
+
159
+ ## 📁 Project Structure
160
+
161
+ ```
162
+ responses.js/
163
+ ├── demo/ # Interactive chat UI demo
164
+ ├── examples/ # Example scripts using openai-node client
165
+ ├── src/
166
+ │ ├── index.ts # Application entry point
167
+ │ ├── server.ts # Express app configuration and route definitions
168
+ │ ├── routes/ # API route implementations
169
+ │ ├── middleware/ # Middleware (validation, logging, etc.)
170
+ │ └── schemas/ # Zod validation schemas
171
+ ├── scripts/ # Utility and build scripts
172
+ ├── package.json # Package configuration and dependencies
173
+ └── README.md # This file
174
+ ```
175
+
176
+ ## 🛣️ Done / TODOs
177
+
178
+ > **Note**: This project is in active development. The roadmap below represents our current priorities and may evolve. Do not take anything for granted.
179
+
180
+ - [x] OpenAI types integration for consistent output
181
+ - [x] Streaming mode support
182
+ - [x] Structured output capabilities
183
+ - [x] Function calling implementation
184
+ - [x] Repository migration to dedicated responses.js repo
185
+ - [x] Basic development tooling setup
186
+ - [x] Demo application with comprehensive instructions
187
+ - [x] Multi-turn conversation fixes for text messages + tool calls
188
+ - [x] Correctly return "usage" field
189
+ - [x] MCP support (non-streaming)
190
+ - [x] MCP support (streaming)
191
+ - [ ] Tools execution (web search, file search, image generation, code interpreter)
192
+ - [ ] Background mode support
193
+ - [ ] Additional API routes (GET, DELETE, CANCEL, LIST responses)
194
+ - [ ] Reasoning capabilities
195
+
196
+ ## 🤝 Contributing
197
+
198
+ We welcome contributions! Please feel free to submit issues, feature requests, or pull requests.
199
+
200
+ ## 📄 License
201
+
202
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
203
+
204
+ ## 🙏 Acknowledgments
205
+
206
+ - Based on OpenAI's [Responses API specification](https://platform.openai.com/docs/api-reference/responses)
207
+ - Built on top of [OpenAI's nodejs client](https://github.com/openai/openai-node)
208
+ - Demo UI adapted from [openai-responses-starter-app](https://github.com/openai/openai-responses-starter-app)
package.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "@huggingface/responses.js",
3
+ "packageManager": "pnpm@10.28.0+sha512.05df71d1421f21399e053fde567cea34d446fa02c76571441bfc1c7956e98e363088982d940465fd34480d4d90a0668bc12362f8aa88000a64e83d0b0e47be48",
4
+ "version": "0.1.0",
5
+ "type": "module",
6
+ "description": "Server for handling AI responses",
7
+ "repository": "https://github.com/huggingface/huggingface.js.git",
8
+ "publishConfig": {
9
+ "access": "public"
10
+ },
11
+ "main": "./dist/index.js",
12
+ "module": "./dist/index.mjs",
13
+ "types": "./dist/index.d.ts",
14
+ "exports": {
15
+ ".": {
16
+ "types": "./dist/index.d.ts",
17
+ "require": "./dist/index.js",
18
+ "import": "./dist/index.mjs"
19
+ }
20
+ },
21
+ "engines": {
22
+ "node": ">=18"
23
+ },
24
+ "source": "index.ts",
25
+ "scripts": {
26
+ "build": "tsup src/*.ts --format cjs,esm --clean && tsc --emitDeclarationOnly --declaration",
27
+ "check": "tsc",
28
+ "dev": "tsx watch src/index.ts",
29
+ "format": "prettier --write .",
30
+ "format:check": "prettier --check .",
31
+ "lint": "eslint --quiet --fix --ext .cjs,.ts .",
32
+ "lint:check": "eslint --ext .cjs,.ts .",
33
+ "prepublishOnly": "pnpm run build",
34
+ "prepare": "pnpm run build",
35
+ "start": "node dist/index.js",
36
+ "example": "node examples/_run.js",
37
+ "demo:build": "cd demo && npm run build",
38
+ "demo:dev": "cd demo && npm run dev",
39
+ "demo:install": "cd demo && npm install",
40
+ "demo:lint": "cd demo && npm run lint",
41
+ "demo:format": "cd demo && npm run format",
42
+ "demo:start": "cd demo && npm run start",
43
+ "deploy:spaces": "./push_to_space.sh",
44
+ "test": "mocha --timeout 20000 \"tests/**/*.test.js\""
45
+ },
46
+ "files": [
47
+ "src",
48
+ "dist",
49
+ "tsconfig.json"
50
+ ],
51
+ "keywords": [
52
+ "huggingface",
53
+ "ai",
54
+ "llm",
55
+ "responses-api",
56
+ "server"
57
+ ],
58
+ "author": "Hugging Face",
59
+ "license": "MIT",
60
+ "dependencies": {
61
+ "@modelcontextprotocol/sdk": "^1.15.0",
62
+ "express": "^4.21.2",
63
+ "openai": "^5.8.2",
64
+ "zod": "^3.25.71"
65
+ },
66
+ "devDependencies": {
67
+ "@eslint/js": "^9.30.1",
68
+ "@types/express": "^4.17.23",
69
+ "@typescript-eslint/eslint-plugin": "^8.35.1",
70
+ "@typescript-eslint/parser": "^8.35.1",
71
+ "eslint": "^9.30.1",
72
+ "eslint-config-prettier": "^10.1.5",
73
+ "eslint-plugin-prettier": "^5.5.1",
74
+ "mocha": "^11.7.1",
75
+ "prettier": "^3.6.2",
76
+ "tsup": "^8.5.0",
77
+ "tsx": "^4.20.3",
78
+ "typescript": "^5.8.3"
79
+ }
80
+ }
pnpm-lock.yaml ADDED
The diff for this file is too large to render. See raw diff
 
src/index.ts ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { createApp } from "./server.js";
2
+
3
+ const app = createApp();
4
+ const port = process.env.PORT || 3000;
5
+
6
+ // Start server
7
+ app.listen(port, () => {
8
+ console.log(`🚀 Server started at ${new Date().toISOString()}`);
9
+ console.log(`🌐 Server is running on http://localhost:${port}`);
10
+ console.log("─".repeat(60));
11
+ });
12
+
13
+ // Graceful shutdown logging
14
+ process.on("SIGINT", () => {
15
+ console.log("─".repeat(60));
16
+ console.log(`🛑 Server shutting down at ${new Date().toISOString()}`);
17
+ process.exit(0);
18
+ });
19
+
20
+ process.on("SIGTERM", () => {
21
+ console.log("─".repeat(60));
22
+ console.log(`🛑 Server shutting down at ${new Date().toISOString()}`);
23
+ process.exit(0);
24
+ });
25
+
26
+ export default app;
src/lib/McpResultFormatter.ts ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Vendored from `@huggingface/mcp-client`
3
+ *
4
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/mcp-client/src/ResultFormatter.ts
5
+ */
6
+
7
+ import type {
8
+ TextResourceContents,
9
+ BlobResourceContents,
10
+ CompatibilityCallToolResult,
11
+ } from "@modelcontextprotocol/sdk/types";
12
+
13
+ /**
14
+ * A utility class for formatting CallToolResult contents into human-readable text.
15
+ * Processes different content types, extracts text, and summarizes binary data.
16
+ */
17
+ export class McpResultFormatter {
18
+ /**
19
+ * Formats a CallToolResult's contents into a single string.
20
+ * - Text content is included directly
21
+ * - Binary content (images, audio, blobs) is summarized
22
+ *
23
+ * @param result The CallToolResult to format
24
+ * @returns A human-readable string representation of the result contents
25
+ */
26
+ static format(result: CompatibilityCallToolResult): string {
27
+ if (!result.content || !Array.isArray(result.content) || result.content.length === 0) {
28
+ return "[No content]";
29
+ }
30
+
31
+ const formattedParts: string[] = [];
32
+
33
+ for (const item of result.content) {
34
+ switch (item.type) {
35
+ case "text":
36
+ // Extract text content directly
37
+ formattedParts.push(item.text);
38
+ break;
39
+
40
+ case "image": {
41
+ // Summarize image content
42
+ const imageSize = this.getBase64Size(item.data);
43
+ formattedParts.push(
44
+ `[Binary Content: Image ${item.mimeType}, ${imageSize} bytes]\nThe task is complete and the content accessible to the User`
45
+ );
46
+ break;
47
+ }
48
+
49
+ case "audio": {
50
+ // Summarize audio content
51
+ const audioSize = this.getBase64Size(item.data);
52
+ formattedParts.push(
53
+ `[Binary Content: Audio ${item.mimeType}, ${audioSize} bytes]\nThe task is complete and the content accessible to the User`
54
+ );
55
+ break;
56
+ }
57
+
58
+ case "resource":
59
+ // Handle embedded resources - explicitly type the resource
60
+ if ("text" in item.resource) {
61
+ // It's a text resource with a text property
62
+ const textResource = item.resource as TextResourceContents;
63
+ formattedParts.push(textResource.text);
64
+ } else if ("blob" in item.resource) {
65
+ // It's a binary resource with a blob property
66
+ const blobResource = item.resource as BlobResourceContents;
67
+ const blobSize = this.getBase64Size(blobResource.blob);
68
+ const uri = blobResource.uri ? ` (${blobResource.uri})` : "";
69
+ const mimeType = blobResource.mimeType ? blobResource.mimeType : "unknown type";
70
+ formattedParts.push(
71
+ `[Binary Content${uri}: ${mimeType}, ${blobSize} bytes]\nThe task is complete and the content accessible to the User`
72
+ );
73
+ }
74
+ break;
75
+ }
76
+ }
77
+
78
+ return formattedParts.join("\n");
79
+ }
80
+
81
+ /**
82
+ * Calculates the approximate size in bytes of base64-encoded data
83
+ */
84
+ private static getBase64Size(base64: string): number {
85
+ // Remove base64 header if present (e.g., data:image/png;base64,)
86
+ const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
87
+
88
+ // Calculate size: Base64 encodes 3 bytes into 4 characters
89
+ const padding = cleanBase64.endsWith("==") ? 2 : cleanBase64.endsWith("=") ? 1 : 0;
90
+ return Math.floor((cleanBase64.length * 3) / 4 - padding);
91
+ }
92
+ }
src/lib/buildProviderScopedModel.ts ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export function buildProviderScopedModel(model: string, provider?: string | null): string {
2
+ if (!provider) {
3
+ return model;
4
+ }
5
+
6
+ const trimmedProvider = provider.trim();
7
+ if (!trimmedProvider) {
8
+ return model;
9
+ }
10
+
11
+ const colonIndex = model.indexOf(":");
12
+ const baseModel = colonIndex === -1 ? model : model.slice(0, colonIndex);
13
+
14
+ return `${baseModel}:${trimmedProvider}`;
15
+ }
src/lib/generateUniqueId.ts ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * AI-generated file using Cursor + Claude 4
3
+ *
4
+ * Generate a unique ID for the response
5
+ */
6
+ import { randomBytes } from "crypto";
7
+
8
+ export function generateUniqueId(prefix?: string): string {
9
+ const id = randomBytes(24).toString("hex");
10
+ return prefix ? `${prefix}_${id}` : id;
11
+ }
src/mcp.ts ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { Client } from "@modelcontextprotocol/sdk/client/index.js";
2
+ import { SSEClientTransport } from "@modelcontextprotocol/sdk/client/sse.js";
3
+ import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
4
+ import { version as packageVersion } from "../package.json";
5
+ import { URL } from "url";
6
+
7
+ import type { McpServerParams } from "./schemas";
8
+ import { McpResultFormatter } from "./lib/McpResultFormatter";
9
+
10
+ export async function connectMcpServer(mcpServer: McpServerParams): Promise<Client> {
11
+ const mcp = new Client({ name: "@huggingface/responses.js", version: packageVersion });
12
+
13
+ // Try to connect with http first, if that fails, try sse
14
+ const url = new URL(mcpServer.server_url);
15
+ const options = {
16
+ requestInit: mcpServer.headers
17
+ ? {
18
+ headers: mcpServer.headers,
19
+ }
20
+ : undefined,
21
+ };
22
+ try {
23
+ const transport = new StreamableHTTPClientTransport(url, options);
24
+ await mcp.connect(transport);
25
+ } catch {
26
+ const transport = new SSEClientTransport(url, options);
27
+ await mcp.connect(transport);
28
+ }
29
+
30
+ console.log("Connected to MCP server", mcpServer.server_url);
31
+
32
+ return mcp;
33
+ }
34
+
35
+ export async function callMcpTool(
36
+ mcpServer: McpServerParams,
37
+ toolName: string,
38
+ argumentsString: string
39
+ ): Promise<{ error: string; output?: undefined } | { error?: undefined; output: string }> {
40
+ try {
41
+ const client = await connectMcpServer(mcpServer);
42
+ const toolArgs: Record<string, unknown> = argumentsString === "" ? {} : JSON.parse(argumentsString);
43
+ console.log(`Calling MCP tool '${toolName}'`);
44
+ const toolResponse = await client.callTool({ name: toolName, arguments: toolArgs });
45
+ const formattedResult = McpResultFormatter.format(toolResponse);
46
+ return {
47
+ output: formattedResult,
48
+ };
49
+ } catch (error) {
50
+ const errorMessage =
51
+ error instanceof Error ? error.message : typeof error === "string" ? error : JSON.stringify(error);
52
+ return {
53
+ error: errorMessage,
54
+ };
55
+ }
56
+ }
src/middleware/logging.ts ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * AI-generated file using Cursor + Claude 4
3
+ *
4
+ * Middleware to log all HTTP requests with duration, status code, method, and route
5
+ */
6
+ import { type Request, type Response, type NextFunction } from "express";
7
+
8
+ interface LogContext {
9
+ timestamp: string;
10
+ method: string;
11
+ url: string;
12
+ statusCode?: number;
13
+ duration?: number;
14
+ }
15
+
16
+ function formatLogMessage(context: LogContext): string {
17
+ const { timestamp, method, url, statusCode, duration } = context;
18
+
19
+ if (statusCode === undefined) {
20
+ return `[${timestamp}] 📥 ${method} ${url}`;
21
+ }
22
+
23
+ const statusEmoji =
24
+ statusCode >= 200 && statusCode < 300
25
+ ? "✅"
26
+ : statusCode >= 400 && statusCode < 500
27
+ ? "⚠️"
28
+ : statusCode >= 500
29
+ ? "❌"
30
+ : "ℹ️";
31
+ return `[${timestamp}] ${statusEmoji} ${statusCode} ${method} ${url} (${duration}ms)`;
32
+ }
33
+
34
+ /**
35
+ * Middleware to log all HTTP requests with duration, status code, method, and route
36
+ */
37
+ export function requestLogger() {
38
+ return (req: Request, res: Response, next: NextFunction): void => {
39
+ const startTime = Date.now();
40
+ const { method, url } = req;
41
+
42
+ // Log incoming request
43
+ console.log(
44
+ formatLogMessage({
45
+ timestamp: new Date().toISOString(),
46
+ method,
47
+ url,
48
+ })
49
+ );
50
+
51
+ // Listen for when the response finishes
52
+ res.on("finish", () => {
53
+ const duration = Date.now() - startTime;
54
+
55
+ console.log(
56
+ formatLogMessage({
57
+ timestamp: new Date().toISOString(),
58
+ method,
59
+ url,
60
+ statusCode: res.statusCode,
61
+ duration,
62
+ })
63
+ );
64
+ });
65
+
66
+ next();
67
+ };
68
+ }
src/middleware/validation.ts ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * AI-generated file using Cursor + Claude 4
3
+ */
4
+
5
+ import { type Request, type Response, type NextFunction } from "express";
6
+ import { z } from "zod";
7
+
8
+ /**
9
+ * Middleware to validate request body against a Zod schema
10
+ * @param schema - Zod schema to validate against
11
+ * @returns Express middleware function
12
+ */
13
+ export function validateBody<T extends z.ZodTypeAny>(schema: T) {
14
+ return (req: Request, res: Response, next: NextFunction): void => {
15
+ try {
16
+ const validatedBody = schema.parse(req.body);
17
+ req.body = validatedBody;
18
+ next();
19
+ } catch (error) {
20
+ if (error instanceof z.ZodError) {
21
+ console.log(req.body);
22
+ res.status(400).json({
23
+ success: false,
24
+ error: error.errors,
25
+ details: error.errors,
26
+ });
27
+ } else {
28
+ res.status(500).json({
29
+ success: false,
30
+ error: "Internal server error",
31
+ });
32
+ }
33
+ }
34
+ };
35
+ }
36
+
37
+ /**
38
+ * Type helper to create a properly typed request with validated body
39
+ */
40
+ export interface ValidatedRequest<T> extends Request {
41
+ body: T;
42
+ }
src/openai_patch.ts ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * This file patches OpenAI SDK types for OpenResponses spec compliance.
3
+ *
4
+ * The OpenAI SDK uses event type names like "response.reasoning_text.delta",
5
+ * but the OpenResponses spec uses "response.reasoning.delta" (without "_text").
6
+ * We maintain custom event types here to match the OpenResponses specification.
7
+ *
8
+ * Once the OpenAI SDK aligns with OpenResponses spec, this file can be simplified.
9
+ */
10
+ import type {
11
+ ResponseReasoningItem as OpenAIResponseReasoningItem,
12
+ ResponseStreamEvent as OpenAIResponseStreamEvent,
13
+ ResponseOutputRefusal,
14
+ ResponseOutputText,
15
+ ResponseContentPartAddedEvent,
16
+ ResponseContentPartDoneEvent,
17
+ ResponseTextDeltaEvent,
18
+ ResponseTextDoneEvent,
19
+ } from "openai/resources/responses/responses";
20
+
21
+ import type { ChatCompletionChunk } from "openai/resources/chat/completions";
22
+
23
+ export interface ReasoningTextContent {
24
+ type: "reasoning_text";
25
+ text: string;
26
+ }
27
+
28
+ export type PatchedResponseReasoningItem = OpenAIResponseReasoningItem & {
29
+ // Raw CoT returned in reasoning item (in addition to the summary)
30
+ content: ReasoningTextContent[];
31
+ };
32
+
33
+ // Custom event types for OpenResponses spec (differs from SDK's "response.reasoning_text.*")
34
+ interface PatchedResponseReasoningDeltaEvent {
35
+ type: "response.reasoning.delta";
36
+ sequence_number: number;
37
+ item_id: string;
38
+ output_index: number;
39
+ content_index: number;
40
+ delta: string;
41
+ }
42
+
43
+ interface PatchedResponseReasoningDoneEvent {
44
+ type: "response.reasoning.done";
45
+ sequence_number: number;
46
+ item_id: string;
47
+ output_index: number;
48
+ content_index: number;
49
+ text: string;
50
+ }
51
+
52
+ export type PatchedResponseStreamEvent =
53
+ | OpenAIResponseStreamEvent
54
+ | PatchedResponseReasoningDeltaEvent
55
+ | PatchedResponseReasoningDoneEvent
56
+ | PatchedResponseContentPartAddedEvent
57
+ | PatchedResponseContentPartDoneEvent
58
+ | PatchedResponseOutputTextDeltaEvent
59
+ | PatchedResponseOutputTextDoneEvent;
60
+
61
+ export type PatchedResponseContentPart = ResponseOutputText | ResponseOutputRefusal | ReasoningTextContent;
62
+
63
+ export interface PatchedResponseOutputTextDeltaEvent extends ResponseTextDeltaEvent {
64
+ logprobs: unknown[];
65
+ }
66
+
67
+ export interface PatchedResponseOutputTextDoneEvent extends ResponseTextDoneEvent {
68
+ logprobs: unknown[];
69
+ }
70
+
71
+ interface PatchedResponseContentPartAddedEvent {
72
+ content_index: number;
73
+ item_id: string;
74
+ output_index: number;
75
+ part: PatchedResponseContentPart;
76
+ sequence_number: number;
77
+ type: "response.content_part.added";
78
+ }
79
+
80
+ interface PatchedResponseContentPartDoneEvent {
81
+ content_index: number;
82
+ item_id: string;
83
+ output_index: number;
84
+ part: PatchedResponseContentPart;
85
+ sequence_number: number;
86
+ type: "response.content_part.done";
87
+ }
88
+
89
+ export type PatchedDeltaWithReasoning = ChatCompletionChunk.Choice.Delta & {
90
+ reasoning?: string;
91
+ reasoning_content?: string;
92
+ };
src/routes/health.ts ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import type { Request, Response } from "express";
2
+
3
+ export function getHealth(req: Request, res: Response): void {
4
+ res.send("OK");
5
+ }
src/routes/index.ts ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ export { postCreateResponse } from "./responses.js";
2
+ export { getLandingPageHtml } from "./landingPageHtml.js";
3
+ export { getHealth } from "./health.js";
src/routes/landingPageHtml.ts ADDED
@@ -0,0 +1,724 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Request, Response } from "express";
2
+
3
+ export function getLandingPageHtml(req: Request, res: Response): void {
4
+ const host = req.get("host");
5
+ const protocol = host && host.endsWith(".hf.space") ? "https" : req.protocol;
6
+ const baseUrl = `${protocol}://${host}/v1`;
7
+ res.setHeader("Content-Type", "text/html; charset=utf-8");
8
+ res.send(`
9
+ <!DOCTYPE html>
10
+ <html lang="en">
11
+ <head>
12
+ <meta charset="UTF-8">
13
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
14
+ <title>responses.js – OpenAI-compatible Responses API</title>
15
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap" rel="stylesheet">
16
+ <style>
17
+ :root {
18
+ --primary: #2563eb;
19
+ --primary-dark: #1e40af;
20
+ --accent: #fbbf24;
21
+ --bg: #f8fafc;
22
+ --card-bg: #fff;
23
+ --border: #e5e7eb;
24
+ --text: #1e293b;
25
+ --muted: #64748b;
26
+ --radius: 14px;
27
+ --shadow: 0 4px 24px #0002;
28
+ }
29
+ html, body {
30
+ height: 100%;
31
+ max-width: 100vw;
32
+ overflow-x: hidden;
33
+ }
34
+ body {
35
+ font-family: 'Inter', Arial, sans-serif;
36
+ background: var(--bg);
37
+ color: var(--text);
38
+ margin: 0;
39
+ min-height: 100vh;
40
+ display: flex;
41
+ flex-direction: column;
42
+ width: 100%;
43
+ box-sizing: border-box;
44
+ }
45
+ .header-inner, main, .hero, .api-endpoint-box, .features, .feature-card, .examples-section, .more-info-footer {
46
+ width: 100%;
47
+ box-sizing: border-box;
48
+ }
49
+ .sticky-header {
50
+ position: sticky;
51
+ top: 0;
52
+ z-index: 100;
53
+ background: linear-gradient(90deg, var(--primary) 0%, #60a5fa 100%);
54
+ color: #fff;
55
+ box-shadow: 0 2px 12px #0001;
56
+ width: 100%;
57
+ }
58
+ .header-inner {
59
+ max-width: 1100px;
60
+ margin: 0 auto;
61
+ display: flex;
62
+ align-items: center;
63
+ justify-content: space-between;
64
+ padding: 1.5rem 1.5rem 1.2rem 1.5rem;
65
+ width: 100%;
66
+ box-sizing: border-box;
67
+ }
68
+ .header-title {
69
+ display: flex;
70
+ align-items: center;
71
+ gap: 0.8rem;
72
+ min-width: 0;
73
+ }
74
+ .header-title svg {
75
+ height: 2.2rem;
76
+ width: 2.2rem;
77
+ display: block;
78
+ }
79
+ .header-title h1 {
80
+ font-size: 2.1rem;
81
+ font-weight: 700;
82
+ margin: 0;
83
+ letter-spacing: -1px;
84
+ white-space: pre-line;
85
+ word-break: break-word;
86
+ overflow-wrap: anywhere;
87
+ }
88
+ .github-btn {
89
+ background: #fff2;
90
+ color: #fff;
91
+ border: 1.5px solid #fff4;
92
+ border-radius: 8px;
93
+ padding: 0.6em 1.3em;
94
+ font-weight: 600;
95
+ font-size: 1.05em;
96
+ text-decoration: none;
97
+ display: flex;
98
+ align-items: center;
99
+ gap: 0.5em;
100
+ transition: background 0.2s, color 0.2s;
101
+ min-width: 0;
102
+ }
103
+ .github-btn:hover {
104
+ background: #fff;
105
+ color: var(--primary-dark);
106
+ }
107
+ main {
108
+ flex: 1;
109
+ max-width: 900px;
110
+ margin: 0 auto;
111
+ padding: 2.5rem 1.2rem 1.5rem 1.2rem;
112
+ display: flex;
113
+ flex-direction: column;
114
+ gap: 2.5rem;
115
+ width: 100%;
116
+ box-sizing: border-box;
117
+ }
118
+ .hero {
119
+ background: linear-gradient(120deg, #dbeafe 0%, #f0fdf4 100%);
120
+ border-radius: var(--radius);
121
+ box-shadow: var(--shadow);
122
+ padding: 2.5rem 2rem 2rem 2rem;
123
+ display: flex;
124
+ flex-direction: column;
125
+ align-items: center;
126
+ text-align: center;
127
+ position: relative;
128
+ overflow: hidden;
129
+ width: 100%;
130
+ box-sizing: border-box;
131
+ }
132
+ .hero h2 {
133
+ font-size: 2rem;
134
+ font-weight: 700;
135
+ margin: 0 0 0.7rem 0;
136
+ color: var(--primary-dark);
137
+ word-break: break-word;
138
+ overflow-wrap: anywhere;
139
+ }
140
+ .hero p {
141
+ font-size: 1.18rem;
142
+ color: var(--muted);
143
+ margin: 0 0 1.5rem 0;
144
+ word-break: break-word;
145
+ overflow-wrap: anywhere;
146
+ }
147
+ .api-endpoint-box {
148
+ background: #fff;
149
+ border: 2px solid var(--primary);
150
+ border-radius: 12px;
151
+ padding: 1.3rem 1.2rem 1.3rem 1.2rem;
152
+ margin: 1.5rem 0 1.5rem 0;
153
+ text-align: center;
154
+ font-size: 1.18rem;
155
+ box-shadow: 0 2px 8px #174ea610;
156
+ position: relative;
157
+ display: flex;
158
+ flex-direction: column;
159
+ align-items: center;
160
+ gap: 0.5em;
161
+ width: 100%;
162
+ box-sizing: border-box;
163
+ }
164
+ .api-endpoint-url {
165
+ display: inline-block;
166
+ background: #f1f5f9;
167
+ color: var(--primary-dark);
168
+ font-family: 'Fira Mono', 'Consolas', monospace;
169
+ font-size: 1.15em;
170
+ padding: 0.3em 0.7em;
171
+ border-radius: 6px;
172
+ border: 1px solid #cbd5e1;
173
+ margin: 0.5em 0 0.5em 0;
174
+ word-break: break-all;
175
+ overflow-wrap: anywhere;
176
+ max-width: 100%;
177
+ }
178
+ .copy-endpoint-btn {
179
+ position: absolute;
180
+ top: 16px;
181
+ right: 16px;
182
+ background: var(--primary);
183
+ color: #fff;
184
+ border: none;
185
+ border-radius: 4px;
186
+ padding: 0.3em 1em;
187
+ font-size: 1em;
188
+ cursor: pointer;
189
+ opacity: 0.85;
190
+ transition: background 0.2s, opacity 0.2s;
191
+ z-index: 2;
192
+ min-width: 0;
193
+ }
194
+ .copy-endpoint-btn:hover { background: var(--primary-dark); opacity: 1; }
195
+ .copy-endpoint-btn.copied { background: #388e3c; color: #fff; opacity: 1; }
196
+ .cta {
197
+ margin: 1.5rem auto 0 auto;
198
+ background: var(--primary);
199
+ color: #fff;
200
+ text-decoration: none;
201
+ font-weight: bold;
202
+ padding: 1rem 2.5rem;
203
+ border-radius: 8px;
204
+ font-size: 1.2rem;
205
+ transition: background 0.2s;
206
+ box-shadow: 0 2px 8px #2563eb20;
207
+ display: inline-block;
208
+ max-width: 100%;
209
+ }
210
+ .cta:hover { background: var(--primary-dark); }
211
+ .features {
212
+ display: grid;
213
+ grid-template-columns: repeat(2, 1fr); /* 2 columns for 2x2 grid */
214
+ gap: 1.5rem;
215
+ margin: 2rem 0 0 0;
216
+ width: 100%;
217
+ box-sizing: border-box;
218
+ }
219
+ .feature-card {
220
+ background: var(--card-bg);
221
+ border-radius: var(--radius);
222
+ box-shadow: 0 1px 6px #0001;
223
+ padding: 1.2rem 1.3rem;
224
+ border: 1.5px solid var(--border);
225
+ display: flex;
226
+ flex-direction: column;
227
+ align-items: flex-start;
228
+ gap: 0.5em;
229
+ min-height: 120px;
230
+ position: relative;
231
+ transition: box-shadow 0.2s, border 0.2s;
232
+ width: 100%;
233
+ box-sizing: border-box;
234
+ }
235
+ .feature-card:hover {
236
+ box-shadow: 0 4px 16px #2563eb22;
237
+ border: 1.5px solid var(--primary);
238
+ }
239
+ .feature-card b {
240
+ font-size: 1.08em;
241
+ color: var(--primary-dark);
242
+ }
243
+ .examples-section {
244
+ margin-top: 2.5rem;
245
+ width: 100%;
246
+ box-sizing: border-box;
247
+ }
248
+ .examples-tabs {
249
+ display: flex;
250
+ gap: 0.5em;
251
+ margin-bottom: 1.2em;
252
+ border-bottom: 2px solid #e5e7eb;
253
+ width: 100%;
254
+ box-sizing: border-box;
255
+ min-width: 0;
256
+ }
257
+ .examples-tab {
258
+ background: none;
259
+ border: none;
260
+ font-size: 1.08em;
261
+ font-weight: 600;
262
+ color: var(--muted);
263
+ padding: 0.7em 1.2em 0.5em 1.2em;
264
+ cursor: pointer;
265
+ border-radius: 8px 8px 0 0;
266
+ transition: color 0.2s, background 0.2s;
267
+ min-width: 0;
268
+ }
269
+ .examples-tab.active {
270
+ color: var(--primary-dark);
271
+ background: #fff;
272
+ border-bottom: 2px solid var(--primary);
273
+ }
274
+ .example-panel { display: none; }
275
+ .example-panel.active { display: block; }
276
+ pre {
277
+ background: #f4f4f8;
278
+ border-radius: 8px;
279
+ padding: 1.1rem 1rem 1.1rem 1rem;
280
+ overflow-x: auto;
281
+ font-size: 0.98rem;
282
+ position: relative;
283
+ margin: 0.5em 0 0.5em 0;
284
+ width: 100%;
285
+ box-sizing: border-box;
286
+ max-width: 100vw;
287
+ }
288
+ code {
289
+ font-family: 'Fira Mono', 'Consolas', monospace;
290
+ font-size: 1em;
291
+ background: none;
292
+ color: #222;
293
+ word-break: break-word;
294
+ overflow-wrap: anywhere;
295
+ max-width: 100%;
296
+ display: block;
297
+ }
298
+ .copy-btn {
299
+ position: absolute;
300
+ top: 10px;
301
+ right: 10px;
302
+ background: #e0e4ea;
303
+ border: none;
304
+ border-radius: 4px;
305
+ padding: 0.2em 0.7em;
306
+ font-size: 0.95em;
307
+ color: var(--primary-dark);
308
+ cursor: pointer;
309
+ opacity: 0.7;
310
+ transition: opacity 0.2s, background 0.2s;
311
+ z-index: 2;
312
+ min-width: 0;
313
+ }
314
+ .copy-btn:hover { opacity: 1; background: #c9d3e6; }
315
+ .copy-btn.copied { color: #388e3c; background: #d0f5dd; opacity: 1; }
316
+ .more-info-footer {
317
+ background: #f1f5f9;
318
+ border-top: 1.5px solid #e5e7eb;
319
+ margin-top: 3rem;
320
+ padding: 2rem 1rem 1.5rem 1rem;
321
+ border-radius: 0 0 var(--radius) var(--radius);
322
+ text-align: center;
323
+ color: var(--muted);
324
+ font-size: 1.08em;
325
+ width: 100%;
326
+ box-sizing: border-box;
327
+ }
328
+ .more-info-footer ul {
329
+ list-style: none;
330
+ padding: 0;
331
+ margin: 0.5em 0 0 0;
332
+ display: flex;
333
+ flex-wrap: wrap;
334
+ gap: 1.5em;
335
+ justify-content: center;
336
+ width: 100%;
337
+ box-sizing: border-box;
338
+ }
339
+ .more-info-footer a {
340
+ color: var(--primary-dark);
341
+ text-decoration: none;
342
+ font-weight: 500;
343
+ transition: color 0.2s;
344
+ }
345
+ .more-info-footer a:hover { color: var(--primary); }
346
+ @media (max-width: 700px) {
347
+ .header-inner {
348
+ flex-direction: row;
349
+ align-items: center;
350
+ gap: 1.2em;
351
+ width: 100%;
352
+ flex-wrap: nowrap;
353
+ }
354
+ .header-title { flex-shrink: 1; min-width: 0; }
355
+ .github-btn { margin-left: auto; }
356
+ .header-title h1 { font-size: 1.5rem; }
357
+ main { padding: 1.2rem; }
358
+ .hero { padding: 1.2rem 0.7rem 1.2rem 0.7rem; }
359
+ .features { grid-template-columns: 1fr; gap: 1.1rem; }
360
+ .feature-card { min-height: unset; font-size: 0.98em; }
361
+ .api-endpoint-box { padding: 1rem 0.7rem; font-size: 1em; }
362
+ .api-endpoint-url { font-size: 1em; }
363
+ .cta { padding: 0.8rem 1.5rem; font-size: 1rem; }
364
+ .examples-section { margin-top: 1.5rem; }
365
+ .examples-tabs { flex-wrap: wrap; gap: 0.2em; }
366
+ .examples-tab { font-size: 1em; padding: 0.5em 0.7em 0.4em 0.7em; }
367
+ pre { font-size: 0.92rem; padding: 0.8rem 0.5rem; }
368
+ .copy-btn { top: 6px; right: 6px; font-size: 0.9em; padding: 0.15em 0.5em; }
369
+ .api-endpoint-box > div[style*="font-size"] {
370
+ font-size: 0.95em !important;
371
+ white-space: normal;
372
+ word-break: break-word;
373
+ overflow-wrap: anywhere;
374
+ }
375
+ }
376
+ @media (max-width: 500px) {
377
+ .header-inner { padding: 1rem 0.5rem 1rem 0.5rem; }
378
+ .header-title h1 { font-size: 1.1rem; }
379
+ .header-title svg, .header-title img { height: 2.2rem !important; width: 2.2rem !important; }
380
+ main { padding: 0.5rem; }
381
+ .hero { padding: 0.7rem 0.2rem 0.7rem 0.2rem; }
382
+ .features { gap: 0.7rem; }
383
+ .feature-card { padding: 0.7rem 0.5rem; font-size: 0.92em; }
384
+ .api-endpoint-box { padding: 0.7rem 0.3rem; font-size: 0.95em; }
385
+ .api-endpoint-url { font-size: 0.95em; }
386
+ .cta { padding: 0.6rem 1rem; font-size: 0.95rem; }
387
+ .examples-section { margin-top: 1rem; }
388
+ .examples-tabs { gap: 0.1em; }
389
+ .examples-tab { font-size: 0.95em; padding: 0.4em 0.5em 0.3em 0.5em; }
390
+ pre { font-size: 0.88rem; padding: 0.6rem 0.2rem; }
391
+ .copy-btn { top: 4px; right: 4px; font-size: 0.85em; padding: 0.1em 0.3em; }
392
+ .more-info-footer { font-size: 0.98em; padding: 1rem 0.2rem 1rem 0.2rem; }
393
+ .api-endpoint-box > div[style*="font-size"] {
394
+ font-size: 0.88em !important;
395
+ }
396
+ }
397
+ /* Make code blocks and tabs horizontally scrollable on small screens */
398
+ @media (max-width: 700px) {
399
+ .examples-tabs { overflow-x: auto; }
400
+ pre { overflow-x: auto; }
401
+ }
402
+ </style>
403
+ <!-- Prism.js for syntax highlighting -->
404
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/prismjs@1.29.0/themes/prism.min.css">
405
+ <script src="https://cdn.jsdelivr.net/npm/prismjs@1.29.0/prism.min.js"></script>
406
+ <script src="https://cdn.jsdelivr.net/npm/prismjs@1.29.0/components/prism-javascript.min.js"></script>
407
+ <script src="https://cdn.jsdelivr.net/npm/prismjs@1.29.0/components/prism-python.min.js"></script>
408
+ <script>
409
+ function copyCode(btn) {
410
+ const pre = btn.parentElement;
411
+ const code = pre.querySelector('code');
412
+ if (!code) return;
413
+ const text = code.innerText;
414
+ navigator.clipboard.writeText(text).then(() => {
415
+ btn.textContent = 'Copied!';
416
+ btn.classList.add('copied');
417
+ setTimeout(() => {
418
+ btn.textContent = 'Copy';
419
+ btn.classList.remove('copied');
420
+ }, 1200);
421
+ });
422
+ }
423
+ function copyEndpointUrl(btn) {
424
+ const url = document.getElementById('api-endpoint-url').innerText;
425
+ navigator.clipboard.writeText(url).then(() => {
426
+ btn.textContent = 'Copied!';
427
+ btn.classList.add('copied');
428
+ setTimeout(() => {
429
+ btn.textContent = 'Copy';
430
+ btn.classList.remove('copied');
431
+ }, 1200);
432
+ });
433
+ }
434
+ // Tabs for examples
435
+ function showExampleTab(idx) {
436
+ document.querySelectorAll('.examples-tab').forEach((tab, i) => {
437
+ tab.classList.toggle('active', i === idx);
438
+ });
439
+ document.querySelectorAll('.example-panel').forEach((panel, i) => {
440
+ panel.classList.toggle('active', i === idx);
441
+ });
442
+ }
443
+ window.addEventListener('DOMContentLoaded', function() {
444
+ showExampleTab(0);
445
+ document.querySelectorAll('.examples-tab').forEach((tab, i) => {
446
+ tab.addEventListener('click', () => showExampleTab(i));
447
+ });
448
+ });
449
+ </script>
450
+ </head>
451
+ <body>
452
+ <header class="sticky-header">
453
+ <div class="header-inner">
454
+ <div class="header-title">
455
+ <img src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg" alt="Hugging Face Logo" style="height:4.0rem;width:4.0rem;display:block;"/>
456
+ <h1>open responses</h1>
457
+ </div>
458
+ <a href="https://github.com/huggingface/responses.js" target="_blank" aria-label="GitHub Repository" class="github-btn">
459
+ <svg height="20" width="20" viewBox="0 0 16 16" fill="currentColor" style="display: block;"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.19 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"/></svg>
460
+ GitHub
461
+ </a>
462
+ </div>
463
+ </header>
464
+ <main>
465
+ <section class="hero">
466
+ <h2>Open Responses Compatible API</h2>
467
+ <p style="text-wrap: balance;">
468
+ <b>Open Responses</b> is an open-source specification and ecosystem for building multi-provider, interoperable LLM interfaces based on the OpenAI Responses API.
469
+ </p>
470
+ <div class="api-endpoint-box">
471
+ <button class="copy-endpoint-btn" onclick="copyEndpointUrl(this)">Copy</button>
472
+ <div><b>API Endpoint:</b></div>
473
+ <span class="api-endpoint-url" id="api-endpoint-url">${baseUrl}</span>
474
+ <div style="font-size:0.98em; color:#333; margin-top:0.5em;">Get started by sending requests to this endpoint</div>
475
+ </div>
476
+ <a class="cta" href="https://github.com/huggingface/responses.js" target="_blank">View on GitHub</a>
477
+ </section>
478
+ <section>
479
+ <div class="features">
480
+ <div class="feature-card">
481
+ <b>Open Responses Compatible</b><br>Connect to Hugging Face Inference Providers with <a href="https://openresponses.org" target="_blank">Open Responses</a>
482
+ </div>
483
+ <div class="feature-card">
484
+ <b>Provider Agnostic</b><br>Configurable to work any Chat Completion API back-end (local or remote).
485
+ </div>
486
+ <div class="feature-card">
487
+ <b>Multi-modal, streaming, structured output</b><br>Supports text and image inputs, streaming output, JSON schema, and function calling.
488
+ </div>
489
+ <div class="feature-card">
490
+ <b>Remote MCP</b><br>Server-side MCP tool execution.
491
+ </div>
492
+ </div>
493
+ </section>
494
+ <section class="examples-section">
495
+ <h2 style="color:var(--primary-dark);margin-bottom:1.2em;">Examples</h2>
496
+ <div class="examples-tabs">
497
+ <button class="examples-tab active" type="button">Text</button>
498
+ <button class="examples-tab" type="button">Text + Image Input</button>
499
+ <button class="examples-tab" type="button">Multi-turn</button>
500
+ <button class="examples-tab" type="button">Streaming</button>
501
+ <button class="examples-tab" type="button">Function Calling</button>
502
+ <button class="examples-tab" type="button">Structured Output</button>
503
+ <button class="examples-tab" type="button">MCP</button>
504
+ <button class="examples-tab" type="button">Reasoning</button>
505
+ </div>
506
+ <div class="example-panel active">
507
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
508
+ import os
509
+
510
+ client = OpenAI(
511
+ base_url="${baseUrl}",
512
+ api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
513
+ )
514
+
515
+ response = client.responses.create(
516
+ model="moonshotai/Kimi-K2-Instruct:groq",
517
+ instructions="You are a helpful assistant.",
518
+ input="Tell me a three sentence bedtime story about a unicorn.",
519
+ )
520
+
521
+ print(response)
522
+ print(response.output_text)</code></pre>
523
+ </div>
524
+ <div class="example-panel">
525
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
526
+ import os
527
+
528
+ client = OpenAI(
529
+ base_url="${baseUrl}",
530
+ api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
531
+ )
532
+
533
+ response = client.responses.create(
534
+ model="Qwen/Qwen2.5-VL-7B-Instruct",
535
+ input=[
536
+ {
537
+ "role": "user",
538
+ "content": [
539
+ {"type": "input_text", "text": "what is in this image?"},
540
+ {
541
+ "type": "input_image",
542
+ "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
543
+ },
544
+ ],
545
+ }
546
+ ],
547
+ )
548
+
549
+ print(response)
550
+ print(response.output_text)</code></pre>
551
+ </div>
552
+ <div class="example-panel">
553
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
554
+ import os
555
+
556
+ client = OpenAI(
557
+ base_url="${baseUrl}",
558
+ api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
559
+ )
560
+
561
+ response = client.responses.create(
562
+ model="moonshotai/Kimi-K2-Instruct:groq",
563
+ input=[
564
+ {
565
+ "role": "developer",
566
+ "content": "Talk like a pirate.",
567
+ },
568
+ {
569
+ "role": "user",
570
+ "content": "Are semicolons optional in JavaScript?",
571
+ },
572
+ ],
573
+ )
574
+
575
+ print(response)
576
+ print(response.output_text)</code></pre>
577
+ </div>
578
+ <div class="example-panel">
579
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
580
+ import os
581
+
582
+ client = OpenAI(
583
+ base_url="${baseUrl}",
584
+ api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
585
+ )
586
+
587
+ stream = client.responses.create(
588
+ model="moonshotai/Kimi-K2-Instruct:groq",
589
+ input=[
590
+ {
591
+ "role": "user",
592
+ "content": "Say 'double bubble bath' ten times fast.",
593
+ },
594
+ ],
595
+ stream=True,
596
+ )
597
+
598
+ for event in stream:
599
+ print(event)</code></pre>
600
+ </div>
601
+ <div class="example-panel">
602
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
603
+ import os
604
+
605
+ client = OpenAI(
606
+ base_url="${baseUrl}",
607
+ api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
608
+ )
609
+
610
+ tools = [
611
+ {
612
+ "type": "function",
613
+ "name": "get_current_weather",
614
+ "description": "Get the current weather in a given location",
615
+ "parameters": {
616
+ "type": "object",
617
+ "properties": {
618
+ "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"},
619
+ "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
620
+ },
621
+ "required": ["location", "unit"],
622
+ },
623
+ }
624
+ ]
625
+
626
+ response = client.responses.create(
627
+ model="moonshotai/Kimi-K2-Instruct:groq",
628
+ tools=tools,
629
+ input="What is the weather like in Boston today?",
630
+ tool_choice="auto",
631
+ )
632
+
633
+ print(response)</code></pre>
634
+ </div>
635
+ <div class="example-panel">
636
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
637
+ from pydantic import BaseModel
638
+ import os
639
+
640
+ client = OpenAI(
641
+ base_url="${baseUrl}",
642
+ api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
643
+ )
644
+
645
+ class CalendarEvent(BaseModel):
646
+ name: str
647
+ date: str
648
+ participants: list[str]
649
+
650
+ response = client.responses.parse(
651
+ model="moonshotai/Kimi-K2-Instruct:groq",
652
+ input=[
653
+ {"role": "system", "content": "Extract the event information."},
654
+ {
655
+ "role": "user",
656
+ "content": "Alice and Bob are going to a science fair on Friday.",
657
+ },
658
+ ],
659
+ text_format=CalendarEvent,
660
+ )
661
+
662
+ print(response.output_parsed)</code></pre>
663
+ </div>
664
+ <div class="example-panel">
665
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
666
+ import os
667
+
668
+ client = OpenAI(
669
+ base_url="${baseUrl}",
670
+ api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
671
+ )
672
+
673
+ response = client.responses.create(
674
+ model="moonshotai/Kimi-K2-Instruct:groq",
675
+ input="how does tiktoken work?",
676
+ tools=[
677
+ {
678
+ "type": "mcp",
679
+ "server_label": "gitmcp",
680
+ "server_url": "https://gitmcp.io/openai/tiktoken",
681
+ "allowed_tools": ["search_tiktoken_documentation", "fetch_tiktoken_documentation"],
682
+ "require_approval": "never",
683
+ },
684
+ ],
685
+ )
686
+
687
+ for output in response.output:
688
+ print(output)</code></pre>
689
+ </div>
690
+ <div class="example-panel">
691
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
692
+ import os
693
+
694
+ client = OpenAI(
695
+ base_url="${baseUrl}",
696
+ api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
697
+ )
698
+
699
+ response = client.responses.create(
700
+ model="deepseek-ai/DeepSeek-R1",
701
+ instructions="You are a helpful assistant.",
702
+ input="Say hello to the world.",
703
+ reasoning={
704
+ "effort": "low",
705
+ }
706
+ )
707
+
708
+ for index, item in enumerate(response.output):
709
+ print(f"Output #{index}: {item.type}", item.content)</code></pre>
710
+ </div>
711
+ </section>
712
+ <footer class="more-info-footer">
713
+ <div style="font-weight:600; color:var(--primary-dark); font-size:1.13em; margin-bottom:0.5em;">More Info</div>
714
+ <ul>
715
+ <li><a href="https://github.com/huggingface/responses.js" target="_blank">GitHub Repository</a></li>
716
+ <li><a href="https://platform.openai.com/docs/api-reference/responses" target="_blank">OpenAI Responses API Docs</a></li>
717
+ <li><a href="https://huggingface.co/docs/inference-providers/index" target="_blank">Hugging Face Inference Providers</a></li>
718
+ </ul>
719
+ </footer>
720
+ </main>
721
+ </body>
722
+ </html>
723
+ `);
724
+ }
src/routes/responses.ts ADDED
@@ -0,0 +1,1161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { type Response as ExpressResponse } from "express";
2
+ import { type ValidatedRequest } from "../middleware/validation.js";
3
+ import type { CreateResponseParams, McpServerParams, McpApprovalRequestParams } from "../schemas.js";
4
+ import { generateUniqueId } from "../lib/generateUniqueId.js";
5
+ import { buildProviderScopedModel } from "../lib/buildProviderScopedModel.js";
6
+ import { OpenAI } from "openai";
7
+ import type {
8
+ Response,
9
+ ResponseContentPartAddedEvent,
10
+ ResponseOutputMessage,
11
+ ResponseFunctionToolCall,
12
+ ResponseOutputItem,
13
+ ResponseTextConfig,
14
+ } from "openai/resources/responses/responses";
15
+ import type {
16
+ PatchedResponseContentPart,
17
+ PatchedResponseStreamEvent,
18
+ ReasoningTextContent,
19
+ PatchedDeltaWithReasoning,
20
+ } from "../openai_patch";
21
+ import type {
22
+ ChatCompletionCreateParamsStreaming,
23
+ ChatCompletionMessageParam,
24
+ ChatCompletionTool,
25
+ ChatCompletion,
26
+ } from "openai/resources/chat/completions.js";
27
+ import type { FunctionParameters } from "openai/resources/shared.js";
28
+ import { callMcpTool, connectMcpServer } from "../mcp.js";
29
+
30
+ class StreamingError extends Error {
31
+ constructor(message: string) {
32
+ super(message);
33
+ this.name = "StreamingError";
34
+ }
35
+ }
36
+
37
+ type SpecAugmentedFields = {
38
+ completed_at: number | null;
39
+ frequency_penalty: number;
40
+ max_tool_calls: number | null;
41
+ presence_penalty: number;
42
+ prompt_cache_key: string | null;
43
+ prompt_cache_retention: "in_memory" | "24h" | null;
44
+ safety_identifier: string | null;
45
+ store: boolean;
46
+ top_logprobs: number;
47
+ user: string | null;
48
+ };
49
+ type IncompleteResponse = Omit<Response, "user"> & SpecAugmentedFields;
50
+ const SEQUENCE_NUMBER_PLACEHOLDER = -1;
51
+
52
+ // All headers are forwarded by default, except these ones.
53
+ const NOT_FORWARDED_HEADERS = new Set([
54
+ "accept",
55
+ "accept-encoding",
56
+ "authorization",
57
+ "connection",
58
+ "content-length",
59
+ "content-type",
60
+ "host",
61
+ "keep-alive",
62
+ "te",
63
+ "trailer",
64
+ "trailers",
65
+ "transfer-encoding",
66
+ "upgrade",
67
+ ]);
68
+
69
+ export const postCreateResponse = async (
70
+ req: ValidatedRequest<CreateResponseParams>,
71
+ res: ExpressResponse
72
+ ): Promise<void> => {
73
+ if (process.env.DEBUG_TOOL_STREAM === "1") {
74
+ console.debug("[responses.js] incoming request", JSON.stringify(req.body, null, 2));
75
+ }
76
+ // To avoid duplicated code, we run all requests as stream.
77
+ const events = runCreateResponseStream(req, res);
78
+
79
+ // Then we return in the correct format depending on the user 'stream' flag.
80
+ if (req.body.stream) {
81
+ res.setHeader("Content-Type", "text/event-stream");
82
+ res.setHeader("Connection", "keep-alive");
83
+ console.debug("Stream request");
84
+ for await (const event of events) {
85
+ console.debug(`Event #${event.sequence_number}: ${event.type}`);
86
+ res.write(`data: ${JSON.stringify(event)}\n\n`);
87
+ }
88
+ res.end();
89
+ } else {
90
+ console.debug("Non-stream request");
91
+ for await (const event of events) {
92
+ if (event.type === "response.completed" || event.type === "response.failed") {
93
+ console.debug(event.type);
94
+ res.json(event.response);
95
+ }
96
+ }
97
+ }
98
+ };
99
+
100
+ /*
101
+ * Top-level stream.
102
+ *
103
+ * Handles response lifecycle + execute inner logic (MCP list tools, MCP tool calls, LLM call, etc.).
104
+ * Handles sequenceNumber by overwriting it in the events.
105
+ */
106
+ async function* runCreateResponseStream(
107
+ req: ValidatedRequest<CreateResponseParams>,
108
+ res: ExpressResponse
109
+ ): AsyncGenerator<PatchedResponseStreamEvent> {
110
+ let sequenceNumber = 0;
111
+ const resolvedModel = buildProviderScopedModel(req.body.model, req.body.provider);
112
+ const responseTextConfig: ResponseTextConfig = req.body.text ?? { format: { type: "text" } };
113
+ // NOTE: The OpenResponses response schema (schema/open_responses.md#response-object) requires these base fields to always be present,
114
+ // so we eagerly hydrate them here even when they would otherwise be null or falsey.
115
+ const responseObject: IncompleteResponse = {
116
+ background: req.body.background ?? false,
117
+ completed_at: null,
118
+ created_at: Math.floor(new Date().getTime() / 1000),
119
+ error: null,
120
+ frequency_penalty: req.body.frequency_penalty ?? 0,
121
+ id: generateUniqueId("resp"),
122
+ incomplete_details: null,
123
+ instructions: req.body.instructions,
124
+ max_output_tokens: req.body.max_output_tokens,
125
+ max_tool_calls: req.body.max_tool_calls ?? null,
126
+ metadata: req.body.metadata,
127
+ model: resolvedModel,
128
+ object: "response",
129
+ output: [],
130
+ output_text: "",
131
+ parallel_tool_calls: req.body.parallel_tool_calls ?? false,
132
+ presence_penalty: req.body.presence_penalty ?? 0,
133
+ previous_response_id: req.body.previous_response_id ?? null,
134
+ prompt_cache_key: req.body.prompt_cache_key ?? null,
135
+ prompt_cache_retention: req.body.prompt_cache_retention ?? null,
136
+ reasoning: req.body.reasoning ?? null,
137
+ safety_identifier: req.body.safety_identifier ?? null,
138
+ service_tier: req.body.service_tier ?? "auto",
139
+ status: "in_progress",
140
+ store: req.body.store ?? false,
141
+ text: responseTextConfig,
142
+ tool_choice: req.body.tool_choice ?? "auto",
143
+ tools: req.body.tools ?? [],
144
+ temperature: req.body.temperature,
145
+ top_logprobs: req.body.top_logprobs ?? 0,
146
+ top_p: req.body.top_p,
147
+ truncation: req.body.truncation ?? "disabled",
148
+ usage: {
149
+ input_tokens: 0,
150
+ input_tokens_details: { cached_tokens: 0 },
151
+ output_tokens: 0,
152
+ output_tokens_details: { reasoning_tokens: 0 },
153
+ total_tokens: 0,
154
+ },
155
+ user: req.body.user ?? null,
156
+ };
157
+
158
+ // Response created event
159
+ yield {
160
+ type: "response.created",
161
+ response: responseObject as Response,
162
+ sequence_number: sequenceNumber++,
163
+ };
164
+
165
+ // Response in progress event
166
+ yield {
167
+ type: "response.in_progress",
168
+ response: responseObject as Response,
169
+ sequence_number: sequenceNumber++,
170
+ };
171
+
172
+ // Any events (LLM call, MCP call, list tools, etc.)
173
+ try {
174
+ for await (const event of innerRunStream(req, res, responseObject, resolvedModel)) {
175
+ yield { ...event, sequence_number: sequenceNumber++ };
176
+ }
177
+ } catch (error) {
178
+ // Error event => stop
179
+ console.error("Error in stream:", error);
180
+
181
+ const message =
182
+ typeof error === "object" &&
183
+ error &&
184
+ "message" in error &&
185
+ typeof (error as { message: unknown }).message === "string"
186
+ ? (error as { message: string }).message
187
+ : "An error occurred in stream";
188
+
189
+ responseObject.status = "failed";
190
+ responseObject.completed_at = Math.floor(new Date().getTime() / 1000);
191
+ responseObject.error = {
192
+ code: "server_error",
193
+ message,
194
+ };
195
+ if (process.env.DEBUG_FINAL_RESPONSE === "1") {
196
+ console.dir({ finalResponse: responseObject }, { depth: null });
197
+ }
198
+ yield {
199
+ type: "response.failed",
200
+ response: responseObject as Response,
201
+ sequence_number: sequenceNumber++,
202
+ };
203
+ return;
204
+ }
205
+
206
+ // Response completed event
207
+ responseObject.status = "completed";
208
+ responseObject.completed_at = Math.floor(new Date().getTime() / 1000);
209
+ if (process.env.DEBUG_FINAL_RESPONSE === "1") {
210
+ console.dir({ finalResponse: responseObject }, { depth: null });
211
+ }
212
+ yield {
213
+ type: "response.completed",
214
+ response: responseObject as Response,
215
+ sequence_number: sequenceNumber++,
216
+ };
217
+ }
218
+
219
+ async function* innerRunStream(
220
+ req: ValidatedRequest<CreateResponseParams>,
221
+ res: ExpressResponse,
222
+ responseObject: IncompleteResponse,
223
+ resolvedModel: string
224
+ ): AsyncGenerator<PatchedResponseStreamEvent> {
225
+ // Retrieve API key from headers
226
+ const apiKey = req.headers.authorization?.split(" ")[1];
227
+ if (!apiKey) {
228
+ res.status(401).json({
229
+ success: false,
230
+ error: "Unauthorized",
231
+ });
232
+ return;
233
+ }
234
+
235
+ // Forward headers (except authorization handled separately)
236
+ const defaultHeaders = Object.fromEntries(
237
+ Object.entries(req.headers).filter(([key]) => !NOT_FORWARDED_HEADERS.has(key.toLowerCase()))
238
+ ) as Record<string, string>;
239
+
240
+ // Return early if not supported param
241
+ if (req.body.reasoning?.summary && req.body.reasoning?.summary !== "auto") {
242
+ throw new Error(`Not implemented: only 'auto' summary is supported. Got '${req.body.reasoning?.summary}'`);
243
+ }
244
+
245
+ // List MCP tools from server (if required) + prepare tools for the LLM
246
+ let tools: ChatCompletionTool[] | undefined = [];
247
+ const mcpToolsMapping: Record<string, McpServerParams> = {};
248
+ if (req.body.tools) {
249
+ for (const tool of req.body.tools) {
250
+ switch (tool.type) {
251
+ case "function":
252
+ tools?.push({
253
+ type: tool.type,
254
+ function: {
255
+ name: tool.name,
256
+ parameters: tool.parameters,
257
+ description: tool.description,
258
+ strict: tool.strict,
259
+ },
260
+ });
261
+ break;
262
+ case "mcp": {
263
+ let mcpListTools: ResponseOutputItem.McpListTools | undefined;
264
+
265
+ // If MCP list tools is already in the input, use it
266
+ if (Array.isArray(req.body.input)) {
267
+ for (const item of req.body.input) {
268
+ if (item.type === "mcp_list_tools" && item.server_label === tool.server_label) {
269
+ mcpListTools = item;
270
+ console.debug(`Using MCP list tools from input for server '${tool.server_label}'`);
271
+ break;
272
+ }
273
+ }
274
+ }
275
+ // Otherwise, list tools from MCP server
276
+ if (!mcpListTools) {
277
+ for await (const event of listMcpToolsStream(tool, responseObject)) {
278
+ yield event;
279
+ }
280
+ mcpListTools = responseObject.output.at(-1) as ResponseOutputItem.McpListTools;
281
+ }
282
+
283
+ // Only allowed tools are forwarded to the LLM
284
+ const allowedTools = tool.allowed_tools
285
+ ? Array.isArray(tool.allowed_tools)
286
+ ? tool.allowed_tools
287
+ : tool.allowed_tools.tool_names
288
+ : [];
289
+ if (mcpListTools?.tools) {
290
+ for (const mcpTool of mcpListTools.tools) {
291
+ if (allowedTools.length === 0 || allowedTools.includes(mcpTool.name)) {
292
+ tools?.push({
293
+ type: "function" as const,
294
+ function: {
295
+ name: mcpTool.name,
296
+ parameters: mcpTool.input_schema as FunctionParameters,
297
+ description: mcpTool.description ?? undefined,
298
+ },
299
+ });
300
+ }
301
+ mcpToolsMapping[mcpTool.name] = tool;
302
+ }
303
+ break;
304
+ }
305
+ }
306
+ }
307
+ }
308
+ }
309
+ if (tools.length === 0) {
310
+ tools = undefined;
311
+ }
312
+
313
+ // Prepare payload for the LLM
314
+
315
+ // Format input to Chat Completion format
316
+ const messages: ChatCompletionMessageParam[] = req.body.instructions
317
+ ? [{ role: "system", content: req.body.instructions }]
318
+ : [];
319
+ if (Array.isArray(req.body.input)) {
320
+ messages.push(
321
+ ...req.body.input
322
+ .map((item) => {
323
+ switch (item.type) {
324
+ case "function_call":
325
+ return {
326
+ role: "tool" as const,
327
+ content: item.arguments,
328
+ tool_call_id: item.call_id,
329
+ };
330
+ case "function_call_output":
331
+ return {
332
+ role: "tool" as const,
333
+ content: item.output,
334
+ tool_call_id: item.call_id,
335
+ };
336
+ case "message":
337
+ case undefined:
338
+ if (item.role === "assistant" || item.role === "user" || item.role === "system") {
339
+ const content =
340
+ typeof item.content === "string"
341
+ ? item.content
342
+ : item.content
343
+ .map((content) => {
344
+ switch (content.type) {
345
+ case "input_image":
346
+ return {
347
+ type: "image_url" as const,
348
+ image_url: {
349
+ url: content.image_url,
350
+ },
351
+ };
352
+ case "output_text":
353
+ return content.text
354
+ ? {
355
+ type: "text" as const,
356
+ text: content.text,
357
+ }
358
+ : undefined;
359
+ case "refusal":
360
+ return undefined;
361
+ case "input_text":
362
+ return {
363
+ type: "text" as const,
364
+ text: content.text,
365
+ };
366
+ }
367
+ })
368
+ .filter((item) => {
369
+ return item !== undefined;
370
+ });
371
+ const maybeFlatContent =
372
+ content.length === 1 &&
373
+ typeof content[0] === "object" &&
374
+ "type" in content[0] &&
375
+ content[0].type === "text"
376
+ ? content[0].text
377
+ : content;
378
+ return {
379
+ role: item.role,
380
+ content: maybeFlatContent,
381
+ } as ChatCompletionMessageParam;
382
+ }
383
+ return undefined;
384
+ case "mcp_list_tools": {
385
+ return {
386
+ role: "tool" as const,
387
+ content: "MCP list tools. Server: '${item.server_label}'.",
388
+ tool_call_id: "mcp_list_tools",
389
+ };
390
+ }
391
+ case "mcp_call": {
392
+ return {
393
+ role: "tool" as const,
394
+ content: `MCP call (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
395
+ tool_call_id: "mcp_call",
396
+ };
397
+ }
398
+ case "mcp_approval_request": {
399
+ return {
400
+ role: "tool" as const,
401
+ content: `MCP approval request (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
402
+ tool_call_id: "mcp_approval_request",
403
+ };
404
+ }
405
+ case "mcp_approval_response": {
406
+ return {
407
+ role: "tool" as const,
408
+ content: `MCP approval response (${item.id}). Approved: ${item.approve}. Reason: ${item.reason}.`,
409
+ tool_call_id: "mcp_approval_response",
410
+ };
411
+ }
412
+ }
413
+ })
414
+ .filter(
415
+ (message): message is NonNullable<typeof message> =>
416
+ message !== undefined &&
417
+ (typeof message.content === "string" || (Array.isArray(message.content) && message.content.length !== 0))
418
+ )
419
+ );
420
+ } else {
421
+ messages.push({ role: "user", content: req.body.input } as const);
422
+ }
423
+
424
+ // Prepare payload for the LLM
425
+ const payload: ChatCompletionCreateParamsStreaming = {
426
+ // main params
427
+ model: resolvedModel,
428
+ messages,
429
+ stream: true,
430
+ // options
431
+ max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
432
+ response_format: req.body.text?.format
433
+ ? req.body.text.format.type === "json_schema"
434
+ ? {
435
+ type: "json_schema",
436
+ json_schema: {
437
+ description: req.body.text.format.description,
438
+ name: req.body.text.format.name,
439
+ schema: req.body.text.format.schema,
440
+ strict: false, // req.body.text.format.strict,
441
+ },
442
+ }
443
+ : { type: req.body.text.format.type }
444
+ : undefined,
445
+ reasoning_effort: req.body.reasoning?.effort,
446
+ temperature: req.body.temperature,
447
+ tool_choice:
448
+ typeof req.body.tool_choice === "string"
449
+ ? req.body.tool_choice
450
+ : req.body.tool_choice
451
+ ? {
452
+ type: "function",
453
+ function: {
454
+ name: req.body.tool_choice.name,
455
+ },
456
+ }
457
+ : undefined,
458
+ tools,
459
+ top_p: req.body.top_p,
460
+ };
461
+ if (process.env.DEBUG_TOOL_STREAM === "1") {
462
+ console.debug("[responses.js] upstream request payload", JSON.stringify(payload, null, 2));
463
+ }
464
+
465
+
466
+ // If MCP approval requests => execute them and return (no LLM call)
467
+ if (Array.isArray(req.body.input)) {
468
+ for (const item of req.body.input) {
469
+ if (item.type === "mcp_approval_response" && item.approve) {
470
+ const approvalRequest = req.body.input.find(
471
+ (i) => i.type === "mcp_approval_request" && i.id === item.approval_request_id
472
+ ) as McpApprovalRequestParams | undefined;
473
+ const mcpCallId = "mcp_" + item.approval_request_id.split("_")[1];
474
+ const mcpCall = req.body.input.find((i) => i.type === "mcp_call" && i.id === mcpCallId);
475
+ if (mcpCall) {
476
+ // MCP call for that approval request has already been made, so we can skip it
477
+ continue;
478
+ }
479
+
480
+ for await (const event of callApprovedMCPToolStream(
481
+ item.approval_request_id,
482
+ mcpCallId,
483
+ approvalRequest,
484
+ mcpToolsMapping,
485
+ responseObject,
486
+ payload
487
+ )) {
488
+ yield event;
489
+ }
490
+ }
491
+ }
492
+ }
493
+
494
+ // Call the LLM until no new message is added to the payload.
495
+ // New messages can be added if the LLM calls an MCP tool that is automatically run.
496
+ // A maximum number of iterations is set to avoid infinite loops.
497
+ let previousMessageCount: number;
498
+ let currentMessageCount = payload.messages.length;
499
+ const MAX_ITERATIONS = 5; // hard-coded
500
+ let iterations = 0;
501
+ do {
502
+ previousMessageCount = currentMessageCount;
503
+
504
+ for await (const event of handleOneTurnStream(apiKey, payload, responseObject, mcpToolsMapping, defaultHeaders)) {
505
+ yield event;
506
+ }
507
+
508
+ currentMessageCount = payload.messages.length;
509
+ iterations++;
510
+ } while (currentMessageCount > previousMessageCount && iterations < MAX_ITERATIONS);
511
+ }
512
+
513
+ async function* listMcpToolsStream(
514
+ tool: McpServerParams,
515
+ responseObject: IncompleteResponse
516
+ ): AsyncGenerator<PatchedResponseStreamEvent> {
517
+ const outputObject: ResponseOutputItem.McpListTools = {
518
+ id: generateUniqueId("mcpl"),
519
+ type: "mcp_list_tools",
520
+ server_label: tool.server_label,
521
+ tools: [],
522
+ };
523
+ responseObject.output.push(outputObject);
524
+
525
+ yield {
526
+ type: "response.output_item.added",
527
+ output_index: responseObject.output.length - 1,
528
+ item: outputObject,
529
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
530
+ };
531
+
532
+ yield {
533
+ type: "response.mcp_list_tools.in_progress",
534
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
535
+ };
536
+
537
+ try {
538
+ const mcp = await connectMcpServer(tool);
539
+ const mcpTools = await mcp.listTools();
540
+ yield {
541
+ type: "response.mcp_list_tools.completed",
542
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
543
+ };
544
+ outputObject.tools = mcpTools.tools.map((mcpTool) => ({
545
+ input_schema: mcpTool.inputSchema,
546
+ name: mcpTool.name,
547
+ annotations: mcpTool.annotations,
548
+ description: mcpTool.description,
549
+ }));
550
+ yield {
551
+ type: "response.output_item.done",
552
+ output_index: responseObject.output.length - 1,
553
+ item: outputObject,
554
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
555
+ };
556
+ } catch (error) {
557
+ const errorMessage = `Failed to list tools from MCP server '${tool.server_label}': ${error instanceof Error ? error.message : "Unknown error"}`;
558
+ console.error(errorMessage);
559
+ yield {
560
+ type: "response.mcp_list_tools.failed",
561
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
562
+ };
563
+ throw new Error(errorMessage);
564
+ }
565
+ }
566
+
567
+ /*
568
+ * Call LLM and stream the response.
569
+ */
570
+ async function* handleOneTurnStream(
571
+ apiKey: string | undefined,
572
+ payload: ChatCompletionCreateParamsStreaming,
573
+ responseObject: IncompleteResponse,
574
+ mcpToolsMapping: Record<string, McpServerParams>,
575
+ defaultHeaders: Record<string, string>
576
+ ): AsyncGenerator<PatchedResponseStreamEvent> {
577
+ const client = new OpenAI({
578
+ baseURL: process.env.OPENAI_BASE_URL ?? "https://router.huggingface.co/v1",
579
+ apiKey: apiKey,
580
+ defaultHeaders,
581
+ });
582
+ const stream = await client.chat.completions.create(payload);
583
+ let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
584
+ const observedToolCallIds = new Set<string>();
585
+ const debugToolStream = process.env.DEBUG_TOOL_STREAM === "1";
586
+ let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
587
+ let previousTotalTokens = responseObject.usage?.total_tokens ?? 0;
588
+
589
+ for await (const chunk of stream) {
590
+ if (chunk.usage) {
591
+ // Overwrite usage with the latest chunk's usage
592
+ responseObject.usage = {
593
+ input_tokens: previousInputTokens + chunk.usage.prompt_tokens,
594
+ input_tokens_details: { cached_tokens: 0 },
595
+ output_tokens: previousOutputTokens + chunk.usage.completion_tokens,
596
+ output_tokens_details: { reasoning_tokens: 0 },
597
+ total_tokens: previousTotalTokens + chunk.usage.total_tokens,
598
+ };
599
+ }
600
+
601
+ if (!chunk.choices[0]) {
602
+ continue;
603
+ }
604
+
605
+ const delta = chunk.choices[0].delta as PatchedDeltaWithReasoning;
606
+ if (debugToolStream) {
607
+ const debugPayload = {
608
+ finish_reason: chunk.choices[0].finish_reason,
609
+ delta: chunk.choices[0].delta,
610
+ tool_calls: chunk.choices[0].delta?.tool_calls,
611
+ };
612
+ console.debug("[responses.js] stream delta", JSON.stringify(debugPayload, null, 2));
613
+ }
614
+ const reasoningText = delta.reasoning ?? delta.reasoning_content;
615
+
616
+ if (delta.content || reasoningText) {
617
+ let currentOutputItem = responseObject.output.at(-1);
618
+ if (currentOutputItem?.type !== "message" || currentOutputItem?.status !== "in_progress") {
619
+ const outputObject: ResponseOutputMessage = {
620
+ id: generateUniqueId("msg"),
621
+ type: "message",
622
+ role: "assistant",
623
+ status: "in_progress",
624
+ content: [],
625
+ };
626
+ responseObject.output.push(outputObject);
627
+
628
+ // Response output item added event
629
+ yield {
630
+ type: "response.output_item.added",
631
+ output_index: responseObject.output.length - 1,
632
+ item: outputObject,
633
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
634
+ };
635
+ }
636
+
637
+ const currentOutputMessage = responseObject.output.at(-1) as ResponseOutputMessage;
638
+ const messageContent = currentOutputMessage.content as unknown as PatchedResponseContentPart[];
639
+ const outputIndex = responseObject.output.length - 1;
640
+
641
+ if (reasoningText) {
642
+ let reasoningPart = messageContent.at(-1);
643
+ if (!reasoningPart || reasoningPart.type !== "reasoning_text") {
644
+ const part: ReasoningTextContent = {
645
+ type: "reasoning_text",
646
+ text: "",
647
+ };
648
+ messageContent.push(part as unknown as PatchedResponseContentPart);
649
+
650
+ yield {
651
+ type: "response.content_part.added",
652
+ item_id: currentOutputMessage.id,
653
+ output_index: outputIndex,
654
+ content_index: messageContent.length - 1,
655
+ part: part as unknown as PatchedResponseContentPart,
656
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
657
+ };
658
+ reasoningPart = part as unknown as PatchedResponseContentPart;
659
+ }
660
+
661
+ const reasoningContent = reasoningPart as ReasoningTextContent;
662
+ reasoningContent.text += reasoningText as string;
663
+ yield {
664
+ type: "response.reasoning.delta",
665
+ item_id: currentOutputMessage.id,
666
+ output_index: outputIndex,
667
+ content_index: messageContent.length - 1,
668
+ delta: reasoningText as string,
669
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
670
+ };
671
+ }
672
+
673
+ if (delta.content) {
674
+ let textPart = messageContent.at(-1);
675
+ if (!textPart || textPart.type !== "output_text") {
676
+ const newPart: ResponseContentPartAddedEvent["part"] = {
677
+ type: "output_text",
678
+ text: "",
679
+ annotations: [],
680
+ logprobs: [],
681
+ };
682
+ messageContent.push(newPart as unknown as PatchedResponseContentPart);
683
+
684
+ yield {
685
+ type: "response.content_part.added",
686
+ item_id: currentOutputMessage.id,
687
+ output_index: outputIndex,
688
+ content_index: messageContent.length - 1,
689
+ part: newPart,
690
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
691
+ };
692
+ textPart = newPart as unknown as PatchedResponseContentPart;
693
+ }
694
+
695
+ if (!textPart || textPart.type !== "output_text") {
696
+ throw new StreamingError(`Not implemented: expected output_text content part, got ${textPart?.type}`);
697
+ }
698
+
699
+ textPart.text += delta.content as string;
700
+ responseObject.output_text += delta.content as string;
701
+ yield {
702
+ type: "response.output_text.delta",
703
+ item_id: currentOutputMessage.id,
704
+ output_index: outputIndex,
705
+ content_index: messageContent.length - 1,
706
+ delta: delta.content as string,
707
+ logprobs: [],
708
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
709
+ };
710
+ }
711
+ } else if (delta.tool_calls && delta.tool_calls.length > 0) {
712
+ if (debugToolStream) {
713
+ console.debug("[responses.js] delta tool_calls", JSON.stringify(delta.tool_calls, null, 2));
714
+ }
715
+ for await (const event of closeLastOutputItem(responseObject, payload, mcpToolsMapping)) {
716
+ yield event;
717
+ }
718
+ if (delta.tool_calls.length > 1) {
719
+ console.log("Multiple tool calls are not supported. Only the first one will be processed.");
720
+ }
721
+
722
+ let currentOutputItem = responseObject.output.at(-1);
723
+ if (delta.tool_calls[0].function?.name) {
724
+ const incomingToolCallId = delta.tool_calls[0].id;
725
+ if (incomingToolCallId) {
726
+ observedToolCallIds.add(incomingToolCallId);
727
+ }
728
+ const functionName = delta.tool_calls[0].function.name;
729
+ // Tool call with a name => new tool call
730
+ let newOutputObject:
731
+ | ResponseOutputItem.McpCall
732
+ | ResponseFunctionToolCall
733
+ | ResponseOutputItem.McpApprovalRequest;
734
+ if (functionName in mcpToolsMapping) {
735
+ if (requiresApproval(functionName, mcpToolsMapping)) {
736
+ newOutputObject = {
737
+ id: generateUniqueId("mcpr"),
738
+ type: "mcp_approval_request",
739
+ name: functionName,
740
+ server_label: mcpToolsMapping[functionName].server_label,
741
+ arguments: "",
742
+ };
743
+ } else {
744
+ newOutputObject = {
745
+ type: "mcp_call",
746
+ id: generateUniqueId("mcp"),
747
+ name: functionName,
748
+ server_label: mcpToolsMapping[functionName].server_label,
749
+ arguments: "",
750
+ };
751
+ }
752
+ } else {
753
+ newOutputObject = {
754
+ type: "function_call",
755
+ id: generateUniqueId("fc"),
756
+ call_id: delta.tool_calls[0].id ?? "",
757
+ name: functionName,
758
+ arguments: "",
759
+ };
760
+ }
761
+
762
+ // Response output item added event
763
+ responseObject.output.push(newOutputObject);
764
+ yield {
765
+ type: "response.output_item.added",
766
+ output_index: responseObject.output.length - 1,
767
+ item: newOutputObject,
768
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
769
+ };
770
+ if (newOutputObject.type === "mcp_call") {
771
+ yield {
772
+ type: "response.mcp_call.in_progress",
773
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
774
+ item_id: newOutputObject.id,
775
+ output_index: responseObject.output.length - 1,
776
+ };
777
+ }
778
+ }
779
+
780
+ if (delta.tool_calls[0].function?.arguments) {
781
+ // Current item is necessarily a tool call
782
+ currentOutputItem = responseObject.output.at(-1) as
783
+ | ResponseOutputItem.McpCall
784
+ | ResponseFunctionToolCall
785
+ | ResponseOutputItem.McpApprovalRequest;
786
+ currentOutputItem.arguments += delta.tool_calls[0].function.arguments;
787
+ if (currentOutputItem.type === "mcp_call" || currentOutputItem.type === "function_call") {
788
+ yield {
789
+ type:
790
+ currentOutputItem.type === "mcp_call"
791
+ ? ("response.mcp_call_arguments.delta" as "response.mcp_call.arguments_delta") // bug workaround (see https://github.com/openai/openai-node/issues/1562)
792
+ : "response.function_call_arguments.delta",
793
+ item_id: currentOutputItem.id as string,
794
+ output_index: responseObject.output.length - 1,
795
+ delta: delta.tool_calls[0].function.arguments,
796
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
797
+ };
798
+ }
799
+ }
800
+ }
801
+ }
802
+
803
+ const fallbackEvents = await buildFinalToolCallFallbackEvents(stream, responseObject, observedToolCallIds, debugToolStream);
804
+ for (const event of fallbackEvents) {
805
+ yield event;
806
+ }
807
+
808
+ for await (const event of closeLastOutputItem(responseObject, payload, mcpToolsMapping)) {
809
+ yield event;
810
+ }
811
+ }
812
+
813
+ /*
814
+ * Perform an approved MCP tool call and stream the response.
815
+ */
816
+ async function* callApprovedMCPToolStream(
817
+ approval_request_id: string,
818
+ mcpCallId: string,
819
+ approvalRequest: McpApprovalRequestParams | undefined,
820
+ mcpToolsMapping: Record<string, McpServerParams>,
821
+ responseObject: IncompleteResponse,
822
+ payload: ChatCompletionCreateParamsStreaming
823
+ ): AsyncGenerator<PatchedResponseStreamEvent> {
824
+ if (!approvalRequest) {
825
+ throw new Error(`MCP approval request '${approval_request_id}' not found`);
826
+ }
827
+
828
+ const outputObject: ResponseOutputItem.McpCall = {
829
+ type: "mcp_call",
830
+ id: mcpCallId,
831
+ name: approvalRequest.name,
832
+ server_label: approvalRequest.server_label,
833
+ arguments: approvalRequest.arguments,
834
+ };
835
+ responseObject.output.push(outputObject);
836
+
837
+ // Response output item added event
838
+ yield {
839
+ type: "response.output_item.added",
840
+ output_index: responseObject.output.length - 1,
841
+ item: outputObject,
842
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
843
+ };
844
+
845
+ yield {
846
+ type: "response.mcp_call.in_progress",
847
+ item_id: outputObject.id as string,
848
+ output_index: responseObject.output.length - 1,
849
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
850
+ };
851
+
852
+ const toolParams = mcpToolsMapping[approvalRequest.name];
853
+ const toolResult = await callMcpTool(toolParams, approvalRequest.name, approvalRequest.arguments);
854
+
855
+ if (toolResult.error) {
856
+ outputObject.error = toolResult.error;
857
+ yield {
858
+ type: "response.mcp_call.failed",
859
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
860
+ };
861
+ } else {
862
+ outputObject.output = toolResult.output;
863
+ yield {
864
+ type: "response.mcp_call.completed",
865
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
866
+ };
867
+ }
868
+
869
+ yield {
870
+ type: "response.output_item.done",
871
+ output_index: responseObject.output.length - 1,
872
+ item: outputObject,
873
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
874
+ };
875
+
876
+ // Updating the payload for next LLM call
877
+ payload.messages.push(
878
+ {
879
+ role: "assistant",
880
+ tool_calls: [
881
+ {
882
+ id: outputObject.id,
883
+ type: "function",
884
+ function: {
885
+ name: outputObject.name,
886
+ arguments: outputObject.arguments,
887
+ // Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
888
+ // TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
889
+ },
890
+ },
891
+ ],
892
+ },
893
+ {
894
+ role: "tool",
895
+ tool_call_id: outputObject.id,
896
+ content: outputObject.output ? outputObject.output : outputObject.error ? `Error: ${outputObject.error}` : "",
897
+ }
898
+ );
899
+ }
900
+
901
+ function requiresApproval(toolName: string, mcpToolsMapping: Record<string, McpServerParams>): boolean {
902
+ const toolParams = mcpToolsMapping[toolName];
903
+ return toolParams.require_approval === "always"
904
+ ? true
905
+ : toolParams.require_approval === "never"
906
+ ? false
907
+ : toolParams.require_approval.always?.tool_names?.includes(toolName)
908
+ ? true
909
+ : toolParams.require_approval.never?.tool_names?.includes(toolName)
910
+ ? false
911
+ : true; // behavior is undefined in specs, let's default to true
912
+ }
913
+
914
+ async function buildFinalToolCallFallbackEvents(
915
+ stream: { finalResponse?: () => Promise<ChatCompletion> } | AsyncIterable<unknown>,
916
+ responseObject: IncompleteResponse,
917
+ observedToolCallIds: Set<string>,
918
+ debugToolStream: boolean
919
+ ): Promise<PatchedResponseStreamEvent[]> {
920
+ const events: PatchedResponseStreamEvent[] = [];
921
+ if (!stream || typeof (stream as { finalResponse?: () => Promise<ChatCompletion> }).finalResponse !== "function") {
922
+ return events;
923
+ }
924
+
925
+ let finalResponse: ChatCompletion | null = null;
926
+ try {
927
+ finalResponse = await (stream as { finalResponse: () => Promise<ChatCompletion> }).finalResponse();
928
+ } catch (error) {
929
+ if (debugToolStream) {
930
+ console.debug("[responses.js] finalResponse unavailable", error);
931
+ }
932
+ return events;
933
+ }
934
+
935
+ const choices = finalResponse?.choices ?? [];
936
+ if (debugToolStream) {
937
+ console.debug("[responses.js] finalResponse", JSON.stringify(finalResponse, null, 2));
938
+ }
939
+ for (const choice of choices) {
940
+ const toolCalls = choice.message?.tool_calls;
941
+ if (!toolCalls || toolCalls.length === 0) {
942
+ continue;
943
+ }
944
+
945
+ for (const toolCall of toolCalls) {
946
+ if (debugToolStream) {
947
+ console.debug("[responses.js] final tool_call", JSON.stringify(toolCall, null, 2));
948
+ }
949
+ if (toolCall.type !== "function" || !toolCall.function) {
950
+ continue;
951
+ }
952
+ if (toolCall.id && observedToolCallIds.has(toolCall.id)) {
953
+ continue;
954
+ }
955
+
956
+ const outputObject: ResponseFunctionToolCall = {
957
+ id: generateUniqueId("fc"),
958
+ type: "function_call",
959
+ call_id: toolCall.id ?? generateUniqueId("call"),
960
+ name: toolCall.function.name ?? "function",
961
+ arguments: toolCall.function.arguments ?? "",
962
+ status: "completed",
963
+ };
964
+ responseObject.output.push(outputObject);
965
+ const outputIndex = responseObject.output.length - 1;
966
+
967
+ events.push({
968
+ type: "response.output_item.added",
969
+ output_index: outputIndex,
970
+ item: outputObject,
971
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
972
+ });
973
+
974
+ if (outputObject.arguments) {
975
+ events.push({
976
+ type: "response.function_call_arguments.delta",
977
+ item_id: outputObject.id as string,
978
+ output_index: outputIndex,
979
+ delta: outputObject.arguments,
980
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
981
+ });
982
+ }
983
+
984
+ events.push({
985
+ type: "response.function_call_arguments.done",
986
+ item_id: outputObject.id as string,
987
+ output_index: outputIndex,
988
+ arguments: outputObject.arguments,
989
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
990
+ });
991
+
992
+ events.push({
993
+ type: "response.output_item.done",
994
+ output_index: outputIndex,
995
+ item: outputObject,
996
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
997
+ });
998
+
999
+ if (toolCall.id) {
1000
+ observedToolCallIds.add(toolCall.id);
1001
+ }
1002
+ }
1003
+ }
1004
+
1005
+ return events;
1006
+ }
1007
+
1008
+
1009
+ async function* closeLastOutputItem(
1010
+ responseObject: IncompleteResponse,
1011
+ payload: ChatCompletionCreateParamsStreaming,
1012
+ mcpToolsMapping: Record<string, McpServerParams>
1013
+ ): AsyncGenerator<PatchedResponseStreamEvent> {
1014
+ const lastOutputItem = responseObject.output.at(-1);
1015
+ if (lastOutputItem) {
1016
+ if (lastOutputItem?.type === "message") {
1017
+ const messageContent = lastOutputItem.content as unknown as PatchedResponseContentPart[];
1018
+ const outputIndex = responseObject.output.length - 1;
1019
+ for (let i = 0; i < messageContent.length; i++) {
1020
+ const contentPart = messageContent[i];
1021
+ const partType = (contentPart as { type?: string }).type ?? "unknown";
1022
+ switch (contentPart.type) {
1023
+ case "output_text": {
1024
+ yield {
1025
+ type: "response.output_text.done",
1026
+ item_id: lastOutputItem.id,
1027
+ output_index: outputIndex,
1028
+ content_index: i,
1029
+ text: contentPart.text,
1030
+ logprobs: [],
1031
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
1032
+ };
1033
+ break;
1034
+ }
1035
+ case "reasoning_text": {
1036
+ yield {
1037
+ type: "response.reasoning.done",
1038
+ item_id: lastOutputItem.id,
1039
+ output_index: outputIndex,
1040
+ content_index: i,
1041
+ text: contentPart.text,
1042
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
1043
+ };
1044
+ break;
1045
+ }
1046
+ case "refusal": {
1047
+ // Fall through to shared content_part.done below without extra events.
1048
+ break;
1049
+ }
1050
+ default: {
1051
+ throw new StreamingError(`Unsupported content part type: ${partType}`);
1052
+ }
1053
+ }
1054
+
1055
+ yield {
1056
+ type: "response.content_part.done",
1057
+ item_id: lastOutputItem.id,
1058
+ output_index: outputIndex,
1059
+ content_index: i,
1060
+ part: contentPart,
1061
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
1062
+ };
1063
+ }
1064
+
1065
+ lastOutputItem.status = "completed";
1066
+ yield {
1067
+ type: "response.output_item.done",
1068
+ output_index: outputIndex,
1069
+ item: lastOutputItem,
1070
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
1071
+ };
1072
+ } else if (lastOutputItem?.type === "function_call") {
1073
+ yield {
1074
+ type: "response.function_call_arguments.done",
1075
+ item_id: lastOutputItem.id as string,
1076
+ output_index: responseObject.output.length - 1,
1077
+ arguments: lastOutputItem.arguments,
1078
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
1079
+ };
1080
+
1081
+ lastOutputItem.status = "completed";
1082
+ yield {
1083
+ type: "response.output_item.done",
1084
+ output_index: responseObject.output.length - 1,
1085
+ item: lastOutputItem,
1086
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
1087
+ };
1088
+ } else if (lastOutputItem?.type === "mcp_call") {
1089
+ yield {
1090
+ type: "response.mcp_call_arguments.done" as "response.mcp_call.arguments_done", // bug workaround (see https://github.com/openai/openai-node/issues/1562)
1091
+ item_id: lastOutputItem.id as string,
1092
+ output_index: responseObject.output.length - 1,
1093
+ arguments: lastOutputItem.arguments,
1094
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
1095
+ };
1096
+
1097
+ // Call MCP tool
1098
+ const toolParams = mcpToolsMapping[lastOutputItem.name];
1099
+ const toolResult = await callMcpTool(toolParams, lastOutputItem.name, lastOutputItem.arguments);
1100
+ if (toolResult.error) {
1101
+ lastOutputItem.error = toolResult.error;
1102
+ yield {
1103
+ type: "response.mcp_call.failed",
1104
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
1105
+ };
1106
+ } else {
1107
+ lastOutputItem.output = toolResult.output;
1108
+ yield {
1109
+ type: "response.mcp_call.completed",
1110
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
1111
+ };
1112
+ }
1113
+
1114
+ yield {
1115
+ type: "response.output_item.done",
1116
+ output_index: responseObject.output.length - 1,
1117
+ item: lastOutputItem,
1118
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
1119
+ };
1120
+
1121
+ // Updating the payload for next LLM call
1122
+ payload.messages.push(
1123
+ {
1124
+ role: "assistant",
1125
+ tool_calls: [
1126
+ {
1127
+ id: lastOutputItem.id,
1128
+ type: "function",
1129
+ function: {
1130
+ name: lastOutputItem.name,
1131
+ arguments: lastOutputItem.arguments,
1132
+ // Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
1133
+ // TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
1134
+ },
1135
+ },
1136
+ ],
1137
+ },
1138
+ {
1139
+ role: "tool",
1140
+ tool_call_id: lastOutputItem.id,
1141
+ content: lastOutputItem.output
1142
+ ? lastOutputItem.output
1143
+ : lastOutputItem.error
1144
+ ? `Error: ${lastOutputItem.error}`
1145
+ : "",
1146
+ }
1147
+ );
1148
+ } else if (lastOutputItem?.type === "mcp_approval_request" || lastOutputItem?.type === "mcp_list_tools") {
1149
+ yield {
1150
+ type: "response.output_item.done",
1151
+ output_index: responseObject.output.length - 1,
1152
+ item: lastOutputItem,
1153
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
1154
+ };
1155
+ } else {
1156
+ throw new StreamingError(
1157
+ `Not implemented: expected message, function_call, or mcp_call, got ${lastOutputItem?.type}`
1158
+ );
1159
+ }
1160
+ }
1161
+ }
src/schemas.ts ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { z } from "zod";
2
+
3
+ /**
4
+ * https://platform.openai.com/docs/api-reference/responses/create
5
+ * commented out properties are not supported by the server
6
+ */
7
+
8
+ const inputContentSchema = z.array(
9
+ z.union([
10
+ z.object({
11
+ type: z.literal("input_text"),
12
+ text: z.string(),
13
+ }),
14
+ z.object({
15
+ type: z.literal("input_image"),
16
+ // file_id: z.string().nullable().default(null),
17
+ image_url: z.string(),
18
+ // detail: z.enum(["auto", "low", "high"]).default("auto"),
19
+ }),
20
+ // z.object({
21
+ // type: z.literal("input_file"),
22
+ // file_data: z.string().nullable().default(null),
23
+ // file_id: z.string().nullable().default(null),
24
+ // filename: z.string().nullable().default(null),
25
+ // }),
26
+ ])
27
+ );
28
+
29
+ const mcpServerParamsSchema = z.object({
30
+ server_label: z.string(),
31
+ server_url: z.string(),
32
+ type: z.literal("mcp"),
33
+ allowed_tools: z
34
+ .union([
35
+ z.array(z.string()),
36
+ z.object({
37
+ tool_names: z.array(z.string()),
38
+ }),
39
+ ])
40
+ .nullable()
41
+ .default(null),
42
+ headers: z.record(z.string()).nullable().default(null),
43
+ require_approval: z
44
+ .union([
45
+ z.enum(["always", "never"]),
46
+ z.object({
47
+ always: z.object({ tool_names: z.array(z.string()).optional() }).optional(),
48
+ never: z.object({ tool_names: z.array(z.string()).optional() }).optional(),
49
+ }),
50
+ ])
51
+ .default("always"),
52
+ });
53
+
54
+ const mcpApprovalRequestParamsSchema = z.object({
55
+ type: z.literal("mcp_approval_request"),
56
+ id: z.string(),
57
+ server_label: z.string(),
58
+ name: z.string(),
59
+ arguments: z.string(),
60
+ });
61
+ const mcpApprovalResponseParamsSchema = z.object({
62
+ type: z.literal("mcp_approval_response"),
63
+ id: z.string().nullable().default(null),
64
+ approval_request_id: z.string(),
65
+ approve: z.boolean(),
66
+ reason: z.string().nullable().default(null),
67
+ });
68
+ const mcpCallParamsSchema = z.object({
69
+ type: z.literal("mcp_call"),
70
+ id: z.string(),
71
+ name: z.string(),
72
+ server_label: z.string(),
73
+ arguments: z.string(),
74
+ });
75
+
76
+ const truncationEnum = z.enum(["auto", "disabled"]);
77
+ const serviceTierEnum = z.enum(["auto", "default", "flex", "priority"]);
78
+ const promptCacheRetentionEnum = z.enum(["in_memory", "24h"]);
79
+
80
+ export const createResponseParamsSchema = z.object({
81
+ background: z.boolean().default(false),
82
+ // include:
83
+ input: z.union([
84
+ z.string(),
85
+ z.array(
86
+ z.union([
87
+ z.object({
88
+ content: z.union([z.string(), inputContentSchema]),
89
+ role: z.enum(["user", "assistant", "system", "developer"]),
90
+ type: z.enum(["message"]).default("message"),
91
+ }),
92
+ z.object({
93
+ role: z.enum(["user", "system", "developer"]),
94
+ status: z.enum(["in_progress", "completed", "incomplete"]).nullable().default(null),
95
+ content: inputContentSchema,
96
+ type: z.enum(["message"]).default("message"),
97
+ }),
98
+ z.object({
99
+ id: z.string().optional(),
100
+ role: z.enum(["assistant"]),
101
+ status: z.enum(["in_progress", "completed", "incomplete"]).optional(),
102
+ type: z.enum(["message"]).default("message"),
103
+ content: z.array(
104
+ z.union([
105
+ z.object({
106
+ type: z.literal("output_text"),
107
+ text: z.string(),
108
+ annotations: z.array(z.record(z.any())).nullable().optional(), // TODO: incomplete
109
+ logprobs: z.array(z.record(z.any())).nullable().optional(), // TODO: incomplete
110
+ }),
111
+ z.object({
112
+ type: z.literal("refusal"),
113
+ refusal: z.string(),
114
+ }),
115
+ // TODO: much more objects: File search tool call, Computer tool call, Computer tool call output, Web search tool call, Function tool call, Function tool call output, Reasoning, Image generation call, Code interpreter tool call, Local shell call, Local shell call output, MCP list tools, MCP approval request, MCP approval response, MCP tool call
116
+ ])
117
+ ),
118
+ }),
119
+ z.object({
120
+ type: z.literal("function_call"),
121
+ id: z.string().optional(),
122
+ call_id: z.string(),
123
+ name: z.string(),
124
+ arguments: z.string(),
125
+ status: z.enum(["in_progress", "completed", "incomplete"]).optional(),
126
+ }),
127
+ z.object({
128
+ call_id: z.string(),
129
+ output: z.string(),
130
+ type: z.literal("function_call_output"),
131
+ id: z.string().optional(),
132
+ status: z.enum(["in_progress", "completed", "incomplete"]).optional(),
133
+ }),
134
+ z.object({
135
+ type: z.literal("mcp_list_tools"),
136
+ id: z.string(),
137
+ server_label: z.string(),
138
+ tools: z.array(
139
+ z.object({
140
+ name: z.string(),
141
+ input_schema: z.record(z.any()),
142
+ description: z.string().nullable().optional(),
143
+ annotations: z.object({}).optional(),
144
+ })
145
+ ),
146
+ error: z.string().nullable().optional(),
147
+ }),
148
+ mcpApprovalRequestParamsSchema,
149
+ mcpApprovalResponseParamsSchema,
150
+ mcpCallParamsSchema,
151
+ ])
152
+ ),
153
+ ]),
154
+ instructions: z.string().nullable().default(null),
155
+ max_output_tokens: z.number().int().min(0).nullable().default(null),
156
+ max_tool_calls: z.number().int().min(1).nullable().default(null),
157
+ metadata: z
158
+ .record(z.string().max(64), z.string().max(512))
159
+ .refine((val) => Object.keys(val).length <= 16, {
160
+ message: "Must have at most 16 items",
161
+ })
162
+ .nullable()
163
+ .default(null),
164
+ model: z.string(),
165
+ previous_response_id: z.string().nullable().default(null),
166
+ provider: z.string().min(1).optional(),
167
+ parallel_tool_calls: z.boolean().nullable().default(null),
168
+ // previous_response_id: z.string().nullable().default(null),
169
+ reasoning: z
170
+ .object({
171
+ effort: z.enum(["low", "medium", "high"]).default("medium"),
172
+ summary: z.enum(["auto", "concise", "detailed"]).nullable().default(null),
173
+ })
174
+ .optional(),
175
+ store: z.boolean().default(false),
176
+ service_tier: serviceTierEnum.nullable().default(null),
177
+ safety_identifier: z.string().max(64).nullable().default(null),
178
+ prompt_cache_key: z.string().max(64).nullable().default(null),
179
+ prompt_cache_retention: promptCacheRetentionEnum.nullable().default(null),
180
+ stream: z.boolean().default(false),
181
+ temperature: z.number().min(0).max(2).default(1),
182
+ text: z
183
+ .object({
184
+ format: z.union([
185
+ z.object({
186
+ type: z.literal("text"),
187
+ }),
188
+ z.object({
189
+ type: z.literal("json_object"),
190
+ }),
191
+ z.object({
192
+ type: z.literal("json_schema"),
193
+ name: z
194
+ .string()
195
+ .max(64, "Must be at most 64 characters")
196
+ .regex(/^[a-zA-Z0-9_-]+$/, "Only letters, numbers, underscores, and dashes are allowed"),
197
+ description: z.string().optional(),
198
+ schema: z.record(z.any()),
199
+ strict: z.boolean().default(false),
200
+ }),
201
+ ]),
202
+ })
203
+ .optional(),
204
+ tool_choice: z
205
+ .union([
206
+ z.enum(["auto", "none", "required"]),
207
+ z.object({
208
+ type: z.literal("function"),
209
+ name: z.string(),
210
+ }),
211
+ // TODO: also hosted tool and MCP tool
212
+ ])
213
+ .optional(),
214
+ tools: z
215
+ .array(
216
+ z.union([
217
+ z.object({
218
+ name: z.string(),
219
+ parameters: z.record(z.any()),
220
+ strict: z.boolean().default(false),
221
+ type: z.literal("function"),
222
+ description: z.string().optional(),
223
+ }),
224
+ mcpServerParamsSchema,
225
+ ])
226
+ )
227
+ .optional(),
228
+ top_logprobs: z.number().int().min(0).max(20).nullable().default(null),
229
+ top_p: z.number().min(0).max(1).default(1),
230
+ presence_penalty: z.number().min(-2).max(2).nullable().default(null),
231
+ frequency_penalty: z.number().min(-2).max(2).nullable().default(null),
232
+ truncation: truncationEnum.nullable().default(null),
233
+ user: z.string().max(64).nullable().default(null),
234
+ });
235
+
236
+ export type CreateResponseParams = z.infer<typeof createResponseParamsSchema>;
237
+ export type McpServerParams = z.infer<typeof mcpServerParamsSchema>;
238
+ export type McpApprovalRequestParams = z.infer<typeof mcpApprovalRequestParamsSchema>;
239
+ export type McpApprovalResponseParams = z.infer<typeof mcpApprovalResponseParamsSchema>;
240
+ export type McpCallParams = z.infer<typeof mcpCallParamsSchema>;
src/server.ts ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import express, { type Express } from "express";
2
+ import { createResponseParamsSchema } from "./schemas.js";
3
+ import { validateBody } from "./middleware/validation.js";
4
+ import { requestLogger } from "./middleware/logging.js";
5
+ import { getLandingPageHtml, postCreateResponse, getHealth } from "./routes/index.js";
6
+
7
+ export const createApp = (): Express => {
8
+ const app: Express = express();
9
+
10
+ // Middleware
11
+ app.use(requestLogger());
12
+ app.use(express.json());
13
+
14
+ // Routes
15
+ app.get("/", getLandingPageHtml);
16
+
17
+ app.get("/health", getHealth);
18
+
19
+ app.post("/v1/responses", validateBody(createResponseParamsSchema), postCreateResponse);
20
+
21
+ return app;
22
+ };
tsconfig.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "allowSyntheticDefaultImports": true,
4
+ "lib": ["ES2022", "DOM"],
5
+ "module": "CommonJS",
6
+ "moduleResolution": "node",
7
+ "target": "ES2022",
8
+ "forceConsistentCasingInFileNames": true,
9
+ "strict": true,
10
+ "noImplicitAny": true,
11
+ "strictNullChecks": true,
12
+ "skipLibCheck": true,
13
+ "noImplicitOverride": true,
14
+ "outDir": "./dist",
15
+ "declaration": true,
16
+ "declarationMap": true,
17
+ "resolveJsonModule": true
18
+ },
19
+ "include": ["src", "test"],
20
+ "exclude": ["dist"]
21
+ }