Oxygen Developer commited on
Commit
0eedb5a
·
1 Parent(s): a9a72e1
duckai.ts ADDED
@@ -0,0 +1,475 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import UserAgent from "user-agents";
2
+ import { JSDOM } from "jsdom";
3
+ import { RateLimitStore } from "./rate-limit-store";
4
+ import { SharedRateLimitMonitor } from "./shared-rate-limit-monitor";
5
+ import type {
6
+ ChatCompletionMessage,
7
+ VQDResponse,
8
+ DuckAIRequest,
9
+ } from "./types";
10
+ import { createHash } from "node:crypto";
11
+ import { Buffer } from "node:buffer";
12
+
13
+ // Rate limiting tracking with sliding window
14
+ interface RateLimitInfo {
15
+ requestTimestamps: number[]; // Array of request timestamps for sliding window
16
+ lastRequestTime: number;
17
+ isLimited: boolean;
18
+ retryAfter?: number;
19
+ }
20
+
21
+ export class DuckAI {
22
+ private rateLimitInfo: RateLimitInfo = {
23
+ requestTimestamps: [],
24
+ lastRequestTime: 0,
25
+ isLimited: false,
26
+ };
27
+ private rateLimitStore: RateLimitStore;
28
+ private rateLimitMonitor: SharedRateLimitMonitor;
29
+
30
+ // Conservative rate limiting - adjust based on observed limits
31
+ private readonly MAX_REQUESTS_PER_MINUTE = 20;
32
+ private readonly WINDOW_SIZE_MS = 60 * 1000; // 1 minute
33
+ private readonly MIN_REQUEST_INTERVAL_MS = 1000; // 1 second between requests
34
+
35
+ constructor() {
36
+ this.rateLimitStore = new RateLimitStore();
37
+ this.rateLimitMonitor = new SharedRateLimitMonitor();
38
+ this.loadRateLimitFromStore();
39
+ }
40
+
41
+ /**
42
+ * Clean old timestamps outside the sliding window
43
+ */
44
+ private cleanOldTimestamps(): void {
45
+ const now = Date.now();
46
+ const cutoff = now - this.WINDOW_SIZE_MS;
47
+ this.rateLimitInfo.requestTimestamps =
48
+ this.rateLimitInfo.requestTimestamps.filter(
49
+ (timestamp) => timestamp > cutoff
50
+ );
51
+ }
52
+
53
+ /**
54
+ * Get current request count in sliding window
55
+ */
56
+ private getCurrentRequestCount(): number {
57
+ this.cleanOldTimestamps();
58
+ return this.rateLimitInfo.requestTimestamps.length;
59
+ }
60
+
61
+ /**
62
+ * Load rate limit data from shared store
63
+ */
64
+ private loadRateLimitFromStore(): void {
65
+ const stored = this.rateLimitStore.read();
66
+ if (stored) {
67
+ // Convert old format to new sliding window format if needed
68
+ const storedAny = stored as any;
69
+ if ("requestCount" in storedAny && "windowStart" in storedAny) {
70
+ // Old format - convert to new format (start fresh)
71
+ this.rateLimitInfo = {
72
+ requestTimestamps: [],
73
+ lastRequestTime: storedAny.lastRequestTime || 0,
74
+ isLimited: storedAny.isLimited || false,
75
+ retryAfter: storedAny.retryAfter,
76
+ };
77
+ } else {
78
+ // New format
79
+ this.rateLimitInfo = {
80
+ requestTimestamps: storedAny.requestTimestamps || [],
81
+ lastRequestTime: storedAny.lastRequestTime || 0,
82
+ isLimited: storedAny.isLimited || false,
83
+ retryAfter: storedAny.retryAfter,
84
+ };
85
+ }
86
+ // Clean old timestamps after loading
87
+ this.cleanOldTimestamps();
88
+ }
89
+ }
90
+
91
+ /**
92
+ * Save rate limit data to shared store
93
+ */
94
+ private saveRateLimitToStore(): void {
95
+ this.cleanOldTimestamps();
96
+ this.rateLimitStore.write({
97
+ requestTimestamps: this.rateLimitInfo.requestTimestamps,
98
+ lastRequestTime: this.rateLimitInfo.lastRequestTime,
99
+ isLimited: this.rateLimitInfo.isLimited,
100
+ retryAfter: this.rateLimitInfo.retryAfter,
101
+ } as any);
102
+ }
103
+
104
+ /**
105
+ * Get current rate limit status
106
+ */
107
+ getRateLimitStatus(): {
108
+ requestsInCurrentWindow: number;
109
+ maxRequestsPerMinute: number;
110
+ timeUntilWindowReset: number;
111
+ isCurrentlyLimited: boolean;
112
+ recommendedWaitTime: number;
113
+ } {
114
+ // Load latest data from store first
115
+ this.loadRateLimitFromStore();
116
+
117
+ const now = Date.now();
118
+ const currentRequestCount = this.getCurrentRequestCount();
119
+
120
+ // For sliding window, there's no fixed reset time
121
+ // The "reset" happens continuously as old requests fall out of the window
122
+ const oldestTimestamp = this.rateLimitInfo.requestTimestamps[0];
123
+ const timeUntilReset = oldestTimestamp
124
+ ? Math.max(0, oldestTimestamp + this.WINDOW_SIZE_MS - now)
125
+ : 0;
126
+
127
+ const timeSinceLastRequest = now - this.rateLimitInfo.lastRequestTime;
128
+ const recommendedWait = Math.max(
129
+ 0,
130
+ this.MIN_REQUEST_INTERVAL_MS - timeSinceLastRequest
131
+ );
132
+
133
+ return {
134
+ requestsInCurrentWindow: currentRequestCount,
135
+ maxRequestsPerMinute: this.MAX_REQUESTS_PER_MINUTE,
136
+ timeUntilWindowReset: timeUntilReset,
137
+ isCurrentlyLimited: this.rateLimitInfo.isLimited,
138
+ recommendedWaitTime: recommendedWait,
139
+ };
140
+ }
141
+
142
+ /**
143
+ * Check if we should wait before making a request
144
+ */
145
+ private shouldWaitBeforeRequest(): { shouldWait: boolean; waitTime: number } {
146
+ // Load latest data from store first
147
+ this.loadRateLimitFromStore();
148
+
149
+ const now = Date.now();
150
+ const currentRequestCount = this.getCurrentRequestCount();
151
+
152
+ // Check if we're hitting the rate limit
153
+ if (currentRequestCount >= this.MAX_REQUESTS_PER_MINUTE) {
154
+ // Find the oldest request timestamp
155
+ const oldestTimestamp = this.rateLimitInfo.requestTimestamps[0];
156
+ if (oldestTimestamp) {
157
+ // Wait until the oldest request falls out of the window
158
+ const waitTime = oldestTimestamp + this.WINDOW_SIZE_MS - now + 100; // +100ms buffer
159
+ return { shouldWait: true, waitTime: Math.max(0, waitTime) };
160
+ }
161
+ }
162
+
163
+ // Check minimum interval between requests
164
+ const timeSinceLastRequest = now - this.rateLimitInfo.lastRequestTime;
165
+ if (timeSinceLastRequest < this.MIN_REQUEST_INTERVAL_MS) {
166
+ const waitTime = this.MIN_REQUEST_INTERVAL_MS - timeSinceLastRequest;
167
+ return { shouldWait: true, waitTime };
168
+ }
169
+
170
+ return { shouldWait: false, waitTime: 0 };
171
+ }
172
+
173
+ /**
174
+ * Wait if necessary before making a request
175
+ */
176
+ private async waitIfNeeded(): Promise<void> {
177
+ const { shouldWait, waitTime } = this.shouldWaitBeforeRequest();
178
+
179
+ if (shouldWait) {
180
+ console.log(`Rate limiting: waiting ${waitTime}ms before next request`);
181
+ await new Promise((resolve) => setTimeout(resolve, waitTime));
182
+ }
183
+ }
184
+
185
+ private async getEncodedVqdHash(vqdHash: string): Promise<string> {
186
+ const jsScript = Buffer.from(vqdHash, 'base64').toString('utf-8');
187
+
188
+ const dom = new JSDOM(
189
+ `<iframe id="jsa" sandbox="allow-scripts allow-same-origin" srcdoc="<!DOCTYPE html>
190
+ <html>
191
+ <head>
192
+ <meta http-equiv="Content-Security-Policy"; content="default-src 'none'; script-src 'unsafe-inline'">
193
+ </head>
194
+ <body></body>
195
+ </html>" style="position: absolute; left: -9999px; top: -9999px;"></iframe>`,
196
+ { runScripts: 'dangerously' }
197
+ );
198
+ dom.window.top.__DDG_BE_VERSION__ = 1;
199
+ dom.window.top.__DDG_FE_CHAT_HASH__ = 1;
200
+ const jsa = dom.window.top.document.querySelector('#jsa') as HTMLIFrameElement;
201
+ const contentDoc = jsa.contentDocument || jsa.contentWindow!.document;
202
+
203
+ const meta = contentDoc.createElement('meta');
204
+ meta.setAttribute('http-equiv', 'Content-Security-Policy');
205
+ meta.setAttribute('content', "default-src 'none'; script-src 'unsafe-inline';");
206
+ contentDoc.head.appendChild(meta);
207
+ const result = await dom.window.eval(jsScript) as {
208
+ client_hashes: string[];
209
+ [key: string]: any;
210
+ };
211
+
212
+ result.client_hashes[0] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36';
213
+ result.client_hashes = result.client_hashes.map((t) => {
214
+ const hash = createHash('sha256');
215
+ hash.update(t);
216
+
217
+ return hash.digest('base64');
218
+ });
219
+
220
+ return btoa(JSON.stringify(result));
221
+ }
222
+
223
+ private async getVQD(userAgent: string): Promise<VQDResponse> {
224
+ const response = await fetch("https://duckduckgo.com/duckchat/v1/status", {
225
+ headers: {
226
+ accept: "*/*",
227
+ "accept-language": "en-US,en;q=0.9,fa;q=0.8",
228
+ "cache-control": "no-store",
229
+ pragma: "no-cache",
230
+ priority: "u=1, i",
231
+ "sec-fetch-dest": "empty",
232
+ "sec-fetch-mode": "cors",
233
+ "sec-fetch-site": "same-origin",
234
+ "x-vqd-accept": "1",
235
+ "User-Agent": userAgent,
236
+ },
237
+ referrer: "https://duckduckgo.com/",
238
+ referrerPolicy: "origin",
239
+ method: "GET",
240
+ mode: "cors",
241
+ credentials: "include",
242
+ });
243
+
244
+ if (!response.ok) {
245
+ throw new Error(
246
+ `Failed to get VQD: ${response.status} ${response.statusText}`
247
+ );
248
+ }
249
+
250
+ const hashHeader = response.headers.get("x-Vqd-hash-1");
251
+
252
+ if (!hashHeader) {
253
+ throw new Error(
254
+ `Missing VQD headers: hash=${!!hashHeader}`
255
+ );
256
+ }
257
+
258
+ const encodedHash = await this.getEncodedVqdHash(hashHeader);
259
+
260
+ return { hash: encodedHash };
261
+ }
262
+
263
+ private async hashClientHashes(clientHashes: string[]): Promise<string[]> {
264
+ return Promise.all(
265
+ clientHashes.map(async (hash) => {
266
+ const encoder = new TextEncoder();
267
+ const data = encoder.encode(hash);
268
+ const hashBuffer = await crypto.subtle.digest("SHA-256", data);
269
+ const hashArray = new Uint8Array(hashBuffer);
270
+ return btoa(
271
+ hashArray.reduce((str, byte) => str + String.fromCharCode(byte), "")
272
+ );
273
+ })
274
+ );
275
+ }
276
+
277
+ async chat(request: DuckAIRequest): Promise<string> {
278
+ // Wait if rate limiting is needed
279
+ await this.waitIfNeeded();
280
+
281
+ const userAgent = new UserAgent().toString();
282
+ const vqd = await this.getVQD(userAgent);
283
+
284
+ // Update rate limit tracking BEFORE making the request
285
+ const now = Date.now();
286
+ this.rateLimitInfo.requestTimestamps.push(now);
287
+ this.rateLimitInfo.lastRequestTime = now;
288
+ this.saveRateLimitToStore();
289
+
290
+ // Show compact rate limit status in server console
291
+ this.rateLimitMonitor.printCompactStatus();
292
+
293
+ const response = await fetch("https://duckduckgo.com/duckchat/v1/chat", {
294
+ headers: {
295
+ accept: "text/event-stream",
296
+ "accept-language": "en-US,en;q=0.9,fa;q=0.8",
297
+ "cache-control": "no-cache",
298
+ "content-type": "application/json",
299
+ pragma: "no-cache",
300
+ priority: "u=1, i",
301
+ "sec-fetch-dest": "empty",
302
+ "sec-fetch-mode": "cors",
303
+ "sec-fetch-site": "same-origin",
304
+ "x-fe-version": "serp_20250401_100419_ET-19d438eb199b2bf7c300",
305
+ "User-Agent": userAgent,
306
+ "x-vqd-hash-1": vqd.hash,
307
+ },
308
+ referrer: "https://duckduckgo.com/",
309
+ referrerPolicy: "origin",
310
+ body: JSON.stringify(request),
311
+ method: "POST",
312
+ mode: "cors",
313
+ credentials: "include",
314
+ });
315
+
316
+ // Handle rate limiting
317
+ if (response.status === 429) {
318
+ const retryAfter = response.headers.get("retry-after");
319
+ const waitTime = retryAfter ? parseInt(retryAfter) * 1000 : 60000; // Default 1 minute
320
+ throw new Error(
321
+ `Rate limited. Retry after ${waitTime}ms. Status: ${response.status}`
322
+ );
323
+ }
324
+
325
+ if (!response.ok) {
326
+ throw new Error(
327
+ `DuckAI API error: ${response.status} ${response.statusText}`
328
+ );
329
+ }
330
+
331
+ const text = await response.text();
332
+
333
+ // Check for errors
334
+ try {
335
+ const parsed = JSON.parse(text);
336
+ if (parsed.action === "error") {
337
+ throw new Error(`Duck.ai error: ${JSON.stringify(parsed)}`);
338
+ }
339
+ } catch (e) {
340
+ // Not JSON, continue processing
341
+ }
342
+
343
+ // Extract the LLM response from the streamed response
344
+ let llmResponse = "";
345
+ const lines = text.split("\n");
346
+ for (const line of lines) {
347
+ if (line.startsWith("data: ")) {
348
+ try {
349
+ const json = JSON.parse(line.slice(6));
350
+ if (json.message) {
351
+ llmResponse += json.message;
352
+ }
353
+ } catch (e) {
354
+ // Skip invalid JSON lines
355
+ }
356
+ }
357
+ }
358
+
359
+ const finalResponse = llmResponse.trim();
360
+
361
+ // If response is empty, provide a fallback
362
+ if (!finalResponse) {
363
+ console.warn("Duck.ai returned empty response, using fallback");
364
+ return "I apologize, but I'm unable to provide a response at the moment. Please try again.";
365
+ }
366
+
367
+ return finalResponse;
368
+ }
369
+
370
+ async chatStream(request: DuckAIRequest): Promise<ReadableStream<string>> {
371
+ // Wait if rate limiting is needed
372
+ await this.waitIfNeeded();
373
+
374
+ const userAgent = new UserAgent().toString();
375
+ const vqd = await this.getVQD(userAgent);
376
+
377
+ // Update rate limit tracking BEFORE making the request
378
+ const now = Date.now();
379
+ this.rateLimitInfo.requestTimestamps.push(now);
380
+ this.rateLimitInfo.lastRequestTime = now;
381
+ this.saveRateLimitToStore();
382
+
383
+ // Show compact rate limit status in server console
384
+ this.rateLimitMonitor.printCompactStatus();
385
+
386
+ const response = await fetch("https://duckduckgo.com/duckchat/v1/chat", {
387
+ headers: {
388
+ accept: "text/event-stream",
389
+ "accept-language": "en-US,en;q=0.9,fa;q=0.8",
390
+ "cache-control": "no-cache",
391
+ "content-type": "application/json",
392
+ pragma: "no-cache",
393
+ priority: "u=1, i",
394
+ "sec-fetch-dest": "empty",
395
+ "sec-fetch-mode": "cors",
396
+ "sec-fetch-site": "same-origin",
397
+ "x-fe-version": "serp_20250401_100419_ET-19d438eb199b2bf7c300",
398
+ "User-Agent": userAgent,
399
+ "x-vqd-hash-1": vqd.hash,
400
+ },
401
+ referrer: "https://duckduckgo.com/",
402
+ referrerPolicy: "origin",
403
+ body: JSON.stringify(request),
404
+ method: "POST",
405
+ mode: "cors",
406
+ credentials: "include",
407
+ });
408
+
409
+ // Handle rate limiting
410
+ if (response.status === 429) {
411
+ const retryAfter = response.headers.get("retry-after");
412
+ const waitTime = retryAfter ? parseInt(retryAfter) * 1000 : 60000; // Default 1 minute
413
+ throw new Error(
414
+ `Rate limited. Retry after ${waitTime}ms. Status: ${response.status}`
415
+ );
416
+ }
417
+
418
+ if (!response.ok) {
419
+ throw new Error(
420
+ `DuckAI API error: ${response.status} ${response.statusText}`
421
+ );
422
+ }
423
+
424
+ if (!response.body) {
425
+ throw new Error("No response body");
426
+ }
427
+
428
+ return new ReadableStream({
429
+ start(controller) {
430
+ const reader = response.body!.getReader();
431
+ const decoder = new TextDecoder();
432
+
433
+ function pump(): Promise<void> {
434
+ return reader.read().then(({ done, value }) => {
435
+ if (done) {
436
+ controller.close();
437
+ return;
438
+ }
439
+
440
+ const chunk = decoder.decode(value, { stream: true });
441
+ const lines = chunk.split("\n");
442
+
443
+ for (const line of lines) {
444
+ if (line.startsWith("data: ")) {
445
+ try {
446
+ const json = JSON.parse(line.slice(6));
447
+ if (json.message) {
448
+ controller.enqueue(json.message);
449
+ }
450
+ } catch (e) {
451
+ // Skip invalid JSON
452
+ }
453
+ }
454
+ }
455
+
456
+ return pump();
457
+ });
458
+ }
459
+
460
+ return pump();
461
+ },
462
+ });
463
+ }
464
+
465
+ getAvailableModels(): string[] {
466
+ return [
467
+ "gpt-4o-mini",
468
+ "gpt-5-mini",
469
+ "claude-3-5-haiku-latest",
470
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct",
471
+ "mistralai/Mistral-Small-24B-Instruct-2501",
472
+ "openai/gpt-oss-120b"
473
+ ];
474
+ }
475
+ }
openai-service.ts ADDED
@@ -0,0 +1,637 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { DuckAI } from "./duckai";
2
+ import { ToolService } from "./tool-service";
3
+ import type {
4
+ ChatCompletionRequest,
5
+ ChatCompletionResponse,
6
+ ChatCompletionStreamResponse,
7
+ ChatCompletionMessage,
8
+ ModelsResponse,
9
+ Model,
10
+ DuckAIRequest,
11
+ ToolDefinition,
12
+ ToolCall,
13
+ } from "./types";
14
+
15
+ export class OpenAIService {
16
+ private duckAI: DuckAI;
17
+ private toolService: ToolService;
18
+ private availableFunctions: Record<string, Function>;
19
+
20
+ constructor() {
21
+ this.duckAI = new DuckAI();
22
+ this.toolService = new ToolService();
23
+ this.availableFunctions = this.initializeBuiltInFunctions();
24
+ }
25
+
26
+ private initializeBuiltInFunctions(): Record<string, Function> {
27
+ return {
28
+ // Example built-in functions - users can extend this
29
+ get_current_time: () => new Date().toISOString(),
30
+ calculate: (args: { expression: string }) => {
31
+ try {
32
+ // Simple calculator - in production, use a proper math parser
33
+ const result = Function(
34
+ `"use strict"; return (${args.expression})`
35
+ )();
36
+ return { result };
37
+ } catch (error) {
38
+ return { error: "Invalid expression" };
39
+ }
40
+ },
41
+ get_weather: (args: { location: string }) => {
42
+ // Mock weather function
43
+ return {
44
+ location: args.location,
45
+ temperature: Math.floor(Math.random() * 30) + 10,
46
+ condition: ["sunny", "cloudy", "rainy"][
47
+ Math.floor(Math.random() * 3)
48
+ ],
49
+ note: "This is a mock weather function for demonstration",
50
+ };
51
+ },
52
+ };
53
+ }
54
+
55
+ registerFunction(name: string, func: Function): void {
56
+ this.availableFunctions[name] = func;
57
+ }
58
+
59
+ private generateId(): string {
60
+ return `chatcmpl-${Math.random().toString(36).substring(2, 15)}`;
61
+ }
62
+
63
+ private getCurrentTimestamp(): number {
64
+ return Math.floor(Date.now() / 1000);
65
+ }
66
+
67
+ private estimateTokens(text: string): number {
68
+ // Rough estimation: ~4 characters per token
69
+ return Math.ceil(text.length / 4);
70
+ }
71
+
72
+ private transformToDuckAIRequest(
73
+ request: ChatCompletionRequest
74
+ ): DuckAIRequest {
75
+ // Use the model from request, fallback to default
76
+ const model = request.model || "mistralai/Mistral-Small-24B-Instruct-2501";
77
+
78
+ return {
79
+ model,
80
+ messages: request.messages,
81
+ };
82
+ }
83
+
84
+ async createChatCompletion(
85
+ request: ChatCompletionRequest
86
+ ): Promise<ChatCompletionResponse> {
87
+ // Check if this request involves function calling
88
+ if (
89
+ this.toolService.shouldUseFunctionCalling(
90
+ request.tools,
91
+ request.tool_choice
92
+ )
93
+ ) {
94
+ return this.createChatCompletionWithTools(request);
95
+ }
96
+
97
+ const duckAIRequest = this.transformToDuckAIRequest(request);
98
+ const response = await this.duckAI.chat(duckAIRequest);
99
+
100
+ const id = this.generateId();
101
+ const created = this.getCurrentTimestamp();
102
+
103
+ // Calculate token usage
104
+ const promptText = request.messages.map((m) => m.content || "").join(" ");
105
+ const promptTokens = this.estimateTokens(promptText);
106
+ const completionTokens = this.estimateTokens(response);
107
+
108
+ return {
109
+ id,
110
+ object: "chat.completion",
111
+ created,
112
+ model: request.model,
113
+ choices: [
114
+ {
115
+ index: 0,
116
+ message: {
117
+ role: "assistant",
118
+ content: response,
119
+ },
120
+ finish_reason: "stop",
121
+ },
122
+ ],
123
+ usage: {
124
+ prompt_tokens: promptTokens,
125
+ completion_tokens: completionTokens,
126
+ total_tokens: promptTokens + completionTokens,
127
+ },
128
+ };
129
+ }
130
+
131
+ private async createChatCompletionWithTools(
132
+ request: ChatCompletionRequest
133
+ ): Promise<ChatCompletionResponse> {
134
+ const id = this.generateId();
135
+ const created = this.getCurrentTimestamp();
136
+
137
+ // Validate tools
138
+ if (request.tools) {
139
+ const validation = this.toolService.validateTools(request.tools);
140
+ if (!validation.valid) {
141
+ throw new Error(`Invalid tools: ${validation.errors.join(", ")}`);
142
+ }
143
+ }
144
+
145
+ // Create a modified request with tool instructions
146
+ const modifiedMessages = [...request.messages];
147
+
148
+ // Add tool instructions as user message (DuckAI doesn't support system messages)
149
+ if (request.tools && request.tools.length > 0) {
150
+ const toolPrompt = this.toolService.generateToolSystemPrompt(
151
+ request.tools,
152
+ request.tool_choice
153
+ );
154
+ modifiedMessages.unshift({
155
+ role: "user",
156
+ content: `[SYSTEM INSTRUCTIONS] ${toolPrompt}
157
+
158
+ Please follow these instructions when responding to the following user message.`,
159
+ });
160
+ }
161
+
162
+ const duckAIRequest = this.transformToDuckAIRequest({
163
+ ...request,
164
+ messages: modifiedMessages,
165
+ });
166
+
167
+ const response = await this.duckAI.chat(duckAIRequest);
168
+
169
+ // Check if the response contains function calls
170
+ if (this.toolService.detectFunctionCalls(response)) {
171
+ const toolCalls = this.toolService.extractFunctionCalls(response);
172
+
173
+ if (toolCalls.length > 0) {
174
+ // Calculate token usage
175
+ const promptText = modifiedMessages
176
+ .map((m) => m.content || "")
177
+ .join(" ");
178
+ const promptTokens = this.estimateTokens(promptText);
179
+ const completionTokens = this.estimateTokens(response);
180
+
181
+ return {
182
+ id,
183
+ object: "chat.completion",
184
+ created,
185
+ model: request.model,
186
+ choices: [
187
+ {
188
+ index: 0,
189
+ message: {
190
+ role: "assistant",
191
+ content: null,
192
+ tool_calls: toolCalls,
193
+ },
194
+ finish_reason: "tool_calls",
195
+ },
196
+ ],
197
+ usage: {
198
+ prompt_tokens: promptTokens,
199
+ completion_tokens: completionTokens,
200
+ total_tokens: promptTokens + completionTokens,
201
+ },
202
+ };
203
+ }
204
+ }
205
+
206
+ // No function calls detected
207
+ // If tool_choice is "required" or specific function, we need to force a function call
208
+ if (
209
+ (request.tool_choice === "required" ||
210
+ (typeof request.tool_choice === "object" &&
211
+ request.tool_choice.type === "function")) &&
212
+ request.tools &&
213
+ request.tools.length > 0
214
+ ) {
215
+ // Get user message for argument extraction
216
+ const userMessage = request.messages[request.messages.length - 1];
217
+ const userContent = userMessage.content || "";
218
+
219
+ // Determine which function to call
220
+ let functionToCall: string;
221
+
222
+ // If specific function is requested, use that
223
+ if (
224
+ typeof request.tool_choice === "object" &&
225
+ request.tool_choice.type === "function"
226
+ ) {
227
+ functionToCall = request.tool_choice.function.name;
228
+ } else {
229
+ // Try to infer which function to call based on the user's request
230
+ // Simple heuristics to choose appropriate function
231
+ functionToCall = request.tools[0].function.name; // Default to first function
232
+
233
+ if (userContent.toLowerCase().includes("time")) {
234
+ const timeFunction = request.tools.find(
235
+ (t) => t.function.name === "get_current_time"
236
+ );
237
+ if (timeFunction) functionToCall = timeFunction.function.name;
238
+ } else if (
239
+ userContent.toLowerCase().includes("calculate") ||
240
+ /\d+\s*[+\-*/]\s*\d+/.test(userContent)
241
+ ) {
242
+ const calcFunction = request.tools.find(
243
+ (t) => t.function.name === "calculate"
244
+ );
245
+ if (calcFunction) functionToCall = calcFunction.function.name;
246
+ } else if (userContent.toLowerCase().includes("weather")) {
247
+ const weatherFunction = request.tools.find(
248
+ (t) => t.function.name === "get_weather"
249
+ );
250
+ if (weatherFunction) functionToCall = weatherFunction.function.name;
251
+ }
252
+ }
253
+
254
+ // Generate appropriate arguments based on function
255
+ let args = "{}";
256
+ if (functionToCall === "calculate") {
257
+ const mathMatch = userContent.match(/(\d+\s*[+\-*/]\s*\d+)/);
258
+ if (mathMatch) {
259
+ args = JSON.stringify({ expression: mathMatch[1] });
260
+ }
261
+ } else if (functionToCall === "get_weather") {
262
+ // Try to extract location from user message
263
+ const locationMatch = userContent.match(
264
+ /(?:in|for|at)\s+([A-Za-z\s,]+)/i
265
+ );
266
+ if (locationMatch) {
267
+ args = JSON.stringify({ location: locationMatch[1].trim() });
268
+ }
269
+ }
270
+
271
+ const forcedToolCall: ToolCall = {
272
+ id: `call_${Date.now()}`,
273
+ type: "function",
274
+ function: {
275
+ name: functionToCall,
276
+ arguments: args,
277
+ },
278
+ };
279
+
280
+ const promptText = modifiedMessages.map((m) => m.content || "").join(" ");
281
+ const promptTokens = this.estimateTokens(promptText);
282
+ const completionTokens = this.estimateTokens(
283
+ JSON.stringify(forcedToolCall)
284
+ );
285
+
286
+ return {
287
+ id,
288
+ object: "chat.completion",
289
+ created,
290
+ model: request.model,
291
+ choices: [
292
+ {
293
+ index: 0,
294
+ message: {
295
+ role: "assistant",
296
+ content: null,
297
+ tool_calls: [forcedToolCall],
298
+ },
299
+ finish_reason: "tool_calls",
300
+ },
301
+ ],
302
+ usage: {
303
+ prompt_tokens: promptTokens,
304
+ completion_tokens: completionTokens,
305
+ total_tokens: promptTokens + completionTokens,
306
+ },
307
+ };
308
+ }
309
+
310
+ // No function calls detected, return normal response
311
+ const promptText = modifiedMessages.map((m) => m.content || "").join(" ");
312
+ const promptTokens = this.estimateTokens(promptText);
313
+ const completionTokens = this.estimateTokens(response);
314
+
315
+ return {
316
+ id,
317
+ object: "chat.completion",
318
+ created,
319
+ model: request.model,
320
+ choices: [
321
+ {
322
+ index: 0,
323
+ message: {
324
+ role: "assistant",
325
+ content: response,
326
+ },
327
+ finish_reason: "stop",
328
+ },
329
+ ],
330
+ usage: {
331
+ prompt_tokens: promptTokens,
332
+ completion_tokens: completionTokens,
333
+ total_tokens: promptTokens + completionTokens,
334
+ },
335
+ };
336
+ }
337
+
338
+ async createChatCompletionStream(
339
+ request: ChatCompletionRequest
340
+ ): Promise<ReadableStream<Uint8Array>> {
341
+ // Check if this request involves function calling
342
+ if (
343
+ this.toolService.shouldUseFunctionCalling(
344
+ request.tools,
345
+ request.tool_choice
346
+ )
347
+ ) {
348
+ return this.createChatCompletionStreamWithTools(request);
349
+ }
350
+
351
+ const duckAIRequest = this.transformToDuckAIRequest(request);
352
+ const duckStream = await this.duckAI.chatStream(duckAIRequest);
353
+
354
+ const id = this.generateId();
355
+ const created = this.getCurrentTimestamp();
356
+
357
+ return new ReadableStream({
358
+ start(controller) {
359
+ const reader = duckStream.getReader();
360
+ let isFirst = true;
361
+
362
+ function pump(): Promise<void> {
363
+ return reader.read().then(({ done, value }) => {
364
+ if (done) {
365
+ // Send final chunk
366
+ const finalChunk: ChatCompletionStreamResponse = {
367
+ id,
368
+ object: "chat.completion.chunk",
369
+ created,
370
+ model: request.model,
371
+ choices: [
372
+ {
373
+ index: 0,
374
+ delta: {},
375
+ finish_reason: "stop",
376
+ },
377
+ ],
378
+ };
379
+
380
+ const finalData = `data: ${JSON.stringify(finalChunk)}\n\n`;
381
+ const finalDone = `data: [DONE]\n\n`;
382
+
383
+ controller.enqueue(new TextEncoder().encode(finalData));
384
+ controller.enqueue(new TextEncoder().encode(finalDone));
385
+ controller.close();
386
+ return;
387
+ }
388
+
389
+ const chunk: ChatCompletionStreamResponse = {
390
+ id,
391
+ object: "chat.completion.chunk",
392
+ created,
393
+ model: request.model,
394
+ choices: [
395
+ {
396
+ index: 0,
397
+ delta: isFirst
398
+ ? { role: "assistant", content: value }
399
+ : { content: value },
400
+ finish_reason: null,
401
+ },
402
+ ],
403
+ };
404
+
405
+ isFirst = false;
406
+ const data = `data: ${JSON.stringify(chunk)}\n\n`;
407
+ controller.enqueue(new TextEncoder().encode(data));
408
+
409
+ return pump();
410
+ });
411
+ }
412
+
413
+ return pump();
414
+ },
415
+ });
416
+ }
417
+
418
+ private async createChatCompletionStreamWithTools(
419
+ request: ChatCompletionRequest
420
+ ): Promise<ReadableStream<Uint8Array>> {
421
+ // For tools, we need to collect the full response first to parse function calls
422
+ // This is a limitation of the "trick" approach - streaming with tools is complex
423
+ const completion = await this.createChatCompletionWithTools(request);
424
+
425
+ const id = completion.id;
426
+ const created = completion.created;
427
+
428
+ return new ReadableStream({
429
+ start(controller) {
430
+ const choice = completion.choices[0];
431
+
432
+ if (choice.message.tool_calls) {
433
+ // Stream tool calls
434
+ const toolCallsChunk: ChatCompletionStreamResponse = {
435
+ id,
436
+ object: "chat.completion.chunk",
437
+ created,
438
+ model: request.model,
439
+ choices: [
440
+ {
441
+ index: 0,
442
+ delta: {
443
+ role: "assistant",
444
+ tool_calls: choice.message.tool_calls,
445
+ },
446
+ finish_reason: null,
447
+ },
448
+ ],
449
+ };
450
+
451
+ const toolCallsData = `data: ${JSON.stringify(toolCallsChunk)}\n\n`;
452
+ controller.enqueue(new TextEncoder().encode(toolCallsData));
453
+
454
+ // Send final chunk
455
+ const finalChunk: ChatCompletionStreamResponse = {
456
+ id,
457
+ object: "chat.completion.chunk",
458
+ created,
459
+ model: request.model,
460
+ choices: [
461
+ {
462
+ index: 0,
463
+ delta: {},
464
+ finish_reason: "tool_calls",
465
+ },
466
+ ],
467
+ };
468
+
469
+ const finalData = `data: ${JSON.stringify(finalChunk)}\n\n`;
470
+ const finalDone = `data: [DONE]\n\n`;
471
+
472
+ controller.enqueue(new TextEncoder().encode(finalData));
473
+ controller.enqueue(new TextEncoder().encode(finalDone));
474
+ } else {
475
+ // Stream regular content
476
+ const content = choice.message.content || "";
477
+
478
+ // Send role first
479
+ const roleChunk: ChatCompletionStreamResponse = {
480
+ id,
481
+ object: "chat.completion.chunk",
482
+ created,
483
+ model: request.model,
484
+ choices: [
485
+ {
486
+ index: 0,
487
+ delta: { role: "assistant" },
488
+ finish_reason: null,
489
+ },
490
+ ],
491
+ };
492
+
493
+ const roleData = `data: ${JSON.stringify(roleChunk)}\n\n`;
494
+ controller.enqueue(new TextEncoder().encode(roleData));
495
+
496
+ // Stream content in chunks
497
+ const chunkSize = 10;
498
+ for (let i = 0; i < content.length; i += chunkSize) {
499
+ const contentChunk = content.slice(i, i + chunkSize);
500
+
501
+ const chunk: ChatCompletionStreamResponse = {
502
+ id,
503
+ object: "chat.completion.chunk",
504
+ created,
505
+ model: request.model,
506
+ choices: [
507
+ {
508
+ index: 0,
509
+ delta: { content: contentChunk },
510
+ finish_reason: null,
511
+ },
512
+ ],
513
+ };
514
+
515
+ const data = `data: ${JSON.stringify(chunk)}\n\n`;
516
+ controller.enqueue(new TextEncoder().encode(data));
517
+ }
518
+
519
+ // Send final chunk
520
+ const finalChunk: ChatCompletionStreamResponse = {
521
+ id,
522
+ object: "chat.completion.chunk",
523
+ created,
524
+ model: request.model,
525
+ choices: [
526
+ {
527
+ index: 0,
528
+ delta: {},
529
+ finish_reason: "stop",
530
+ },
531
+ ],
532
+ };
533
+
534
+ const finalData = `data: ${JSON.stringify(finalChunk)}\n\n`;
535
+ const finalDone = `data: [DONE]\n\n`;
536
+
537
+ controller.enqueue(new TextEncoder().encode(finalData));
538
+ controller.enqueue(new TextEncoder().encode(finalDone));
539
+ }
540
+
541
+ controller.close();
542
+ },
543
+ });
544
+ }
545
+
546
+ getModels(): ModelsResponse {
547
+ const models = this.duckAI.getAvailableModels();
548
+ const created = this.getCurrentTimestamp();
549
+
550
+ const modelData: Model[] = models.map((modelId) => ({
551
+ id: modelId,
552
+ object: "model",
553
+ created,
554
+ owned_by: "duckai",
555
+ }));
556
+
557
+ return {
558
+ object: "list",
559
+ data: modelData,
560
+ };
561
+ }
562
+
563
+ validateRequest(request: any): ChatCompletionRequest {
564
+ if (!request.messages || !Array.isArray(request.messages)) {
565
+ throw new Error("messages field is required and must be an array");
566
+ }
567
+
568
+ if (request.messages.length === 0) {
569
+ throw new Error("messages array cannot be empty");
570
+ }
571
+
572
+ for (const message of request.messages) {
573
+ if (
574
+ !message.role ||
575
+ !["system", "user", "assistant", "tool"].includes(message.role)
576
+ ) {
577
+ throw new Error(
578
+ "Each message must have a valid role (system, user, assistant, or tool)"
579
+ );
580
+ }
581
+
582
+ // Tool messages have different validation rules
583
+ if (message.role === "tool") {
584
+ if (!message.tool_call_id) {
585
+ throw new Error("Tool messages must have a tool_call_id");
586
+ }
587
+ if (typeof message.content !== "string") {
588
+ throw new Error("Tool messages must have content as a string");
589
+ }
590
+ } else {
591
+ // For non-tool messages, content can be null if there are tool_calls
592
+ if (
593
+ message.content === undefined ||
594
+ (message.content !== null && typeof message.content !== "string")
595
+ ) {
596
+ throw new Error("Each message must have content as a string or null");
597
+ }
598
+ }
599
+ }
600
+
601
+ // Validate tools if provided
602
+ if (request.tools) {
603
+ const validation = this.toolService.validateTools(request.tools);
604
+ if (!validation.valid) {
605
+ throw new Error(`Invalid tools: ${validation.errors.join(", ")}`);
606
+ }
607
+ }
608
+
609
+ return {
610
+ model: request.model || "mistralai/Mistral-Small-24B-Instruct-2501",
611
+ messages: request.messages,
612
+ temperature: request.temperature,
613
+ max_tokens: request.max_tokens,
614
+ stream: request.stream || false,
615
+ top_p: request.top_p,
616
+ frequency_penalty: request.frequency_penalty,
617
+ presence_penalty: request.presence_penalty,
618
+ stop: request.stop,
619
+ tools: request.tools,
620
+ tool_choice: request.tool_choice,
621
+ };
622
+ }
623
+
624
+ async executeToolCall(toolCall: ToolCall): Promise<string> {
625
+ return this.toolService.executeFunctionCall(
626
+ toolCall,
627
+ this.availableFunctions
628
+ );
629
+ }
630
+
631
+ /**
632
+ * Get current rate limit status from DuckAI
633
+ */
634
+ getRateLimitStatus() {
635
+ return this.duckAI.getRateLimitStatus();
636
+ }
637
+ }
rate-limit-store.ts ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { existsSync, readFileSync, writeFileSync, mkdirSync } from "fs";
2
+ import { join } from "path";
3
+ import { tmpdir } from "os";
4
+
5
+ interface RateLimitData {
6
+ // Support both old and new formats for backward compatibility
7
+ requestCount?: number; // Old format
8
+ windowStart?: number; // Old format
9
+ requestTimestamps?: number[]; // New sliding window format
10
+ lastRequestTime: number;
11
+ isLimited: boolean;
12
+ retryAfter?: number;
13
+ processId: string;
14
+ lastUpdated: number;
15
+ }
16
+
17
+ export class RateLimitStore {
18
+ private readonly storeDir: string;
19
+ private readonly storeFile: string;
20
+ private readonly processId: string;
21
+
22
+ constructor() {
23
+ this.storeDir = join(tmpdir(), "duckai");
24
+ this.storeFile = join(this.storeDir, "rate-limit.json");
25
+ this.processId = `${process.pid}-${Date.now()}`;
26
+
27
+ // Ensure directory exists
28
+ if (!existsSync(this.storeDir)) {
29
+ mkdirSync(this.storeDir, { recursive: true });
30
+ }
31
+ }
32
+
33
+ /**
34
+ * Read rate limit data from shared store
35
+ */
36
+ read(): RateLimitData | null {
37
+ try {
38
+ if (!existsSync(this.storeFile)) {
39
+ return null;
40
+ }
41
+
42
+ const data = readFileSync(this.storeFile, "utf8");
43
+
44
+ // Handle empty file
45
+ if (!data.trim()) {
46
+ return null;
47
+ }
48
+
49
+ const parsed: RateLimitData = JSON.parse(data);
50
+
51
+ // Check if data is stale (older than 5 minutes)
52
+ const now = Date.now();
53
+ if (now - parsed.lastUpdated > 5 * 60 * 1000) {
54
+ return null;
55
+ }
56
+
57
+ return parsed;
58
+ } catch (error) {
59
+ // Don't log warnings for expected cases like empty files
60
+ return null;
61
+ }
62
+ }
63
+
64
+ /**
65
+ * Write rate limit data to shared store
66
+ */
67
+ write(data: Omit<RateLimitData, "processId" | "lastUpdated">): void {
68
+ try {
69
+ const storeData: RateLimitData = {
70
+ ...data,
71
+ processId: this.processId,
72
+ lastUpdated: Date.now(),
73
+ };
74
+
75
+ writeFileSync(this.storeFile, JSON.stringify(storeData, null, 2));
76
+ } catch (error) {
77
+ console.warn("Failed to write rate limit store:", error);
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Update rate limit data atomically
83
+ */
84
+ update(updater: (current: RateLimitData | null) => RateLimitData): void {
85
+ const current = this.read();
86
+ const updated = updater(current);
87
+ this.write(updated);
88
+ }
89
+
90
+ /**
91
+ * Clear the store
92
+ */
93
+ clear(): void {
94
+ try {
95
+ if (existsSync(this.storeFile)) {
96
+ const fs = require("fs");
97
+ fs.unlinkSync(this.storeFile);
98
+ }
99
+ } catch (error) {
100
+ console.warn("Failed to clear rate limit store:", error);
101
+ }
102
+ }
103
+
104
+ /**
105
+ * Get store file path for debugging
106
+ */
107
+ getStorePath(): string {
108
+ return this.storeFile;
109
+ }
110
+ }
server.ts ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { OpenAIService } from "./openai-service";
2
+
3
+ const openAIService = new OpenAIService();
4
+
5
+ const server = Bun.serve({
6
+ port: process.env.PORT || 3000,
7
+ async fetch(req) {
8
+ const url = new URL(req.url);
9
+
10
+ // CORS headers
11
+ const corsHeaders = {
12
+ "Access-Control-Allow-Origin": "*",
13
+ "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
14
+ "Access-Control-Allow-Headers": "Content-Type, Authorization",
15
+ };
16
+
17
+ // Handle preflight requests
18
+ if (req.method === "OPTIONS") {
19
+ return new Response(null, { headers: corsHeaders });
20
+ }
21
+
22
+ try {
23
+ // Health check endpoint
24
+ if (url.pathname === "/health" && req.method === "GET") {
25
+ return new Response(JSON.stringify({ status: "ok" }), {
26
+ headers: { "Content-Type": "application/json", ...corsHeaders },
27
+ });
28
+ }
29
+
30
+ // Models endpoint
31
+ if (url.pathname === "/v1/models" && req.method === "GET") {
32
+ const models = openAIService.getModels();
33
+ return new Response(JSON.stringify(models), {
34
+ headers: { "Content-Type": "application/json", ...corsHeaders },
35
+ });
36
+ }
37
+
38
+ // Chat completions endpoint
39
+ if (url.pathname === "/v1/chat/completions" && req.method === "POST") {
40
+ const body = await req.json();
41
+ const validatedRequest = openAIService.validateRequest(body);
42
+
43
+ // Handle streaming
44
+ if (validatedRequest.stream) {
45
+ const stream =
46
+ await openAIService.createChatCompletionStream(validatedRequest);
47
+ return new Response(stream, {
48
+ headers: {
49
+ "Content-Type": "text/event-stream",
50
+ "Cache-Control": "no-cache",
51
+ Connection: "keep-alive",
52
+ ...corsHeaders,
53
+ },
54
+ });
55
+ }
56
+
57
+ // Handle non-streaming
58
+ const completion =
59
+ await openAIService.createChatCompletion(validatedRequest);
60
+ return new Response(JSON.stringify(completion), {
61
+ headers: { "Content-Type": "application/json", ...corsHeaders },
62
+ });
63
+ }
64
+
65
+ // 404 for unknown endpoints
66
+ return new Response(
67
+ JSON.stringify({
68
+ error: {
69
+ message: "Not found",
70
+ type: "invalid_request_error",
71
+ },
72
+ }),
73
+ {
74
+ status: 404,
75
+ headers: { "Content-Type": "application/json", ...corsHeaders },
76
+ }
77
+ );
78
+ } catch (error) {
79
+ console.error("Server error:", error);
80
+
81
+ const errorMessage =
82
+ error instanceof Error ? error.message : "Internal server error";
83
+ const statusCode =
84
+ errorMessage.includes("required") || errorMessage.includes("must")
85
+ ? 400
86
+ : 500;
87
+
88
+ return new Response(
89
+ JSON.stringify({
90
+ error: {
91
+ message: errorMessage,
92
+ type:
93
+ statusCode === 400
94
+ ? "invalid_request_error"
95
+ : "internal_server_error",
96
+ },
97
+ }),
98
+ {
99
+ status: statusCode,
100
+ headers: { "Content-Type": "application/json", ...corsHeaders },
101
+ }
102
+ );
103
+ }
104
+ },
105
+ });
106
+
107
+ console.log(
108
+ `🚀 OpenAI-compatible server running on http://localhost:${server.port}`
109
+ );
110
+ console.log(`📚 Available endpoints:`);
111
+ console.log(` GET /health - Health check`);
112
+ console.log(` GET /v1/models - List available models`);
113
+ console.log(
114
+ ` POST /v1/chat/completions - Chat completions (streaming & non-streaming)`
115
+ );
116
+ console.log(`\n🔧 Example usage:`);
117
+ console.log(
118
+ `curl -X POST http://localhost:${server.port}/v1/chat/completions \\`
119
+ );
120
+ console.log(` -H "Content-Type: application/json" \\`);
121
+ console.log(
122
+ ` -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Hello!"}]}'`
123
+ );
shared-rate-limit-monitor.ts ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { RateLimitStore } from "./rate-limit-store";
2
+
3
+ /**
4
+ * Shared Rate Limit Monitor
5
+ *
6
+ * This monitor reads rate limit data from a shared store,
7
+ * allowing it to display real-time rate limit information
8
+ * across all DuckAI processes.
9
+ */
10
+ export class SharedRateLimitMonitor {
11
+ private rateLimitStore: RateLimitStore;
12
+ private monitoringInterval?: NodeJS.Timeout;
13
+
14
+ // Rate limit constants (should match DuckAI class)
15
+ private readonly MAX_REQUESTS_PER_MINUTE = 20;
16
+ private readonly WINDOW_SIZE_MS = 60 * 1000; // 1 minute
17
+ private readonly MIN_REQUEST_INTERVAL_MS = 1000; // 1 second
18
+
19
+ constructor() {
20
+ this.rateLimitStore = new RateLimitStore();
21
+ }
22
+
23
+ /**
24
+ * Clean old timestamps outside the sliding window
25
+ */
26
+ private cleanOldTimestamps(timestamps: number[]): number[] {
27
+ const now = Date.now();
28
+ const cutoff = now - this.WINDOW_SIZE_MS;
29
+ return timestamps.filter((timestamp) => timestamp > cutoff);
30
+ }
31
+
32
+ /**
33
+ * Get current rate limit status from shared store
34
+ */
35
+ getCurrentStatus() {
36
+ const stored = this.rateLimitStore.read();
37
+
38
+ if (!stored) {
39
+ // No data available, return default state
40
+ return {
41
+ requestsInCurrentWindow: 0,
42
+ maxRequestsPerMinute: this.MAX_REQUESTS_PER_MINUTE,
43
+ timeUntilWindowReset: this.WINDOW_SIZE_MS,
44
+ isCurrentlyLimited: false,
45
+ recommendedWaitTime: 0,
46
+ utilizationPercentage: 0,
47
+ timeUntilWindowResetMinutes: 1,
48
+ recommendedWaitTimeSeconds: 0,
49
+ dataSource: "default" as const,
50
+ lastUpdated: null,
51
+ };
52
+ }
53
+
54
+ const now = Date.now();
55
+ let requestsInWindow: number;
56
+ let timeUntilReset: number;
57
+
58
+ // Handle both old and new formats
59
+ if (stored.requestTimestamps) {
60
+ // New sliding window format
61
+ const cleanTimestamps = this.cleanOldTimestamps(stored.requestTimestamps);
62
+ requestsInWindow = cleanTimestamps.length;
63
+
64
+ // For sliding window, calculate when the oldest request will expire
65
+ const oldestTimestamp = cleanTimestamps[0];
66
+ timeUntilReset = oldestTimestamp
67
+ ? Math.max(0, oldestTimestamp + this.WINDOW_SIZE_MS - now)
68
+ : 0;
69
+ } else {
70
+ // Old fixed window format (backward compatibility)
71
+ const windowElapsed = now - (stored.windowStart || 0);
72
+ requestsInWindow = stored.requestCount || 0;
73
+ timeUntilReset = this.WINDOW_SIZE_MS - windowElapsed;
74
+
75
+ if (windowElapsed >= this.WINDOW_SIZE_MS) {
76
+ requestsInWindow = 0;
77
+ timeUntilReset = this.WINDOW_SIZE_MS;
78
+ }
79
+ }
80
+
81
+ // Calculate recommended wait time
82
+ const timeSinceLastRequest = now - stored.lastRequestTime;
83
+ const recommendedWait = Math.max(
84
+ 0,
85
+ this.MIN_REQUEST_INTERVAL_MS - timeSinceLastRequest
86
+ );
87
+
88
+ const utilizationPercentage =
89
+ (requestsInWindow / this.MAX_REQUESTS_PER_MINUTE) * 100;
90
+
91
+ return {
92
+ requestsInCurrentWindow: requestsInWindow,
93
+ maxRequestsPerMinute: this.MAX_REQUESTS_PER_MINUTE,
94
+ timeUntilWindowReset: Math.max(0, timeUntilReset),
95
+ isCurrentlyLimited: stored.isLimited,
96
+ recommendedWaitTime: recommendedWait,
97
+ utilizationPercentage,
98
+ timeUntilWindowResetMinutes: Math.ceil(
99
+ Math.max(0, timeUntilReset) / 60000
100
+ ),
101
+ recommendedWaitTimeSeconds: Math.ceil(recommendedWait / 1000),
102
+ dataSource: "shared" as const,
103
+ lastUpdated: new Date(stored.lastUpdated).toISOString(),
104
+ processId: stored.processId,
105
+ windowType: stored.requestTimestamps ? "sliding" : "fixed",
106
+ };
107
+ }
108
+
109
+ /**
110
+ * Print current rate limit status to console
111
+ */
112
+ printStatus(clearConsole: boolean = false) {
113
+ if (clearConsole) {
114
+ // Clear console for cleaner monitoring display
115
+ console.clear();
116
+ }
117
+
118
+ const status = this.getCurrentStatus();
119
+
120
+ const windowTypeIcon =
121
+ (status as any).windowType === "sliding" ? "🔄" : "⏰";
122
+ const windowTypeText =
123
+ (status as any).windowType === "sliding"
124
+ ? "Sliding Window"
125
+ : "Fixed Window";
126
+
127
+ console.log(`\n🔍 DuckAI Rate Limit Status (${windowTypeText}):`);
128
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
129
+ console.log(
130
+ `📊 Requests in current window: ${status.requestsInCurrentWindow}/${status.maxRequestsPerMinute}`
131
+ );
132
+ console.log(`📈 Utilization: ${status.utilizationPercentage.toFixed(1)}%`);
133
+
134
+ if ((status as any).windowType === "sliding") {
135
+ console.log(
136
+ `${windowTypeIcon} Next request expires in: ${status.timeUntilWindowResetMinutes} minutes`
137
+ );
138
+ } else {
139
+ console.log(
140
+ `${windowTypeIcon} Window resets in: ${status.timeUntilWindowResetMinutes} minutes`
141
+ );
142
+ }
143
+
144
+ console.log(
145
+ `🚦 Currently limited: ${status.isCurrentlyLimited ? "❌ Yes" : "✅ No"}`
146
+ );
147
+
148
+ if (status.recommendedWaitTimeSeconds > 0) {
149
+ console.log(
150
+ `⏳ Recommended wait: ${status.recommendedWaitTimeSeconds} seconds`
151
+ );
152
+ }
153
+
154
+ // Data source info
155
+ if (status.dataSource === "shared" && status.lastUpdated) {
156
+ const updateTime = new Date(status.lastUpdated).toLocaleTimeString();
157
+ console.log(`📡 Data from: Process ${status.processId} at ${updateTime}`);
158
+ } else {
159
+ console.log(`📡 Data source: ${status.dataSource} (no active processes)`);
160
+ }
161
+
162
+ // Visual progress bar
163
+ const barLength = 20;
164
+ const filledLength = Math.round(
165
+ (status.utilizationPercentage / 100) * barLength
166
+ );
167
+ const bar = "█".repeat(filledLength) + "░".repeat(barLength - filledLength);
168
+ console.log(
169
+ `📊 Usage: [${bar}] ${status.utilizationPercentage.toFixed(1)}%`
170
+ );
171
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
172
+ }
173
+
174
+ /**
175
+ * Print compact rate limit status for server console
176
+ */
177
+ printCompactStatus() {
178
+ const status = this.getCurrentStatus();
179
+ const windowType = (status as any).windowType === "sliding" ? "🔄" : "⏰";
180
+ const limitIcon = status.isCurrentlyLimited ? "❌" : "✅";
181
+
182
+ console.log(
183
+ `${windowType} Rate Limit: ${status.requestsInCurrentWindow}/${status.maxRequestsPerMinute} (${status.utilizationPercentage.toFixed(1)}%) ${limitIcon}`
184
+ );
185
+ }
186
+
187
+ /**
188
+ * Start continuous monitoring (prints status every interval)
189
+ */
190
+ startMonitoring(intervalSeconds: number = 30) {
191
+ console.log(
192
+ `🔄 Starting shared rate limit monitoring (every ${intervalSeconds}s)...`
193
+ );
194
+ console.log(`📁 Store location: ${this.rateLimitStore.getStorePath()}`);
195
+ this.printStatus();
196
+
197
+ this.monitoringInterval = setInterval(() => {
198
+ this.printStatus(true); // Clear console for each update
199
+ }, intervalSeconds * 1000);
200
+ }
201
+
202
+ /**
203
+ * Stop continuous monitoring
204
+ */
205
+ stopMonitoring() {
206
+ if (this.monitoringInterval) {
207
+ clearInterval(this.monitoringInterval);
208
+ this.monitoringInterval = undefined;
209
+ console.log("⏹️ Shared rate limit monitoring stopped.");
210
+ }
211
+ }
212
+
213
+ /**
214
+ * Get recommendations for optimal usage
215
+ */
216
+ getRecommendations() {
217
+ const status = this.getCurrentStatus();
218
+ const recommendations: string[] = [];
219
+
220
+ if (status.dataSource === "default") {
221
+ recommendations.push(
222
+ "ℹ️ No active DuckAI processes detected. Start making API calls to see real data."
223
+ );
224
+ }
225
+
226
+ if (status.utilizationPercentage > 80) {
227
+ recommendations.push(
228
+ "⚠️ High utilization detected. Consider implementing request queuing."
229
+ );
230
+ }
231
+
232
+ if (status.recommendedWaitTimeSeconds > 0) {
233
+ recommendations.push(
234
+ `⏳ Wait ${status.recommendedWaitTimeSeconds}s before next request.`
235
+ );
236
+ }
237
+
238
+ if (status.isCurrentlyLimited) {
239
+ recommendations.push(
240
+ "🚫 Currently rate limited. Wait for window reset or implement exponential backoff."
241
+ );
242
+ }
243
+
244
+ if (status.utilizationPercentage < 50 && status.dataSource === "shared") {
245
+ recommendations.push(
246
+ "✅ Good utilization level. You can safely increase request frequency."
247
+ );
248
+ }
249
+
250
+ recommendations.push(
251
+ "💡 Consider implementing request batching for better efficiency."
252
+ );
253
+ recommendations.push("🔄 Use exponential backoff for retry logic.");
254
+ recommendations.push("📊 Monitor rate limits continuously in production.");
255
+
256
+ return recommendations;
257
+ }
258
+
259
+ /**
260
+ * Print recommendations
261
+ */
262
+ printRecommendations() {
263
+ const recommendations = this.getRecommendations();
264
+
265
+ console.log("\n💡 Rate Limit Recommendations:");
266
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
267
+ recommendations.forEach((rec) => console.log(rec));
268
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
269
+ }
270
+
271
+ /**
272
+ * Clear the shared rate limit store
273
+ */
274
+ clearStore() {
275
+ this.rateLimitStore.clear();
276
+ console.log("🗑️ Shared rate limit store cleared.");
277
+ }
278
+
279
+ /**
280
+ * Get store information
281
+ */
282
+ getStoreInfo() {
283
+ const stored = this.rateLimitStore.read();
284
+ return {
285
+ storePath: this.rateLimitStore.getStorePath(),
286
+ hasData: !!stored,
287
+ data: stored,
288
+ };
289
+ }
290
+ }
291
+
292
+ // CLI usage for shared monitoring
293
+ if (require.main === module) {
294
+ const monitor = new SharedRateLimitMonitor();
295
+
296
+ // Parse command line arguments
297
+ const args = process.argv.slice(2);
298
+ const command = args[0];
299
+
300
+ switch (command) {
301
+ case "status":
302
+ monitor.printStatus();
303
+ monitor.printRecommendations();
304
+ break;
305
+
306
+ case "monitor":
307
+ const interval = parseInt(args[1]) || 30;
308
+ monitor.startMonitoring(interval);
309
+
310
+ // Stop monitoring on Ctrl+C
311
+ process.on("SIGINT", () => {
312
+ monitor.stopMonitoring();
313
+ process.exit(0);
314
+ });
315
+ break;
316
+
317
+ case "clear":
318
+ monitor.clearStore();
319
+ break;
320
+
321
+ case "info":
322
+ const info = monitor.getStoreInfo();
323
+ console.log("📁 Store Information:");
324
+ console.log(` Path: ${info.storePath}`);
325
+ console.log(` Has Data: ${info.hasData}`);
326
+ if (info.data) {
327
+ console.log(
328
+ ` Last Updated: ${new Date(info.data.lastUpdated).toLocaleString()}`
329
+ );
330
+ console.log(` Process ID: ${info.data.processId}`);
331
+ console.log(` Requests: ${info.data.requestCount}`);
332
+ }
333
+ break;
334
+
335
+ default:
336
+ console.log("🔍 DuckAI Shared Rate Limit Monitor");
337
+ console.log("");
338
+ console.log("This monitor reads rate limit data from a shared store,");
339
+ console.log("showing real-time information across all DuckAI processes.");
340
+ console.log("");
341
+ console.log("Usage:");
342
+ console.log(
343
+ " bun run src/shared-rate-limit-monitor.ts status # Show current status"
344
+ );
345
+ console.log(
346
+ " bun run src/shared-rate-limit-monitor.ts monitor [interval] # Start monitoring (default: 30s)"
347
+ );
348
+ console.log(
349
+ " bun run src/shared-rate-limit-monitor.ts clear # Clear stored data"
350
+ );
351
+ console.log(
352
+ " bun run src/shared-rate-limit-monitor.ts info # Show store info"
353
+ );
354
+ console.log("");
355
+ console.log("Examples:");
356
+ console.log(" bun run src/shared-rate-limit-monitor.ts status");
357
+ console.log(" bun run src/shared-rate-limit-monitor.ts monitor 10");
358
+ console.log(" bun run src/shared-rate-limit-monitor.ts clear");
359
+ break;
360
+ }
361
+ }
shared-rate-limit-tester.ts ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { DuckAI } from "./duckai";
2
+
3
+ /**
4
+ * Shared Rate Limit Tester
5
+ *
6
+ * This utility tests rate limits using the DuckAI class which writes to the shared store,
7
+ * allowing cross-process monitoring to work correctly.
8
+ */
9
+ export class SharedRateLimitTester {
10
+ private duckAI: DuckAI;
11
+
12
+ constructor() {
13
+ this.duckAI = new DuckAI();
14
+ }
15
+
16
+ /**
17
+ * Get current rate limit status
18
+ */
19
+ getCurrentStatus() {
20
+ const status = this.duckAI.getRateLimitStatus();
21
+ return {
22
+ ...status,
23
+ utilizationPercentage:
24
+ (status.requestsInCurrentWindow / status.maxRequestsPerMinute) * 100,
25
+ timeUntilWindowResetMinutes: Math.ceil(
26
+ status.timeUntilWindowReset / 60000
27
+ ),
28
+ recommendedWaitTimeSeconds: Math.ceil(status.recommendedWaitTime / 1000),
29
+ };
30
+ }
31
+
32
+ /**
33
+ * Print current rate limit status to console
34
+ */
35
+ printStatus() {
36
+ const status = this.getCurrentStatus();
37
+
38
+ console.log("\n🔍 DuckAI Rate Limit Status (Shared Tester):");
39
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
40
+ console.log(
41
+ `📊 Requests in current window: ${status.requestsInCurrentWindow}/${status.maxRequestsPerMinute}`
42
+ );
43
+ console.log(`📈 Utilization: ${status.utilizationPercentage.toFixed(1)}%`);
44
+ console.log(
45
+ `⏰ Window resets in: ${status.timeUntilWindowResetMinutes} minutes`
46
+ );
47
+ console.log(
48
+ `🚦 Currently limited: ${status.isCurrentlyLimited ? "❌ Yes" : "✅ No"}`
49
+ );
50
+
51
+ if (status.recommendedWaitTimeSeconds > 0) {
52
+ console.log(
53
+ `⏳ Recommended wait: ${status.recommendedWaitTimeSeconds} seconds`
54
+ );
55
+ }
56
+
57
+ // Visual progress bar
58
+ const barLength = 20;
59
+ const filledLength = Math.round(
60
+ (status.utilizationPercentage / 100) * barLength
61
+ );
62
+ const bar = "█".repeat(filledLength) + "░".repeat(barLength - filledLength);
63
+ console.log(
64
+ `📊 Usage: [${bar}] ${status.utilizationPercentage.toFixed(1)}%`
65
+ );
66
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
67
+ }
68
+
69
+ /**
70
+ * Test rate limits by making a series of requests using DuckAI (writes to shared store)
71
+ */
72
+ async testRateLimits(
73
+ numberOfRequests: number = 5,
74
+ delayBetweenRequests: number = 1000
75
+ ) {
76
+ console.log(
77
+ `🧪 Testing rate limits with ${numberOfRequests} requests (${delayBetweenRequests}ms delay)...`
78
+ );
79
+ console.log(
80
+ "📡 Using DuckAI class - data will be shared across processes!"
81
+ );
82
+
83
+ for (let i = 1; i <= numberOfRequests; i++) {
84
+ console.log(`\n📤 Making request ${i}/${numberOfRequests}...`);
85
+
86
+ try {
87
+ const startTime = Date.now();
88
+
89
+ const response = await this.duckAI.chat({
90
+ model: "gpt-4o-mini",
91
+ messages: [{ role: "user", content: `Shared test request ${i}` }],
92
+ });
93
+
94
+ const endTime = Date.now();
95
+ const responseTime = endTime - startTime;
96
+
97
+ console.log(`✅ Request ${i} successful (${responseTime}ms)`);
98
+ this.printStatus();
99
+
100
+ if (i < numberOfRequests) {
101
+ console.log(
102
+ `⏳ Waiting ${delayBetweenRequests}ms before next request...`
103
+ );
104
+ await new Promise((resolve) =>
105
+ setTimeout(resolve, delayBetweenRequests)
106
+ );
107
+ }
108
+ } catch (error) {
109
+ const errorMessage =
110
+ error instanceof Error ? error.message : String(error);
111
+ console.log(`❌ Request ${i} failed:`, errorMessage);
112
+ this.printStatus();
113
+
114
+ // If rate limited, wait longer
115
+ if (errorMessage.includes("Rate limited")) {
116
+ const waitTime =
117
+ this.getCurrentStatus().recommendedWaitTimeSeconds * 1000;
118
+ console.log(`⏳ Rate limited! Waiting ${waitTime}ms...`);
119
+ await new Promise((resolve) => setTimeout(resolve, waitTime));
120
+ }
121
+ }
122
+ }
123
+
124
+ console.log("\n🏁 Shared rate limit test completed!");
125
+ console.log(
126
+ "📡 Data has been written to shared store for cross-process monitoring!"
127
+ );
128
+ }
129
+
130
+ /**
131
+ * Get recommendations for optimal usage
132
+ */
133
+ getRecommendations() {
134
+ const status = this.getCurrentStatus();
135
+ const recommendations: string[] = [];
136
+
137
+ if (status.utilizationPercentage > 80) {
138
+ recommendations.push(
139
+ "⚠️ High utilization detected. Consider implementing request queuing."
140
+ );
141
+ }
142
+
143
+ if (status.recommendedWaitTimeSeconds > 0) {
144
+ recommendations.push(
145
+ `⏳ Wait ${status.recommendedWaitTimeSeconds}s before next request.`
146
+ );
147
+ }
148
+
149
+ if (status.isCurrentlyLimited) {
150
+ recommendations.push(
151
+ "🚫 Currently rate limited. Wait for window reset or implement exponential backoff."
152
+ );
153
+ }
154
+
155
+ if (status.utilizationPercentage < 50) {
156
+ recommendations.push(
157
+ "✅ Good utilization level. You can safely increase request frequency."
158
+ );
159
+ }
160
+
161
+ recommendations.push(
162
+ "💡 Consider implementing request batching for better efficiency."
163
+ );
164
+ recommendations.push("🔄 Use exponential backoff for retry logic.");
165
+ recommendations.push("📊 Monitor rate limits continuously in production.");
166
+ recommendations.push(
167
+ "📡 Use shared monitoring for cross-process visibility."
168
+ );
169
+
170
+ return recommendations;
171
+ }
172
+
173
+ /**
174
+ * Print recommendations
175
+ */
176
+ printRecommendations() {
177
+ const recommendations = this.getRecommendations();
178
+
179
+ console.log("\n💡 Rate Limit Recommendations:");
180
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
181
+ recommendations.forEach((rec) => console.log(rec));
182
+ console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
183
+ }
184
+ }
185
+
186
+ // CLI usage
187
+ if (require.main === module) {
188
+ const tester = new SharedRateLimitTester();
189
+
190
+ // Parse command line arguments
191
+ const args = process.argv.slice(2);
192
+ const command = args[0];
193
+
194
+ switch (command) {
195
+ case "status":
196
+ tester.printStatus();
197
+ tester.printRecommendations();
198
+ break;
199
+
200
+ case "test":
201
+ const requests = parseInt(args[1]) || 5;
202
+ const delay = parseInt(args[2]) || 1000;
203
+ tester.testRateLimits(requests, delay).then(() => {
204
+ tester.printRecommendations();
205
+ process.exit(0);
206
+ });
207
+ break;
208
+
209
+ default:
210
+ console.log("🔍 DuckAI Shared Rate Limit Tester");
211
+ console.log("📡 Uses DuckAI class - data is shared across processes!");
212
+ console.log("");
213
+ console.log("Usage:");
214
+ console.log(
215
+ " bun run src/shared-rate-limit-tester.ts status # Show current status"
216
+ );
217
+ console.log(
218
+ " bun run src/shared-rate-limit-tester.ts test [requests] [delay] # Test rate limits (shared)"
219
+ );
220
+ console.log("");
221
+ console.log("Examples:");
222
+ console.log(" bun run src/shared-rate-limit-tester.ts status");
223
+ console.log(" bun run src/shared-rate-limit-tester.ts test 10 2000");
224
+ console.log("");
225
+ console.log("💡 For cross-process monitoring, run this in one terminal:");
226
+ console.log(" bun run src/shared-rate-limit-tester.ts test 20 3000");
227
+ console.log("");
228
+ console.log("And this in another terminal:");
229
+ console.log(" bun run src/shared-rate-limit-monitor.ts monitor 2");
230
+ break;
231
+ }
232
+ }
start_duckai.bat ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ REM DuckAI Server Starter
3
+ REM Starts the DuckAI OpenAI-compatible server on port 3265
4
+
5
+ echo Starting DuckAI Server...
6
+ echo.
7
+
8
+ REM Check if Node.js is installed
9
+ where node >nul 2>nul
10
+ if %ERRORLEVEL% neq 0 (
11
+ echo Node.js is not installed or not in PATH.
12
+ echo Please install Node.js from https://nodejs.org/
13
+ pause
14
+ exit /b 1
15
+ )
16
+
17
+ REM Change to DuckAI directory
18
+ cd /d "%~dp0"
19
+
20
+ REM Start DuckAI server
21
+ echo Starting DuckAI OpenAI server on port 3265...
22
+ echo Server will be available at: http://localhost:3265/v1
23
+ echo.
24
+
25
+ REM Run the server with port 3265
26
+ set PORT=3265
27
+ npm start
28
+
29
+ pause
tool-service.ts ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ ToolDefinition,
3
+ ToolCall,
4
+ ToolChoice,
5
+ ChatCompletionMessage,
6
+ FunctionDefinition,
7
+ } from "./types";
8
+
9
+ export class ToolService {
10
+ /**
11
+ * Generates a system prompt that instructs the AI how to use the provided tools
12
+ */
13
+ generateToolSystemPrompt(
14
+ tools: ToolDefinition[],
15
+ toolChoice: ToolChoice = "auto"
16
+ ): string {
17
+ const toolDescriptions = tools
18
+ .map((tool) => {
19
+ const func = tool.function;
20
+ let description = `${func.name}`;
21
+
22
+ if (func.description) {
23
+ description += `: ${func.description}`;
24
+ }
25
+
26
+ if (func.parameters) {
27
+ const params = func.parameters.properties || {};
28
+ const required = func.parameters.required || [];
29
+
30
+ const paramDescriptions = Object.entries(params)
31
+ .map(([name, schema]: [string, any]) => {
32
+ const isRequired = required.includes(name);
33
+ const type = schema.type || "any";
34
+ const desc = schema.description || "";
35
+ return ` - ${name} (${type}${isRequired ? ", required" : ", optional"}): ${desc}`;
36
+ })
37
+ .join("\n");
38
+
39
+ if (paramDescriptions) {
40
+ description += `\nParameters:\n${paramDescriptions}`;
41
+ }
42
+ }
43
+
44
+ return description;
45
+ })
46
+ .join("\n\n");
47
+
48
+ let prompt = `You are an AI assistant with access to the following functions. When you need to call a function, respond with a JSON object in this exact format:
49
+
50
+ {
51
+ "tool_calls": [
52
+ {
53
+ "id": "call_<unique_id>",
54
+ "type": "function",
55
+ "function": {
56
+ "name": "<function_name>",
57
+ "arguments": "<json_string_of_arguments>"
58
+ }
59
+ }
60
+ ]
61
+ }
62
+
63
+ Available functions:
64
+ ${toolDescriptions}
65
+
66
+ Important rules:
67
+ 1. Only call functions when necessary to answer the user's question
68
+ 2. Use the exact function names provided
69
+ 3. Provide arguments as a JSON string
70
+ 4. Generate unique IDs for each tool call (e.g., call_1, call_2, etc.)
71
+ 5. If you don't need to call any functions, respond normally without the tool_calls format`;
72
+
73
+ if (toolChoice === "required") {
74
+ prompt +=
75
+ "\n6. You MUST call at least one function to answer this request";
76
+ } else if (toolChoice === "none") {
77
+ prompt += "\n6. Do NOT call any functions, respond normally";
78
+ } else if (
79
+ typeof toolChoice === "object" &&
80
+ toolChoice.type === "function"
81
+ ) {
82
+ prompt += `\n6. You MUST call the function "${toolChoice.function.name}"`;
83
+ }
84
+
85
+ return prompt;
86
+ }
87
+
88
+ /**
89
+ * Detects if a response contains function calls
90
+ */
91
+ detectFunctionCalls(content: string): boolean {
92
+ try {
93
+ const parsed = JSON.parse(content.trim());
94
+ return (
95
+ parsed.tool_calls &&
96
+ Array.isArray(parsed.tool_calls) &&
97
+ parsed.tool_calls.length > 0
98
+ );
99
+ } catch {
100
+ // Try to find tool_calls pattern in the text
101
+ return /["']?tool_calls["']?\s*:\s*\[/.test(content);
102
+ }
103
+ }
104
+
105
+ /**
106
+ * Extracts function calls from AI response
107
+ */
108
+ extractFunctionCalls(content: string): ToolCall[] {
109
+ try {
110
+ // First try to parse as complete JSON
111
+ const parsed = JSON.parse(content.trim());
112
+ if (parsed.tool_calls && Array.isArray(parsed.tool_calls)) {
113
+ return parsed.tool_calls.map((call: any, index: number) => ({
114
+ id: call.id || `call_${Date.now()}_${index}`,
115
+ type: "function",
116
+ function: {
117
+ name: call.function.name,
118
+ arguments:
119
+ typeof call.function.arguments === "string"
120
+ ? call.function.arguments
121
+ : JSON.stringify(call.function.arguments),
122
+ },
123
+ }));
124
+ }
125
+ } catch {
126
+ // Try to extract from partial or malformed JSON
127
+ const toolCallsMatch = content.match(
128
+ /["']?tool_calls["']?\s*:\s*\[(.*?)\]/s
129
+ );
130
+ if (toolCallsMatch) {
131
+ try {
132
+ const toolCallsStr = `[${toolCallsMatch[1]}]`;
133
+ const toolCalls = JSON.parse(toolCallsStr);
134
+ return toolCalls.map((call: any, index: number) => ({
135
+ id: call.id || `call_${Date.now()}_${index}`,
136
+ type: "function",
137
+ function: {
138
+ name: call.function.name,
139
+ arguments:
140
+ typeof call.function.arguments === "string"
141
+ ? call.function.arguments
142
+ : JSON.stringify(call.function.arguments),
143
+ },
144
+ }));
145
+ } catch {
146
+ // Fallback: try to extract individual function calls
147
+ return this.extractFunctionCallsFromText(content);
148
+ }
149
+ }
150
+ }
151
+
152
+ return [];
153
+ }
154
+
155
+ /**
156
+ * Fallback method to extract function calls from text
157
+ */
158
+ private extractFunctionCallsFromText(content: string): ToolCall[] {
159
+ const calls: ToolCall[] = [];
160
+
161
+ // Look for function call patterns
162
+ const functionPattern =
163
+ /["']?function["']?\s*:\s*\{[^}]*["']?name["']?\s*:\s*["']([^"']+)["'][^}]*["']?arguments["']?\s*:\s*["']([^"']*)["']/g;
164
+ let match;
165
+ let index = 0;
166
+
167
+ while ((match = functionPattern.exec(content)) !== null) {
168
+ calls.push({
169
+ id: `call_${Date.now()}_${index}`,
170
+ type: "function",
171
+ function: {
172
+ name: match[1],
173
+ arguments: match[2],
174
+ },
175
+ });
176
+ index++;
177
+ }
178
+
179
+ return calls;
180
+ }
181
+
182
+ /**
183
+ * Executes a function call (mock implementation - in real use, this would call actual functions)
184
+ */
185
+ async executeFunctionCall(
186
+ toolCall: ToolCall,
187
+ availableFunctions: Record<string, Function>
188
+ ): Promise<string> {
189
+ const functionName = toolCall.function.name;
190
+ const functionToCall = availableFunctions[functionName];
191
+
192
+ if (!functionToCall) {
193
+ return JSON.stringify({
194
+ error: `Function '${functionName}' not found`,
195
+ available_functions: Object.keys(availableFunctions),
196
+ });
197
+ }
198
+
199
+ try {
200
+ const args = JSON.parse(toolCall.function.arguments);
201
+ const result = await functionToCall(args);
202
+ return typeof result === "string" ? result : JSON.stringify(result);
203
+ } catch (error) {
204
+ return JSON.stringify({
205
+ error: `Error executing function '${functionName}': ${error instanceof Error ? error.message : "Unknown error"}`,
206
+ arguments_received: toolCall.function.arguments,
207
+ });
208
+ }
209
+ }
210
+
211
+ /**
212
+ * Creates a tool result message
213
+ */
214
+ createToolResultMessage(
215
+ toolCallId: string,
216
+ result: string
217
+ ): ChatCompletionMessage {
218
+ return {
219
+ role: "tool",
220
+ content: result,
221
+ tool_call_id: toolCallId,
222
+ };
223
+ }
224
+
225
+ /**
226
+ * Validates tool definitions
227
+ */
228
+ validateTools(tools: ToolDefinition[]): { valid: boolean; errors: string[] } {
229
+ const errors: string[] = [];
230
+
231
+ if (!Array.isArray(tools)) {
232
+ errors.push("Tools must be an array");
233
+ return { valid: false, errors };
234
+ }
235
+
236
+ tools.forEach((tool, index) => {
237
+ if (!tool.type || tool.type !== "function") {
238
+ errors.push(`Tool at index ${index}: type must be "function"`);
239
+ }
240
+
241
+ if (!tool.function) {
242
+ errors.push(`Tool at index ${index}: function definition is required`);
243
+ return;
244
+ }
245
+
246
+ if (!tool.function.name || typeof tool.function.name !== "string") {
247
+ errors.push(
248
+ `Tool at index ${index}: function name is required and must be a string`
249
+ );
250
+ }
251
+
252
+ if (tool.function.parameters) {
253
+ if (tool.function.parameters.type !== "object") {
254
+ errors.push(
255
+ `Tool at index ${index}: function parameters type must be "object"`
256
+ );
257
+ }
258
+ }
259
+ });
260
+
261
+ return { valid: errors.length === 0, errors };
262
+ }
263
+
264
+ /**
265
+ * Checks if the request requires function calling
266
+ */
267
+ shouldUseFunctionCalling(
268
+ tools?: ToolDefinition[],
269
+ toolChoice?: ToolChoice
270
+ ): boolean {
271
+ if (!tools || tools.length === 0) {
272
+ return false;
273
+ }
274
+
275
+ if (toolChoice === "none") {
276
+ return false;
277
+ }
278
+
279
+ return true;
280
+ }
281
+
282
+ /**
283
+ * Generates a unique ID for tool calls
284
+ */
285
+ generateToolCallId(): string {
286
+ return `call_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
287
+ }
288
+ }
types.ts ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // OpenAI API Types
2
+ export interface ChatCompletionMessage {
3
+ role: "system" | "user" | "assistant" | "tool";
4
+ content: string | null;
5
+ name?: string;
6
+ tool_calls?: ToolCall[];
7
+ tool_call_id?: string;
8
+ }
9
+
10
+ export interface FunctionDefinition {
11
+ name: string;
12
+ description?: string;
13
+ parameters?: {
14
+ type: "object";
15
+ properties: Record<string, any>;
16
+ required?: string[];
17
+ };
18
+ }
19
+
20
+ export interface ToolDefinition {
21
+ type: "function";
22
+ function: FunctionDefinition;
23
+ }
24
+
25
+ export interface ToolCall {
26
+ id: string;
27
+ type: "function";
28
+ function: {
29
+ name: string;
30
+ arguments: string;
31
+ };
32
+ }
33
+
34
+ export type ToolChoice =
35
+ | "none"
36
+ | "auto"
37
+ | "required"
38
+ | { type: "function"; function: { name: string } };
39
+
40
+ export interface ChatCompletionRequest {
41
+ model: string;
42
+ messages: ChatCompletionMessage[];
43
+ temperature?: number;
44
+ max_tokens?: number;
45
+ stream?: boolean;
46
+ top_p?: number;
47
+ frequency_penalty?: number;
48
+ presence_penalty?: number;
49
+ stop?: string | string[];
50
+ tools?: ToolDefinition[];
51
+ tool_choice?: ToolChoice;
52
+ }
53
+
54
+ export interface ChatCompletionChoice {
55
+ index: number;
56
+ message: ChatCompletionMessage;
57
+ finish_reason: "stop" | "length" | "content_filter" | "tool_calls" | null;
58
+ }
59
+
60
+ export interface ChatCompletionResponse {
61
+ id: string;
62
+ object: "chat.completion";
63
+ created: number;
64
+ model: string;
65
+ choices: ChatCompletionChoice[];
66
+ usage: {
67
+ prompt_tokens: number;
68
+ completion_tokens: number;
69
+ total_tokens: number;
70
+ };
71
+ }
72
+
73
+ export interface ChatCompletionStreamChoice {
74
+ index: number;
75
+ delta: {
76
+ role?: "assistant";
77
+ content?: string;
78
+ tool_calls?: ToolCall[];
79
+ };
80
+ finish_reason: "stop" | "length" | "content_filter" | "tool_calls" | null;
81
+ }
82
+
83
+ export interface ChatCompletionStreamResponse {
84
+ id: string;
85
+ object: "chat.completion.chunk";
86
+ created: number;
87
+ model: string;
88
+ choices: ChatCompletionStreamChoice[];
89
+ }
90
+
91
+ export interface Model {
92
+ id: string;
93
+ object: "model";
94
+ created: number;
95
+ owned_by: string;
96
+ }
97
+
98
+ export interface ModelsResponse {
99
+ object: "list";
100
+ data: Model[];
101
+ }
102
+
103
+ // Duck.ai specific types
104
+ export interface VQDResponse {
105
+ vqd: string;
106
+ hash: string;
107
+ }
108
+
109
+ export interface DuckAIRequest {
110
+ model: string;
111
+ messages: ChatCompletionMessage[];
112
+ }