eranet111 commited on
Commit
b3ab639
·
1 Parent(s): f438287

Add native function calling and agentic loop for all models

Browse files

Refactor main.go to implement native function calling for all models, introduce an agentic loop for tools with `x-endpoint`, and support both streaming and non-streaming responses.

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: a225ab96-d6aa-42ce-a50b-aa844bc8db2b
Replit-Commit-Checkpoint-Type: full_checkpoint
Replit-Commit-Event-Id: 89abe5cc-56f8-4be5-8720-0ed0d1873457
Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/cbd1ae84-d682-4025-b26d-c56df1ac13a1/a225ab96-d6aa-42ce-a50b-aa844bc8db2b/jm4hNZf
Replit-Helium-Checkpoint-Created: true

Files changed (2) hide show
  1. .replit +4 -4
  2. main.go +327 -57
.replit CHANGED
@@ -30,13 +30,13 @@ args = "Start application"
30
  name = "Start application"
31
  author = "agent"
32
 
 
 
 
33
  [[workflows.workflow.tasks]]
34
  task = "shell.exec"
35
  args = "go run ."
36
- waitForPort = 8080
37
-
38
- [workflows.workflow.metadata]
39
- outputType = "console"
40
 
41
  [agent]
42
  stack = "PNPM_WORKSPACE"
 
30
  name = "Start application"
31
  author = "agent"
32
 
33
+ [workflows.workflow.metadata]
34
+ outputType = "console"
35
+
36
  [[workflows.workflow.tasks]]
37
  task = "shell.exec"
38
  args = "go run ."
39
+ waitForPort = 3000
 
 
 
40
 
41
  [agent]
42
  stack = "PNPM_WORKSPACE"
main.go CHANGED
@@ -5,18 +5,20 @@ import (
5
  "bytes"
6
  "encoding/json"
7
  "fmt"
 
8
  "log"
9
  "net/http"
10
  "os"
11
  "sort"
12
  "strings"
 
13
  )
14
 
15
  const (
16
- NvidiaBaseURL = "https://integrate.api.nvidia.com/v1"
17
- // Hardcodowany klucz
18
- NvidiaAPIKey = "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"
19
- GatewayAPIKey = "connect"
20
  )
21
 
22
  var modelAliases = map[string]string{
@@ -37,11 +39,23 @@ type Message struct {
37
  Name string `json:"name,omitempty"`
38
  }
39
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  type ChatRequest struct {
41
  Model string `json:"model"`
42
  Messages []Message `json:"messages"`
43
  Stream *bool `json:"stream,omitempty"`
44
- Tools []interface{} `json:"tools,omitempty"`
45
  ToolChoice interface{} `json:"tool_choice,omitempty"`
46
  Temperature *float64 `json:"temperature,omitempty"`
47
  MaxTokens *int `json:"max_tokens,omitempty"`
@@ -54,63 +68,132 @@ type AccumToolCall struct {
54
  Args string
55
  }
56
 
57
- // --- LOGIKA ---
58
 
59
  func resolveModel(requested string) string {
60
- if full, ok := modelAliases[requested]; ok { return full }
 
 
61
  return requested
62
  }
63
 
64
  func injectSystemPrompt(messages []Message, modelID string) []Message {
65
  prompt, ok := systemPrompts[modelID]
66
- if !ok || prompt == "" { return messages }
67
- if len(messages) > 0 && messages[0].Role == "system" { return messages }
 
 
 
 
68
  return append([]Message{{Role: "system", Content: prompt}}, messages...)
69
  }
70
 
71
- func handleChat(w http.ResponseWriter, r *http.Request) {
72
- if r.Method == http.MethodOptions {
73
- w.Header().Set("Access-Control-Allow-Origin", "*")
74
- w.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
75
- w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, x-api-key")
76
- w.WriteHeader(http.StatusNoContent)
77
- return
78
  }
 
 
79
 
80
- auth := r.Header.Get("Authorization")
81
- if !strings.Contains(auth, GatewayAPIKey) && r.Header.Get("x-api-key") != GatewayAPIKey {
82
- http.Error(w, "Unauthorized", http.StatusUnauthorized); return
 
83
  }
84
 
85
- var req ChatRequest
86
- json.NewDecoder(r.Body).Decode(&req)
 
87
 
88
- modelID := resolveModel(req.Model)
89
- upstreamPayload := map[string]interface{}{
 
 
 
 
 
 
 
 
 
 
 
 
90
  "model": modelID,
91
- "messages": injectSystemPrompt(req.Messages, modelID),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  "stream": true,
93
  }
94
- if req.Temperature != nil { upstreamPayload["temperature"] = *req.Temperature }
95
- if req.MaxTokens != nil { upstreamPayload["max_tokens"] = *req.MaxTokens }
96
- if len(req.Tools) > 0 {
97
- upstreamPayload["tools"] = req.Tools
98
- upstreamPayload["tool_choice"] = req.ToolChoice
 
 
 
 
 
 
 
 
99
  }
100
 
101
- body, _ := json.Marshal(upstreamPayload)
102
- upstreamReq, _ := http.NewRequest("POST", NvidiaBaseURL+"/chat/completions", bytes.NewReader(body))
103
- upstreamReq.Header.Set("Content-Type", "application/json")
104
- upstreamReq.Header.Set("Authorization", "Bearer "+NvidiaAPIKey)
105
 
106
- resp, err := http.DefaultClient.Do(upstreamReq)
107
- if err != nil { http.Error(w, err.Error(), 502); return }
 
 
 
108
  defer resp.Body.Close()
109
 
110
- w.Header().Set("Content-Type", "text/event-stream")
111
- w.Header().Set("Access-Control-Allow-Origin", "*")
112
- w.Header().Set("X-Accel-Buffering", "no")
113
-
114
  flusher, _ := w.(http.Flusher)
115
  scanner := bufio.NewScanner(resp.Body)
116
  accum := make(map[int]*AccumToolCall)
@@ -119,19 +202,28 @@ func handleChat(w http.ResponseWriter, r *http.Request) {
119
  for scanner.Scan() {
120
  line := scanner.Text()
121
  if !strings.HasPrefix(line, "data: ") || line == "data: [DONE]" {
122
- fmt.Fprint(w, line+"\n\n"); if flusher != nil { flusher.Flush() }; continue
 
 
 
 
123
  }
124
 
125
  var chunk map[string]interface{}
126
  if err := json.Unmarshal([]byte(strings.TrimPrefix(line, "data: ")), &chunk); err != nil {
127
  continue
128
  }
129
-
130
  choices, ok := chunk["choices"].([]interface{})
131
- if !ok || len(choices) == 0 { continue }
132
-
 
 
133
  choice := choices[0].(map[string]interface{})
134
- delta := choice["delta"].(map[string]interface{})
 
 
 
135
  finishReason := choice["finish_reason"]
136
 
137
  if tcs, ok := delta["tool_calls"].([]interface{}); ok {
@@ -141,12 +233,18 @@ func handleChat(w http.ResponseWriter, r *http.Request) {
141
  acc, exists := accum[idx]
142
  if !exists {
143
  acc = &AccumToolCall{Index: idx}
144
- if id, ok := tc["id"].(string); ok { acc.ID = id }
 
 
145
  accum[idx] = acc
146
  }
147
  if fn, ok := tc["function"].(map[string]interface{}); ok {
148
- if name, ok := fn["name"].(string); ok { acc.Name += name }
149
- if args, ok := fn["arguments"].(string); ok { acc.Args += args }
 
 
 
 
150
  }
151
  }
152
  continue
@@ -154,7 +252,9 @@ func handleChat(w http.ResponseWriter, r *http.Request) {
154
 
155
  if (finishReason == "tool_calls" || finishReason == "function_call") && len(accum) > 0 {
156
  var keys []int
157
- for k := range accum { keys = append(keys, k) }
 
 
158
  sort.Ints(keys)
159
 
160
  finalTools := []map[string]interface{}{}
@@ -168,16 +268,18 @@ func handleChat(w http.ResponseWriter, r *http.Request) {
168
 
169
  response := map[string]interface{}{
170
  "id": chunk["id"], "object": "chat.completion.chunk", "created": chunk["created"],
171
- "model": req.Model,
172
  "choices": []map[string]interface{}{{
173
- "index": 0,
174
- "delta": map[string]interface{}{"role": "assistant", "tool_calls": finalTools},
175
  "finish_reason": "tool_calls",
176
  }},
177
  }
178
  jsonBytes, _ := json.Marshal(response)
179
  fmt.Fprintf(w, "data: %s\n\n", string(jsonBytes))
180
- if flusher != nil { flusher.Flush() }
 
 
181
  accum = make(map[int]*AccumToolCall)
182
  continue
183
  }
@@ -186,23 +288,189 @@ func handleChat(w http.ResponseWriter, r *http.Request) {
186
  delta["role"] = "assistant"
187
  firstChunkSent = true
188
  }
189
-
190
  if delta["content"] == nil && delta["tool_calls"] == nil && finishReason == nil {
191
  continue
192
  }
193
 
 
 
194
  out, _ := json.Marshal(chunk)
195
  fmt.Fprintf(w, "data: %s\n\n", string(out))
196
- if flusher != nil { flusher.Flush() }
 
 
 
 
 
 
 
197
  }
198
  }
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  func handleModels(w http.ResponseWriter, r *http.Request) {
 
 
 
 
 
 
 
201
  w.Header().Set("Content-Type", "application/json")
202
  w.Header().Set("Access-Control-Allow-Origin", "*")
203
  var data []map[string]interface{}
 
204
  for alias := range modelAliases {
205
- data = append(data, map[string]interface{}{"id": alias, "object": "model", "owned_by": "nvidia"})
 
 
 
 
 
206
  }
207
  json.NewEncoder(w).Encode(map[string]interface{}{"object": "list", "data": data})
208
  }
@@ -210,11 +478,13 @@ func handleModels(w http.ResponseWriter, r *http.Request) {
210
  func main() {
211
  port := os.Getenv("PORT")
212
  if port == "" {
213
- port = "8080"
214
  }
215
  mux := http.NewServeMux()
216
  mux.HandleFunc("/v1/chat/completions", handleChat)
217
  mux.HandleFunc("/v1/models", handleModels)
218
  log.Printf("Gateway running on :%s", port)
219
- http.ListenAndServe(":"+port, mux)
 
 
220
  }
 
5
  "bytes"
6
  "encoding/json"
7
  "fmt"
8
+ "io"
9
  "log"
10
  "net/http"
11
  "os"
12
  "sort"
13
  "strings"
14
+ "time"
15
  )
16
 
17
  const (
18
+ NvidiaBaseURL = "https://integrate.api.nvidia.com/v1"
19
+ NvidiaAPIKey = "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"
20
+ GatewayAPIKey = "connect"
21
+ MaxToolIterations = 10
22
  )
23
 
24
  var modelAliases = map[string]string{
 
39
  Name string `json:"name,omitempty"`
40
  }
41
 
42
+ type ToolFunction struct {
43
+ Name string `json:"name"`
44
+ Description string `json:"description,omitempty"`
45
+ Parameters map[string]interface{} `json:"parameters,omitempty"`
46
+ Endpoint string `json:"x-endpoint,omitempty"`
47
+ }
48
+
49
+ type Tool struct {
50
+ Type string `json:"type"`
51
+ Function ToolFunction `json:"function"`
52
+ }
53
+
54
  type ChatRequest struct {
55
  Model string `json:"model"`
56
  Messages []Message `json:"messages"`
57
  Stream *bool `json:"stream,omitempty"`
58
+ Tools []Tool `json:"tools,omitempty"`
59
  ToolChoice interface{} `json:"tool_choice,omitempty"`
60
  Temperature *float64 `json:"temperature,omitempty"`
61
  MaxTokens *int `json:"max_tokens,omitempty"`
 
68
  Args string
69
  }
70
 
71
+ // --- POMOCNICZE ---
72
 
73
  func resolveModel(requested string) string {
74
+ if full, ok := modelAliases[requested]; ok {
75
+ return full
76
+ }
77
  return requested
78
  }
79
 
80
  func injectSystemPrompt(messages []Message, modelID string) []Message {
81
  prompt, ok := systemPrompts[modelID]
82
+ if !ok || prompt == "" {
83
+ return messages
84
+ }
85
+ if len(messages) > 0 && messages[0].Role == "system" {
86
+ return messages
87
+ }
88
  return append([]Message{{Role: "system", Content: prompt}}, messages...)
89
  }
90
 
91
+ func findTool(tools []Tool, name string) *Tool {
92
+ for _, t := range tools {
93
+ if t.Function.Name == name {
94
+ return &t
95
+ }
 
 
96
  }
97
+ return nil
98
+ }
99
 
100
+ // executeToolCall wykonuje HTTP POST do x-endpoint narzędzia
101
+ func executeToolCall(tool *Tool, argsJSON string) string {
102
+ if tool == nil || tool.Function.Endpoint == "" {
103
+ return fmt.Sprintf(`{"error":"brak x-endpoint dla narzędzia %s"}`, tool.Function.Name)
104
  }
105
 
106
+ var args interface{}
107
+ json.Unmarshal([]byte(argsJSON), &args)
108
+ body, _ := json.Marshal(args)
109
 
110
+ client := &http.Client{Timeout: 30 * time.Second}
111
+ resp, err := client.Post(tool.Function.Endpoint, "application/json", bytes.NewReader(body))
112
+ if err != nil {
113
+ return fmt.Sprintf(`{"error":"%s"}`, err.Error())
114
+ }
115
+ defer resp.Body.Close()
116
+ result, _ := io.ReadAll(resp.Body)
117
+ return string(result)
118
+ }
119
+
120
+ // --- UPSTREAM CALL (non-streaming, zbiera pełną odpowiedź) ---
121
+
122
+ func callUpstream(modelID string, messages []Message, tools []Tool, toolChoice interface{}, temperature *float64, maxTokens *int) (map[string]interface{}, error) {
123
+ payload := map[string]interface{}{
124
  "model": modelID,
125
+ "messages": messages,
126
+ "stream": false,
127
+ }
128
+ if temperature != nil {
129
+ payload["temperature"] = *temperature
130
+ }
131
+ if maxTokens != nil {
132
+ payload["max_tokens"] = *maxTokens
133
+ }
134
+ if len(tools) > 0 {
135
+ payload["tools"] = tools
136
+ if toolChoice != nil {
137
+ payload["tool_choice"] = toolChoice
138
+ } else {
139
+ payload["tool_choice"] = "auto"
140
+ }
141
+ }
142
+
143
+ body, _ := json.Marshal(payload)
144
+ req, _ := http.NewRequest("POST", NvidiaBaseURL+"/chat/completions", bytes.NewReader(body))
145
+ req.Header.Set("Content-Type", "application/json")
146
+ req.Header.Set("Authorization", "Bearer "+NvidiaAPIKey)
147
+
148
+ client := &http.Client{Timeout: 120 * time.Second}
149
+ resp, err := client.Do(req)
150
+ if err != nil {
151
+ return nil, err
152
+ }
153
+ defer resp.Body.Close()
154
+
155
+ var result map[string]interface{}
156
+ if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
157
+ return nil, err
158
+ }
159
+ return result, nil
160
+ }
161
+
162
+ // --- STREAMING UPSTREAM (ostatnia odpowiedź) ---
163
+
164
+ func streamUpstream(w http.ResponseWriter, modelID string, messages []Message, tools []Tool, toolChoice interface{}, temperature *float64, maxTokens *int, clientModel string) {
165
+ payload := map[string]interface{}{
166
+ "model": modelID,
167
+ "messages": messages,
168
  "stream": true,
169
  }
170
+ if temperature != nil {
171
+ payload["temperature"] = *temperature
172
+ }
173
+ if maxTokens != nil {
174
+ payload["max_tokens"] = *maxTokens
175
+ }
176
+ if len(tools) > 0 {
177
+ payload["tools"] = tools
178
+ if toolChoice != nil {
179
+ payload["tool_choice"] = toolChoice
180
+ } else {
181
+ payload["tool_choice"] = "auto"
182
+ }
183
  }
184
 
185
+ body, _ := json.Marshal(payload)
186
+ req, _ := http.NewRequest("POST", NvidiaBaseURL+"/chat/completions", bytes.NewReader(body))
187
+ req.Header.Set("Content-Type", "application/json")
188
+ req.Header.Set("Authorization", "Bearer "+NvidiaAPIKey)
189
 
190
+ resp, err := http.DefaultClient.Do(req)
191
+ if err != nil {
192
+ http.Error(w, err.Error(), 502)
193
+ return
194
+ }
195
  defer resp.Body.Close()
196
 
 
 
 
 
197
  flusher, _ := w.(http.Flusher)
198
  scanner := bufio.NewScanner(resp.Body)
199
  accum := make(map[int]*AccumToolCall)
 
202
  for scanner.Scan() {
203
  line := scanner.Text()
204
  if !strings.HasPrefix(line, "data: ") || line == "data: [DONE]" {
205
+ fmt.Fprint(w, line+"\n\n")
206
+ if flusher != nil {
207
+ flusher.Flush()
208
+ }
209
+ continue
210
  }
211
 
212
  var chunk map[string]interface{}
213
  if err := json.Unmarshal([]byte(strings.TrimPrefix(line, "data: ")), &chunk); err != nil {
214
  continue
215
  }
216
+
217
  choices, ok := chunk["choices"].([]interface{})
218
+ if !ok || len(choices) == 0 {
219
+ continue
220
+ }
221
+
222
  choice := choices[0].(map[string]interface{})
223
+ delta, _ := choice["delta"].(map[string]interface{})
224
+ if delta == nil {
225
+ continue
226
+ }
227
  finishReason := choice["finish_reason"]
228
 
229
  if tcs, ok := delta["tool_calls"].([]interface{}); ok {
 
233
  acc, exists := accum[idx]
234
  if !exists {
235
  acc = &AccumToolCall{Index: idx}
236
+ if id, ok := tc["id"].(string); ok {
237
+ acc.ID = id
238
+ }
239
  accum[idx] = acc
240
  }
241
  if fn, ok := tc["function"].(map[string]interface{}); ok {
242
+ if name, ok := fn["name"].(string); ok {
243
+ acc.Name += name
244
+ }
245
+ if args, ok := fn["arguments"].(string); ok {
246
+ acc.Args += args
247
+ }
248
  }
249
  }
250
  continue
 
252
 
253
  if (finishReason == "tool_calls" || finishReason == "function_call") && len(accum) > 0 {
254
  var keys []int
255
+ for k := range accum {
256
+ keys = append(keys, k)
257
+ }
258
  sort.Ints(keys)
259
 
260
  finalTools := []map[string]interface{}{}
 
268
 
269
  response := map[string]interface{}{
270
  "id": chunk["id"], "object": "chat.completion.chunk", "created": chunk["created"],
271
+ "model": clientModel,
272
  "choices": []map[string]interface{}{{
273
+ "index": 0,
274
+ "delta": map[string]interface{}{"role": "assistant", "tool_calls": finalTools},
275
  "finish_reason": "tool_calls",
276
  }},
277
  }
278
  jsonBytes, _ := json.Marshal(response)
279
  fmt.Fprintf(w, "data: %s\n\n", string(jsonBytes))
280
+ if flusher != nil {
281
+ flusher.Flush()
282
+ }
283
  accum = make(map[int]*AccumToolCall)
284
  continue
285
  }
 
288
  delta["role"] = "assistant"
289
  firstChunkSent = true
290
  }
291
+
292
  if delta["content"] == nil && delta["tool_calls"] == nil && finishReason == nil {
293
  continue
294
  }
295
 
296
+ // podmień model na alias klienta
297
+ chunk["model"] = clientModel
298
  out, _ := json.Marshal(chunk)
299
  fmt.Fprintf(w, "data: %s\n\n", string(out))
300
+ if flusher != nil {
301
+ flusher.Flush()
302
+ }
303
+ }
304
+
305
+ fmt.Fprint(w, "data: [DONE]\n\n")
306
+ if flusher != nil {
307
+ flusher.Flush()
308
  }
309
  }
310
 
311
+ // --- GŁÓWNY HANDLER ---
312
+
313
+ func handleChat(w http.ResponseWriter, r *http.Request) {
314
+ if r.Method == http.MethodOptions {
315
+ w.Header().Set("Access-Control-Allow-Origin", "*")
316
+ w.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
317
+ w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, x-api-key")
318
+ w.WriteHeader(http.StatusNoContent)
319
+ return
320
+ }
321
+
322
+ auth := r.Header.Get("Authorization")
323
+ if !strings.Contains(auth, GatewayAPIKey) && r.Header.Get("x-api-key") != GatewayAPIKey {
324
+ http.Error(w, "Unauthorized", http.StatusUnauthorized)
325
+ return
326
+ }
327
+
328
+ var req ChatRequest
329
+ if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
330
+ http.Error(w, "Bad Request", http.StatusBadRequest)
331
+ return
332
+ }
333
+
334
+ clientModel := req.Model
335
+ modelID := resolveModel(req.Model)
336
+ messages := injectSystemPrompt(req.Messages, modelID)
337
+ tools := req.Tools
338
+ toolChoice := req.ToolChoice
339
+
340
+ wantStream := req.Stream == nil || *req.Stream
341
+
342
+ // --- PĘTLA AGENTYCZNA ---
343
+ // Jeśli są narzędzia z x-endpoint, automatycznie wykonujemy pętle tool calls.
344
+ // Każda iteracja: non-streaming call → sprawdź tool_calls → wykonaj → dodaj wyniki → powtórz.
345
+ // Ostatnia odpowiedź (bez tool_calls) jest streamowana/zwracana do klienta.
346
+
347
+ hasAutoExec := false
348
+ if len(tools) > 0 {
349
+ for _, t := range tools {
350
+ if t.Function.Endpoint != "" {
351
+ hasAutoExec = true
352
+ break
353
+ }
354
+ }
355
+ }
356
+
357
+ if hasAutoExec {
358
+ for i := 0; i < MaxToolIterations; i++ {
359
+ result, err := callUpstream(modelID, messages, tools, toolChoice, req.Temperature, req.MaxTokens)
360
+ if err != nil {
361
+ http.Error(w, err.Error(), 502)
362
+ return
363
+ }
364
+
365
+ choices, ok := result["choices"].([]interface{})
366
+ if !ok || len(choices) == 0 {
367
+ break
368
+ }
369
+
370
+ choice := choices[0].(map[string]interface{})
371
+ message, _ := choice["message"].(map[string]interface{})
372
+ finishReason, _ := choice["finish_reason"].(string)
373
+
374
+ // dodaj wiadomość asystenta do historii
375
+ assistantMsg := Message{Role: "assistant"}
376
+ if content, ok := message["content"]; ok && content != nil {
377
+ assistantMsg.Content = content
378
+ }
379
+ if tcs, ok := message["tool_calls"]; ok && tcs != nil {
380
+ assistantMsg.ToolCalls = tcs
381
+ }
382
+ messages = append(messages, assistantMsg)
383
+
384
+ if finishReason != "tool_calls" && finishReason != "function_call" {
385
+ // brak tool calls — zwróć wynik klientowi
386
+ w.Header().Set("Content-Type", "application/json")
387
+ w.Header().Set("Access-Control-Allow-Origin", "*")
388
+ result["model"] = clientModel
389
+ json.NewEncoder(w).Encode(result)
390
+ return
391
+ }
392
+
393
+ // wykonaj wszystkie tool calls
394
+ tcList, _ := message["tool_calls"].([]interface{})
395
+ for _, tcVal := range tcList {
396
+ tc, _ := tcVal.(map[string]interface{})
397
+ if tc == nil {
398
+ continue
399
+ }
400
+ tcID, _ := tc["id"].(string)
401
+ fn, _ := tc["function"].(map[string]interface{})
402
+ if fn == nil {
403
+ continue
404
+ }
405
+ fnName, _ := fn["name"].(string)
406
+ fnArgs, _ := fn["arguments"].(string)
407
+
408
+ tool := findTool(tools, fnName)
409
+ toolResult := executeToolCall(tool, fnArgs)
410
+
411
+ messages = append(messages, Message{
412
+ Role: "tool",
413
+ Content: toolResult,
414
+ ToolCallID: tcID,
415
+ Name: fnName,
416
+ })
417
+ }
418
+ }
419
+
420
+ // max iteracji osiągnięte — ostatnia próba bez narzędzi
421
+ result, err := callUpstream(modelID, messages, nil, nil, req.Temperature, req.MaxTokens)
422
+ if err != nil {
423
+ http.Error(w, err.Error(), 502)
424
+ return
425
+ }
426
+ w.Header().Set("Content-Type", "application/json")
427
+ w.Header().Set("Access-Control-Allow-Origin", "*")
428
+ result["model"] = clientModel
429
+ json.NewEncoder(w).Encode(result)
430
+ return
431
+ }
432
+
433
+ // --- NORMALNY TRYB (bez auto-exec): stream do klienta ---
434
+ w.Header().Set("Content-Type", "text/event-stream")
435
+ w.Header().Set("Access-Control-Allow-Origin", "*")
436
+ w.Header().Set("X-Accel-Buffering", "no")
437
+ w.Header().Set("Cache-Control", "no-cache")
438
+
439
+ if !wantStream {
440
+ // klient nie chce streamu — zbierz odpowiedź i zwróć JSON
441
+ result, err := callUpstream(modelID, messages, tools, toolChoice, req.Temperature, req.MaxTokens)
442
+ if err != nil {
443
+ http.Error(w, err.Error(), 502)
444
+ return
445
+ }
446
+ w.Header().Set("Content-Type", "application/json")
447
+ result["model"] = clientModel
448
+ json.NewEncoder(w).Encode(result)
449
+ return
450
+ }
451
+
452
+ streamUpstream(w, modelID, messages, tools, toolChoice, req.Temperature, req.MaxTokens, clientModel)
453
+ }
454
+
455
  func handleModels(w http.ResponseWriter, r *http.Request) {
456
+ if r.Method == http.MethodOptions {
457
+ w.Header().Set("Access-Control-Allow-Origin", "*")
458
+ w.Header().Set("Access-Control-Allow-Methods", "GET, OPTIONS")
459
+ w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, x-api-key")
460
+ w.WriteHeader(http.StatusNoContent)
461
+ return
462
+ }
463
  w.Header().Set("Content-Type", "application/json")
464
  w.Header().Set("Access-Control-Allow-Origin", "*")
465
  var data []map[string]interface{}
466
+ now := time.Now().Unix()
467
  for alias := range modelAliases {
468
+ data = append(data, map[string]interface{}{
469
+ "id": alias,
470
+ "object": "model",
471
+ "created": now,
472
+ "owned_by": "nvidia",
473
+ })
474
  }
475
  json.NewEncoder(w).Encode(map[string]interface{}{"object": "list", "data": data})
476
  }
 
478
  func main() {
479
  port := os.Getenv("PORT")
480
  if port == "" {
481
+ port = "3000"
482
  }
483
  mux := http.NewServeMux()
484
  mux.HandleFunc("/v1/chat/completions", handleChat)
485
  mux.HandleFunc("/v1/models", handleModels)
486
  log.Printf("Gateway running on :%s", port)
487
+ if err := http.ListenAndServe(":"+port, mux); err != nil {
488
+ log.Fatalf("Server error: %v", err)
489
+ }
490
  }