| import axios, { AxiosInstance, AxiosError } from 'axios'; |
| import { GLMMessage, GLMResponse, GLMStreamResponse, GLM_DEFAULTS } from '@glmpilot/shared'; |
| import { config } from '../config/env.js'; |
| import { logger } from '../utils/logger.js'; |
| import { stripCodeFences } from '../utils/code-parser.js'; |
| import { countTokens } from '../utils/token-counter.js'; |
|
|
| export class GLMClient { |
| private client: AxiosInstance; |
| private model: string; |
| private concurrentRequests = 0; |
| private maxConcurrent = GLM_DEFAULTS.MAX_CONCURRENT_REQUESTS; |
| private waitQueue: Array<() => void> = []; |
|
|
| constructor() { |
| this.model = config.GLM_MODEL || GLM_DEFAULTS.MODEL; |
| const baseUrl = (config.GLM_BASE_URL || GLM_DEFAULTS.BASE_URL).trim().replace(/\/?$/, '/'); |
|
|
| this.client = axios.create({ |
| baseURL: baseUrl, |
| timeout: GLM_DEFAULTS.TIMEOUT_MS, |
| headers: { |
| 'Content-Type': 'application/json', |
| Authorization: `Bearer ${config.GLM_API_KEY}`, |
| }, |
| }); |
| } |
|
|
| async chat( |
| messages: GLMMessage[], |
| options: { temperature?: number; max_tokens?: number; top_p?: number } = {} |
| ): Promise<GLMResponse> { |
| await this.waitForSlot(); |
| try { |
| const body = { |
| model: this.model, |
| messages, |
| temperature: options.temperature ?? GLM_DEFAULTS.TEMPERATURE, |
| max_tokens: options.max_tokens ?? GLM_DEFAULTS.MAX_TOKENS, |
| top_p: options.top_p ?? GLM_DEFAULTS.TOP_P, |
| stream: false, |
| }; |
|
|
| const fullUrl = `${this.client.defaults.baseURL}chat/completions`; |
| logger.info('GLM request starting', { |
| url: fullUrl, |
| messageCount: messages.length, |
| tokenEstimate: countTokens(messages.map(m => m.content).join('')), |
| }); |
|
|
| const result = await this.retryWithBackoff(async () => { |
| const response = await this.client.post<GLMResponse>('chat/completions', body); |
| return response.data; |
| }, GLM_DEFAULTS.MAX_RETRIES); |
|
|
| logger.info('GLM response', { status: 'success', usage: result.usage }); |
| return result; |
| } finally { |
| this.releaseSlot(); |
| } |
| } |
|
|
| async *chatStream( |
| messages: GLMMessage[], |
| options: { temperature?: number; max_tokens?: number } = {}, |
| signal?: AbortSignal |
| ): AsyncGenerator<string> { |
| await this.waitForSlot(); |
| try { |
| const body = { |
| model: this.model, |
| messages, |
| temperature: options.temperature ?? GLM_DEFAULTS.TEMPERATURE, |
| max_tokens: options.max_tokens ?? GLM_DEFAULTS.MAX_TOKENS, |
| top_p: GLM_DEFAULTS.TOP_P, |
| stream: true, |
| }; |
|
|
| const response = await this.client.post('chat/completions', body, { |
| responseType: 'stream', |
| signal, |
| }); |
|
|
| let buffer = ''; |
| for await (const chunk of response.data) { |
| buffer += chunk.toString(); |
| const lines = buffer.split('\n'); |
| buffer = lines.pop() || ''; |
|
|
| for (const line of lines) { |
| const trimmed = line.trim(); |
| if (!trimmed || !trimmed.startsWith('data: ')) continue; |
| const data = trimmed.slice(6); |
| if (data === '[DONE]') return; |
|
|
| try { |
| const parsed: GLMStreamResponse = JSON.parse(data); |
| const content = parsed.choices?.[0]?.delta?.content; |
| if (content) yield content; |
| } catch { |
| |
| } |
| } |
| } |
| } catch (err) { |
| if (signal?.aborted) return; |
| const axiosErr = err as import('axios').AxiosError; |
| if (axiosErr.response) { |
| |
| |
| try { |
| const chunks: Buffer[] = []; |
| for await (const chunk of axiosErr.response.data as AsyncIterable<Buffer>) { |
| chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); |
| } |
| const body = Buffer.concat(chunks).toString('utf-8'); |
| throw new Error(`GLM API ${axiosErr.response.status}: ${body}`); |
| } catch (readErr) { |
| if ((readErr as Error).message.startsWith('GLM API')) throw readErr; |
| } |
| } |
| throw err; |
| } finally { |
| this.releaseSlot(); |
| } |
| } |
|
|
| async analyzeCode(code: string, instruction: string, systemPrompt: string): Promise<string> { |
| const messages: GLMMessage[] = [ |
| { role: 'system', content: systemPrompt }, |
| { role: 'user', content: `${instruction}\n\n\`\`\`\n${code}\n\`\`\`` }, |
| ]; |
|
|
| const response = await this.chat(messages, { |
| temperature: 0.2, |
| max_tokens: GLM_DEFAULTS.MAX_TOKENS, |
| }); |
|
|
| return response.choices[0]?.message?.content || ''; |
| } |
|
|
| async completeCode( |
| prefix: string, |
| suffix: string, |
| language: string, |
| fileContext?: string |
| ): Promise<string> { |
| const systemPrompt = `You are an expert ${language} code completion engine. You will be given code with a cursor position marked as <CURSOR>. Output ONLY the code that should be inserted at the cursor position. Do not include any explanation, markdown formatting, or the surrounding code. Output raw code only.`; |
|
|
| const userContent = fileContext |
| ? `${fileContext}\n\n// Current file:\n${prefix}<CURSOR>${suffix}` |
| : `${prefix}<CURSOR>${suffix}`; |
|
|
| const messages: GLMMessage[] = [ |
| { role: 'system', content: systemPrompt }, |
| { role: 'user', content: userContent }, |
| ]; |
|
|
| const response = await this.chat(messages, { |
| temperature: 0.3, |
| max_tokens: 256, |
| }); |
|
|
| const content = response.choices[0]?.message?.content || ''; |
| return stripCodeFences(content); |
| } |
|
|
| private async waitForSlot(): Promise<void> { |
| if (this.concurrentRequests < this.maxConcurrent) { |
| this.concurrentRequests++; |
| return; |
| } |
| return new Promise((resolve) => { |
| this.waitQueue.push(() => { |
| this.concurrentRequests++; |
| resolve(); |
| }); |
| }); |
| } |
|
|
| private releaseSlot(): void { |
| this.concurrentRequests--; |
| const next = this.waitQueue.shift(); |
| if (next) next(); |
| } |
|
|
| private async retryWithBackoff<T>(fn: () => Promise<T>, maxRetries: number): Promise<T> { |
| const delays = GLM_DEFAULTS.RETRY_DELAYS; |
| for (let attempt = 0; attempt <= maxRetries; attempt++) { |
| try { |
| return await fn(); |
| } catch (error) { |
| const axiosErr = error as AxiosError; |
| const status = axiosErr.response?.status; |
|
|
| if (axiosErr.response) { |
| logger.error('GLM API Error Response:', { |
| status: axiosErr.response.status, |
| data: axiosErr.response.data, |
| }); |
| } |
|
|
| if (status && status >= 400 && status < 500 && status !== 429) { |
| throw error; |
| } |
|
|
| if (attempt === maxRetries) throw error; |
|
|
| let delay = delays[attempt] || 4000; |
| if (status === 429) { |
| const retryAfter = axiosErr.response?.headers?.['retry-after']; |
| if (retryAfter) delay = parseInt(retryAfter, 10) * 1000; |
| } |
|
|
| logger.warn(`GLM request failed (attempt ${attempt + 1}/${maxRetries + 1}), retrying in ${delay}ms`, { |
| status, |
| message: axiosErr.message, |
| }); |
|
|
| await new Promise((r) => setTimeout(r, delay)); |
| } |
| } |
| throw new Error('Retry exhausted'); |
| } |
| } |
|
|
| export const glm = new GLMClient(); |
|
|