File size: 5,107 Bytes
cbff270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""
GLM API Client for AI Writer.
Handles communication with the GLM-5 API endpoint on Modal.
"""

import os
import requests
import json
from typing import Optional, Dict, Any


DEFAULT_API_URL = "https://api.us-west-2.modal.direct/v1/chat/completions"
DEFAULT_MODEL = "zai-org/GLM-5.1-FP8"
DEFAULT_TOKEN = os.environ.get("GLM_API_TOKEN", "modalresearch_-z6GDDZ_VYtv7RlIuppxp5Vll50nSaDLtAOO-A5OnrI")


class GLMClient:
    """Client for the GLM-5 API."""

    def __init__(
        self,
        api_url: str = DEFAULT_API_URL,
        model: str = DEFAULT_MODEL,
        token: str = DEFAULT_TOKEN,
    ):
        self.api_url = api_url
        self.model = model
        self.token = token

    def test_connection(self) -> Dict[str, Any]:
        """Test the API connection with a simple request."""
        try:
            headers = {
                "Content-Type": "application/json",
                "Authorization": f"Bearer {self.token}",
            }
            payload = {
                "model": self.model,
                "messages": [
                    {"role": "user", "content": "Say 'Connection successful' in Russian."}
                ],
                "max_tokens": 50,
            }
            response = requests.post(
                self.api_url,
                headers=headers,
                json=payload,
                timeout=30,
            )
            if response.status_code == 200:
                data = response.json()
                content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
                return {
                    "success": True,
                    "message": f"API connection successful. Response: {content}",
                    "status_code": response.status_code,
                }
            else:
                return {
                    "success": False,
                    "message": f"API returned status {response.status_code}: {response.text[:500]}",
                    "status_code": response.status_code,
                }
        except requests.exceptions.Timeout:
            return {
                "success": False,
                "message": "Connection timed out after 30 seconds.",
                "status_code": None,
            }
        except requests.exceptions.ConnectionError:
            return {
                "success": False,
                "message": "Could not connect to the API endpoint. Check your internet connection.",
                "status_code": None,
            }
        except Exception as e:
            return {
                "success": False,
                "message": f"Error: {str(e)}",
                "status_code": None,
            }

    def generate(
        self,
        system_prompt: str,
        user_prompt: str,
        max_tokens: int = 8000,
        temperature: float = 0.7,
    ) -> Dict[str, Any]:
        """Generate text using the GLM API."""
        try:
            headers = {
                "Content-Type": "application/json",
                "Authorization": f"Bearer {self.token}",
            }
            messages = []
            if system_prompt:
                messages.append({"role": "system", "content": system_prompt})
            messages.append({"role": "user", "content": user_prompt})

            payload = {
                "model": self.model,
                "messages": messages,
                "max_tokens": max_tokens,
                "temperature": temperature,
            }

            response = requests.post(
                self.api_url,
                headers=headers,
                json=payload,
                timeout=300,  # 5 minutes for long generation
            )

            if response.status_code == 200:
                data = response.json()
                content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
                usage = data.get("usage", {})
                return {
                    "success": True,
                    "content": content,
                    "usage": usage,
                    "status_code": response.status_code,
                }
            else:
                return {
                    "success": False,
                    "content": "",
                    "message": f"API returned status {response.status_code}: {response.text[:500]}",
                    "status_code": response.status_code,
                }
        except requests.exceptions.Timeout:
            return {
                "success": False,
                "content": "",
                "message": "Generation timed out after 5 minutes. Try reducing max tokens or shortening the prompt.",
            }
        except Exception as e:
            return {
                "success": False,
                "content": "",
                "message": f"Error during generation: {str(e)}",
            }

    def update_token(self, new_token: str):
        """Update the API token."""
        self.token = new_token

    def update_model(self, new_model: str):
        """Update the model name."""
        self.model = new_model