File size: 10,211 Bytes
7feac49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# Copyright (c) Alibaba, Inc. and its affiliates.
import collections
import os.path
import sys
import time
from datetime import datetime
from typing import Dict, List, Tuple, Type

import gradio as gr
import psutil
from packaging import version

from swift.ui.base import BaseUI
from swift.utils import get_logger
from swift.utils.utils import format_time

logger = get_logger()


class Runtime(BaseUI):
    handlers: Dict[str, Tuple[List, Tuple]] = {}

    group = 'llm_infer'

    cmd = 'deploy'

    log_event = {}

    locale_dict = {
        'runtime_tab': {
            'label': {
                'zh': '运行时',
                'en': 'Runtime'
            },
        },
        'running_cmd': {
            'label': {
                'zh': '运行命令',
                'en': 'Command line'
            },
            'info': {
                'zh': '执行的实际命令',
                'en': 'The actual command'
            }
        },
        'show_log': {
            'value': {
                'zh': '展示部署状态',
                'en': 'Show running status'
            },
        },
        'stop_show_log': {
            'value': {
                'zh': '停止展示',
                'en': 'Stop showing running status'
            },
        },
        'log': {
            'label': {
                'zh': '日志输出',
                'en': 'Logging content'
            },
            'info': {
                'zh': '如果日志无更新请再次点击"展示日志内容"',
                'en': 'Please press "Show log" if the log content is not updating'
            }
        },
        'running_tasks': {
            'label': {
                'zh': '运行中部署',
                'en': 'Running deployments'
            },
            'info': {
                'zh': '所有的swift deploy命令启动的任务',
                'en': 'Started by swift deploy'
            }
        },
        'refresh_tasks': {
            'value': {
                'zh': '找回部署',
                'en': 'Find deployments'
            },
        },
        'kill_task': {
            'value': {
                'zh': '杀死部署',
                'en': 'Kill running task'
            },
        },
    }

    @classmethod
    def do_build_ui(cls, base_tab: Type['BaseUI']):
        with gr.Accordion(elem_id='runtime_tab', open=False, visible=True):
            with gr.Blocks():
                with gr.Row():
                    gr.Dropdown(elem_id='running_tasks', scale=10, allow_custom_value=True)
                    gr.Button(elem_id='refresh_tasks', scale=1, variant='primary')
                    gr.Button(elem_id='show_log', scale=1, variant='primary')
                    gr.Button(elem_id='stop_show_log', scale=1)
                    gr.Button(elem_id='kill_task', scale=1)
                with gr.Row():
                    gr.Textbox(elem_id='log', lines=6, visible=False)

                concurrency_limit = {}
                if version.parse(gr.__version__) >= version.parse('4.0.0'):
                    concurrency_limit = {'concurrency_limit': 5}
                base_tab.element('show_log').click(cls.update_log, [],
                                                   [cls.element('log')]).then(cls.wait,
                                                                              [base_tab.element('running_tasks')],
                                                                              [cls.element('log')], **concurrency_limit)

                base_tab.element('stop_show_log').click(cls.break_log_event, [cls.element('running_tasks')], [])

                base_tab.element('refresh_tasks').click(
                    cls.refresh_tasks,
                    [base_tab.element('running_tasks')],
                    [base_tab.element('running_tasks')],
                )

    @classmethod
    def break_log_event(cls, task):
        if not task:
            return
        pid, all_args = cls.parse_info_from_cmdline(task)
        cls.log_event[all_args['log_file']] = True

    @classmethod
    def update_log(cls):
        return gr.update(visible=True)

    @classmethod
    def wait(cls, task):
        if not task:
            return [None]
        _, args = cls.parse_info_from_cmdline(task)
        log_file = args['log_file']
        cls.log_event[log_file] = False
        offset = 0
        latest_data = ''
        lines = collections.deque(maxlen=int(os.environ.get('MAX_LOG_LINES', 50)))
        try:
            with open(log_file, 'r', encoding='utf-8') as input:
                input.seek(offset)
                fail_cnt = 0
                while True:
                    try:
                        latest_data += input.read()
                    except UnicodeDecodeError:
                        continue
                    if not latest_data:
                        time.sleep(0.5)
                        fail_cnt += 1
                        if fail_cnt > 50:
                            break

                    if cls.log_event.get(log_file, False):
                        cls.log_event[log_file] = False
                        break

                    if '\n' not in latest_data:
                        continue
                    latest_lines = latest_data.split('\n')
                    if latest_data[-1] != '\n':
                        latest_data = latest_lines[-1]
                        latest_lines = latest_lines[:-1]
                    else:
                        latest_data = ''
                    lines.extend(latest_lines)
                    yield '\n'.join(lines)
        except IOError:
            pass

    @classmethod
    def get_all_ports(cls):
        process_name = 'swift'
        cmd_name = cls.cmd
        ports = set()
        for proc in psutil.process_iter():
            try:
                cmdlines = proc.cmdline()
            except (psutil.ZombieProcess, psutil.AccessDenied, psutil.NoSuchProcess):
                cmdlines = []
            if any([process_name in cmdline for cmdline in cmdlines]) and any(  # noqa
                [cmd_name == cmdline for cmdline in cmdlines]):  # noqa
                try:
                    ports.add(int(cls.parse_info_from_cmdline(cls.construct_running_task(proc))[1].get('port', 8000)))
                except IndexError:
                    pass
        return ports

    @classmethod
    def refresh_tasks(cls, running_task=None):
        log_file = running_task if not running_task or 'pid:' not in running_task else None
        process_name = 'swift'
        negative_name = 'swift.exe'
        cmd_name = cls.cmd
        process = []
        selected = None
        for proc in psutil.process_iter():
            try:
                cmdlines = proc.cmdline()
            except (psutil.ZombieProcess, psutil.AccessDenied, psutil.NoSuchProcess):
                cmdlines = []
            if any([process_name in cmdline
                    for cmdline in cmdlines]) and not any([negative_name in cmdline
                                                           for cmdline in cmdlines]) and any(  # noqa
                                                               [cmd_name == cmdline for cmdline in cmdlines]):  # noqa
                process.append(cls.construct_running_task(proc))
                if log_file is not None and any(  # noqa
                    [log_file == cmdline for cmdline in cmdlines]):  # noqa
                    selected = cls.construct_running_task(proc)
        if not selected:
            if running_task and running_task in process:
                selected = running_task
        if not selected and process:
            selected = process[0]
        return gr.update(choices=process, value=selected)

    @staticmethod
    def construct_running_task(proc):
        pid = proc.pid
        ts = time.time()
        create_time = proc.create_time()
        create_time_formatted = datetime.fromtimestamp(create_time).strftime('%Y-%m-%d, %H:%M')

        return f'pid:{pid}/create:{create_time_formatted}' \
               f'/running:{format_time(ts - create_time)}/cmd:{" ".join(proc.cmdline())}'

    @classmethod
    def parse_info_from_cmdline(cls, task):
        pid = None
        for i in range(3):
            slash = task.find('/')
            if i == 0:
                pid = task[:slash].split(':')[1]
            task = task[slash + 1:]
        args = task.split(f'swift {cls.cmd}')[1]
        args = [arg.strip() for arg in args.split('--') if arg.strip()]
        all_args = {}
        for i in range(len(args)):
            space = args[i].find(' ')
            splits = args[i][:space], args[i][space + 1:]
            all_args[splits[0]] = splits[1]
        return pid, all_args

    @classmethod
    def kill_task(cls, task):
        if task:
            pid, all_args = cls.parse_info_from_cmdline(task)
            log_file = all_args['log_file']
            if sys.platform == 'win32':
                os.system(f'taskkill /f /t /pid "{pid}"')
            else:
                os.system(f'pkill -9 -f {log_file}')
            time.sleep(1)
            cls.break_log_event(task)
        return [cls.refresh_tasks()] + [gr.update(value=None)]

    @classmethod
    def task_changed(cls, task, base_tab):
        if task:
            _, all_args = cls.parse_info_from_cmdline(task)
        else:
            all_args = {}
        elements = list(base_tab.valid_elements().values())
        ret = []
        is_custom_path = 'ckpt_dir' in all_args
        for e in elements:
            if e.elem_id in all_args:
                if isinstance(e, gr.Dropdown) and e.multiselect:
                    arg = all_args[e.elem_id].split(' ')
                else:
                    if e.elem_id == 'model':
                        if is_custom_path:
                            arg = all_args['ckpt_dir']
                        else:
                            arg = all_args[e.elem_id]
                    else:
                        arg = all_args[e.elem_id]
                ret.append(gr.update(value=arg))
            else:
                ret.append(gr.update())
        cls.break_log_event(task)
        return ret + [gr.update(value=None)]