Rfym21 commited on
Commit
af92211
·
verified ·
1 Parent(s): 55d5d77

Delete chatgpt/chatFormat.py

Browse files
Files changed (1) hide show
  1. chatgpt/chatFormat.py +0 -436
chatgpt/chatFormat.py DELETED
@@ -1,436 +0,0 @@
1
- import asyncio
2
- import json
3
- import random
4
- import re
5
- import string
6
- import time
7
- import uuid
8
-
9
- import pybase64
10
- import websockets
11
- from fastapi import HTTPException
12
-
13
- from api.files import get_file_content
14
- from api.models import model_system_fingerprint
15
- from api.tokens import split_tokens_from_content, calculate_image_tokens, num_tokens_from_messages
16
- from utils.Logger import logger
17
-
18
- moderation_message = "I'm sorry, I cannot provide or engage in any content related to pornography, violence, or any unethical material. If you have any other questions or need assistance, please feel free to let me know. I'll do my best to provide support and assistance."
19
-
20
-
21
- async def format_not_stream_response(response, prompt_tokens, max_tokens, model):
22
- chat_id = f"chatcmpl-{''.join(random.choice(string.ascii_letters + string.digits) for _ in range(29))}"
23
- system_fingerprint_list = model_system_fingerprint.get(model, None)
24
- system_fingerprint = random.choice(system_fingerprint_list) if system_fingerprint_list else None
25
- created_time = int(time.time())
26
- all_text = ""
27
- async for chunk in response:
28
- try:
29
- if chunk.startswith("data: [DONE]"):
30
- break
31
- elif not chunk.startswith("data: "):
32
- continue
33
- else:
34
- chunk = json.loads(chunk[6:])
35
- if not chunk["choices"][0].get("delta"):
36
- continue
37
- all_text += chunk["choices"][0]["delta"]["content"]
38
- except Exception as e:
39
- logger.error(f"Error: {chunk}, error: {str(e)}")
40
- continue
41
- content, completion_tokens, finish_reason = await split_tokens_from_content(all_text, max_tokens, model)
42
- message = {
43
- "role": "assistant",
44
- "content": content,
45
- }
46
- usage = {
47
- "prompt_tokens": prompt_tokens,
48
- "completion_tokens": completion_tokens,
49
- "total_tokens": prompt_tokens + completion_tokens
50
- }
51
- if not message.get("content"):
52
- raise HTTPException(status_code=403, detail="No content in the message.")
53
-
54
- data = {
55
- "id": chat_id,
56
- "object": "chat.completion",
57
- "created": created_time,
58
- "model": model,
59
- "choices": [
60
- {
61
- "index": 0,
62
- "message": message,
63
- "logprobs": None,
64
- "finish_reason": finish_reason
65
- }
66
- ],
67
- "usage": usage
68
- }
69
- if system_fingerprint:
70
- data["system_fingerprint"] = system_fingerprint
71
- return data
72
-
73
-
74
- async def wss_stream_response(websocket, conversation_id):
75
- while not websocket.closed:
76
- try:
77
- message = await asyncio.wait_for(websocket.recv(), timeout=10)
78
- if message:
79
- resultObj = json.loads(message)
80
- sequenceId = resultObj.get("sequenceId", None)
81
- if not sequenceId:
82
- continue
83
- data = resultObj.get("data", {})
84
- if conversation_id != data.get("conversation_id", ""):
85
- continue
86
- sequenceId = resultObj.get('sequenceId')
87
- if sequenceId and sequenceId % 80 == 0:
88
- await websocket.send(
89
- json.dumps(
90
- {"type": "sequenceAck", "sequenceId": sequenceId}
91
- )
92
- )
93
- decoded_bytes = pybase64.b64decode(data.get("body", None))
94
- yield decoded_bytes
95
- else:
96
- print("No message received within the specified time.")
97
- except asyncio.TimeoutError:
98
- logger.error("Timeout! No message received within the specified time.")
99
- break
100
- except websockets.ConnectionClosed as e:
101
- if e.code == 1000:
102
- logger.error("WebSocket closed normally with code 1000 (OK)")
103
- yield b"data: [DONE]\n\n"
104
- else:
105
- logger.error(f"WebSocket closed with error code {e.code}")
106
- except Exception as e:
107
- logger.error(f"Error: {str(e)}")
108
- continue
109
-
110
-
111
- async def head_process_response(response):
112
- async for chunk in response:
113
- chunk = chunk.decode("utf-8")
114
- if chunk.startswith("data: {"):
115
- chunk_old_data = json.loads(chunk[6:])
116
- message = chunk_old_data.get("message", {})
117
- if not message and "error" in chunk_old_data:
118
- return response, False
119
- role = message.get('author', {}).get('role')
120
- if role == 'user' or role == 'system':
121
- continue
122
-
123
- status = message.get("status")
124
- if status == "in_progress":
125
- return response, True
126
- return response, False
127
-
128
-
129
- async def stream_response(service, response, model, max_tokens):
130
- chat_id = f"chatcmpl-{''.join(random.choice(string.ascii_letters + string.digits) for _ in range(29))}"
131
- system_fingerprint_list = model_system_fingerprint.get(model, None)
132
- system_fingerprint = random.choice(system_fingerprint_list) if system_fingerprint_list else None
133
- created_time = int(time.time())
134
- completion_tokens = 0
135
- len_last_content = 0
136
- len_last_citation = 0
137
- last_message_id = None
138
- last_role = None
139
- last_content_type = None
140
- model_slug = None
141
- end = False
142
-
143
- chunk_new_data = {
144
- "id": chat_id,
145
- "object": "chat.completion.chunk",
146
- "created": created_time,
147
- "model": model,
148
- "choices": [
149
- {
150
- "index": 0,
151
- "delta": {"role": "assistant", "content": ""},
152
- "logprobs": None,
153
- "finish_reason": None
154
- }
155
- ]
156
- }
157
- if system_fingerprint:
158
- chunk_new_data["system_fingerprint"] = system_fingerprint
159
- yield f"data: {json.dumps(chunk_new_data)}\n\n"
160
-
161
- async for chunk in response:
162
- chunk = chunk.decode("utf-8")
163
- if end:
164
- logger.info(f"Response Model: {model_slug}")
165
- yield "data: [DONE]\n\n"
166
- break
167
- try:
168
- if chunk.startswith("data: {"):
169
- chunk_old_data = json.loads(chunk[6:])
170
- finish_reason = None
171
- message = chunk_old_data.get("message", {})
172
- conversation_id = chunk_old_data.get("conversation_id")
173
- role = message.get('author', {}).get('role')
174
- if role == 'user' or role == 'system':
175
- continue
176
-
177
- status = message.get("status")
178
- message_id = message.get("id")
179
- content = message.get("content", {})
180
- recipient = message.get("recipient", "")
181
- meta_data = message.get("metadata", {})
182
- initial_text = meta_data.get("initial_text", "")
183
- model_slug = meta_data.get("model_slug", model_slug)
184
-
185
- if not message and chunk_old_data.get("type") == "moderation":
186
- delta = {"role": "assistant", "content": moderation_message}
187
- finish_reason = "stop"
188
- end = True
189
- elif status == "in_progress":
190
- outer_content_type = content.get("content_type")
191
- if outer_content_type == "text":
192
- part = content.get("parts", [])[0]
193
- if not part:
194
- if role == 'assistant' and last_role != 'assistant':
195
- if last_role == None:
196
- new_text = ""
197
- else:
198
- new_text = f"\n"
199
- elif role == 'tool' and last_role != 'tool':
200
- new_text = f">{initial_text}\n"
201
- else:
202
- new_text = ""
203
- else:
204
- if last_message_id and last_message_id != message_id:
205
- continue
206
- citation = message.get("metadata", {}).get("citations", [])
207
- if len(citation) > len_last_citation:
208
- inside_metadata = citation[-1].get("metadata", {})
209
- citation_title = inside_metadata.get("title", "")
210
- citation_url = inside_metadata.get("url", "")
211
- new_text = f' **[[""]]({citation_url} "{citation_title}")** '
212
- len_last_citation = len(citation)
213
- else:
214
- if role == 'assistant' and last_role != 'assistant':
215
- if recipient == 'dalle.text2im':
216
- new_text = f"\n```{recipient}\n{part[len_last_content:]}"
217
- elif last_role == None:
218
- new_text = part[len_last_content:]
219
- else:
220
- new_text = f"\n\n{part[len_last_content:]}"
221
- elif role == 'tool' and last_role != 'tool':
222
- new_text = f">{initial_text}\n{part[len_last_content:]}"
223
- elif role == 'tool':
224
- new_text = part[len_last_content:].replace("\n\n", "\n")
225
- else:
226
- new_text = part[len_last_content:]
227
- len_last_content = len(part)
228
- else:
229
- text = content.get("text", "")
230
- if outer_content_type == "code" and last_content_type != "code":
231
- language = content.get("language", "")
232
- if not language or language == "unknown":
233
- language = recipient
234
- new_text = "\n```" + language + "\n" + text[len_last_content:]
235
- elif outer_content_type == "execution_output" and last_content_type != "execution_output":
236
- new_text = "\n```" + "Output" + "\n" + text[len_last_content:]
237
- else:
238
- new_text = text[len_last_content:]
239
- len_last_content = len(text)
240
- if last_content_type == "code" and outer_content_type != "code":
241
- new_text = "\n```\n" + new_text
242
- elif last_content_type == "execution_output" and outer_content_type != "execution_output":
243
- new_text = "\n```\n" + new_text
244
-
245
- delta = {"content": new_text}
246
- last_content_type = outer_content_type
247
- if completion_tokens >= max_tokens:
248
- delta = {}
249
- finish_reason = "length"
250
- end = True
251
- elif status == "finished_successfully":
252
- if content.get("content_type") == "multimodal_text":
253
- parts = content.get("parts", [])
254
- delta = {}
255
- for part in parts:
256
- if isinstance(part, str):
257
- continue
258
- inner_content_type = part.get('content_type')
259
- if inner_content_type == "image_asset_pointer":
260
- last_content_type = "image_asset_pointer"
261
- file_id = part.get('asset_pointer').replace('file-service://', '')
262
- logger.debug(f"file_id: {file_id}")
263
- image_download_url = await service.get_download_url(file_id)
264
- logger.debug(f"image_download_url: {image_download_url}")
265
- if image_download_url:
266
- delta = {"content": f"\n```\n![image]({image_download_url})\n"}
267
- else:
268
- delta = {"content": f"\n```\nFailed to load the image.\n"}
269
- elif message.get("end_turn"):
270
- part = content.get("parts", [])[0]
271
- new_text = part[len_last_content:]
272
- if not new_text:
273
- matches = re.findall(r'\(sandbox:(.*?)\)', part)
274
- if matches:
275
- file_url_content = ""
276
- for i, sandbox_path in enumerate(matches):
277
- file_download_url = await service.get_response_file_url(conversation_id, message_id, sandbox_path)
278
- if file_download_url:
279
- file_url_content += f"\n```\n\n![File {i+1}]({file_download_url})\n"
280
- delta = {"content": file_url_content}
281
- else:
282
- delta = {}
283
- else:
284
- delta = {"content": new_text}
285
- finish_reason = "stop"
286
- end = True
287
- else:
288
- len_last_content = 0
289
- if meta_data.get("finished_text"):
290
- delta = {"content": f"\n{meta_data.get('finished_text')}\n"}
291
- else:
292
- continue
293
- else:
294
- continue
295
- last_message_id = message_id
296
- last_role = role
297
- if not end and not delta.get("content"):
298
- delta = {"role": "assistant", "content": ""}
299
- chunk_new_data["choices"][0]["delta"] = delta
300
- chunk_new_data["choices"][0]["finish_reason"] = finish_reason
301
- if not service.history_disabled:
302
- chunk_new_data.update({
303
- "message_id": message_id,
304
- "conversation_id": conversation_id,
305
- })
306
- completion_tokens += 1
307
- yield f"data: {json.dumps(chunk_new_data)}\n\n"
308
- elif chunk.startswith("data: [DONE]"):
309
- logger.info(f"Response Model: {model_slug}")
310
- yield "data: [DONE]\n\n"
311
- else:
312
- continue
313
- except Exception as e:
314
- if chunk.startswith("data: "):
315
- chunk_data = json.loads(chunk[6:])
316
- if chunk_data.get("error"):
317
- logger.error(f"Error: {chunk_data.get('error')}")
318
- yield "data: [DONE]\n\n"
319
- break
320
- logger.error(f"Error: {chunk}, details: {str(e)}")
321
- continue
322
-
323
-
324
- def get_url_from_content(content):
325
- if isinstance(content, str) and content.startswith('http'):
326
- try:
327
- url = re.match(
328
- r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))',
329
- content.split(' ')[0])[0]
330
- content = content.replace(url, '').strip()
331
- return url, content
332
- except Exception:
333
- return None, content
334
- return None, content
335
-
336
-
337
- def format_messages_with_url(content):
338
- url_list = []
339
- while True:
340
- url, content = get_url_from_content(content)
341
- if url:
342
- url_list.append(url)
343
- logger.info(f"Found a file_url from messages: {url}")
344
- else:
345
- break
346
- if not url_list:
347
- return content
348
- new_content = [
349
- {
350
- "type": "text",
351
- "text": content
352
- }
353
- ]
354
- for url in url_list:
355
- new_content.append({
356
- "type": "image_url",
357
- "image_url": {
358
- "url": url
359
- }
360
- })
361
- return new_content
362
-
363
-
364
- async def api_messages_to_chat(service, api_messages, upload_by_url=False):
365
- file_tokens = 0
366
- chat_messages = []
367
- for api_message in api_messages:
368
- role = api_message.get('role')
369
- content = api_message.get('content')
370
- if upload_by_url:
371
- if isinstance(content, str):
372
- content = format_messages_with_url(content)
373
- if isinstance(content, list):
374
- parts = []
375
- attachments = []
376
- content_type = "multimodal_text"
377
- for i in content:
378
- if i.get("type") == "text":
379
- parts.append(i.get("text"))
380
- elif i.get("type") == "image_url":
381
- image_url = i.get("image_url")
382
- url = image_url.get("url")
383
- detail = image_url.get("detail", "auto")
384
- file_content, mime_type = await get_file_content(url)
385
- file_meta = await service.upload_file(file_content, mime_type)
386
- if file_meta:
387
- file_id = file_meta["file_id"]
388
- file_size = file_meta["size_bytes"]
389
- file_name = file_meta["file_name"]
390
- mime_type = file_meta["mime_type"]
391
- use_case = file_meta["use_case"]
392
- if mime_type.startswith("image/"):
393
- width, height = file_meta["width"], file_meta["height"]
394
- file_tokens += await calculate_image_tokens(width, height, detail)
395
- parts.append({
396
- "content_type": "image_asset_pointer",
397
- "asset_pointer": f"file-service://{file_id}",
398
- "size_bytes": file_size,
399
- "width": width,
400
- "height": height
401
- })
402
- attachments.append({
403
- "id": file_id,
404
- "size": file_size,
405
- "name": file_name,
406
- "mime_type": mime_type,
407
- "width": width,
408
- "height": height
409
- })
410
- else:
411
- if not use_case == "ace_upload":
412
- await service.check_upload(file_id)
413
- file_tokens += file_size // 1000
414
- attachments.append({
415
- "id": file_id,
416
- "size": file_size,
417
- "name": file_name,
418
- "mime_type": mime_type,
419
- })
420
- metadata = {
421
- "attachments": attachments
422
- }
423
- else:
424
- content_type = "text"
425
- parts = [content]
426
- metadata = {}
427
- chat_message = {
428
- "id": f"{uuid.uuid4()}",
429
- "author": {"role": role},
430
- "content": {"content_type": content_type, "parts": parts},
431
- "metadata": metadata
432
- }
433
- chat_messages.append(chat_message)
434
- text_tokens = await num_tokens_from_messages(api_messages, service.resp_model)
435
- prompt_tokens = text_tokens + file_tokens
436
- return chat_messages, prompt_tokens