Spaces:
Sleeping
Sleeping
| import base64 | |
| import numpy as np | |
| from typing import Dict | |
| import random | |
| import asyncio | |
| import logging | |
| import os, json | |
| from typing import Any | |
| from aiohttp import ClientSession | |
| from tqdm.asyncio import tqdm_asyncio | |
| import random | |
| from time import sleep | |
| import aiolimiter | |
| import openai | |
| from openai import AsyncOpenAI, OpenAIError | |
| from anthropic import AsyncAnthropic | |
| async def _throttled_openai_chat_completion_acreate( | |
| client: AsyncOpenAI, | |
| model: str, | |
| messages, | |
| temperature: float, | |
| max_tokens: int, | |
| top_p: float, | |
| limiter: aiolimiter.AsyncLimiter, | |
| json_format: bool = False, | |
| n: int = 1, | |
| ): | |
| async with limiter: | |
| for _ in range(10): | |
| try: | |
| if json_format: | |
| return await client.chat.completions.create( | |
| model=model, | |
| messages=messages, | |
| temperature=temperature, | |
| max_tokens=max_tokens, | |
| top_p=top_p, | |
| n=n, | |
| response_format={"type": "json_object"}, | |
| ) | |
| else: | |
| return await client.chat.completions.create( | |
| model=model, | |
| messages=messages, | |
| temperature=temperature, | |
| max_tokens=max_tokens, | |
| top_p=top_p, | |
| n=n, | |
| ) | |
| except openai.RateLimitError as e: | |
| print("Rate limit exceeded, retrying...") | |
| sleep(random.randint(10, 20)) # 增加重试等待时间 | |
| except openai.BadRequestError as e: | |
| print(e) | |
| return None | |
| except OpenAIError as e: | |
| print(e) | |
| sleep(random.randint(5, 10)) | |
| return None | |
| async def generate_from_openai_chat_completion( | |
| client, | |
| messages, | |
| engine_name: str, | |
| temperature: float = 1.0, | |
| max_tokens: int = 512, | |
| top_p: float = 1.0, | |
| requests_per_minute: int = 100, | |
| json_format: bool = False, | |
| n: int = 1, | |
| ): | |
| # https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168 | |
| delay = 60.0 / requests_per_minute | |
| limiter = aiolimiter.AsyncLimiter(1, delay) | |
| async_responses = [ | |
| _throttled_openai_chat_completion_acreate( | |
| client, | |
| model=engine_name, | |
| messages=message, | |
| temperature=temperature, | |
| max_tokens=max_tokens, | |
| top_p=top_p, | |
| limiter=limiter, | |
| json_format=json_format, | |
| n=n, | |
| ) | |
| for message in messages | |
| ] | |
| responses = await tqdm_asyncio.gather(*async_responses) | |
| empty_dict = { | |
| "question": "", | |
| "options": { | |
| "A": "", | |
| "B": "", | |
| "C": "", | |
| "D": "", | |
| }, | |
| "distractors": { | |
| "E": "", | |
| "F": "", | |
| "G": "", | |
| }, | |
| "correct_answer": "" | |
| } | |
| empty_str = "" | |
| outputs = [] | |
| for response in responses: | |
| if n == 1: | |
| if json_format: | |
| if response and response.choices[0] and response.choices[0].message and response.choices[0].message.content: | |
| outputs.append(json.loads(response.choices[0].message.content)) | |
| else: | |
| outputs.append(empty_dict) | |
| else: | |
| if response and response.choices[0] and response.choices[0].message and response.choices[0].message.content: | |
| outputs.append(response.choices[0].message.content) | |
| else: | |
| outputs.append(empty_str) | |
| else: | |
| if json_format: | |
| outputs.append([ | |
| json.loads(response.choices[i].message.content) if response and response.choices[i].message.content else empty_dict | |
| for i in range(n) | |
| ]) | |
| else: | |
| outputs.append([ | |
| response.choices[i].message.content if response and response.choices[i].message.content else empty_str | |
| for i in range(n) | |
| ]) | |
| return outputs | |
| async def _throttled_claude_chat_completion_acreate( | |
| client: AsyncAnthropic, | |
| model: str, | |
| messages, | |
| temperature: float, | |
| max_tokens: int, | |
| top_p: float, | |
| limiter: aiolimiter.AsyncLimiter, | |
| ): | |
| async with limiter: | |
| try: | |
| return await client.messages.create( | |
| model=model, | |
| messages=messages, | |
| temperature=temperature, | |
| max_tokens=max_tokens, | |
| top_p=top_p, | |
| ) | |
| except: | |
| return None | |
| async def generate_from_claude_chat_completion( | |
| client, | |
| messages, | |
| engine_name: str, | |
| temperature: float = 1.0, | |
| max_tokens: int = 512, | |
| top_p: float = 1.0, | |
| requests_per_minute: int = 100, | |
| n: int = 1, | |
| ): | |
| # https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168 | |
| delay = 60.0 / requests_per_minute | |
| limiter = aiolimiter.AsyncLimiter(1, delay) | |
| n_messages = [] | |
| for message in messages: | |
| for _ in range(n): | |
| n_messages.append(message) | |
| async_responses = [ | |
| _throttled_claude_chat_completion_acreate( | |
| client, | |
| model=engine_name, | |
| messages=message, | |
| temperature=temperature, | |
| max_tokens=max_tokens, | |
| top_p=top_p, | |
| limiter=limiter, | |
| ) | |
| for message in n_messages | |
| ] | |
| responses = await tqdm_asyncio.gather(*async_responses) | |
| outputs = [] | |
| if n == 1: | |
| for response in responses: | |
| if response and response.content and response.content[0] and response.content[0].text: | |
| outputs.append(response.content[0].text) | |
| else: | |
| outputs.append("") | |
| else: | |
| idx = 0 | |
| for response in responses: | |
| if idx % n == 0: | |
| outputs.append([]) | |
| idx += 1 | |
| if response and response.content and response.content[0] and response.content[0].text: | |
| outputs[-1].append(response.content[0].text) | |
| else: | |
| outputs[-1].append("") | |
| return outputs |