| | import os |
| | import time |
| | import logging |
| | import requests |
| | import json |
| | import concurrent.futures |
| | import threading |
| | from datetime import datetime, timedelta |
| | from apscheduler.schedulers.background import BackgroundScheduler |
| | from flask import Flask, request, jsonify, Response, stream_with_context |
| |
|
| | os.environ['TZ'] = 'Asia/Shanghai' |
| | time.tzset() |
| |
|
| | logging.basicConfig(level=logging.INFO, |
| | format='%(asctime)s - %(levelname)s - %(message)s') |
| |
|
| | API_ENDPOINT = "https://api.deepseek.com/user/balance" |
| | TEST_MODEL_ENDPOINT = "https://api.deepseek.com/v1/chat/completions" |
| | MODELS_ENDPOINT = "https://api.deepseek.com/models" |
| |
|
| | app = Flask(__name__) |
| |
|
| | text_models = [] |
| |
|
| | invalid_keys_global = [] |
| | valid_keys_global = [] |
| |
|
| | executor = concurrent.futures.ThreadPoolExecutor(max_workers=10000) |
| | model_key_indices = {} |
| |
|
| | request_timestamps = [] |
| | token_counts = [] |
| | data_lock = threading.Lock() |
| |
|
| | def get_credit_summary(api_key): |
| | headers = { |
| | "Authorization": f"Bearer {api_key}", |
| | "Content-Type": "application/json" |
| | } |
| | try: |
| | response = requests.get(API_ENDPOINT, headers=headers) |
| | response.raise_for_status() |
| | data = response.json() |
| | if not data.get("is_available", False): |
| | logging.warning(f"API Key: {api_key} is not available.") |
| | return None |
| | |
| | balance_infos = data.get("balance_infos", []) |
| | total_balance_cny = 0.0 |
| | usd_balance = 0.0 |
| | for balance_info in balance_infos: |
| | currency = balance_info.get("currency") |
| | total_balance = float(balance_info.get("total_balance", 0)) |
| |
|
| | if currency == "CNY": |
| | total_balance_cny += total_balance |
| | elif currency == "USD": |
| | usd_balance = total_balance |
| |
|
| | try: |
| | exchange_rate = get_usd_to_cny_rate() |
| | if exchange_rate is not None: |
| | total_balance_cny += usd_balance * exchange_rate |
| | logging.info(f"获取美元兑人民币汇率成功,API Key:{api_key},当前总额度(CNY): {total_balance_cny}") |
| | else: |
| | logging.warning(f"获取美元兑人民币汇率失败,无法转换美元余额,API Key:{api_key}") |
| | total_balance_cny += usd_balance * 7.2 |
| | except Exception as e: |
| | logging.error(f"获取美元兑人民币汇率失败,API Key:{api_key},错误信息:{e}") |
| | total_balance_cny += usd_balance * 7.2 |
| |
|
| | return {"total_balance": float(total_balance_cny)} |
| | except requests.exceptions.RequestException as e: |
| | logging.error(f"获取额度信息失败,API Key:{api_key},错误信息:{e}") |
| | return None |
| | except Exception as e: |
| | logging.error(f"处理额度信息失败,API Key:{api_key},错误信息:{e}") |
| | return None |
| |
|
| | def get_usd_to_cny_rate(): |
| | try: |
| | response = requests.get("https://api.exchangerate-api.com/v4/latest/USD") |
| | response.raise_for_status() |
| | data = response.json() |
| | return data.get("rates", {}).get("CNY") |
| | except requests.exceptions.RequestException as e: |
| | logging.error(f"获取美元兑人民币汇率失败,错误信息:{e}") |
| | return None |
| |
|
| | def refresh_models(): |
| | text_models = ["deepseek-chat", "deepseek-reasoner"] |
| | logging.info(f"所有文本模型列表:{text_models}") |
| |
|
| | def load_keys(): |
| | keys_str = os.environ.get("KEYS") |
| | keys = [key.strip() for key in keys_str.split(',')] |
| | unique_keys = list(set(keys)) |
| | keys_str = ','.join(unique_keys) |
| | os.environ["KEYS"] = keys_str |
| |
|
| | logging.info(f"加载的 keys:{unique_keys}") |
| |
|
| | with concurrent.futures.ThreadPoolExecutor( |
| | max_workers=10000 |
| | ) as executor: |
| | future_to_key = { |
| | executor.submit( |
| | process_key, key |
| | ): key for key in unique_keys |
| | } |
| |
|
| | invalid_keys = [] |
| | valid_keys = [] |
| |
|
| | for future in concurrent.futures.as_completed( |
| | future_to_key |
| | ): |
| | key = future_to_key[future] |
| | try: |
| | key_type = future.result() |
| | if key_type == "invalid": |
| | invalid_keys.append(key) |
| | elif key_type == "valid": |
| | valid_keys.append(key) |
| | except Exception as exc: |
| | logging.error(f"处理 KEY {key} 生成异常: {exc}") |
| |
|
| | logging.info(f"无效 KEY:{invalid_keys}") |
| | logging.info(f"有效 KEY:{valid_keys}") |
| |
|
| | global invalid_keys_global, valid_keys_global |
| | invalid_keys_global = invalid_keys |
| | valid_keys_global = valid_keys |
| |
|
| | def process_key(key): |
| | credit_summary = get_credit_summary(key) |
| | if credit_summary is None: |
| | return "invalid" |
| | else: |
| | total_balance = credit_summary.get("total_balance", 0) |
| | if total_balance <= 0: |
| | return "invalid" |
| | else: |
| | return "valid" |
| |
|
| | def select_key(model_name): |
| | available_keys = valid_keys_global |
| |
|
| | current_index = model_key_indices.get(model_name, 0) |
| |
|
| | for _ in range(len(available_keys)): |
| | key = available_keys[current_index % len(available_keys)] |
| | current_index += 1 |
| | model_key_indices[model_name] = current_index |
| | return key |
| |
|
| | model_key_indices[model_name] = 0 |
| | return None |
| |
|
| | def check_authorization(request): |
| | authorization_key = os.environ.get("AUTHORIZATION_KEY") |
| | if not authorization_key: |
| | logging.warning("环境变量 AUTHORIZATION_KEY 未设置,请设置后重试。") |
| | return False |
| |
|
| | auth_header = request.headers.get('Authorization') |
| | if not auth_header: |
| | logging.warning("请求头中缺少 Authorization 字段。") |
| | return False |
| |
|
| | if auth_header != f"Bearer {authorization_key}": |
| | logging.warning(f"无效的 Authorization 密钥:{auth_header}") |
| | return False |
| |
|
| | return True |
| |
|
| | scheduler = BackgroundScheduler() |
| | scheduler.add_job(load_keys, 'interval', hours=1) |
| | scheduler.remove_all_jobs() |
| |
|
| | @app.route('/') |
| | def index(): |
| | current_time = time.time() |
| | one_minute_ago = current_time - 60 |
| |
|
| | with data_lock: |
| | while request_timestamps and request_timestamps[0] < one_minute_ago: |
| | request_timestamps.pop(0) |
| | token_counts.pop(0) |
| |
|
| | rpm = len(request_timestamps) |
| | tpm = sum(token_counts) |
| |
|
| | return jsonify({"rpm": rpm, "tpm": tpm}) |
| | |
| | @app.route('/handsome/v1/models', methods=['GET']) |
| | def list_models(): |
| | if not check_authorization(request): |
| | return jsonify({"error": "Unauthorized"}), 401 |
| | |
| | detailed_models = [ |
| | { |
| | "id": "deepseek-chat", |
| | "object": "model", |
| | "created": 1678888888, |
| | "owned_by": "openai", |
| | "root": "deepseek-chat", |
| | "parent": None |
| | }, |
| | { |
| | "id": "deepseek-reasoner", |
| | "object": "model", |
| | "created": 1678888889, |
| | "owned_by": "openai", |
| | "root": "deepseek-reasoner", |
| | "parent": None |
| | } |
| | ] |
| |
|
| | return jsonify({ |
| | "success": True, |
| | "data": detailed_models |
| | }) |
| |
|
| | def get_billing_info(): |
| | keys = valid_keys_global |
| | total_balance = 0 |
| |
|
| | with concurrent.futures.ThreadPoolExecutor( |
| | max_workers=10000 |
| | ) as executor: |
| | futures = [ |
| | executor.submit(get_credit_summary, key) for key in keys |
| | ] |
| |
|
| | for future in concurrent.futures.as_completed(futures): |
| | try: |
| | credit_summary = future.result() |
| | if credit_summary: |
| | total_balance += credit_summary.get( |
| | "total_balance", |
| | 0 |
| | ) |
| | except Exception as exc: |
| | logging.error(f"获取额度信息生成异常: {exc}") |
| |
|
| | return total_balance |
| |
|
| | @app.route('/handsome/v1/dashboard/billing/usage', methods=['GET']) |
| | def billing_usage(): |
| | if not check_authorization(request): |
| | return jsonify({"error": "Unauthorized"}), 401 |
| |
|
| | end_date = datetime.now() |
| | start_date = end_date - timedelta(days=30) |
| |
|
| | daily_usage = [] |
| | current_date = start_date |
| | while current_date <= end_date: |
| | daily_usage.append({ |
| | "timestamp": int(current_date.timestamp()), |
| | "daily_usage": 0 |
| | }) |
| | current_date += timedelta(days=1) |
| |
|
| | return jsonify({ |
| | "object": "list", |
| | "data": daily_usage, |
| | "total_usage": 0 |
| | }) |
| |
|
| | @app.route('/handsome/v1/dashboard/billing/subscription', methods=['GET']) |
| | def billing_subscription(): |
| | if not check_authorization(request): |
| | return jsonify({"error": "Unauthorized"}), 401 |
| |
|
| | total_balance = get_billing_info() |
| |
|
| | return jsonify({ |
| | "object": "billing_subscription", |
| | "has_payment_method": False, |
| | "canceled": False, |
| | "canceled_at": None, |
| | "delinquent": None, |
| | "access_until": int(datetime(9999, 12, 31).timestamp()), |
| | "soft_limit": 0, |
| | "hard_limit": total_balance, |
| | "system_hard_limit": total_balance, |
| | "soft_limit_usd": 0, |
| | "hard_limit_usd": total_balance, |
| | "system_hard_limit_usd": total_balance, |
| | "plan": { |
| | "name": "SiliconFlow API", |
| | "id": "siliconflow-api" |
| | }, |
| | "account_name": "SiliconFlow User", |
| | "po_number": None, |
| | "billing_email": None, |
| | "tax_ids": [], |
| | "billing_address": None, |
| | "business_address": None |
| | }) |
| |
|
| | @app.route('/handsome/v1/chat/completions', methods=['POST']) |
| | def handsome_chat_completions(): |
| | if not check_authorization(request): |
| | return jsonify({"error": "Unauthorized"}), 401 |
| |
|
| | data = request.get_json() |
| | if not data or 'model' not in data: |
| | return jsonify({"error": "Invalid request data"}), 400 |
| |
|
| | model_name = data['model'] |
| |
|
| | api_key = select_key(model_name) |
| |
|
| | if not api_key: |
| | return jsonify( |
| | { |
| | "error": ( |
| | "No available API key for this " |
| | "request type or all keys have " |
| | "reached their limits" |
| | ) |
| | } |
| | ), 429 |
| |
|
| | if model_name == "deepseek-reasoner": |
| | for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]: |
| | if param in data: |
| | del data[param] |
| |
|
| | headers = { |
| | "Authorization": f"Bearer {api_key}", |
| | "Content-Type": "application/json" |
| | } |
| |
|
| | try: |
| | start_time = time.time() |
| | response = requests.post( |
| | TEST_MODEL_ENDPOINT, |
| | headers=headers, |
| | json=data, |
| | stream=data.get("stream", False), |
| | timeout=60 |
| | ) |
| |
|
| | if response.status_code == 429: |
| | return jsonify(response.json()), 429 |
| |
|
| | if data.get("stream", False): |
| | def generate(): |
| | first_chunk_time = None |
| | full_response_content = "" |
| | reasoning_content_accumulated = "" |
| | content_accumulated = "" |
| | first_reasoning_chunk = True |
| | |
| | for chunk in response.iter_content(chunk_size=10000000000): |
| | if chunk: |
| | if first_chunk_time is None: |
| | first_chunk_time = time.time() |
| | full_response_content += chunk.decode("utf-8") |
| | |
| | for line in chunk.decode("utf-8").splitlines(): |
| | |
| |
|
| | if line.startswith("data:"): |
| | try: |
| | chunk_json = json.loads(line.lstrip("data: ").strip()) |
| | if "choices" in chunk_json and len(chunk_json["choices"]) > 0: |
| | delta = chunk_json["choices"][0].get("delta", {}) |
| | |
| | if delta.get("reasoning_content") is not None: |
| | reasoning_chunk = delta["reasoning_content"] |
| | reasoning_chunk = reasoning_chunk.replace('\n', '\n> ') |
| | if first_reasoning_chunk: |
| | reasoning_chunk = "> " + reasoning_chunk |
| | first_reasoning_chunk = False |
| | yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n" |
| | |
| | if delta.get("content") is not None: |
| | if not first_reasoning_chunk: |
| | |
| | |
| | yield f"data: {json.dumps({'choices': [{'delta': {'content': '\n\n'}, 'index': 0}]})}\n\n" |
| | first_reasoning_chunk = True |
| | yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n" |
| |
|
| | except (KeyError, ValueError, json.JSONDecodeError) as e: |
| | logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}") |
| | continue |
| |
|
| | end_time = time.time() |
| | first_token_time = ( |
| | first_chunk_time - start_time |
| | if first_chunk_time else 0 |
| | ) |
| | total_time = end_time - start_time |
| |
|
| | prompt_tokens = 0 |
| | completion_tokens = 0 |
| | for line in full_response_content.splitlines(): |
| | if line.startswith("data:"): |
| | line = line[5:].strip() |
| | if line == "[DONE]": |
| | continue |
| | try: |
| | response_json = json.loads(line) |
| |
|
| | if ( |
| | "usage" in response_json and |
| | "completion_tokens" in response_json["usage"] |
| | ): |
| | completion_tokens += response_json[ |
| | "usage" |
| | ]["completion_tokens"] |
| | if ( |
| | "usage" in response_json and |
| | "prompt_tokens" in response_json["usage"] |
| | ): |
| | prompt_tokens = response_json[ |
| | "usage" |
| | ]["prompt_tokens"] |
| |
|
| | except ( |
| | KeyError, |
| | ValueError, |
| | IndexError |
| | ) as e: |
| | logging.error( |
| | f"解析流式响应单行 JSON 失败: {e}, " |
| | f"行内容: {line}" |
| | ) |
| |
|
| | user_content = "" |
| | messages = data.get("messages", []) |
| | for message in messages: |
| | if message["role"] == "user": |
| | if isinstance(message["content"], str): |
| | user_content += message["content"] + " " |
| | elif isinstance(message["content"], list): |
| | for item in message["content"]: |
| | if ( |
| | isinstance(item, dict) and |
| | item.get("type") == "text" |
| | ): |
| | user_content += ( |
| | item.get("text", "") + |
| | " " |
| | ) |
| |
|
| | user_content = user_content.strip() |
| |
|
| | user_content_replaced = user_content.replace( |
| | '\n', '\\n' |
| | ).replace('\r', '\\n') |
| | response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated |
| | response_content_replaced = response_content_replaced.replace( |
| | '\n', '\\n' |
| | ).replace('\r', '\\n') |
| |
|
| | logging.info( |
| | f"使用的key: {api_key}, " |
| | f"提示token: {prompt_tokens}, " |
| | f"输出token: {completion_tokens}, " |
| | f"首字用时: {first_token_time:.4f}秒, " |
| | f"总共用时: {total_time:.4f}秒, " |
| | f"使用的模型: {model_name}, " |
| | f"用户的内容: {user_content_replaced}, " |
| | f"输出的内容: {response_content_replaced}" |
| | ) |
| |
|
| | with data_lock: |
| | request_timestamps.append(time.time()) |
| | token_counts.append(prompt_tokens + completion_tokens) |
| |
|
| | yield "data: [DONE]\n\n" |
| |
|
| | return Response( |
| | stream_with_context(generate()), |
| | content_type="text/event-stream" |
| | ) |
| | else: |
| | response.raise_for_status() |
| | end_time = time.time() |
| | response_json = response.json() |
| | total_time = end_time - start_time |
| |
|
| | try: |
| | prompt_tokens = response_json["usage"]["prompt_tokens"] |
| | completion_tokens = response_json["usage"]["completion_tokens"] |
| | response_content = "" |
| |
|
| | if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0: |
| | choice = response_json["choices"][0] |
| | if "message" in choice: |
| | if "reasoning_content" in choice["message"]: |
| | reasoning_content = choice["message"]["reasoning_content"] |
| | reasoning_content = reasoning_content.replace('\n', '\n> ') |
| | reasoning_content = '> ' + reasoning_content |
| | formatted_reasoning = f"{reasoning_content}\n" |
| | response_content += formatted_reasoning + "\n" |
| | if "content" in choice["message"]: |
| | response_content += choice["message"]["content"] |
| | elif "choices" in response_json and len(response_json["choices"]) > 0: |
| | response_content = response_json["choices"][0]["message"]["content"] |
| |
|
| | except (KeyError, ValueError, IndexError) as e: |
| | logging.error( |
| | f"解析非流式响应 JSON 失败: {e}, " |
| | f"完整内容: {response_json}" |
| | ) |
| | prompt_tokens = 0 |
| | completion_tokens = 0 |
| | response_content = "" |
| |
|
| | user_content = "" |
| | messages = data.get("messages", []) |
| | for message in messages: |
| | if message["role"] == "user": |
| | if isinstance(message["content"], str): |
| | user_content += message["content"] + " " |
| | elif isinstance(message["content"], list): |
| | for item in message["content"]: |
| | if ( |
| | isinstance(item, dict) and |
| | item.get("type") == "text" |
| | ): |
| | user_content += ( |
| | item.get("text", "") + |
| | " " |
| | ) |
| |
|
| | user_content = user_content.strip() |
| |
|
| | user_content_replaced = user_content.replace( |
| | '\n', '\\n' |
| | ).replace('\r', '\\n') |
| | response_content_replaced = response_content.replace( |
| | '\n', '\\n' |
| | ).replace('\r', '\\n') |
| |
|
| | logging.info( |
| | f"使用的key: {api_key}, " |
| | f"提示token: {prompt_tokens}, " |
| | f"输出token: {completion_tokens}, " |
| | f"首字用时: 0, " |
| | f"总共用时: {total_time:.4f}秒, " |
| | f"使用的模型: {model_name}, " |
| | f"用户的内容: {user_content_replaced}, " |
| | f"输出的内容: {response_content_replaced}" |
| | ) |
| | with data_lock: |
| | request_timestamps.append(time.time()) |
| | token_counts.append(prompt_tokens + completion_tokens) |
| |
|
| | formatted_response = { |
| | "id": response_json.get("id", ""), |
| | "object": "chat.completion", |
| | "created": response_json.get("created", int(time.time())), |
| | "model": model_name, |
| | "choices": [ |
| | { |
| | "index": 0, |
| | "message": { |
| | "role": "assistant", |
| | "content": response_content |
| | }, |
| | "finish_reason": "stop" |
| | } |
| | ], |
| | "usage": { |
| | "prompt_tokens": prompt_tokens, |
| | "completion_tokens": completion_tokens, |
| | "total_tokens": prompt_tokens + completion_tokens |
| | } |
| | } |
| |
|
| | return jsonify(formatted_response) |
| |
|
| | except requests.exceptions.RequestException as e: |
| | logging.error(f"请求转发异常: {e}") |
| | return jsonify({"error": str(e)}), 500 |
| |
|
| | if __name__ == '__main__': |
| | logging.info(f"环境变量:{os.environ}") |
| |
|
| | invalid_keys_global = [] |
| | valid_keys_global = [] |
| |
|
| | load_keys() |
| | logging.info("程序启动时首次加载 keys 已执行") |
| |
|
| | scheduler.start() |
| |
|
| | logging.info("首次加载 keys 已手动触发执行") |
| |
|
| | refresh_models() |
| | logging.info("首次刷新模型列表已手动触发执行") |
| |
|
| | app.run( |
| | debug=False, |
| | host='0.0.0.0', |
| | port=int(os.environ.get('PORT', 7860)) |
| | ) |