Spaces:
Paused
Paused
| import random | |
| import json | |
| import aiohttp | |
| import asyncio | |
| from aiohttp import web | |
| from datetime import datetime | |
| # Debug mode switch | |
| DEBUG_MODE = False | |
| # Define fixed model information | |
| DEFAULT_MODEL = "llama3.1-8b" | |
| ALTERNATE_MODEL = "llama3.1-70b" | |
| FIXED_URL = "https://api.cerebras.ai/v1/chat/completions" | |
| FIXED_TEMPERATURE = 0.2 | |
| FIXED_TOP_P = 1 | |
| FIXED_MAX_TOKENS = 4096 | |
| # Log function for basic information | |
| def log_basic_info(message): | |
| timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |
| print(f"[{timestamp}] {message}") | |
| # Asynchronous function to send request and print debug information | |
| async def send_request(auth_tokens, data): | |
| try: | |
| headers = { | |
| "accept": "application/json", | |
| "authorization": f"Bearer {auth_tokens[0]}", | |
| "content-type": "application/json" | |
| } | |
| requested_model = data.get("model", DEFAULT_MODEL) | |
| model_to_use = ALTERNATE_MODEL if requested_model == ALTERNATE_MODEL else DEFAULT_MODEL | |
| log_basic_info(f"Requested model: {requested_model}, Using model: {model_to_use}") | |
| payload = { | |
| "messages": data.get("messages", []), | |
| "model": model_to_use, | |
| "temperature": FIXED_TEMPERATURE, | |
| "top_p": FIXED_TOP_P, | |
| "max_tokens": FIXED_MAX_TOKENS | |
| } | |
| if DEBUG_MODE: | |
| print("Request Payload:", json.dumps(payload, indent=4)) | |
| print("Request Headers:", headers) | |
| async with aiohttp.ClientSession() as session: | |
| async with session.post(FIXED_URL, headers=headers, json=payload) as resp: | |
| response_text = await resp.text() | |
| response_json = json.loads(response_text) | |
| total_tokens = response_json.get('usage', {}).get('total_tokens', 'N/A') | |
| total_time = response_json.get('time_info', {}).get('total_time', 'N/A') | |
| log_basic_info(f"Path: {FIXED_URL}, Status Code: {resp.status}, Total Tokens Used: {total_tokens}, Total Time: {total_time:.3f} seconds") | |
| return response_text | |
| except Exception as e: | |
| log_basic_info(f"Exception occurred: {str(e)}") | |
| # Main handler function | |
| async def handle_request(request): | |
| try: | |
| request_data = await request.json() | |
| headers = dict(request.headers) | |
| authorization_header = headers.get('Authorization', '') | |
| auth_tokens = [auth.strip() for auth in authorization_header.replace('Bearer ', '').split(',')] | |
| if not auth_tokens: | |
| return web.json_response({"error": "Missing Authorization token"}, status=400) | |
| auth_token = random.choice(auth_tokens) | |
| headers['Authorization'] = f"Bearer {auth_token}" | |
| log_basic_info(f"Received request for path: {request.path}") | |
| if DEBUG_MODE: | |
| print("Received Request Data:", json.dumps(request_data, indent=4)) | |
| print("Received Headers:", headers) | |
| response_text = await send_request(auth_tokens, request_data) | |
| return web.json_response(json.loads(response_text)) | |
| except Exception as e: | |
| log_basic_info(f"Exception occurred in handling request: {str(e)}") | |
| return web.json_response({"error": str(e)}, status=500) | |
| # Set up routes | |
| app = web.Application() | |
| app.router.add_post('/hf/v1/chat/completions', handle_request) | |
| # Run the server | |
| if __name__ == '__main__': | |
| web.run_app(app, host='0.0.0.0', port=7860) | |