|
|
import os |
|
|
from flask import Flask, request, Response, jsonify |
|
|
import requests |
|
|
import json |
|
|
import random |
|
|
from helper import create_jwt, github_username_zed_userid_list |
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
@app.route('/hf/v1/chat/completions', methods=['POST']) |
|
|
def chat(): |
|
|
|
|
|
payload = request.json |
|
|
|
|
|
|
|
|
model = payload.get('model', 'claude-3-5-sonnet-20241022') |
|
|
|
|
|
|
|
|
url = "https://llm.zed.dev/completion?" |
|
|
|
|
|
llm_payload = { |
|
|
"provider": "anthropic", |
|
|
"model": model, |
|
|
"provider_request": { |
|
|
"model": model, |
|
|
"max_tokens": payload.get('max_tokens', 8192), |
|
|
"temperature": payload.get('temperature', 0), |
|
|
"top_p": payload.get('top_p', 0.7), |
|
|
"messages": payload['messages'], |
|
|
"stream": payload.get('stream', True), |
|
|
"system": payload.get('system', "") |
|
|
} |
|
|
} |
|
|
|
|
|
github_username, zed_user_id = random.choice(github_username_zed_userid_list) |
|
|
jwt = create_jwt(github_username, zed_user_id) |
|
|
|
|
|
headers = { |
|
|
'Host': 'llm.zed.dev', |
|
|
'accept': '*/*', |
|
|
'content-type': 'application/json', |
|
|
'authorization': f'Bearer {jwt}', |
|
|
'user-agent': 'Zed/0.149.3 (macos; aarch64)' |
|
|
} |
|
|
|
|
|
|
|
|
proxy = os.environ.get('HTTP_PROXY', None) |
|
|
proxies = {'http': proxy, 'https': proxy} if proxy else None |
|
|
|
|
|
def generate(): |
|
|
with requests.post(url, headers=headers, json=llm_payload, stream=True, proxies=proxies) as response: |
|
|
for chunk in response.iter_content(chunk_size=1024): |
|
|
if chunk: |
|
|
|
|
|
try: |
|
|
data = json.loads(chunk.decode('utf-8')) |
|
|
content = data.get('completion', '') |
|
|
yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n" |
|
|
except json.JSONDecodeError: |
|
|
continue |
|
|
yield "data: [DONE]\n\n" |
|
|
|
|
|
if payload.get('stream', False): |
|
|
return Response(generate(), content_type='text/event-stream') |
|
|
else: |
|
|
with requests.post(url, headers=headers, json=llm_payload, proxies=proxies) as response: |
|
|
data = response.json() |
|
|
return jsonify({ |
|
|
"id": "chatcmpl-" + os.urandom(12).hex(), |
|
|
"object": "chat.completion", |
|
|
"created": int(time.time()), |
|
|
"model": model, |
|
|
"choices": [{ |
|
|
"index": 0, |
|
|
"message": { |
|
|
"role": "assistant", |
|
|
"content": data.get('completion', '') |
|
|
}, |
|
|
"finish_reason": "stop" |
|
|
}], |
|
|
"usage": { |
|
|
"prompt_tokens": -1, |
|
|
"completion_tokens": -1, |
|
|
"total_tokens": -1 |
|
|
} |
|
|
}) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
app.run(debug=True) |