kevin commited on
Commit
6820770
·
1 Parent(s): a69534e

github 模型

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. Dockerfile +13 -0
  3. README.md +1 -0
  4. app.py +215 -0
  5. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .idea
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt /app/
6
+
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ COPY . /app/
10
+
11
+ EXPOSE 8080
12
+
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]
README.md CHANGED
@@ -5,6 +5,7 @@ colorFrom: indigo
5
  colorTo: gray
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
5
  colorTo: gray
6
  sdk: docker
7
  pinned: false
8
+ app_port: 8080
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Request
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import Response
4
+ import logging
5
+ import httpx
6
+ import random
7
+ import uvicorn
8
+ import json
9
+
10
+ logging.basicConfig(
11
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
12
+ )
13
+ logger = logging.getLogger(__name__)
14
+
15
+ app = FastAPI()
16
+
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"],
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ client = httpx.AsyncClient()
26
+
27
+ BASE_URL_CHAT = "https://models.inference.ai.azure.com/chat/completions"
28
+ BASE_URL_EMBEDDINGS = "https://models.inference.ai.azure.com/embeddings"
29
+
30
+ async def process_request_body(body: bytes) -> bytes:
31
+ try:
32
+ data = json.loads(body)
33
+ if isinstance(data, dict) and "store" in data:
34
+ del data["store"]
35
+ return json.dumps(data).encode()
36
+ except json.JSONDecodeError:
37
+ return body
38
+
39
+ async def make_request(method, url, headers, body, api_keys=None, retry_count=0):
40
+ try:
41
+ if api_keys and len(api_keys) > 1:
42
+ remaining_keys = api_keys.copy()
43
+ while remaining_keys and retry_count < 3:
44
+ selected_key = random.choice(remaining_keys)
45
+ remaining_keys.remove(selected_key)
46
+ headers = {**headers, "Authorization": f"Bearer {selected_key}"}
47
+ logger.info(f"Attempting request with API key: {selected_key}")
48
+
49
+ try:
50
+ r = await client.request(
51
+ method,
52
+ url,
53
+ headers=headers,
54
+ content=body,
55
+ timeout=600
56
+ )
57
+ if r.status_code < 400:
58
+ return r
59
+ logger.error(f"Request failed with key {selected_key}, status code: {r.status_code}")
60
+ except Exception as e:
61
+ logger.error(f"Request failed with key {selected_key}: {str(e)}")
62
+ retry_count += 1
63
+
64
+ raise HTTPException(status_code=500, detail="All API keys failed")
65
+ else:
66
+ while retry_count < 3:
67
+ single_key = api_keys[0] if api_keys else headers.get("authorization", "").replace("Bearer ", "").strip()
68
+ headers = {**headers, "Authorization": f"Bearer {single_key}"}
69
+ logger.info(f"Attempting request with API key: {single_key}")
70
+
71
+ try:
72
+ r = await client.request(
73
+ method,
74
+ url,
75
+ headers=headers,
76
+ content=body,
77
+ timeout=600
78
+ )
79
+ if r.status_code < 400:
80
+ return r
81
+ logger.error(f"Request failed with status code: {r.status_code}")
82
+ except Exception as e:
83
+ logger.error(f"Request failed: {str(e)}")
84
+ retry_count += 1
85
+
86
+ raise HTTPException(status_code=500, detail="Request failed after 3 retries")
87
+
88
+ except Exception as e:
89
+ logger.error(f"Request failed: {str(e)}")
90
+ raise HTTPException(status_code=500, detail=str(e))
91
+
92
+ @app.api_route(
93
+ "/v1/chat/completions",
94
+ methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH"],
95
+ )
96
+ @app.api_route(
97
+ "/hf/v1/chat/completions",
98
+ methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH"],
99
+ )
100
+ async def chat_completions(request: Request):
101
+ target_url = BASE_URL_CHAT
102
+
103
+ try:
104
+ headers = dict(request.headers)
105
+ if "content-length" in headers:
106
+ del headers["content-length"]
107
+ if "host" in headers:
108
+ del headers["host"]
109
+ headers["Host"] = "models.inference.ai.azure.com"
110
+
111
+ api_keys = None
112
+ auth_header = headers.get("authorization", "")
113
+ if auth_header and auth_header.startswith("Bearer "):
114
+ raw_keys = auth_header.replace("Bearer ", "").strip()
115
+ api_keys = [k.strip() for k in raw_keys.split(',') if k.strip()]
116
+ if "authorization" in headers:
117
+ del headers["authorization"]
118
+
119
+ request_body = await request.body()
120
+ processed_body = await process_request_body(request_body)
121
+
122
+ r = await make_request(request.method, target_url, headers, processed_body, api_keys)
123
+ return Response(content=r.content, status_code=r.status_code, headers=r.headers)
124
+
125
+ except Exception as e:
126
+ logger.error(f"Forwarding request failed: {e}")
127
+ raise HTTPException(status_code=500, detail=str(e))
128
+
129
+ @app.api_route("/v1/embeddings", methods=["POST", "OPTIONS"])
130
+ @app.api_route("/hf/v1/embeddings", methods=["POST", "OPTIONS"])
131
+ async def embeddings(request: Request):
132
+ target_url = BASE_URL_EMBEDDINGS
133
+
134
+ try:
135
+ headers = dict(request.headers)
136
+ if "content-length" in headers:
137
+ del headers["content-length"]
138
+ if "host" in headers:
139
+ del headers["host"]
140
+ headers["Host"] = "models.inference.ai.azure.com"
141
+
142
+ api_keys = None
143
+ auth_header = headers.get("authorization", "")
144
+ if auth_header and auth_header.startswith("Bearer "):
145
+ raw_keys = auth_header.replace("Bearer ", "").strip()
146
+ api_keys = [k.strip() for k in raw_keys.split(',') if k.strip()]
147
+ if "authorization" in headers:
148
+ del headers["authorization"]
149
+
150
+ request_body = await request.body()
151
+ processed_body = await process_request_body(request_body)
152
+
153
+ r = await make_request(request.method, target_url, headers, processed_body, api_keys)
154
+ return Response(content=r.content, status_code=r.status_code, headers=r.headers)
155
+
156
+ except Exception as e:
157
+ logger.error(f"Forwarding request failed: {e}")
158
+ raise HTTPException(status_code=500, detail=str(e))
159
+
160
+ @app.get("/v1/models")
161
+ @app.get("/hf/v1/models")
162
+ async def list_models():
163
+ models_data = {
164
+ "object": "list",
165
+ "data": [
166
+ {"id": "AI21-Jamba-1.5-Large", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
167
+ {"id": "AI21-Jamba-1.5-Mini", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
168
+ {"id": "Cohere-command-r", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
169
+ {"id": "Cohere-command-r-08-2024", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
170
+ {"id": "Cohere-command-r-plus", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
171
+ {"id": "Cohere-command-r-plus-08-2024", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
172
+ {"id": "Cohere-embed-v3-english", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
173
+ {"id": "Cohere-embed-v3-multilingual", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
174
+ {"id": "Llama-3.2-90B-Vision-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
175
+ {"id": "Llama-3.2-11B-Vision-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
176
+ {"id": "Meta-Llama-3.1-405B-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
177
+ {"id": "Meta-Llama-3.1-70B-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
178
+ {"id": "Meta-Llama-3.1-8B-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
179
+ {"id": "Meta-Llama-3-70B-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
180
+ {"id": "Meta-Llama-3-8B-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
181
+ {"id": "Mistral-large", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
182
+ {"id": "Mistral-large-2407", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
183
+ {"id": "Mistral-Nemo", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
184
+ {"id": "Mistral-small", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
185
+ {"id": "Ministral-3B", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
186
+ {"id": "gpt-4o", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
187
+ {"id": "gpt-4o-mini", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
188
+ {"id": "o1-preview", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
189
+ {"id": "o1-mini", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
190
+ {"id": "text-embedding-3-large", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
191
+ {"id": "text-embedding-3-small", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
192
+ {"id": "Phi-3.5-MoE-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
193
+ {"id": "Phi-3.5-vision-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
194
+ {"id": "Phi-3.5-mini-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
195
+ {"id": "Phi-3-medium-128k-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
196
+ {"id": "Phi-3-medium-4k-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
197
+ {"id": "Phi-3-mini-128k-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
198
+ {"id": "Phi-3-mini-4k-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
199
+ {"id": "Phi-3-small-128k-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
200
+ {"id": "Phi-3-small-8k-instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
201
+ {"id": "jais-30b-chat", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
202
+ {"id": "Llama-3.3-70B-Instruct", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
203
+ {"id": "Mistral-large-2411", "object": "model", "created": 1709266800, "owned_by": "f-droid"},
204
+ ]
205
+ }
206
+ return models_data
207
+
208
+ @app.get("/health")
209
+ @app.get("/")
210
+ async def health_check():
211
+ logger.info("Health check endpoint called")
212
+ return {"status": "healthy"}
213
+
214
+ if __name__ == "__main__":
215
+ uvicorn.run(app, host="0.0.0.0", port=8080)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ httpx
4
+ python-multipart