overwrite69 commited on
Commit
a933329
Β·
verified Β·
1 Parent(s): d07e9e5

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +292 -191
app.py CHANGED
@@ -1,218 +1,319 @@
1
- import os
2
- import json
 
 
 
 
 
 
3
  import asyncio
 
4
  import re
5
- import urllib.parse
6
- from typing import AsyncGenerator, Optional, List, Dict, Any
7
- from contextlib import asynccontextmanager
8
 
9
  import httpx
10
  from fastapi import FastAPI, HTTPException, Request
11
  from fastapi.responses import StreamingResponse, JSONResponse
12
- from fastapi.middleware.cors import CORSMiddleware
13
  from pydantic import BaseModel
14
- from sse_starlette.sse import EventSourceResponse
15
- from dotenv import load_dotenv
16
 
17
- load_dotenv()
18
 
19
- API_ENDPOINT = os.getenv("API_ENDPOINT", "")
20
- PROXY_URL = os.getenv("PROXY_URL", "")
 
 
 
 
 
 
 
 
21
 
22
- DEFAULT_MODELS = [
23
- {"id": "openai/gpt-4o-mini", "slug": "chatgpt", "name": "ChatGPT"},
24
- {"id": "anthropic/claude-haiku-4-5", "slug": "claude", "name": "Claude"},
25
- {"id": "google/gemini-2.0-flash-001", "slug": "gemini", "name": "Gemini"},
26
- {"id": "x-ai/grok-3-mini-beta", "slug": "grok", "name": "Grok"},
27
- {"id": "deepseek/deepseek-chat-v3-0324", "slug": "deepseek", "name": "DeepSeek"},
28
- {"id": "qwen/qwen-2.5-72b-instruct", "slug": "qwen", "name": "Qwen"},
29
- {"id": "moonshotai/moonlight-16k", "slug": "kimi", "name": "Kimi"},
30
- {"id": "perplexity/sonar", "slug": "perplexity", "name": "Perplexity"},
31
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- class Message(BaseModel):
34
- role: str
35
- content: str
36
 
37
- class ChatRequest(BaseModel):
38
- model: str
39
- messages: List[Message]
40
- stream: bool = True
41
- max_tokens: Optional[int] = None
42
- temperature: Optional[float] = None
43
 
44
- class AnthropicMessage(BaseModel):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  role: str
46
  content: str
47
 
48
- class AnthropicRequest(BaseModel):
49
- model: str
50
- messages: List[AnthropicMessage]
51
- max_tokens: int = 1024
52
- stream: bool = False
53
-
54
- def get_proxies():
55
- if PROXY_URL:
56
- return PROXY_URL
57
- return None
58
-
59
- async def get_csrf_token(client: httpx.AsyncClient) -> str:
60
- resp = await client.get(f"{API_ENDPOINT}/claude/chat", follow_redirects=True)
61
- xsrf_cookie = resp.cookies.get("XSRF-TOKEN")
62
- if xsrf_cookie:
63
- return urllib.parse.unquote(xsrf_cookie)
64
- match = re.search(r'XSRF-TOKEN=([^;]+)', resp.headers.get("set-cookie", ""))
65
- if match:
66
- return urllib.parse.unquote(match.group(1))
67
- raise HTTPException(500, "Auth failed")
68
-
69
- async def stream_chat(client: httpx.AsyncClient, model: str, messages: List[Dict], csrf_token: str) -> AsyncGenerator[str, None]:
70
- payload = {"model": model, "messages": [m.model_dump() for m in messages] if hasattr(messages[0], 'model_dump') else messages}
71
- headers = {
72
  "Content-Type": "application/json",
73
- "X-CSRF-TOKEN": csrf_token,
74
- "Accept": "text/event-stream",
75
- "Origin": API_ENDPOINT,
76
- "Referer": f"{API_ENDPOINT}/claude/chat",
 
 
 
 
77
  }
78
- async with client.stream("POST", f"{API_ENDPOINT}/api/chat", json=payload, headers=headers, timeout=120.0) as resp:
79
- if resp.status_code != 200:
80
- error_body = await resp.aread()
81
- raise HTTPException(resp.status_code, error_body.decode())
82
- async for line in resp.aiter_lines():
83
- if line.startswith("data: "):
84
- data = line[6:]
85
- if data == "[DONE]":
86
- yield f"data: [DONE]\n\n"
87
- break
88
- try:
89
- parsed = json.loads(data)
90
- yield f"data: {json.dumps(parsed)}\n\n"
91
- except json.JSONDecodeError:
92
- continue
93
-
94
- async def do_chat(model: str, messages: List[Dict], stream: bool = False):
95
- proxy = get_proxies()
96
- async with httpx.AsyncClient(proxy=proxy, cookies={}, http2=True, follow_redirects=True, timeout=120.0) as client:
97
- csrf_token = await get_csrf_token(client)
98
-
99
- if stream:
100
- return EventSourceResponse(stream_chat(client, model, messages, csrf_token))
101
- else:
102
- full_content = ""
103
- usage = None
104
- current_messages = [m.model_dump() if hasattr(m, 'model_dump') else m for m in messages]
105
- max_continuations = 5
106
- continuations = 0
107
-
108
- while continuations <= max_continuations:
109
- if full_content:
110
- current_messages.append({"role": "assistant", "content": full_content})
111
- current_messages.append({"role": "user", "content": "Continue and complete from where you left off. Do not repeat, just continue."})
112
-
113
- chunk_count = 0
114
- last_finish_reason = None
115
-
116
- async for chunk in stream_chat(client, model, current_messages, csrf_token):
117
- if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
118
- try:
119
- data = json.loads(chunk[6:-2])
120
- if "choices" in data and data["choices"]:
121
- delta = data["choices"][0].get("delta", {})
122
- content = delta.get("content", "")
123
- if content:
124
- full_content += content
125
- chunk_count += 1
126
- finish = data["choices"][0].get("finish_reason")
127
- if finish:
128
- last_finish_reason = finish
129
- if "usage" in data:
130
- usage = data["usage"]
131
- except:
132
- pass
133
-
134
- if last_finish_reason == "stop" or chunk_count < 50:
135
- break
136
-
137
- continuations += 1
138
-
139
- return full_content, usage
140
-
141
- @asynccontextmanager
142
- async def lifespan(app: FastAPI):
143
- yield
144
-
145
- app = FastAPI(title="Haiku API", description="Fast AI chat completions API", lifespan=lifespan)
146
-
147
- app.add_middleware(
148
- CORSMiddleware,
149
- allow_origins=["*"],
150
- allow_credentials=True,
151
- allow_methods=["*"],
152
- allow_headers=["*"],
153
- )
154
 
155
- @app.get("/")
156
- async def root():
157
- return {"status": "ok", "message": "Haiku API"}
158
 
159
- @app.get("/v1/models")
160
- async def list_models():
161
- return {"object": "list", "data": [{"id": m["id"], "object": "model", "owned_by": m["slug"]} for m in DEFAULT_MODELS]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- @app.get("/v1/models/{model_id}")
164
- async def get_model(model_id: str):
165
- for m in DEFAULT_MODELS:
166
- if m["id"] == model_id:
167
- return {"id": m["id"], "object": "model", "owned_by": m["slug"]}
168
- raise HTTPException(404, "Model not found")
169
 
170
- # OpenAI format
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  @app.post("/v1/chat/completions")
172
- async def chat_completions(request: ChatRequest):
173
  try:
174
- result = await do_chat(request.model, request.messages, request.stream)
175
- if request.stream:
176
- return result
177
- full_content, usage = result
178
- return {
179
- "id": f"chatcmpl-{hash(full_content) % 1000000}",
180
- "object": "chat.completion",
181
- "model": request.model,
182
- "choices": [{"index": 0, "message": {"role": "assistant", "content": full_content}, "finish_reason": "stop"}],
183
- "usage": usage
184
- }
185
- except Exception as e:
186
- raise HTTPException(500, str(e))
187
-
188
- # Anthropic format
189
- @app.post("/v1/messages")
190
- async def anthropic_messages(request: AnthropicRequest):
191
- try:
192
- # Map model name
193
- model_map = {
194
- "claude-sonnet-4.5": "anthropic/claude-sonnet-4-5",
195
- "claude-haiku-4.5": "anthropic/claude-haiku-4-5",
196
- "claude-sonnet-4-5": "anthropic/claude-sonnet-4-5",
197
- "claude-haiku-4-5": "anthropic/claude-haiku-4-5",
198
- }
199
- model = model_map.get(request.model, request.model)
200
-
201
- result = await do_chat(model, request.messages, request.stream)
202
- if request.stream:
203
- return result
204
- full_content, usage = result
205
- return {
206
- "id": f"msg_{hash(full_content) % 1000000}",
207
- "type": "message",
208
- "role": "assistant",
209
- "model": request.model,
210
- "content": [{"type": "text", "text": full_content}],
211
- "stop_reason": "end_turn",
212
- "usage": {"input_tokens": usage.get("prompt_tokens", 0) if usage else 0, "output_tokens": usage.get("completion_tokens", 0) if usage else 0}
213
- }
214
- except Exception as e:
215
- raise HTTPException(500, str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
  if __name__ == "__main__":
218
  import uvicorn
 
1
+ """
2
+ Haiku API - OpenAI-compatible proxy for chatgpt.org/claude/chat
3
+ Deploy to Hugging Face Spaces (Docker SDK)
4
+
5
+ The chatgpt.org backend proxies through OpenRouter and returns
6
+ OpenAI-compatible SSE chunks. We relay them directly.
7
+ """
8
+
9
  import asyncio
10
+ import json
11
  import re
12
+ import time
13
+ from typing import Optional
14
+ from urllib.parse import unquote
15
 
16
  import httpx
17
  from fastapi import FastAPI, HTTPException, Request
18
  from fastapi.responses import StreamingResponse, JSONResponse
 
19
  from pydantic import BaseModel
 
 
20
 
21
+ app = FastAPI(title="Haiku API", version="1.1.0")
22
 
23
+ # ── Session State ────────────────────────────────────────────────
24
+ class SessionState:
25
+ """Manages cookies and CSRF tokens for chatgpt.org."""
26
+ def __init__(self):
27
+ self.xsrf_token: Optional[str] = None
28
+ self.csrf_token: Optional[str] = None
29
+ self.cookies: Optional[httpx.Cookies] = None
30
+ self.last_refresh: float = 0
31
+ self.refresh_interval: float = 600 # 10 min
32
+ self._lock = asyncio.Lock()
33
 
34
+ async def refresh(self, client: httpx.AsyncClient):
35
+ """Visit chatgpt.org to obtain fresh session cookies + CSRF token."""
36
+ async with self._lock:
37
+ now = time.time()
38
+ if self.cookies and (now - self.last_refresh) < self.refresh_interval:
39
+ return
40
+
41
+ try:
42
+ resp = await client.get(
43
+ "https://chatgpt.org/claude/chat",
44
+ follow_redirects=True,
45
+ headers={
46
+ "User-Agent": (
47
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
48
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
49
+ "Chrome/148.0.0.0 Safari/537.36"
50
+ ),
51
+ "Accept": (
52
+ "text/html,application/xhtml+xml,"
53
+ "application/xml;q=0.9,*/*;q=0.8"
54
+ ),
55
+ },
56
+ timeout=30.0,
57
+ )
58
+
59
+ if resp.status_code != 200:
60
+ print(f"[Session] GET returned {resp.status_code}")
61
+ return
62
+
63
+ # Collect cookies
64
+ new_cookies = httpx.Cookies()
65
+ for name, value in resp.cookies.items():
66
+ new_cookies.set(name, value, domain="chatgpt.org")
67
+ for header in resp.headers.get_list("set-cookie"):
68
+ parts = header.split(";")[0]
69
+ if "=" in parts:
70
+ k, v = parts.split("=", 1)
71
+ new_cookies.set(k.strip(), v.strip(), domain="chatgpt.org")
72
+
73
+ # XSRF from cookie
74
+ xsrf = new_cookies.get("XSRF-TOKEN", domain="chatgpt.org")
75
+ if xsrf:
76
+ xsrf = unquote(xsrf)
77
+
78
+ # CSRF from HTML meta tag
79
+ csrf = None
80
+ m = re.search(
81
+ r'<meta\s+name="csrf-token"\s+content="([^"]+)"', resp.text
82
+ )
83
+ if m:
84
+ csrf = m.group(1)
85
+
86
+ self.cookies = new_cookies
87
+ self.xsrf_token = xsrf
88
+ self.csrf_token = csrf
89
+ self.last_refresh = now
90
+
91
+ print(
92
+ f"[Session] OK β€” CSRF:{bool(csrf)} XSRF:{bool(xsrf)} "
93
+ f"Cookies:{list(new_cookies.keys())}"
94
+ )
95
+
96
+ except Exception as e:
97
+ print(f"[Session] Refresh error: {e}")
98
 
 
 
 
99
 
100
+ session = SessionState()
 
 
 
 
 
101
 
102
+ # ── HTTP Client ──────────────────────────────────────────────────
103
+ http_client: Optional[httpx.AsyncClient] = None
104
+
105
+ @app.on_event("startup")
106
+ async def startup():
107
+ global http_client
108
+ http_client = httpx.AsyncClient(
109
+ verify=False,
110
+ timeout=httpx.Timeout(120.0, connect=10.0),
111
+ )
112
+ await session.refresh(http_client)
113
+
114
+
115
+ @app.on_event("shutdown")
116
+ async def shutdown():
117
+ if http_client:
118
+ await http_client.aclose()
119
+
120
+ # ── Models ───────────────────────────────────────────────────────
121
+ class Message(BaseModel):
122
  role: str
123
  content: str
124
 
125
+ # ── Helpers ──────────────────────────────────────────────────────
126
+ def _headers() -> dict:
127
+ h = {
128
+ "Accept": "*/*",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  "Content-Type": "application/json",
130
+ "Origin": "https://chatgpt.org",
131
+ "Referer": "https://chatgpt.org/claude/chat",
132
+ "X-Requested-With": "XMLHttpRequest",
133
+ "User-Agent": (
134
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
135
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
136
+ "Chrome/148.0.0.0 Safari/537.36"
137
+ ),
138
  }
139
+ csrf = session.csrf_token or session.xsrf_token
140
+ if csrf:
141
+ h["X-CSRF-TOKEN"] = csrf
142
+ return h
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
 
 
 
144
 
145
+ async def _chat(messages: list[dict], model: str):
146
+ """POST to chatgpt.org/api/chat; returns raw httpx Response."""
147
+ await session.refresh(http_client)
148
+
149
+ payload = {"model": model, "messages": messages}
150
+ resp = await http_client.post(
151
+ "https://chatgpt.org/api/chat",
152
+ json=payload,
153
+ headers=_headers(),
154
+ cookies=session.cookies,
155
+ )
156
+
157
+ # CSRF mismatch β†’ refresh once
158
+ if resp.status_code == 419:
159
+ print("[Chat] 419 β†’ refreshing session...")
160
+ session.last_refresh = 0
161
+ await session.refresh(http_client)
162
+ resp = await http_client.post(
163
+ "https://chatgpt.org/api/chat",
164
+ json=payload,
165
+ headers=_headers(),
166
+ cookies=session.cookies,
167
+ )
168
+
169
+ if resp.status_code == 429:
170
+ raise HTTPException(429, "Rate limited by upstream")
171
+ if resp.status_code != 200:
172
+ session.last_refresh = 0
173
+ raise HTTPException(resp.status_code, f"Upstream {resp.status_code}: {resp.text[:300]}")
174
+
175
+ return resp
176
+
177
+
178
+ # ── SSE relay ────────────────────────────────────────────────────
179
+ async def _relay_sse(resp):
180
+ """Relay the upstream SSE stream, filtering OPENROUTER PROCESSING lines."""
181
+ content_so_far = ""
182
+ async for raw_line in resp.aiter_lines():
183
+ line = raw_line.strip()
184
 
185
+ # Skip keep-alive / processing comments
186
+ if not line or line.startswith(":"):
187
+ continue
 
 
 
188
 
189
+ if not line.startswith("data: "):
190
+ continue
191
+
192
+ payload = line[6:] # strip "data: "
193
+
194
+ if payload.strip() == "[DONE]":
195
+ yield "data: [DONE]\n\n"
196
+ break
197
+
198
+ try:
199
+ chunk = json.loads(payload)
200
+ except json.JSONDecodeError:
201
+ continue
202
+
203
+ # The upstream already returns OpenAI-format chunks
204
+ # Just relay them as-is
205
+ # Collect content for non-streaming fallback
206
+ for choice in chunk.get("choices", []):
207
+ delta = choice.get("delta", {})
208
+ c = delta.get("content", "")
209
+ if c:
210
+ content_so_far += c
211
+
212
+ yield f"data: {payload}\n\n"
213
+
214
+ # If we got no content via streaming, send it as a single chunk
215
+ if content_so_far and not any(
216
+ c.get("finish_reason") == "stop"
217
+ for chunk_raw in [payload]
218
+ for c in json.loads(payload).get("choices", [])
219
+ if payload
220
+ ):
221
+ pass # Content already streamed
222
+
223
+
224
+ async def _collect_sse(resp):
225
+ """Collect all SSE chunks into a single text string."""
226
+ content = ""
227
+ async for raw_line in resp.aiter_lines():
228
+ line = raw_line.strip()
229
+ if not line or line.startswith(":"):
230
+ continue
231
+ if not line.startswith("data: "):
232
+ continue
233
+ payload = line[6:]
234
+ if payload.strip() == "[DONE]":
235
+ break
236
+ try:
237
+ chunk = json.loads(payload)
238
+ for choice in chunk.get("choices", []):
239
+ delta = choice.get("delta", {})
240
+ c = delta.get("content", "")
241
+ if c:
242
+ content += c
243
+ except json.JSONDecodeError:
244
+ continue
245
+ return content
246
+
247
+
248
+ # ── OpenAI-compatible endpoint ──────────────────────────────────
249
  @app.post("/v1/chat/completions")
250
+ async def chat_completions(request: Request):
251
  try:
252
+ body = await request.json()
253
+ except Exception:
254
+ raise HTTPException(400, "Invalid JSON")
255
+
256
+ model = body.get("model", "anthropic/claude-haiku-4-5")
257
+ messages = body.get("messages", [])
258
+ stream = body.get("stream", False)
259
+
260
+ if not messages:
261
+ raise HTTPException(400, "messages is empty")
262
+
263
+ resp = await _chat(messages, model)
264
+
265
+ if stream:
266
+ return StreamingResponse(
267
+ _relay_sse(resp), media_type="text/event-stream"
268
+ )
269
+
270
+ # Non-streaming: collect the full response
271
+ text = await _collect_sse(resp)
272
+ return JSONResponse({
273
+ "id": f"chatcmpl-{int(time.time())}",
274
+ "object": "chat.completion",
275
+ "created": int(time.time()),
276
+ "model": model,
277
+ "choices": [{
278
+ "index": 0,
279
+ "message": {"role": "assistant", "content": text},
280
+ "finish_reason": "stop",
281
+ }],
282
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
283
+ })
284
+
285
+
286
+ # ── Models / Health ─────────────────────────────────────────────
287
+ @app.get("/v1/models")
288
+ async def list_models():
289
+ return JSONResponse({
290
+ "object": "list",
291
+ "data": [
292
+ {"id": "anthropic/claude-haiku-4-5", "object": "model", "owned_by": "anthropic"},
293
+ ],
294
+ })
295
+
296
+
297
+ @app.get("/")
298
+ async def root():
299
+ return {"status": "ok", "endpoints": ["/v1/chat/completions", "/v1/models"]}
300
+
301
+
302
+ @app.get("/health")
303
+ async def health():
304
+ return {"status": "ok", "session_active": bool(session.cookies)}
305
+
306
+
307
+ @app.get("/debug/session")
308
+ async def debug_session():
309
+ return {
310
+ "has_cookies": bool(session.cookies),
311
+ "cookie_names": list(session.cookies.keys()) if session.cookies else [],
312
+ "has_csrf": bool(session.csrf_token),
313
+ "has_xsrf": bool(session.xsrf_token),
314
+ "last_refresh_ago": int(time.time() - session.last_refresh) if session.last_refresh else None,
315
+ }
316
+
317
 
318
  if __name__ == "__main__":
319
  import uvicorn