Spaces:

overwrite69
/

haiku-api

Sleeping

App Files Files Community

overwrite69 commited on 6 days ago

Commit

0ebb9b8

verified ·

1 Parent(s): 49412a5

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +175 -83

app.py CHANGED Viewed

@@ -3,12 +3,13 @@ Haiku API - OpenAI-compatible proxy for chatgpt.org/claude/chat
 Deploy to Hugging Face Spaces (Docker SDK)
 Auto-continues when upstream hits the ~1K token output limit.
 Sends SSE keep-alive comments during continuation gaps.
-Handles content arrays, CORS, extra params, etc.
 """
 import asyncio
 import json
 import re
 import time
 import uuid
@@ -20,7 +21,7 @@ from fastapi import FastAPI, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, JSONResponse
-app = FastAPI(title="Haiku API", version="3.1.0")
 # ── CORS ─────────────────────────────────────────────────────────
 app.add_middleware(
@@ -31,6 +32,28 @@ app.add_middleware(
     allow_headers=["*"],
 )
 # ── Session State ────────────────────────────────────────────────
 class SessionState:
     def __init__(self):
@@ -46,46 +69,68 @@ class SessionState:
             now = time.time()
             if self.cookies and (now - self.last_refresh) < self.refresh_interval:
                 return
-            try:
-                resp = await client.get(
-                    "https://chatgpt.org/claude/chat",
-                    follow_redirects=True,
-                    headers={
-                        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
-                        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
-                    },
-                    timeout=30.0,
-                )
-                if resp.status_code != 200:
-                    print(f"[Session] GET returned {resp.status_code}")
-                    return
-                new_cookies = httpx.Cookies()
-                for name, value in resp.cookies.items():
-                    new_cookies.set(name, value, domain="chatgpt.org")
-                for header in resp.headers.get_list("set-cookie"):
-                    parts = header.split(";")[0]
-                    if "=" in parts:
-                        k, v = parts.split("=", 1)
-                        new_cookies.set(k.strip(), v.strip(), domain="chatgpt.org")
-                xsrf = new_cookies.get("XSRF-TOKEN", domain="chatgpt.org")
-                if xsrf:
-                    xsrf = unquote(xsrf)
-                csrf = None
-                m = re.search(r'<meta\s+name="csrf-token"\s+content="([^"]+)"', resp.text)
-                if m:
-                    csrf = m.group(1)
-                self.cookies = new_cookies
-                self.xsrf_token = xsrf
-                self.csrf_token = csrf
-                self.last_refresh = now
-                print(f"[Session] OK — CSRF:{bool(csrf)} XSRF:{bool(xsrf)} Cookies:{list(new_cookies.keys())}")
-            except Exception as e:
-                print(f"[Session] Refresh error: {e}")
 session = SessionState()
@@ -96,7 +141,7 @@ http_client: Optional[httpx.AsyncClient] = None
 @app.on_event("startup")
 async def startup():
     global http_client
-    http_client = httpx.AsyncClient(verify=False, timeout=httpx.Timeout(120.0, connect=10.0))
     await session.refresh(http_client)
 @app.on_event("shutdown")
@@ -104,6 +149,7 @@ async def shutdown():
     if http_client:
         await http_client.aclose()
 # ── Message normalization ────────────────────────────────────────
 def normalize_messages(messages: list[dict]) -> list[dict]:
     """Normalize messages: content arrays → plain text, strip extra fields."""
@@ -150,18 +196,71 @@ def _headers() -> dict:
     return h
-async def _raw_call(messages: list[dict], model: str, retry_on_429: bool = True) -> httpx.Response:
-    """Make a single POST to chatgpt.org/api/chat, returns raw streaming Response.
-    Retries with backoff on 429 rate limits."""
     await session.refresh(http_client)
     payload = {"model": model, "messages": messages}
-    # CSRF retry
-    for attempt in range(2):
-        # 429 retry with backoff
-        for rate_attempt in range(3):  # up to 3 attempts on 429
-            resp = await http_client.post(
                 "https://chatgpt.org/api/chat",
                 json=payload,
                 headers=_headers(),
@@ -172,14 +271,14 @@ async def _raw_call(messages: list[dict], model: str, retry_on_429: bool = True)
                 print("[Chat] 419 → refreshing session...")
                 session.last_refresh = 0
                 await session.refresh(http_client)
-                break  # break inner loop, retry CSRF
             if resp.status_code == 429:
-                wait_time = (rate_attempt + 1) * 10  # 10s, 20s, 30s
                 print(f"[Chat] 429 rate limited, waiting {wait_time}s (attempt {rate_attempt+1}/3)...")
                 session.last_refresh = 0
                 await session.refresh(http_client)
-                if retry_on_429 and rate_attempt < 2:
                     await asyncio.sleep(wait_time)
                     continue
                 raise HTTPException(429, f"Rate limited by upstream after {rate_attempt+1} retries")
@@ -194,12 +293,7 @@ async def _raw_call(messages: list[dict], model: str, retry_on_429: bool = True)
 async def _stream_one_response(resp):
-    """
-    Stream a single upstream SSE response in real-time.
-    Yields (content_chunk, finish_reason) tuples.
-    finish_reason is None while streaming, "stop" or "length" at the end.
-    """
-    content = ""
     finish_reason = None
     async for raw_line in resp.aiter_lines():
@@ -222,8 +316,6 @@ async def _stream_one_response(resp):
             delta = choice.get("delta", {})
             c = delta.get("content", "")
             if c:
-                content += c
-                # Yield each piece of content immediately for real-time streaming
                 yield c, None
             fr = choice.get("finish_reason")
@@ -233,22 +325,23 @@ async def _stream_one_response(resp):
                 elif fr in ("length", "max_tokens"):
                     finish_reason = "length"
-    # Yield the final finish reason
     yield "", finish_reason
-# ── Main streaming endpoint with auto-continue ──────────────────
 MAX_CONTINUATIONS = 20
 async def _raw_call_streaming(messages: list[dict], model: str):
-    """Like _raw_call but yields SSE keep-alive comments during 429 retries.
-    For use in streaming mode so the client connection stays alive."""
     await session.refresh(http_client)
     payload = {"model": model, "messages": messages}
     for attempt in range(2):  # CSRF retry
         for rate_attempt in range(3):  # 429 retry
-            resp = await http_client.post(
                 "https://chatgpt.org/api/chat",
                 json=payload,
                 headers=_headers(),
@@ -267,7 +360,6 @@ async def _raw_call_streaming(messages: list[dict], model: str):
                 session.last_refresh = 0
                 await session.refresh(http_client)
                 if rate_attempt < 2:
-                    # Send keep-alive pings while waiting
                     for _ in range(wait_time):
                         yield ": retrying...\n\n"
                         await asyncio.sleep(1)
@@ -285,25 +377,18 @@ async def _raw_call_streaming(messages: list[dict], model: str):
 async def _stream_with_auto_continue(messages: list[dict], model: str):
-    """
-    Stream the response in real-time. If it gets cut off (length limit),
-    auto-continue by appending to the conversation and making another call.
-    Sends SSE keep-alive comments during continuation gaps.
-    """
     chunk_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
     created = int(time.time())
     conversation = list(messages)
     total_content = ""
     for cont_num in range(MAX_CONTINUATIONS):
-        # Send keep-alive before making the call
         yield ": thinking...\n\n"
-        # Make the upstream call (with keep-alive during 429 retries)
         resp = None
         async for result in _raw_call_streaming(conversation, model):
             if isinstance(result, str):
-                # This is a keep-alive comment
                 yield result
             else:
                 resp = result
@@ -311,7 +396,6 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
         if resp is None:
             raise HTTPException(500, "No response from upstream")
-        # Stream it in real-time
         finish_reason = "stop"
         chunk_content = ""
@@ -354,7 +438,6 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
             yield "data: [DONE]\n\n"
             return
-        # Auto-continue — send keep-alive
         yield ": continuing...\n\n"
         conversation.append({"role": "assistant", "content": chunk_content})
@@ -402,7 +485,6 @@ async def _collect_with_auto_continue(messages: list[dict], model: str) -> str:
         if finish_reason == "stop":
             return full_content
-        # Auto-continue
         conversation.append({"role": "assistant", "content": content})
         conversation.append({"role": "user", "content": "Continue exactly from where you left off. Do not repeat any text you already wrote."})
@@ -440,7 +522,7 @@ async def chat_completions(request: Request):
             headers={
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
-                "X-Accel-Buffering": "no",  # Disable nginx buffering
             },
         )
     else:
@@ -473,23 +555,32 @@ async def list_models():
 @app.get("/")
 async def root():
-    return {"status": "ok", "version": "3.1.0", "endpoints": ["/v1/chat/completions", "/v1/models"]}
 @app.get("/health")
 async def health():
-    return {"status": "ok", "session_active": bool(session.cookies)}
 @app.get("/debug/refresh")
 async def force_refresh():
-    """Force refresh the session cookies."""
     session.last_refresh = 0
     await session.refresh(http_client)
     return {
         "refreshed": True,
         "has_cookies": bool(session.cookies),
         "has_csrf": bool(session.csrf_token),
     }
@@ -501,6 +592,7 @@ async def debug_session():
         "has_csrf": bool(session.csrf_token),
         "has_xsrf": bool(session.xsrf_token),
         "last_refresh_ago": int(time.time() - session.last_refresh) if session.last_refresh else None,
     }

 Deploy to Hugging Face Spaces (Docker SDK)
 Auto-continues when upstream hits the ~1K token output limit.
+Uses rotating proxy with aggressive retries for unstable IPs.
 Sends SSE keep-alive comments during continuation gaps.
 """
 import asyncio
 import json
+import os
 import re
 import time
 import uuid
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, JSONResponse
+app = FastAPI(title="Haiku API", version="4.0.0")
 # ── CORS ─────────────────────────────────────────────────────────
 app.add_middleware(
     allow_headers=["*"],
 )
+# ── Proxy Config ─────────────────────────────────────────────────
+PROXY_URL = os.environ.get("PROXY_URL", "")
+# e.g. http://user:pass@proxy.op.wtf:32424
+PROXY_MAX_RETRIES = 6  # rotating proxy: try many IPs since ~half are dead
+PROXY_RETRY_DELAY = 1  # seconds between proxy retries
+def _make_client() -> httpx.AsyncClient:
+    """Create an httpx client, with or without proxy."""
+    kwargs = dict(
+        verify=False,
+        timeout=httpx.Timeout(120.0, connect=15.0),
+    )
+    if PROXY_URL:
+        kwargs["proxy"] = PROXY_URL
+        print(f"[Proxy] Using rotating proxy: {PROXY_URL.split('@')[-1]}")
+    else:
+        print("[Proxy] No proxy configured, direct connection")
+    return httpx.AsyncClient(**kwargs)
 # ── Session State ────────────────────────────────────────────────
 class SessionState:
     def __init__(self):
             now = time.time()
             if self.cookies and (now - self.last_refresh) < self.refresh_interval:
                 return
+            # Try multiple times with proxy rotation (new IP each request)
+            for attempt in range(PROXY_MAX_RETRIES):
+                try:
+                    # Create fresh client for each attempt (gets new proxy IP)
+                    if PROXY_URL and attempt > 0:
+                        try:
+                            await client.aclose()
+                        except:
+                            pass
+                        client = _make_client()
+                    resp = await client.get(
+                        "https://chatgpt.org/claude/chat",
+                        follow_redirects=True,
+                        headers={
+                            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
+                            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+                        },
+                        timeout=30.0,
+                    )
+                    if resp.status_code != 200:
+                        print(f"[Session] GET returned {resp.status_code}, retry #{attempt+1}")
+                        await asyncio.sleep(PROXY_RETRY_DELAY)
+                        continue
+                    new_cookies = httpx.Cookies()
+                    for name, value in resp.cookies.items():
+                        new_cookies.set(name, value, domain="chatgpt.org")
+                    for header in resp.headers.get_list("set-cookie"):
+                        parts = header.split(";")[0]
+                        if "=" in parts:
+                            k, v = parts.split("=", 1)
+                            new_cookies.set(k.strip(), v.strip(), domain="chatgpt.org")
+                    xsrf = new_cookies.get("XSRF-TOKEN", domain="chatgpt.org")
+                    if xsrf:
+                        xsrf = unquote(xsrf)
+                    csrf = None
+                    m = re.search(r'<meta\s+name="csrf-token"\s+content="([^"]+)"', resp.text)
+                    if m:
+                        csrf = m.group(1)
+                    self.cookies = new_cookies
+                    self.xsrf_token = xsrf
+                    self.csrf_token = csrf
+                    self.last_refresh = now
+                    print(f"[Session] OK — CSRF:{bool(csrf)} XSRF:{bool(xsrf)} Cookies:{list(new_cookies.keys())} (attempt {attempt+1})")
+                    return  # Success!
+                except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
+                    print(f"[Session] Proxy error attempt #{attempt+1}: {type(e).__name__}: {e}")
+                    await asyncio.sleep(PROXY_RETRY_DELAY)
+                    continue
+                except Exception as e:
+                    print(f"[Session] Error attempt #{attempt+1}: {e}")
+                    await asyncio.sleep(PROXY_RETRY_DELAY)
+                    continue
+            print("[Session] WARNING: All refresh attempts failed")
 session = SessionState()
 @app.on_event("startup")
 async def startup():
     global http_client
+    http_client = _make_client()
     await session.refresh(http_client)
 @app.on_event("shutdown")
     if http_client:
         await http_client.aclose()
 # ── Message normalization ────────────────────────────────────────
 def normalize_messages(messages: list[dict]) -> list[dict]:
     """Normalize messages: content arrays → plain text, strip extra fields."""
     return h
+# ── Proxy-aware request with retry ──────────────────────────────
+async def _proxy_post(url: str, **kwargs) -> httpx.Response:
+    """POST with proxy retry logic. Creates new client on each retry to get fresh IP."""
+    global http_client
+    for attempt in range(PROXY_MAX_RETRIES):
+        try:
+            resp = await http_client.post(url, **kwargs)
+            # Proxy returned a non-connection error — return it
+            return resp
+        except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
+            print(f"[Proxy] Connection error #{attempt+1}: {type(e).__name__}")
+            # Recreate client with new proxy IP
+            if PROXY_URL:
+                try:
+                    await http_client.aclose()
+                except:
+                    pass
+                http_client = _make_client()
+                # Re-apply session cookies
+                await asyncio.sleep(PROXY_RETRY_DELAY)
+            else:
+                await asyncio.sleep(2)
+            continue
+    # All retries exhausted — return last attempt anyway
+    return await http_client.post(url, **kwargs)
+async def _proxy_get(url: str, **kwargs) -> httpx.Response:
+    """GET with proxy retry logic."""
+    global http_client
+    for attempt in range(PROXY_MAX_RETRIES):
+        try:
+            resp = await http_client.get(url, **kwargs)
+            return resp
+        except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
+            print(f"[Proxy] GET error #{attempt+1}: {type(e).__name__}")
+            if PROXY_URL:
+                try:
+                    await http_client.aclose()
+                except:
+                    pass
+                http_client = _make_client()
+                await asyncio.sleep(PROXY_RETRY_DELAY)
+            else:
+                await asyncio.sleep(2)
+            continue
+    return await http_client.get(url, **kwargs)
+# ── Raw call with retries ───────────────────────────────────────
+async def _raw_call(messages: list[dict], model: str) -> httpx.Response:
+    """Make a single POST to chatgpt.org/api/chat with full retry logic."""
     await session.refresh(http_client)
     payload = {"model": model, "messages": messages}
+    for attempt in range(2):  # CSRF retry
+        for rate_attempt in range(3):  # 429 retry
+            resp = await _proxy_post(
                 "https://chatgpt.org/api/chat",
                 json=payload,
                 headers=_headers(),
                 print("[Chat] 419 → refreshing session...")
                 session.last_refresh = 0
                 await session.refresh(http_client)
+                break
             if resp.status_code == 429:
+                wait_time = (rate_attempt + 1) * 10
                 print(f"[Chat] 429 rate limited, waiting {wait_time}s (attempt {rate_attempt+1}/3)...")
                 session.last_refresh = 0
                 await session.refresh(http_client)
+                if rate_attempt < 2:
                     await asyncio.sleep(wait_time)
                     continue
                 raise HTTPException(429, f"Rate limited by upstream after {rate_attempt+1} retries")
 async def _stream_one_response(resp):
+    """Stream a single upstream SSE response in real-time."""
     finish_reason = None
     async for raw_line in resp.aiter_lines():
             delta = choice.get("delta", {})
             c = delta.get("content", "")
             if c:
                 yield c, None
             fr = choice.get("finish_reason")
                 elif fr in ("length", "max_tokens"):
                     finish_reason = "length"
     yield "", finish_reason
+# ── Streaming with auto-continue ────────────────────────────────
 MAX_CONTINUATIONS = 20
 async def _raw_call_streaming(messages: list[dict], model: str):
+    """Like _raw_call but yields SSE keep-alive comments during retries."""
     await session.refresh(http_client)
     payload = {"model": model, "messages": messages}
     for attempt in range(2):  # CSRF retry
         for rate_attempt in range(3):  # 429 retry
+            # Keep-alive before request
+            yield ": thinking...\n\n"
+            resp = await _proxy_post(
                 "https://chatgpt.org/api/chat",
                 json=payload,
                 headers=_headers(),
                 session.last_refresh = 0
                 await session.refresh(http_client)
                 if rate_attempt < 2:
                     for _ in range(wait_time):
                         yield ": retrying...\n\n"
                         await asyncio.sleep(1)
 async def _stream_with_auto_continue(messages: list[dict], model: str):
+    """Stream with real-time output, auto-continue, and keep-alive pings."""
     chunk_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
     created = int(time.time())
     conversation = list(messages)
     total_content = ""
     for cont_num in range(MAX_CONTINUATIONS):
         yield ": thinking...\n\n"
         resp = None
         async for result in _raw_call_streaming(conversation, model):
             if isinstance(result, str):
                 yield result
             else:
                 resp = result
         if resp is None:
             raise HTTPException(500, "No response from upstream")
         finish_reason = "stop"
         chunk_content = ""
             yield "data: [DONE]\n\n"
             return
         yield ": continuing...\n\n"
         conversation.append({"role": "assistant", "content": chunk_content})
         if finish_reason == "stop":
             return full_content
         conversation.append({"role": "assistant", "content": content})
         conversation.append({"role": "user", "content": "Continue exactly from where you left off. Do not repeat any text you already wrote."})
             headers={
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
+                "X-Accel-Buffering": "no",
             },
         )
     else:
 @app.get("/")
 async def root():
+    return {
+        "status": "ok",
+        "version": "4.0.0",
+        "proxy": bool(PROXY_URL),
+        "endpoints": ["/v1/chat/completions", "/v1/models"],
+    }
 @app.get("/health")
 async def health():
+    return {
+        "status": "ok",
+        "session_active": bool(session.cookies),
+        "proxy": bool(PROXY_URL),
+    }
 @app.get("/debug/refresh")
 async def force_refresh():
     session.last_refresh = 0
     await session.refresh(http_client)
     return {
         "refreshed": True,
         "has_cookies": bool(session.cookies),
         "has_csrf": bool(session.csrf_token),
+        "proxy": bool(PROXY_URL),
     }
         "has_csrf": bool(session.csrf_token),
         "has_xsrf": bool(session.xsrf_token),
         "last_refresh_ago": int(time.time() - session.last_refresh) if session.last_refresh else None,
+        "proxy": bool(PROXY_URL),
     }