cheekeong2025 commited on
Commit
87c4e84
·
verified ·
1 Parent(s): 35bbbf1

Update loader.py

Browse files
Files changed (1) hide show
  1. loader.py +75 -54
loader.py CHANGED
@@ -1,11 +1,11 @@
1
- # loader.py — robust public proxy for PRIVATE Static Space using huggingface.co/resolve
2
  import os
3
  import mimetypes
4
  from urllib.parse import urljoin
5
- from typing import Optional
6
 
7
  from fastapi import FastAPI, Request
8
- from fastapi.responses import Response, HTMLResponse, PlainTextResponse
9
  import httpx
10
 
11
  app = FastAPI()
@@ -13,7 +13,7 @@ app = FastAPI()
13
  # ==== Config from Secrets ====
14
  SPACE_ID = os.getenv("PRIVATE_SPACE_ID") # e.g. "cheekeong2025/iip-grading"
15
  HF_TOKEN = os.getenv("HF_TOKEN") # must have read access
16
- REVISION = os.getenv("REVISION", "main") # optional: pin branch/tag/commit
17
  # =============================
18
 
19
  if not SPACE_ID:
@@ -21,22 +21,20 @@ if not SPACE_ID:
21
  "PRIVATE_SPACE_ID is not set. Add it in Settings → Repository secrets (e.g. 'cheekeong2025/iip-grading')."
22
  )
23
  if not HF_TOKEN:
24
- raise RuntimeError(
25
- "HF_TOKEN is not set. Add a token with READ access to the private Space."
26
- )
27
 
28
- BASE_RESOLVE = f"https://huggingface.co/spaces/{SPACE_ID}/resolve/{REVISION}/"
29
  HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
 
 
30
 
31
- def build(url_base: str, path: str) -> str:
32
- return urljoin(url_base, path.lstrip("/"))
33
 
34
  def with_query(url: str, request: Request) -> str:
35
  q = str(request.url.query or "")
36
  return f"{url}?{q}" if q else url
37
 
38
  def looks_like_file(path: str) -> bool:
39
- # crude but effective: treat last segment with a dot as a file
40
  return "." in path.split("/")[-1]
41
 
42
  def guess_mime(path: str, default: str = "application/octet-stream") -> str:
@@ -45,65 +43,87 @@ def guess_mime(path: str, default: str = "application/octet-stream") -> str:
45
  mime, _ = mimetypes.guess_type(path)
46
  return mime or default
47
 
48
- async def fetch(client: httpx.AsyncClient, path: str, request: Request) -> httpx.Response:
 
 
 
 
 
49
  """
50
- Try multiple resolve locations:
51
- 1) path
52
- 2) static/path
53
- 3) if directory: path/index.html
54
- 4) if directory: static/path/index.html
 
 
 
 
 
55
  """
56
- # 1) as-is
57
- url = with_query(build(BASE_RESOLVE, path), request)
58
- r = await client.get(url, headers=HEADERS)
59
- if r.status_code == 200:
60
- return r
61
-
62
- # 2) static/<path>
63
- url_static = with_query(build(BASE_RESOLVE, f"static/{path.lstrip('/')}"), request)
64
- r2 = await client.get(url_static, headers=HEADERS)
65
- if r2.status_code == 200:
66
- return r2
67
-
68
- # If it doesn't look like a file, try directory index.html in both places
69
- if not looks_like_file(path):
70
- # 3) path/index.html
71
- url_dir = with_query(build(BASE_RESOLVE, path.rstrip("/") + "/index.html"), request)
72
- r3 = await client.get(url_dir, headers=HEADERS)
73
- if r3.status_code == 200:
74
- return r3
75
-
76
- # 4) static/path/index.html
77
- url_dir_static = with_query(build(BASE_RESOLVE, f"static/{path.rstrip('/')}/index.html"), request)
78
- r4 = await client.get(url_dir_static, headers=HEADERS)
79
- if r4.status_code == 200:
80
- return r4
81
-
82
- # Return the "best" (most recent) response even if 404, so caller can show message
83
- return r2 if r2.status_code != 404 else r
 
84
 
85
  @app.get("/health")
86
  async def health():
87
- return {"status": "ok", "space": SPACE_ID, "revision": REVISION, "source": "huggingface.co/resolve"}
 
 
 
 
 
 
 
 
 
 
88
 
89
  @app.get("/")
90
  async def root(request: Request):
91
  """
92
  Serve index.html robustly:
93
- - Try /index.html
94
- - Try /static/index.html
95
- - As a last resort, try repo root (may 404)
96
  """
97
  try:
98
  async with httpx.AsyncClient(timeout=None, follow_redirects=True) as client:
99
- r = await fetch(client, "index.html", request)
100
  if r.status_code == 404:
101
- r = await fetch(client, "", request) # fallback
 
102
  except Exception as e:
103
  return PlainTextResponse(f"Error fetching root: {e}", status_code=500)
104
 
105
  ctype = r.headers.get("content-type", "text/html; charset=utf-8")
106
- # If HTML, return text; else raw bytes
 
107
  if "text/html" in ctype:
108
  return HTMLResponse(r.text, status_code=r.status_code, media_type=ctype)
109
  return Response(content=r.content, status_code=r.status_code, media_type=ctype)
@@ -112,13 +132,14 @@ async def root(request: Request):
112
  async def proxy(path: str, request: Request):
113
  """
114
  Proxy all assets (CSS, JS, images, fonts, SPA routes).
115
- Tries root, then static/, and directory index.html fallbacks.
116
  """
117
  try:
118
  async with httpx.AsyncClient(timeout=None, follow_redirects=True) as client:
119
- r = await fetch(client, path, request)
120
  except Exception as e:
121
  return PlainTextResponse(f"Error fetching {path}: {e}", status_code=500)
122
 
123
  ctype = r.headers.get("content-type") or guess_mime(path)
 
124
  return Response(content=r.content, media_type=ctype, status_code=r.status_code)
 
1
+ # loader.py — robust public proxy for PRIVATE Static Space (resolve + raw, root + static, SPA dirs)
2
  import os
3
  import mimetypes
4
  from urllib.parse import urljoin
5
+ from typing import Optional, Tuple
6
 
7
  from fastapi import FastAPI, Request
8
+ from fastapi.responses import Response, HTMLResponse, PlainTextResponse, JSONResponse
9
  import httpx
10
 
11
  app = FastAPI()
 
13
  # ==== Config from Secrets ====
14
  SPACE_ID = os.getenv("PRIVATE_SPACE_ID") # e.g. "cheekeong2025/iip-grading"
15
  HF_TOKEN = os.getenv("HF_TOKEN") # must have read access
16
+ REVISION = os.getenv("REVISION", "main") # branch/tag/commit (defaults to main)
17
  # =============================
18
 
19
  if not SPACE_ID:
 
21
  "PRIVATE_SPACE_ID is not set. Add it in Settings → Repository secrets (e.g. 'cheekeong2025/iip-grading')."
22
  )
23
  if not HF_TOKEN:
24
+ raise RuntimeError("HF_TOKEN is not set. Add a token with READ access to the private Space.")
 
 
25
 
 
26
  HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
27
+ BASE_RESOLVE = f"https://huggingface.co/spaces/{SPACE_ID}/resolve/{REVISION}/"
28
+ BASE_RAW = f"https://huggingface.co/spaces/{SPACE_ID}/raw/{REVISION}/"
29
 
30
+ def join(base: str, path: str) -> str:
31
+ return urljoin(base, path.lstrip("/"))
32
 
33
  def with_query(url: str, request: Request) -> str:
34
  q = str(request.url.query or "")
35
  return f"{url}?{q}" if q else url
36
 
37
  def looks_like_file(path: str) -> bool:
 
38
  return "." in path.split("/")[-1]
39
 
40
  def guess_mime(path: str, default: str = "application/octet-stream") -> str:
 
43
  mime, _ = mimetypes.guess_type(path)
44
  return mime or default
45
 
46
+ async def try_get(client: httpx.AsyncClient, url: str) -> httpx.Response:
47
+ r = await client.get(url, headers=HEADERS)
48
+ print(f"[proxy] GET {url} -> {r.status_code}")
49
+ return r
50
+
51
+ async def fetch_variants(client: httpx.AsyncClient, path: str, request: Request) -> Tuple[httpx.Response, str]:
52
  """
53
+ Try variants in this order (stop at first 200):
54
+ resolve:path
55
+ resolve:static/path
56
+ if dir → resolve:path/index.html
57
+ if dir → resolve:static/path/index.html
58
+ raw:path
59
+ raw:static/path
60
+ if dir → raw:path/index.html
61
+ if dir → raw:static/path/index.html
62
+ Returns (response, tried_description)
63
  """
64
+ as_dir = not looks_like_file(path)
65
+ variants = []
66
+
67
+ # resolve
68
+ variants.append(("resolve", join(BASE_RESOLVE, path)))
69
+ variants.append(("resolve", join(BASE_RESOLVE, f"static/{path.lstrip('/')}")))
70
+ if as_dir:
71
+ variants.append(("resolve", join(BASE_RESOLVE, path.rstrip("/") + "/index.html")))
72
+ variants.append(("resolve", join(BASE_RESOLVE, f"static/{path.rstrip('/')}/index.html")))
73
+
74
+ # raw
75
+ variants.append(("raw", join(BASE_RAW, path)))
76
+ variants.append(("raw", join(BASE_RAW, f"static/{path.lstrip('/')}")))
77
+ if as_dir:
78
+ variants.append(("raw", join(BASE_RAW, path.rstrip("/") + "/index.html")))
79
+ variants.append(("raw", join(BASE_RAW, f"static/{path.rstrip('/')}/index.html")))
80
+
81
+ last = None
82
+ tried = []
83
+ for kind, base_url in variants:
84
+ url = with_query(base_url, request)
85
+ r = await try_get(client, url)
86
+ tried.append(f"{kind}:{base_url}")
87
+ if r.status_code == 200:
88
+ return r, " -> ".join(tried)
89
+ last = r
90
+
91
+ # nothing 200; return last attempt (or 404-ish) and the tried list for debug
92
+ return last, " -> ".join(tried)
93
 
94
  @app.get("/health")
95
  async def health():
96
+ return {"status": "ok", "space": SPACE_ID, "revision": REVISION, "source": "resolve/raw"}
97
+
98
+ @app.get("/_debug/fetch/{path:path}")
99
+ async def debug_fetch(path: str, request: Request):
100
+ """Manual debugging endpoint to see the exact chain we try and the final status."""
101
+ try:
102
+ async with httpx.AsyncClient(timeout=None, follow_redirects=True) as client:
103
+ r, tried = await fetch_variants(client, path, request)
104
+ return JSONResponse({"status": r.status_code, "content_type": r.headers.get("content-type"), "tried": tried})
105
+ except Exception as e:
106
+ return JSONResponse({"error": str(e)}, status_code=500)
107
 
108
  @app.get("/")
109
  async def root(request: Request):
110
  """
111
  Serve index.html robustly:
112
+ - Try index.html (resolve/raw, root/static)
113
+ - If that fails, try repo root as a last resort
 
114
  """
115
  try:
116
  async with httpx.AsyncClient(timeout=None, follow_redirects=True) as client:
117
+ r, tried = await fetch_variants(client, "index.html", request)
118
  if r.status_code == 404:
119
+ r, tried2 = await fetch_variants(client, "", request)
120
+ tried += " || " + tried2
121
  except Exception as e:
122
  return PlainTextResponse(f"Error fetching root: {e}", status_code=500)
123
 
124
  ctype = r.headers.get("content-type", "text/html; charset=utf-8")
125
+ print(f"[root] final status={r.status_code}, ctype={ctype}")
126
+ # If HTML, decode text; else serve bytes
127
  if "text/html" in ctype:
128
  return HTMLResponse(r.text, status_code=r.status_code, media_type=ctype)
129
  return Response(content=r.content, status_code=r.status_code, media_type=ctype)
 
132
  async def proxy(path: str, request: Request):
133
  """
134
  Proxy all assets (CSS, JS, images, fonts, SPA routes).
135
+ Tries resolve + raw, root + static, and directory index.html fallbacks.
136
  """
137
  try:
138
  async with httpx.AsyncClient(timeout=None, follow_redirects=True) as client:
139
+ r, tried = await fetch_variants(client, path, request)
140
  except Exception as e:
141
  return PlainTextResponse(f"Error fetching {path}: {e}", status_code=500)
142
 
143
  ctype = r.headers.get("content-type") or guess_mime(path)
144
+ print(f"[proxy] path={path} final status={r.status_code}, ctype={ctype}")
145
  return Response(content=r.content, media_type=ctype, status_code=r.status_code)