ZHIWEI666 commited on
Commit
22d80b0
·
verified ·
1 Parent(s): 71ce6d2

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -7
app.py CHANGED
@@ -1,8 +1,11 @@
1
  # ⚙️ 后端逻辑/核心服务端.py (Hugging Face Spaces app.py)
2
  from fastapi import FastAPI, File, UploadFile, Form
3
  from fastapi.middleware.cors import CORSMiddleware
 
4
  import hashlib
5
- import urllib.parse # 【核心新增】:用于处理中文文件名的 URL 编码
 
 
6
  import 数据库连接 as db
7
 
8
  # 引入拆分后的四大业务模块
@@ -37,9 +40,6 @@ async def upload_file(file: UploadFile = File(...), file_type: str = Form(...)):
37
  file_hash = hashlib.md5(content).hexdigest()[:10]
38
 
39
  new_filename = f"{file_hash}_{file.filename}"
40
-
41
- # 【核心修复】:由于标准的 HTTP URL 不支持直接包含中文,
42
- # 我们对文件名进行 URL 安全转码 (如把 '测试' 转为 '%E6%B5%8B%E8%AF%95')
43
  safe_filename = urllib.parse.quote(file.filename)
44
  safe_url_filename = f"{file_hash}_{safe_filename}"
45
 
@@ -47,9 +47,35 @@ async def upload_file(file: UploadFile = File(...), file_type: str = Form(...)):
47
  target_dir = dir_mapping.get(file_type, "others")
48
  full_path_in_repo = f"{target_dir}/{new_filename}"
49
 
50
- # 保存文件时使用原始名称(文件系统支持中文)
51
  db.save_file(full_path_in_repo, content)
52
 
53
- # 返回给前端的下载链接,必须使用 URL Encode 编码后的名称,防止客户端 ascii 报错
54
  url = f"https://huggingface.co/datasets/{db.DATASET_REPO_ID}/resolve/main/{target_dir}/{safe_url_filename}"
55
- return {"status": "success", "url": url, "display_name": file.filename, "hashed_name": new_filename}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # ⚙️ 后端逻辑/核心服务端.py (Hugging Face Spaces app.py)
2
  from fastapi import FastAPI, File, UploadFile, Form
3
  from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi.responses import Response, JSONResponse # 新增 Response 用于返回文件流
5
  import hashlib
6
+ import urllib.parse
7
+ import urllib.request # 新增用于代理请求
8
+ import os # 新增用于读取环境变量
9
  import 数据库连接 as db
10
 
11
  # 引入拆分后的四大业务模块
 
40
  file_hash = hashlib.md5(content).hexdigest()[:10]
41
 
42
  new_filename = f"{file_hash}_{file.filename}"
 
 
 
43
  safe_filename = urllib.parse.quote(file.filename)
44
  safe_url_filename = f"{file_hash}_{safe_filename}"
45
 
 
47
  target_dir = dir_mapping.get(file_type, "others")
48
  full_path_in_repo = f"{target_dir}/{new_filename}"
49
 
 
50
  db.save_file(full_path_in_repo, content)
51
 
 
52
  url = f"https://huggingface.co/datasets/{db.DATASET_REPO_ID}/resolve/main/{target_dir}/{safe_url_filename}"
53
+ return {"status": "success", "url": url, "display_name": file.filename, "hashed_name": new_filename}
54
+
55
+ # =========================================================
56
+ # 【核心新增】:代理下载私有数据集文件的接口
57
+ # =========================================================
58
+ @app.post("/api/proxy_download")
59
+ async def proxy_download(req_data: dict):
60
+ target_url = req_data.get("url")
61
+
62
+ # 安全校验:只允许代理下载 huggingface.co 域名的文件
63
+ if not target_url or "huggingface.co" not in target_url:
64
+ return JSONResponse(content={"error": "无效的下载链接或拒绝访问非 HF 域名"}, status_code=400)
65
+
66
+ hf_token = os.environ.get("HF_TOKEN")
67
+
68
+ # 构造请求去私有库取文件
69
+ req = urllib.request.Request(target_url, headers={'User-Agent': 'Mozilla/5.0'})
70
+
71
+ # 如果环境配置了 Token,则将其塞入请求头中用于突破私有权限
72
+ if hf_token:
73
+ req.add_header("Authorization", f"Bearer {hf_token}")
74
+
75
+ try:
76
+ with urllib.request.urlopen(req) as response:
77
+ content = response.read()
78
+ # 直接将读取到的二进制文件流返回给前端
79
+ return Response(content=content, media_type="application/json")
80
+ except Exception as e:
81
+ return JSONResponse(content={"error": f"云端代理下载失败: {str(e)}"}, status_code=500)