Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
| 1 |
# ⚙️ 后端逻辑/核心服务端.py (Hugging Face Spaces app.py)
|
| 2 |
from fastapi import FastAPI, File, UploadFile, Form
|
| 3 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 4 |
import hashlib
|
| 5 |
-
import urllib.parse
|
|
|
|
|
|
|
| 6 |
import 数据库连接 as db
|
| 7 |
|
| 8 |
# 引入拆分后的四大业务模块
|
|
@@ -37,9 +40,6 @@ async def upload_file(file: UploadFile = File(...), file_type: str = Form(...)):
|
|
| 37 |
file_hash = hashlib.md5(content).hexdigest()[:10]
|
| 38 |
|
| 39 |
new_filename = f"{file_hash}_{file.filename}"
|
| 40 |
-
|
| 41 |
-
# 【核心修复】:由于标准的 HTTP URL 不支持直接包含中文,
|
| 42 |
-
# 我们对文件名进行 URL 安全转码 (如把 '测试' 转为 '%E6%B5%8B%E8%AF%95')
|
| 43 |
safe_filename = urllib.parse.quote(file.filename)
|
| 44 |
safe_url_filename = f"{file_hash}_{safe_filename}"
|
| 45 |
|
|
@@ -47,9 +47,35 @@ async def upload_file(file: UploadFile = File(...), file_type: str = Form(...)):
|
|
| 47 |
target_dir = dir_mapping.get(file_type, "others")
|
| 48 |
full_path_in_repo = f"{target_dir}/{new_filename}"
|
| 49 |
|
| 50 |
-
# 保存文件时使用原始名称(文件系统支持中文)
|
| 51 |
db.save_file(full_path_in_repo, content)
|
| 52 |
|
| 53 |
-
# 返回给前端的下载链接,必须使用 URL Encode 编码后的名称,防止客户端 ascii 报错
|
| 54 |
url = f"https://huggingface.co/datasets/{db.DATASET_REPO_ID}/resolve/main/{target_dir}/{safe_url_filename}"
|
| 55 |
-
return {"status": "success", "url": url, "display_name": file.filename, "hashed_name": new_filename}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# ⚙️ 后端逻辑/核心服务端.py (Hugging Face Spaces app.py)
|
| 2 |
from fastapi import FastAPI, File, UploadFile, Form
|
| 3 |
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
from fastapi.responses import Response, JSONResponse # 新增 Response 用于返回文件流
|
| 5 |
import hashlib
|
| 6 |
+
import urllib.parse
|
| 7 |
+
import urllib.request # 新增用于代理请求
|
| 8 |
+
import os # 新增用于读取环境变量
|
| 9 |
import 数据库连接 as db
|
| 10 |
|
| 11 |
# 引入拆分后的四大业务模块
|
|
|
|
| 40 |
file_hash = hashlib.md5(content).hexdigest()[:10]
|
| 41 |
|
| 42 |
new_filename = f"{file_hash}_{file.filename}"
|
|
|
|
|
|
|
|
|
|
| 43 |
safe_filename = urllib.parse.quote(file.filename)
|
| 44 |
safe_url_filename = f"{file_hash}_{safe_filename}"
|
| 45 |
|
|
|
|
| 47 |
target_dir = dir_mapping.get(file_type, "others")
|
| 48 |
full_path_in_repo = f"{target_dir}/{new_filename}"
|
| 49 |
|
|
|
|
| 50 |
db.save_file(full_path_in_repo, content)
|
| 51 |
|
|
|
|
| 52 |
url = f"https://huggingface.co/datasets/{db.DATASET_REPO_ID}/resolve/main/{target_dir}/{safe_url_filename}"
|
| 53 |
+
return {"status": "success", "url": url, "display_name": file.filename, "hashed_name": new_filename}
|
| 54 |
+
|
| 55 |
+
# =========================================================
|
| 56 |
+
# 【核心新增】:代理下载私有数据集文件的接口
|
| 57 |
+
# =========================================================
|
| 58 |
+
@app.post("/api/proxy_download")
|
| 59 |
+
async def proxy_download(req_data: dict):
|
| 60 |
+
target_url = req_data.get("url")
|
| 61 |
+
|
| 62 |
+
# 安全校验:只允许代理下载 huggingface.co 域名的文件
|
| 63 |
+
if not target_url or "huggingface.co" not in target_url:
|
| 64 |
+
return JSONResponse(content={"error": "无效的下载链接或拒绝访问非 HF 域名"}, status_code=400)
|
| 65 |
+
|
| 66 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 67 |
+
|
| 68 |
+
# 构造请求去私有库取文件
|
| 69 |
+
req = urllib.request.Request(target_url, headers={'User-Agent': 'Mozilla/5.0'})
|
| 70 |
+
|
| 71 |
+
# 如果环境配置了 Token,则将其塞入请求头中用于突破私有权限
|
| 72 |
+
if hf_token:
|
| 73 |
+
req.add_header("Authorization", f"Bearer {hf_token}")
|
| 74 |
+
|
| 75 |
+
try:
|
| 76 |
+
with urllib.request.urlopen(req) as response:
|
| 77 |
+
content = response.read()
|
| 78 |
+
# 直接将读取到的二进制文件流返回给前端
|
| 79 |
+
return Response(content=content, media_type="application/json")
|
| 80 |
+
except Exception as e:
|
| 81 |
+
return JSONResponse(content={"error": f"云端代理下载失败: {str(e)}"}, status_code=500)
|