Spaces:

dragg2
/

dragontgbot

Running

App Files Files Community

dragg2 commited on 16 days ago

Commit

131251a

verified ·

1 Parent(s): 17362a1

Upload 4 files

Browse files

Files changed (2) hide show

Dockerfile +9 -11
app.py +109 -72

Dockerfile CHANGED Viewed

@@ -1,19 +1,17 @@
-FROM aiogram/telegram-bot-api:latest
-RUN apk add --no-cache python3 py3-pip && \
-    python3 -m venv /opt/venv && \
-    /opt/venv/bin/pip install --no-cache-dir --upgrade pip && \
-    /opt/venv/bin/pip install --no-cache-dir fastapi uvicorn httpx
 ENV PATH="/opt/venv/bin:$PATH"
 WORKDIR /app
 COPY app.py /app/app.py
 COPY start.sh /app/start.sh
 RUN sed -i 's/\r$//' /app/start.sh && chmod +x /app/start.sh
 EXPOSE 7860
-# 关键：基础镜像自带 ENTRYPOINT=/docker-entrypoint.sh，会忽略/吞掉 CMD
-# 这里必须显式覆盖 ENTRYPOINT，才能执行我们自己的启动脚本（先起 telegram-bot-api 内部端口，再起对外 7860 代理）
 ENTRYPOINT ["/app/start.sh"]

+FROM aiogram/telegram-bot-api:latest
+RUN apk add --no-cache python3 py3-pip && \
+    python3 -m venv /opt/venv && \
+    /opt/venv/bin/pip install --no-cache-dir --upgrade pip && \
+    /opt/venv/bin/pip install --no-cache-dir fastapi uvicorn httpx
 ENV PATH="/opt/venv/bin:$PATH"
 WORKDIR /app
 COPY app.py /app/app.py
 COPY start.sh /app/start.sh
 RUN sed -i 's/\r$//' /app/start.sh && chmod +x /app/start.sh
 EXPOSE 7860
 ENTRYPOINT ["/app/start.sh"]

app.py CHANGED Viewed

@@ -1,27 +1,58 @@
 import os
 import json
 import asyncio
 from fastapi import FastAPI, Request
 from fastapi.responses import Response, StreamingResponse, FileResponse
 import httpx
-# 内部真实 Bot API Server（telegram-bot-api）监听端口
-# 注意：外网端口必须是 7860（HF 会检查），但 Bot API 可以只跑在容器内部端口（默认 8081）
 UPSTREAM = f"http://127.0.0.1:{os.environ.get('TELEGRAM_UPSTREAM_PORT', '8081')}"
-app = FastAPI()
 WORK_DIR = os.environ.get("TELEGRAM_WORK_DIR", "/tmp/telegram-bot-api-data")
 DOWNLOAD_WAIT_SECONDS = float(os.environ.get("TELEGRAM_DOWNLOAD_WAIT_SECONDS", "8"))
 DOWNLOAD_POLL_INTERVAL_MS = int(os.environ.get("TELEGRAM_DOWNLOAD_POLL_INTERVAL_MS", "200"))
 def _normalize_bot_api_file_path(raw_fp: str | None, token_enc: str | None) -> str:
     if not raw_fp:
         return ""
     fp = str(raw_fp).replace("\\", "/").lstrip("/")
-    # 有些 Local Bot API Server 会把 token 段也带进 file_path，先把它裁掉
     if token_enc:
         marker = f"/{token_enc}/"
         idx = fp.find(marker)
@@ -50,11 +81,28 @@ def _normalize_bot_api_file_path(raw_fp: str | None, token_enc: str | None) -> s
             return "/".join(parts[i:])
     return fp
 @app.get("/")
 async def root():
-    return {"ok": True, "hint": "use /tg/ prefix, e.g. /tg/bot<TOKEN>/getMe"}
-@app.api_route("/tg/{path:path}", methods=["GET","POST","PUT","PATCH","DELETE","HEAD","OPTIONS"])
 async def proxy(path: str, request: Request):
     # 兜底：如果下游请求把 file_path 带成了“工作目录前缀”，这里直接改写成相对路径再转发给 telegram-bot-api
     path_for_upstream = path.lstrip("/")
@@ -69,9 +117,6 @@ async def proxy(path: str, request: Request):
             if fixed:
                 path_for_upstream = f"file/bot{token_enc}/{fixed}"
-    # 官方/通用做法（更稳）：--local 场景 getFile 可能返回“本地路径”，所以 /file 下载最好不要再依赖 bot-api 自己的 /file 端点
-    # 直接从容器的 WORK_DIR 里读文件返回给浏览器。
-    # 这样就算 bot-api 的 /file 行为有差异，也不会影响 CloudPaste 预览下载。
     if path_for_upstream.startswith("file/"):
         rest = path_for_upstream[len("file/") :]
         token_enc = None
@@ -81,60 +126,56 @@ async def proxy(path: str, request: Request):
             rel = rest.split("/", 1)[1] if "/" in rest else ""
         rel_fixed = _normalize_bot_api_file_path(rel, token_enc)
-        candidates = []
-        if token_enc and rel_fixed:
-            candidates.append(os.path.join(WORK_DIR, token_enc, rel_fixed))
-        if rel_fixed:
-            candidates.append(os.path.join(WORK_DIR, rel_fixed))
-        for p in candidates:
-            try:
-                if p and os.path.isfile(p):
-                    return FileResponse(p)
-            except Exception:
-                pass
-        # 如果本地没有这个文件（HF 不持久化常见），允许 CloudPaste 通过 query 传入 file_id 来触发回源下载：
-        # - 不新增新的路���接口，仍然是 /tg/file/bot<TOKEN>/<file_path>
-        # - 只是在 query 上多带一个 file_id（例如 ?file_id=xxx）
         file_id = request.query_params.get("file_id") or request.query_params.get("fid")
         if token_enc and file_id:
-            async with httpx.AsyncClient(timeout=None, follow_redirects=True) as client:
-                r = await client.get(f"{UPSTREAM}/bot{token_enc}/getFile", params={"file_id": file_id})
-                payload = None
-                try:
-                    payload = r.json()
-                except Exception:
-                    payload = None
-            if r.status_code == 200 and isinstance(payload, dict) and payload.get("ok") is True:
-                fp = None
-                try:
-                    fp = (payload.get("result") or {}).get("file_path")
-                except Exception:
-                    fp = None
-                rel2 = _normalize_bot_api_file_path(fp if isinstance(fp, str) else None, token_enc)
-                if rel2:
-                    candidates2 = [
-                        os.path.join(WORK_DIR, token_enc, rel2),
-                        os.path.join(WORK_DIR, rel2),
-                    ]
-                    waited = 0.0
-                    interval = max(0.05, DOWNLOAD_POLL_INTERVAL_MS / 1000.0)
-                    max_wait = max(0.0, DOWNLOAD_WAIT_SECONDS)
-                    while waited <= max_wait:
-                        for p2 in candidates2:
-                            try:
-                                if p2 and os.path.isfile(p2):
-                                    return FileResponse(p2)
-                            except Exception:
-                                pass
-                        if waited >= max_wait:
-                            break
-                        await asyncio.sleep(interval)
-                        waited += interval
     url = f"{UPSTREAM}/{path_for_upstream}"
@@ -147,9 +188,7 @@ async def proxy(path: str, request: Request):
         async for chunk in request.stream():
             yield chunk
-    # 关键修复：
-    # - Bot API 的 /bot... 响应是小 JSON（sendDocument/getFile/getMe），直接“读全量再返回”，避免流式上下文提前关闭导致空响应。
-    # - Bot API 的 /file/... 响应是大文件，才需要流式透传。
     is_file_download = path_for_upstream.startswith("file/")
     is_get_file = "/getFile" in ("/" + path_for_upstream)
@@ -163,8 +202,6 @@ async def proxy(path: str, request: Request):
         "last-modified",
     }
-    # 重要：不能用 “async with AsyncClient()” 包住 StreamingResponse。
-    # 否则函数 return 后 client 会被关闭，导致 /file/... 的下载流被提前掐断（CloudPaste 就会报“下载分片失败”）。
     client = httpx.AsyncClient(timeout=None, follow_redirects=True)
     if not is_file_download:
         try:
@@ -176,14 +213,9 @@ async def proxy(path: str, request: Request):
                 content=iter_request_body(),
             )
             resp_headers = {k: v for k, v in r.headers.items() if k.lower() in passthrough_allow}
-            # 关键：非流式响应不要透传 Content-Length（我们可能会改写 body，长度会变）
             resp_headers.pop("content-length", None)
             resp_headers.pop("Content-Length", None)
-            # 关键兼容：某些 Local Bot API Server 会在 getFile 的 result.file_path 里返回本机工作目录路径
-            # 例如：var/lib/telegram-bot-api/<token>/documents/xxx
-            # 但 CloudPaste 会把这个 file_path 拼到 /file/bot<TOKEN>/<file_path> 上，导致 404。
-            # 这里把 file_path 规范化回 documents/xxx 这种“相对路径”，让后续下载正常。
             content_type = (r.headers.get("content-type") or "").lower()
             if is_get_file and r.status_code == 200 and "application/json" in content_type:
                 try:
@@ -201,9 +233,7 @@ async def proxy(path: str, request: Request):
                             result["file_path"] = fixed
                             payload["result"] = result
                             body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
-                            # 确保 JSON 的 content-type 不被冲掉
                             resp_headers.setdefault("content-type", "application/json")
-                            # body 被我们改写过，Content-Length 必须删掉让框架自己算
                             resp_headers.pop("content-length", None)
                             resp_headers.pop("Content-Length", None)
                             return Response(content=body, status_code=r.status_code, headers=resp_headers)
@@ -214,7 +244,6 @@ async def proxy(path: str, request: Request):
         finally:
             await client.aclose()
-    # /file/...：流式回传，必须保证上游连接在迭代期间保持打开
     req = client.build_request(
         request.method,
         url,
@@ -236,3 +265,11 @@ async def proxy(path: str, request: Request):
                 await client.aclose()
     return StreamingResponse(iter_response(), status_code=r.status_code, headers=resp_headers)

 import os
 import json
 import asyncio
+import time
 from fastapi import FastAPI, Request
 from fastapi.responses import Response, StreamingResponse, FileResponse
 import httpx
+#  Bot API Server监听端口
 UPSTREAM = f"http://127.0.0.1:{os.environ.get('TELEGRAM_UPSTREAM_PORT', '8081')}"
+app = FastAPI(docs_url=None, redoc_url=None, openapi_url=None)
 WORK_DIR = os.environ.get("TELEGRAM_WORK_DIR", "/tmp/telegram-bot-api-data")
 DOWNLOAD_WAIT_SECONDS = float(os.environ.get("TELEGRAM_DOWNLOAD_WAIT_SECONDS", "8"))
 DOWNLOAD_POLL_INTERVAL_MS = int(os.environ.get("TELEGRAM_DOWNLOAD_POLL_INTERVAL_MS", "200"))
+_INFLIGHT_LOCKS: dict[str, asyncio.Lock] = {}
+_INFLIGHT_LAST_SEEN: dict[str, float] = {}
+def _inflight_key(token_enc: str | None, file_id: str | None) -> str:
+    return f"{token_enc or ''}:{file_id or ''}"
+def _get_inflight_lock(key: str) -> asyncio.Lock:
+    lock = _INFLIGHT_LOCKS.get(key)
+    if lock is None:
+        lock = asyncio.Lock()
+        _INFLIGHT_LOCKS[key] = lock
+    _INFLIGHT_LAST_SEEN[key] = time.time()
+    return lock
+def _cleanup_inflight_locks(max_entries: int = 1024, ttl_seconds: float = 300.0) -> None:
+    if len(_INFLIGHT_LOCKS) <= max_entries:
+        return
+    now = time.time()
+    for key, lock in list(_INFLIGHT_LOCKS.items()):
+        last = _INFLIGHT_LAST_SEEN.get(key, 0.0)
+        if (now - last) > ttl_seconds and (not lock.locked()):
+            _INFLIGHT_LOCKS.pop(key, None)
+            _INFLIGHT_LAST_SEEN.pop(key, None)
+def _normalize_proxy_prefix(raw: str | None) -> str:
+    s = str(raw or "").strip()
+    if not s or s == "/":
+        return ""
+    if not s.startswith("/"):
+        s = "/" + s
+    return s.rstrip("/")
 def _normalize_bot_api_file_path(raw_fp: str | None, token_enc: str | None) -> str:
     if not raw_fp:
         return ""
     fp = str(raw_fp).replace("\\", "/").lstrip("/")
     if token_enc:
         marker = f"/{token_enc}/"
         idx = fp.find(marker)
             return "/".join(parts[i:])
     return fp
+def _build_local_candidates(work_dir: str, token_enc: str | None, rel_path: str | None) -> list[str]:
+    candidates = []
+    rp = str(rel_path or "").strip().lstrip("/")
+    if token_enc and rp:
+        candidates.append(os.path.join(work_dir, token_enc, rp))
+    if rp:
+        candidates.append(os.path.join(work_dir, rp))
+    return candidates
+def _try_file_response(candidates: list[str]):
+    for p in candidates:
+        try:
+            if p and os.path.isfile(p):
+                return FileResponse(p)
+        except Exception:
+            pass
+    return None
 @app.get("/")
 async def root():
+    return {"ok": True}
 async def proxy(path: str, request: Request):
     # 兜底：如果下游请求把 file_path 带成了“工作目录前缀”，这里直接改写成相对路径再转发给 telegram-bot-api
     path_for_upstream = path.lstrip("/")
             if fixed:
                 path_for_upstream = f"file/bot{token_enc}/{fixed}"
     if path_for_upstream.startswith("file/"):
         rest = path_for_upstream[len("file/") :]
         token_enc = None
             rel = rest.split("/", 1)[1] if "/" in rest else ""
         rel_fixed = _normalize_bot_api_file_path(rel, token_enc)
+        candidates = _build_local_candidates(WORK_DIR, token_enc, rel_fixed)
+        resp = _try_file_response(candidates)
+        if resp is not None:
+            return resp
+        # 如果本地没有这个文件（HF 不持久化），允许 CloudPaste 通过 query 传入 file_id 来触发回源下载：
+        # query 上多带一个 file_id（例如 ?file_id=xxx）
         file_id = request.query_params.get("file_id") or request.query_params.get("fid")
         if token_enc and file_id:
+            # 防抖：同一个 (token,file_id) 的回源下载只触发一次，避免并发 Range 请求重复打 getFile/重复等待落盘
+            key = _inflight_key(token_enc, str(file_id))
+            lock = _get_inflight_lock(key)
+            try:
+                async with lock:
+                    resp_again = _try_file_response(_build_local_candidates(WORK_DIR, token_enc, rel_fixed))
+                    if resp_again is not None:
+                        return resp_again
+                    async with httpx.AsyncClient(timeout=None, follow_redirects=True) as client:
+                        r = await client.get(f"{UPSTREAM}/bot{token_enc}/getFile", params={"file_id": file_id})
+                        payload = None
+                        try:
+                            payload = r.json()
+                        except Exception:
+                            payload = None
+                    if r.status_code == 200 and isinstance(payload, dict) and payload.get("ok") is True:
+                        fp = None
+                        try:
+                            fp = (payload.get("result") or {}).get("file_path")
+                        except Exception:
+                            fp = None
+                        rel2 = _normalize_bot_api_file_path(fp if isinstance(fp, str) else None, token_enc)
+                        if rel2:
+                            candidates2 = _build_local_candidates(WORK_DIR, token_enc, rel2)
+                            waited = 0.0
+                            interval = max(0.05, DOWNLOAD_POLL_INTERVAL_MS / 1000.0)
+                            max_wait = max(0.0, DOWNLOAD_WAIT_SECONDS)
+                            while waited <= max_wait:
+                                resp2 = _try_file_response(candidates2)
+                                if resp2 is not None:
+                                    return resp2
+                                if waited >= max_wait:
+                                    break
+                                await asyncio.sleep(interval)
+                                waited += interval
+            finally:
+                _cleanup_inflight_locks()
     url = f"{UPSTREAM}/{path_for_upstream}"
         async for chunk in request.stream():
             yield chunk
     is_file_download = path_for_upstream.startswith("file/")
     is_get_file = "/getFile" in ("/" + path_for_upstream)
         "last-modified",
     }
     client = httpx.AsyncClient(timeout=None, follow_redirects=True)
     if not is_file_download:
         try:
                 content=iter_request_body(),
             )
             resp_headers = {k: v for k, v in r.headers.items() if k.lower() in passthrough_allow}
             resp_headers.pop("content-length", None)
             resp_headers.pop("Content-Length", None)
             content_type = (r.headers.get("content-type") or "").lower()
             if is_get_file and r.status_code == 200 and "application/json" in content_type:
                 try:
                             result["file_path"] = fixed
                             payload["result"] = result
                             body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
                             resp_headers.setdefault("content-type", "application/json")
                             resp_headers.pop("content-length", None)
                             resp_headers.pop("Content-Length", None)
                             return Response(content=body, status_code=r.status_code, headers=resp_headers)
         finally:
             await client.aclose()
     req = client.build_request(
         request.method,
         url,
                 await client.aclose()
     return StreamingResponse(iter_response(), status_code=r.status_code, headers=resp_headers)
+_PROXY_PREFIX = _normalize_proxy_prefix(os.environ.get("TELEGRAM_PROXY_PREFIX", "/tg"))
+_PROXY_ROUTE = f"{_PROXY_PREFIX}/{{path:path}}" if _PROXY_PREFIX else "/{path:path}"
+app.add_api_route(
+    _PROXY_ROUTE,
+    proxy,
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"],
+)