encryptd commited on
Commit
9530a76
·
1 Parent(s): abba137

prog update

Browse files
Files changed (1) hide show
  1. app.py +35 -30
app.py CHANGED
@@ -13,7 +13,7 @@ except ImportError:
13
  import audioop_lts as audioop
14
  sys.modules["audioop"] = audioop
15
 
16
- from fastapi import Request
17
  from fastapi.responses import StreamingResponse, JSONResponse
18
  import uvicorn
19
  import gradio as gr
@@ -47,7 +47,38 @@ def start_vllm():
47
  os.environ["VLLM_PID"] = "running"
48
 
49
  start_vllm()
 
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  # --- STEP 2: UI LOGIC ---
52
  def run_ui_test(image, prompt):
53
  if image is None: return "⚠️ Please upload an image."
@@ -97,43 +128,17 @@ with gr.Blocks(title="NuMarkdown API") as demo:
97
  # We enable the queue for long tasks
98
  # 1. FIX ATTRIBUTE ERROR: Patch missing attributes onto the demo object
99
  demo.max_file_size = 100 * 1024 * 1024 # 100MB
 
 
100
  demo.queue()
101
 
102
  # We get the FastAPI instance from Gradio
103
- app = demo.app
104
  # 3. Mount Gradio to FastAPI
105
  # Using path="" and assigning to the app ensures assets are at the root
106
  app = gr.mount_gradio_app(app, demo, path="/")
107
 
108
- # We add the external API proxy directly to this app
109
- @app.api_route("/v1/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
110
- async def gatekeeper_proxy(path: str, request: Request):
111
- target_url = f"http://127.0.0.1:{VLLM_PORT}/v1/{path}"
112
-
113
- # Strip Host and Content-Length to prevent routing loops on HF
114
- headers = {k: v for k, v in request.headers.items() if k.lower() not in ["host", "content-length"]}
115
-
116
- async with httpx.AsyncClient(timeout=300.0) as client:
117
- try:
118
- if path == "chat/completions" and request.method == "POST":
119
- body = await request.json()
120
- if not body.get("stream", False):
121
- resp = await client.post(target_url, headers=headers, json=body)
122
- if resp.status_code == 200:
123
- data = resp.json()
124
- content = data["choices"][0]["message"].get("content", "")
125
- # STRIP THINKING FROM EXTERNAL DOCLING API
126
- if "</think>" in content:
127
- data["choices"][0]["message"]["content"] = content.split("</think>")[-1].strip()
128
- return JSONResponse(content=data)
129
- return JSONResponse(status_code=resp.status_code, content=resp.json())
130
 
131
- # Fallback for models list, etc.
132
- proxy_req = client.build_request(request.method, target_url, headers=headers, content=await request.body())
133
- r = await client.send(proxy_req, stream=True)
134
- return StreamingResponse(r.aiter_raw(), status_code=r.status_code, headers=dict(r.headers))
135
- except Exception as e:
136
- return JSONResponse(status_code=503, content={"error": f"API Proxy Error: {str(e)}"})
137
 
138
  # --- STEP 4: RUN ---
139
  if __name__ == "__main__":
 
13
  import audioop_lts as audioop
14
  sys.modules["audioop"] = audioop
15
 
16
+ from fastapi import Request,FastAPI
17
  from fastapi.responses import StreamingResponse, JSONResponse
18
  import uvicorn
19
  import gradio as gr
 
47
  os.environ["VLLM_PID"] = "running"
48
 
49
  start_vllm()
50
+ # --- STEP 2: FASTAPI PROXY (API) ---
51
+ app = FastAPI()
52
 
53
+ # We add the external API proxy directly to this app
54
+ @app.api_route("/v1/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
55
+ async def gatekeeper_proxy(path: str, request: Request):
56
+ target_url = f"http://127.0.0.1:{VLLM_PORT}/v1/{path}"
57
+
58
+ # Strip Host and Content-Length to prevent routing loops on HF
59
+ headers = {k: v for k, v in request.headers.items() if k.lower() not in ["host", "content-length"]}
60
+
61
+ async with httpx.AsyncClient(timeout=300.0) as client:
62
+ try:
63
+ if path == "chat/completions" and request.method == "POST":
64
+ body = await request.json()
65
+ if not body.get("stream", False):
66
+ resp = await client.post(target_url, headers=headers, json=body)
67
+ if resp.status_code == 200:
68
+ data = resp.json()
69
+ content = data["choices"][0]["message"].get("content", "")
70
+ # STRIP THINKING FROM EXTERNAL DOCLING API
71
+ if "</think>" in content:
72
+ data["choices"][0]["message"]["content"] = content.split("</think>")[-1].strip()
73
+ return JSONResponse(content=data)
74
+ return JSONResponse(status_code=resp.status_code, content=resp.json())
75
+
76
+ # Fallback for models list, etc.
77
+ proxy_req = client.build_request(request.method, target_url, headers=headers, content=await request.body())
78
+ r = await client.send(proxy_req, stream=True)
79
+ return StreamingResponse(r.aiter_raw(), status_code=r.status_code, headers=dict(r.headers))
80
+ except Exception as e:
81
+ return JSONResponse(status_code=503, content={"error": f"API Proxy Error: {str(e)}"})
82
  # --- STEP 2: UI LOGIC ---
83
  def run_ui_test(image, prompt):
84
  if image is None: return "⚠️ Please upload an image."
 
128
  # We enable the queue for long tasks
129
  # 1. FIX ATTRIBUTE ERROR: Patch missing attributes onto the demo object
130
  demo.max_file_size = 100 * 1024 * 1024 # 100MB
131
+ demo.proxy_url = None
132
+ demo.root_path = ""
133
  demo.queue()
134
 
135
  # We get the FastAPI instance from Gradio
136
+ # app = demo.app
137
  # 3. Mount Gradio to FastAPI
138
  # Using path="" and assigning to the app ensures assets are at the root
139
  app = gr.mount_gradio_app(app, demo, path="/")
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
 
 
 
 
 
 
142
 
143
  # --- STEP 4: RUN ---
144
  if __name__ == "__main__":