Spaces:

encryptd
/

ocr_vlm_thinking

Paused

App Files Files Community

encryptd commited on Apr 2

Commit

9530a76

1 Parent(s): abba137

prog update

Browse files

Files changed (1) hide show

app.py +35 -30

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ except ImportError:
     import audioop_lts as audioop
     sys.modules["audioop"] = audioop
-from fastapi import Request
 from fastapi.responses import StreamingResponse, JSONResponse
 import uvicorn
 import gradio as gr
@@ -47,7 +47,38 @@ def start_vllm():
     os.environ["VLLM_PID"] = "running"
 start_vllm()
 # --- STEP 2: UI LOGIC ---
 def run_ui_test(image, prompt):
     if image is None: return "⚠️ Please upload an image."
@@ -97,43 +128,17 @@ with gr.Blocks(title="NuMarkdown API") as demo:
 # We enable the queue for long tasks
 # 1. FIX ATTRIBUTE ERROR: Patch missing attributes onto the demo object
 demo.max_file_size = 100 * 1024 * 1024 # 100MB
 demo.queue()
 # We get the FastAPI instance from Gradio
-app = demo.app
 # 3. Mount Gradio to FastAPI
 # Using path="" and assigning to the app ensures assets are at the root
 app = gr.mount_gradio_app(app, demo, path="/")
-# We add the external API proxy directly to this app
-@app.api_route("/v1/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
-async def gatekeeper_proxy(path: str, request: Request):
-    target_url = f"http://127.0.0.1:{VLLM_PORT}/v1/{path}"
-    # Strip Host and Content-Length to prevent routing loops on HF
-    headers = {k: v for k, v in request.headers.items() if k.lower() not in ["host", "content-length"]}
-    async with httpx.AsyncClient(timeout=300.0) as client:
-        try:
-            if path == "chat/completions" and request.method == "POST":
-                body = await request.json()
-                if not body.get("stream", False):
-                    resp = await client.post(target_url, headers=headers, json=body)
-                    if resp.status_code == 200:
-                        data = resp.json()
-                        content = data["choices"][0]["message"].get("content", "")
-                        # STRIP THINKING FROM EXTERNAL DOCLING API
-                        if "</think>" in content:
-                            data["choices"][0]["message"]["content"] = content.split("</think>")[-1].strip()
-                        return JSONResponse(content=data)
-                    return JSONResponse(status_code=resp.status_code, content=resp.json())
-            # Fallback for models list, etc.
-            proxy_req = client.build_request(request.method, target_url, headers=headers, content=await request.body())
-            r = await client.send(proxy_req, stream=True)
-            return StreamingResponse(r.aiter_raw(), status_code=r.status_code, headers=dict(r.headers))
-        except Exception as e:
-            return JSONResponse(status_code=503, content={"error": f"API Proxy Error: {str(e)}"})
 # --- STEP 4: RUN ---
 if __name__ == "__main__":

     import audioop_lts as audioop
     sys.modules["audioop"] = audioop
+from fastapi import Request,FastAPI
 from fastapi.responses import StreamingResponse, JSONResponse
 import uvicorn
 import gradio as gr
     os.environ["VLLM_PID"] = "running"
 start_vllm()
+# --- STEP 2: FASTAPI PROXY (API) ---
+app = FastAPI()
+# We add the external API proxy directly to this app
+@app.api_route("/v1/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def gatekeeper_proxy(path: str, request: Request):
+    target_url = f"http://127.0.0.1:{VLLM_PORT}/v1/{path}"
+    # Strip Host and Content-Length to prevent routing loops on HF
+    headers = {k: v for k, v in request.headers.items() if k.lower() not in ["host", "content-length"]}
+    async with httpx.AsyncClient(timeout=300.0) as client:
+        try:
+            if path == "chat/completions" and request.method == "POST":
+                body = await request.json()
+                if not body.get("stream", False):
+                    resp = await client.post(target_url, headers=headers, json=body)
+                    if resp.status_code == 200:
+                        data = resp.json()
+                        content = data["choices"][0]["message"].get("content", "")
+                        # STRIP THINKING FROM EXTERNAL DOCLING API
+                        if "</think>" in content:
+                            data["choices"][0]["message"]["content"] = content.split("</think>")[-1].strip()
+                        return JSONResponse(content=data)
+                    return JSONResponse(status_code=resp.status_code, content=resp.json())
+            # Fallback for models list, etc.
+            proxy_req = client.build_request(request.method, target_url, headers=headers, content=await request.body())
+            r = await client.send(proxy_req, stream=True)
+            return StreamingResponse(r.aiter_raw(), status_code=r.status_code, headers=dict(r.headers))
+        except Exception as e:
+            return JSONResponse(status_code=503, content={"error": f"API Proxy Error: {str(e)}"})
 # --- STEP 2: UI LOGIC ---
 def run_ui_test(image, prompt):
     if image is None: return "⚠️ Please upload an image."
 # We enable the queue for long tasks
 # 1. FIX ATTRIBUTE ERROR: Patch missing attributes onto the demo object
 demo.max_file_size = 100 * 1024 * 1024 # 100MB
+demo.proxy_url = None
+demo.root_path = ""
 demo.queue()
 # We get the FastAPI instance from Gradio
+# app = demo.app
 # 3. Mount Gradio to FastAPI
 # Using path="" and assigning to the app ensures assets are at the root
 app = gr.mount_gradio_app(app, demo, path="/")
 # --- STEP 4: RUN ---
 if __name__ == "__main__":