Spaces:

likhonhfai
/

fara-7b-api

Build error

App Files Files Community

likhonhfai commited on Feb 19

Commit

e8efab0

verified ·

1 Parent(s): cb655e6

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +21 -16

app.py CHANGED Viewed

@@ -2,18 +2,30 @@ import os
 import subprocess
 import time
 import requests
-from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel
 from typing import List, Optional, Any
 import torch
-app = FastAPI(title="Fara-7B SGLang API")
 # Configuration
 MODEL_ID = "Qwen/Qwen3-VL-8B-Thinking"
 SGLANG_PORT = 30000
 SGLANG_HOST = "127.0.0.1"
 SGLANG_URL = f"http://{SGLANG_HOST}:{SGLANG_PORT}"
 # Global process for SGLang server
 sglang_process = None
@@ -23,24 +35,20 @@ def start_sglang():
     if sglang_process is None:
         print(f"Starting SGLang server for {MODEL_ID}...")
-        # Command to start SGLang server
-        # Using --chat-template qwen2-vl as Fara-7B is based on Qwen2.5-VL
         cmd = [
             "python3", "-m", "sglang.launch_server",
             "--model-path", MODEL_ID,
             "--host", SGLANG_HOST,
             "--port", str(SGLANG_PORT),
-            "--chat-template", "qwen2-vl", # Qwen3-VL often works with qwen2-vl template or is auto-detected
             "--trust-remote-code"
         ]
-        # Check GPU availability for tensor parallel
         if torch.cuda.device_count() > 1:
             cmd.extend(["--tp", str(torch.cuda.device_count())])
         sglang_process = subprocess.Popen(cmd)
-        # Wait for server to be ready
         max_retries = 60
         for i in range(max_retries):
             try:
@@ -57,7 +65,6 @@ def start_sglang():
 @app.on_event("startup")
 async def startup_event():
-    # Start SGLang in the background
     import threading
     threading.Thread(target=start_sglang, daemon=True).start()
@@ -78,7 +85,7 @@ class MessageRequest(BaseModel):
 @app.get("/")
 async def root():
-    return {"message": "Fara-7B SGLang API is running. Use /v1/responses or /v1/messages"}
 @app.get("/health")
 async def health():
@@ -91,9 +98,8 @@ async def health():
     return {"status": "starting", "backend": "sglang"}
 @app.post("/v1/responses")
-async def generate_response(request: ResponseRequest):
     try:
-        # Map /v1/responses to SGLang's completions or chat completions
         payload = {
             "model": MODEL_ID,
             "prompt": request.prompt,
@@ -108,9 +114,8 @@ async def generate_response(request: ResponseRequest):
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/v1/messages")
-async def generate_message(request: MessageRequest):
     try:
-        # Map /v1/messages to SGLang's chat completions
         payload = {
             "model": MODEL_ID,
             "messages": [m.dict() for m in request.messages],
@@ -124,12 +129,12 @@ async def generate_message(request: MessageRequest):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-# Proxy other OpenAI compatible requests to SGLang if needed
 @app.api_route("/v1/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
-async def proxy_openai(path: str, request: Request):
     url = f"{SGLANG_URL}/v1/{path}"
     method = request.method
-    headers = {k: v for k, v in request.headers.items() if k.lower() != "host"}
     body = await request.body()
     try:

 import subprocess
 import time
 import requests
+from fastapi import FastAPI, HTTPException, Request, Depends
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from pydantic import BaseModel
 from typing import List, Optional, Any
 import torch
+app = FastAPI(title="Qwen3-VL SGLang API with Auth")
 # Configuration
 MODEL_ID = "Qwen/Qwen3-VL-8B-Thinking"
 SGLANG_PORT = 30000
 SGLANG_HOST = "127.0.0.1"
 SGLANG_URL = f"http://{SGLANG_HOST}:{SGLANG_PORT}"
+API_KEY = "sk-sheikh545466"
+security = HTTPBearer()
+def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    if credentials.credentials != API_KEY:
+        raise HTTPException(
+            status_code=403,
+            detail="Invalid or missing API Key"
+        )
+    return credentials.credentials
 # Global process for SGLang server
 sglang_process = None
     if sglang_process is None:
         print(f"Starting SGLang server for {MODEL_ID}...")
         cmd = [
             "python3", "-m", "sglang.launch_server",
             "--model-path", MODEL_ID,
             "--host", SGLANG_HOST,
             "--port", str(SGLANG_PORT),
+            "--chat-template", "qwen2-vl",
             "--trust-remote-code"
         ]
         if torch.cuda.device_count() > 1:
             cmd.extend(["--tp", str(torch.cuda.device_count())])
         sglang_process = subprocess.Popen(cmd)
         max_retries = 60
         for i in range(max_retries):
             try:
 @app.on_event("startup")
 async def startup_event():
     import threading
     threading.Thread(target=start_sglang, daemon=True).start()
 @app.get("/")
 async def root():
+    return {"message": "Qwen3-VL SGLang API is running. Auth required for /v1/ endpoints."}
 @app.get("/health")
 async def health():
     return {"status": "starting", "backend": "sglang"}
 @app.post("/v1/responses")
+async def generate_response(request: ResponseRequest, token: str = Depends(verify_token)):
     try:
         payload = {
             "model": MODEL_ID,
             "prompt": request.prompt,
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/v1/messages")
+async def generate_message(request: MessageRequest, token: str = Depends(verify_token)):
     try:
         payload = {
             "model": MODEL_ID,
             "messages": [m.dict() for m in request.messages],
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+# Proxy other OpenAI compatible requests to SGLang
 @app.api_route("/v1/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def proxy_openai(path: str, request: Request, token: str = Depends(verify_token)):
     url = f"{SGLANG_URL}/v1/{path}"
     method = request.method
+    headers = {k: v for k, v in request.headers.items() if k.lower() not in ["host", "authorization"]}
     body = await request.body()
     try: