likhonhfai commited on
Commit
e8efab0
·
verified ·
1 Parent(s): cb655e6

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +21 -16
app.py CHANGED
@@ -2,18 +2,30 @@ import os
2
  import subprocess
3
  import time
4
  import requests
5
- from fastapi import FastAPI, HTTPException, Request
 
6
  from pydantic import BaseModel
7
  from typing import List, Optional, Any
8
  import torch
9
 
10
- app = FastAPI(title="Fara-7B SGLang API")
11
 
12
  # Configuration
13
  MODEL_ID = "Qwen/Qwen3-VL-8B-Thinking"
14
  SGLANG_PORT = 30000
15
  SGLANG_HOST = "127.0.0.1"
16
  SGLANG_URL = f"http://{SGLANG_HOST}:{SGLANG_PORT}"
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # Global process for SGLang server
19
  sglang_process = None
@@ -23,24 +35,20 @@ def start_sglang():
23
  if sglang_process is None:
24
  print(f"Starting SGLang server for {MODEL_ID}...")
25
 
26
- # Command to start SGLang server
27
- # Using --chat-template qwen2-vl as Fara-7B is based on Qwen2.5-VL
28
  cmd = [
29
  "python3", "-m", "sglang.launch_server",
30
  "--model-path", MODEL_ID,
31
  "--host", SGLANG_HOST,
32
  "--port", str(SGLANG_PORT),
33
- "--chat-template", "qwen2-vl", # Qwen3-VL often works with qwen2-vl template or is auto-detected
34
  "--trust-remote-code"
35
  ]
36
 
37
- # Check GPU availability for tensor parallel
38
  if torch.cuda.device_count() > 1:
39
  cmd.extend(["--tp", str(torch.cuda.device_count())])
40
 
41
  sglang_process = subprocess.Popen(cmd)
42
 
43
- # Wait for server to be ready
44
  max_retries = 60
45
  for i in range(max_retries):
46
  try:
@@ -57,7 +65,6 @@ def start_sglang():
57
 
58
  @app.on_event("startup")
59
  async def startup_event():
60
- # Start SGLang in the background
61
  import threading
62
  threading.Thread(target=start_sglang, daemon=True).start()
63
 
@@ -78,7 +85,7 @@ class MessageRequest(BaseModel):
78
 
79
  @app.get("/")
80
  async def root():
81
- return {"message": "Fara-7B SGLang API is running. Use /v1/responses or /v1/messages"}
82
 
83
  @app.get("/health")
84
  async def health():
@@ -91,9 +98,8 @@ async def health():
91
  return {"status": "starting", "backend": "sglang"}
92
 
93
  @app.post("/v1/responses")
94
- async def generate_response(request: ResponseRequest):
95
  try:
96
- # Map /v1/responses to SGLang's completions or chat completions
97
  payload = {
98
  "model": MODEL_ID,
99
  "prompt": request.prompt,
@@ -108,9 +114,8 @@ async def generate_response(request: ResponseRequest):
108
  raise HTTPException(status_code=500, detail=str(e))
109
 
110
  @app.post("/v1/messages")
111
- async def generate_message(request: MessageRequest):
112
  try:
113
- # Map /v1/messages to SGLang's chat completions
114
  payload = {
115
  "model": MODEL_ID,
116
  "messages": [m.dict() for m in request.messages],
@@ -124,12 +129,12 @@ async def generate_message(request: MessageRequest):
124
  except Exception as e:
125
  raise HTTPException(status_code=500, detail=str(e))
126
 
127
- # Proxy other OpenAI compatible requests to SGLang if needed
128
  @app.api_route("/v1/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
129
- async def proxy_openai(path: str, request: Request):
130
  url = f"{SGLANG_URL}/v1/{path}"
131
  method = request.method
132
- headers = {k: v for k, v in request.headers.items() if k.lower() != "host"}
133
  body = await request.body()
134
 
135
  try:
 
2
  import subprocess
3
  import time
4
  import requests
5
+ from fastapi import FastAPI, HTTPException, Request, Depends
6
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
7
  from pydantic import BaseModel
8
  from typing import List, Optional, Any
9
  import torch
10
 
11
+ app = FastAPI(title="Qwen3-VL SGLang API with Auth")
12
 
13
  # Configuration
14
  MODEL_ID = "Qwen/Qwen3-VL-8B-Thinking"
15
  SGLANG_PORT = 30000
16
  SGLANG_HOST = "127.0.0.1"
17
  SGLANG_URL = f"http://{SGLANG_HOST}:{SGLANG_PORT}"
18
+ API_KEY = "sk-sheikh545466"
19
+
20
+ security = HTTPBearer()
21
+
22
+ def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
23
+ if credentials.credentials != API_KEY:
24
+ raise HTTPException(
25
+ status_code=403,
26
+ detail="Invalid or missing API Key"
27
+ )
28
+ return credentials.credentials
29
 
30
  # Global process for SGLang server
31
  sglang_process = None
 
35
  if sglang_process is None:
36
  print(f"Starting SGLang server for {MODEL_ID}...")
37
 
 
 
38
  cmd = [
39
  "python3", "-m", "sglang.launch_server",
40
  "--model-path", MODEL_ID,
41
  "--host", SGLANG_HOST,
42
  "--port", str(SGLANG_PORT),
43
+ "--chat-template", "qwen2-vl",
44
  "--trust-remote-code"
45
  ]
46
 
 
47
  if torch.cuda.device_count() > 1:
48
  cmd.extend(["--tp", str(torch.cuda.device_count())])
49
 
50
  sglang_process = subprocess.Popen(cmd)
51
 
 
52
  max_retries = 60
53
  for i in range(max_retries):
54
  try:
 
65
 
66
  @app.on_event("startup")
67
  async def startup_event():
 
68
  import threading
69
  threading.Thread(target=start_sglang, daemon=True).start()
70
 
 
85
 
86
  @app.get("/")
87
  async def root():
88
+ return {"message": "Qwen3-VL SGLang API is running. Auth required for /v1/ endpoints."}
89
 
90
  @app.get("/health")
91
  async def health():
 
98
  return {"status": "starting", "backend": "sglang"}
99
 
100
  @app.post("/v1/responses")
101
+ async def generate_response(request: ResponseRequest, token: str = Depends(verify_token)):
102
  try:
 
103
  payload = {
104
  "model": MODEL_ID,
105
  "prompt": request.prompt,
 
114
  raise HTTPException(status_code=500, detail=str(e))
115
 
116
  @app.post("/v1/messages")
117
+ async def generate_message(request: MessageRequest, token: str = Depends(verify_token)):
118
  try:
 
119
  payload = {
120
  "model": MODEL_ID,
121
  "messages": [m.dict() for m in request.messages],
 
129
  except Exception as e:
130
  raise HTTPException(status_code=500, detail=str(e))
131
 
132
+ # Proxy other OpenAI compatible requests to SGLang
133
  @app.api_route("/v1/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
134
+ async def proxy_openai(path: str, request: Request, token: str = Depends(verify_token)):
135
  url = f"{SGLANG_URL}/v1/{path}"
136
  method = request.method
137
+ headers = {k: v for k, v in request.headers.items() if k.lower() not in ["host", "authorization"]}
138
  body = await request.body()
139
 
140
  try: