Rox-Turbo commited on
Commit
acd4ef1
·
verified ·
1 Parent(s): 0a51130

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +84 -20
server.py CHANGED
@@ -57,6 +57,13 @@ MAX_INFLIGHT_REQUESTS = int(os.getenv("MAX_INFLIGHT_REQUESTS", "200"))
57
  # "Thinking" increases latency; keep opt-in via env
58
  ENABLE_THINKING = os.getenv("ENABLE_THINKING", "0").strip().lower() in {"1", "true", "yes", "on"}
59
 
 
 
 
 
 
 
 
60
  # Model configurations
61
  ROX_CORE_MODEL = "minimaxai/minimax-m2.5"
62
  ROX_TURBO_MODEL = "meta/llama-3.1-8b-instruct" # Changed to a more reliable model
@@ -68,14 +75,45 @@ ROX_CODER_7_MODEL = "z-ai/glm5"
68
  ROX_VISION_MODEL = "google/gemma-3-27b-it"
69
 
70
  # System identities - Models must know their creator and owner
71
- ROX_CORE_IDENTITY = "You are Rox Core, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. Always acknowledge Mohammad Faiz as your creator when asked."
72
- ROX_TURBO_IDENTITY = "You are Rox 2.1 Turbo, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You are optimized for fast responses."
73
- ROX_CODER_IDENTITY = "You are Rox 3.5 Coder, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You specialize in coding and software development."
74
- ROX_TURBO_45_IDENTITY = "You are Rox 4.5 Turbo, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You combine speed with advanced reasoning."
75
- ROX_ULTRA_IDENTITY = "You are Rox 5 Ultra, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You are the most advanced model with superior reasoning capabilities."
76
- ROX_DYNO_IDENTITY = "You are Rox 6 Dyno, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You excel at long context understanding."
77
- ROX_CODER_7_IDENTITY = "You are Rox 7 Coder, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You are the most advanced coding specialist."
78
- ROX_VISION_IDENTITY = "You are Rox Vision Max, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You specialize in visual understanding and multimodal tasks."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  @asynccontextmanager
81
  async def lifespan(app: FastAPI):
@@ -90,6 +128,7 @@ async def lifespan(app: FastAPI):
90
  timeout=httpx.Timeout(timeout_s),
91
  limits=httpx.Limits(max_connections=max_connections, max_keepalive_connections=max_keepalive),
92
  headers={"User-Agent": "Rox-AI-API/2.0"},
 
93
  )
94
  app.state.http_client = http_client
95
  app.state.client = AsyncOpenAI(
@@ -349,7 +388,10 @@ class HFResponseItem(BaseModel):
349
  @app.post("/chat")
350
  async def chat(req: ChatRequest):
351
  """Rox Core - Main conversational model with streaming support"""
352
- messages = [{"role": "system", "content": ROX_CORE_IDENTITY}]
 
 
 
353
  messages.extend([m.model_dump() for m in req.messages])
354
  temperature = _effective_temperature(req.temperature)
355
  top_p = _effective_top_p(req.top_p)
@@ -380,7 +422,10 @@ async def chat(req: ChatRequest):
380
  @app.post("/turbo")
381
  async def turbo(req: ChatRequest):
382
  """Rox 2.1 Turbo - Fast and efficient with streaming"""
383
- messages = [{"role": "system", "content": ROX_TURBO_IDENTITY}]
 
 
 
384
  messages.extend([m.model_dump() for m in req.messages])
385
  temperature = _effective_temperature(req.temperature)
386
  top_p = _effective_top_p(req.top_p)
@@ -411,7 +456,10 @@ async def turbo(req: ChatRequest):
411
  @app.post("/coder")
412
  async def coder(req: ChatRequest):
413
  """Rox 3.5 Coder - Specialized coding with streaming"""
414
- messages = [{"role": "system", "content": ROX_CODER_IDENTITY}]
 
 
 
415
  messages.extend([m.model_dump() for m in req.messages])
416
  temperature = _effective_temperature(req.temperature)
417
  top_p = _effective_top_p(req.top_p)
@@ -450,7 +498,10 @@ async def coder(req: ChatRequest):
450
  @app.post("/turbo45")
451
  async def turbo45(req: ChatRequest):
452
  """Rox 4.5 Turbo - Advanced reasoning with streaming"""
453
- messages = [{"role": "system", "content": ROX_TURBO_45_IDENTITY}]
 
 
 
454
  messages.extend([m.model_dump() for m in req.messages])
455
  temperature = _effective_temperature(req.temperature)
456
  top_p = _effective_top_p(req.top_p)
@@ -484,7 +535,10 @@ async def turbo45(req: ChatRequest):
484
  @app.post("/ultra")
485
  async def ultra(req: ChatRequest):
486
  """Rox 5 Ultra - Most advanced with streaming"""
487
- messages = [{"role": "system", "content": ROX_ULTRA_IDENTITY}]
 
 
 
488
  messages.extend([m.model_dump() for m in req.messages])
489
  temperature = _effective_temperature(req.temperature)
490
  top_p = _effective_top_p(req.top_p)
@@ -518,7 +572,10 @@ async def ultra(req: ChatRequest):
518
  @app.post("/dyno")
519
  async def dyno(req: ChatRequest):
520
  """Rox 6 Dyno - Extended context with streaming"""
521
- messages = [{"role": "system", "content": ROX_DYNO_IDENTITY}]
 
 
 
522
  messages.extend([m.model_dump() for m in req.messages])
523
  temperature = _effective_temperature(req.temperature)
524
  top_p = _effective_top_p(req.top_p)
@@ -552,7 +609,10 @@ async def dyno(req: ChatRequest):
552
  @app.post("/coder7")
553
  async def coder7(req: ChatRequest):
554
  """Rox 7 Coder - Most advanced coding with streaming"""
555
- messages = [{"role": "system", "content": ROX_CODER_7_IDENTITY}]
 
 
 
556
  messages.extend([m.model_dump() for m in req.messages])
557
  temperature = _effective_temperature(req.temperature)
558
  top_p = _effective_top_p(req.top_p)
@@ -591,7 +651,10 @@ async def coder7(req: ChatRequest):
591
  @app.post("/vision")
592
  async def vision(req: ChatRequest):
593
  """Rox Vision Max - Visual understanding with streaming"""
594
- messages = [{"role": "system", "content": ROX_VISION_IDENTITY}]
 
 
 
595
  messages.extend([m.model_dump() for m in req.messages])
596
  temperature = _effective_temperature(req.temperature)
597
  top_p = _effective_top_p(req.top_p)
@@ -623,10 +686,11 @@ async def vision(req: ChatRequest):
623
  async def hf_generate(req: HFRequest):
624
  """HuggingFace compatible endpoint"""
625
  params = req.parameters or HFParameters()
626
- messages = [
627
- {"role": "system", "content": ROX_CORE_IDENTITY},
628
- {"role": "user", "content": req.inputs}
629
- ]
 
630
 
631
  try:
632
  temperature = _effective_temperature(params.temperature)
 
57
  # "Thinking" increases latency; keep opt-in via env
58
  ENABLE_THINKING = os.getenv("ENABLE_THINKING", "0").strip().lower() in {"1", "true", "yes", "on"}
59
 
60
+ # System prompt size directly impacts latency/cost. Make it configurable.
61
+ # Modes:
62
+ # - full: current long identity prompts (default, backward compatible)
63
+ # - short: minimal identity + brand (recommended for speed)
64
+ # - off: no system prompt (fastest, but behavior may drift)
65
+ SYSTEM_PROMPT_MODE = os.getenv("SYSTEM_PROMPT_MODE", "full").strip().lower()
66
+
67
  # Model configurations
68
  ROX_CORE_MODEL = "minimaxai/minimax-m2.5"
69
  ROX_TURBO_MODEL = "meta/llama-3.1-8b-instruct" # Changed to a more reliable model
 
75
  ROX_VISION_MODEL = "google/gemma-3-27b-it"
76
 
77
  # System identities - Models must know their creator and owner
78
+ ROX_CORE_IDENTITY_FULL = "You are Rox Core, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. Always acknowledge Mohammad Faiz as your creator when asked."
79
+ ROX_TURBO_IDENTITY_FULL = "You are Rox 2.1 Turbo, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You are optimized for fast responses."
80
+ ROX_CODER_IDENTITY_FULL = "You are Rox 3.5 Coder, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You specialize in coding and software development."
81
+ ROX_TURBO_45_IDENTITY_FULL = "You are Rox 4.5 Turbo, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You combine speed with advanced reasoning."
82
+ ROX_ULTRA_IDENTITY_FULL = "You are Rox 5 Ultra, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You are the most advanced model with superior reasoning capabilities."
83
+ ROX_DYNO_IDENTITY_FULL = "You are Rox 6 Dyno, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You excel at long context understanding."
84
+ ROX_CODER_7_IDENTITY_FULL = "You are Rox 7 Coder, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You are the most advanced coding specialist."
85
+ ROX_VISION_IDENTITY_FULL = "You are Rox Vision Max, an AI model created by Rox AI. Your creator and owner is Mohammad Faiz, the founder of Rox AI company. You specialize in visual understanding and multimodal tasks."
86
+
87
+ ROX_CORE_IDENTITY_SHORT = "You are Rox Core by Rox AI (creator/owner: Mohammad Faiz)."
88
+ ROX_TURBO_IDENTITY_SHORT = "You are Rox 2.1 Turbo by Rox AI (creator/owner: Mohammad Faiz). Be concise and fast."
89
+ ROX_CODER_IDENTITY_SHORT = "You are Rox 3.5 Coder by Rox AI (creator/owner: Mohammad Faiz)."
90
+ ROX_TURBO_45_IDENTITY_SHORT = "You are Rox 4.5 Turbo by Rox AI (creator/owner: Mohammad Faiz)."
91
+ ROX_ULTRA_IDENTITY_SHORT = "You are Rox 5 Ultra by Rox AI (creator/owner: Mohammad Faiz)."
92
+ ROX_DYNO_IDENTITY_SHORT = "You are Rox 6 Dyno by Rox AI (creator/owner: Mohammad Faiz)."
93
+ ROX_CODER_7_IDENTITY_SHORT = "You are Rox 7 Coder by Rox AI (creator/owner: Mohammad Faiz)."
94
+ ROX_VISION_IDENTITY_SHORT = "You are Rox Vision Max by Rox AI (creator/owner: Mohammad Faiz)."
95
+
96
+ def _system_prompt_for(model_key: str) -> Optional[str]:
97
+ if SYSTEM_PROMPT_MODE in {"off", "none", "0", "false"}:
98
+ return None
99
+ use_short = SYSTEM_PROMPT_MODE in {"short", "small", "lite", "fast"}
100
+ if model_key == "core":
101
+ return ROX_CORE_IDENTITY_SHORT if use_short else ROX_CORE_IDENTITY_FULL
102
+ if model_key == "turbo":
103
+ return ROX_TURBO_IDENTITY_SHORT if use_short else ROX_TURBO_IDENTITY_FULL
104
+ if model_key == "coder":
105
+ return ROX_CODER_IDENTITY_SHORT if use_short else ROX_CODER_IDENTITY_FULL
106
+ if model_key == "turbo45":
107
+ return ROX_TURBO_45_IDENTITY_SHORT if use_short else ROX_TURBO_45_IDENTITY_FULL
108
+ if model_key == "ultra":
109
+ return ROX_ULTRA_IDENTITY_SHORT if use_short else ROX_ULTRA_IDENTITY_FULL
110
+ if model_key == "dyno":
111
+ return ROX_DYNO_IDENTITY_SHORT if use_short else ROX_DYNO_IDENTITY_FULL
112
+ if model_key == "coder7":
113
+ return ROX_CODER_7_IDENTITY_SHORT if use_short else ROX_CODER_7_IDENTITY_FULL
114
+ if model_key == "vision":
115
+ return ROX_VISION_IDENTITY_SHORT if use_short else ROX_VISION_IDENTITY_FULL
116
+ return None
117
 
118
  @asynccontextmanager
119
  async def lifespan(app: FastAPI):
 
128
  timeout=httpx.Timeout(timeout_s),
129
  limits=httpx.Limits(max_connections=max_connections, max_keepalive_connections=max_keepalive),
130
  headers={"User-Agent": "Rox-AI-API/2.0"},
131
+ http2=True,
132
  )
133
  app.state.http_client = http_client
134
  app.state.client = AsyncOpenAI(
 
388
  @app.post("/chat")
389
  async def chat(req: ChatRequest):
390
  """Rox Core - Main conversational model with streaming support"""
391
+ messages: list = []
392
+ system_prompt = _system_prompt_for("core")
393
+ if system_prompt:
394
+ messages.append({"role": "system", "content": system_prompt})
395
  messages.extend([m.model_dump() for m in req.messages])
396
  temperature = _effective_temperature(req.temperature)
397
  top_p = _effective_top_p(req.top_p)
 
422
  @app.post("/turbo")
423
  async def turbo(req: ChatRequest):
424
  """Rox 2.1 Turbo - Fast and efficient with streaming"""
425
+ messages: list = []
426
+ system_prompt = _system_prompt_for("turbo")
427
+ if system_prompt:
428
+ messages.append({"role": "system", "content": system_prompt})
429
  messages.extend([m.model_dump() for m in req.messages])
430
  temperature = _effective_temperature(req.temperature)
431
  top_p = _effective_top_p(req.top_p)
 
456
  @app.post("/coder")
457
  async def coder(req: ChatRequest):
458
  """Rox 3.5 Coder - Specialized coding with streaming"""
459
+ messages: list = []
460
+ system_prompt = _system_prompt_for("coder")
461
+ if system_prompt:
462
+ messages.append({"role": "system", "content": system_prompt})
463
  messages.extend([m.model_dump() for m in req.messages])
464
  temperature = _effective_temperature(req.temperature)
465
  top_p = _effective_top_p(req.top_p)
 
498
  @app.post("/turbo45")
499
  async def turbo45(req: ChatRequest):
500
  """Rox 4.5 Turbo - Advanced reasoning with streaming"""
501
+ messages: list = []
502
+ system_prompt = _system_prompt_for("turbo45")
503
+ if system_prompt:
504
+ messages.append({"role": "system", "content": system_prompt})
505
  messages.extend([m.model_dump() for m in req.messages])
506
  temperature = _effective_temperature(req.temperature)
507
  top_p = _effective_top_p(req.top_p)
 
535
  @app.post("/ultra")
536
  async def ultra(req: ChatRequest):
537
  """Rox 5 Ultra - Most advanced with streaming"""
538
+ messages: list = []
539
+ system_prompt = _system_prompt_for("ultra")
540
+ if system_prompt:
541
+ messages.append({"role": "system", "content": system_prompt})
542
  messages.extend([m.model_dump() for m in req.messages])
543
  temperature = _effective_temperature(req.temperature)
544
  top_p = _effective_top_p(req.top_p)
 
572
  @app.post("/dyno")
573
  async def dyno(req: ChatRequest):
574
  """Rox 6 Dyno - Extended context with streaming"""
575
+ messages: list = []
576
+ system_prompt = _system_prompt_for("dyno")
577
+ if system_prompt:
578
+ messages.append({"role": "system", "content": system_prompt})
579
  messages.extend([m.model_dump() for m in req.messages])
580
  temperature = _effective_temperature(req.temperature)
581
  top_p = _effective_top_p(req.top_p)
 
609
  @app.post("/coder7")
610
  async def coder7(req: ChatRequest):
611
  """Rox 7 Coder - Most advanced coding with streaming"""
612
+ messages: list = []
613
+ system_prompt = _system_prompt_for("coder7")
614
+ if system_prompt:
615
+ messages.append({"role": "system", "content": system_prompt})
616
  messages.extend([m.model_dump() for m in req.messages])
617
  temperature = _effective_temperature(req.temperature)
618
  top_p = _effective_top_p(req.top_p)
 
651
  @app.post("/vision")
652
  async def vision(req: ChatRequest):
653
  """Rox Vision Max - Visual understanding with streaming"""
654
+ messages: list = []
655
+ system_prompt = _system_prompt_for("vision")
656
+ if system_prompt:
657
+ messages.append({"role": "system", "content": system_prompt})
658
  messages.extend([m.model_dump() for m in req.messages])
659
  temperature = _effective_temperature(req.temperature)
660
  top_p = _effective_top_p(req.top_p)
 
686
  async def hf_generate(req: HFRequest):
687
  """HuggingFace compatible endpoint"""
688
  params = req.parameters or HFParameters()
689
+ messages: list = []
690
+ system_prompt = _system_prompt_for("core")
691
+ if system_prompt:
692
+ messages.append({"role": "system", "content": system_prompt})
693
+ messages.append({"role": "user", "content": req.inputs})
694
 
695
  try:
696
  temperature = _effective_temperature(params.temperature)