polats commited on
Commit
22a51b2
·
1 Parent(s): bfdbea8

Add MiniCPM5 text generation option

Browse files
Files changed (2) hide show
  1. app.py +32 -3
  2. web/engineServer.js +1 -0
app.py CHANGED
@@ -264,6 +264,7 @@ _DASHSCOPE_URL = _DASHSCOPE_BASE + "/api/v1/services/audio/tts/customization"
264
  TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
265
  VOXCPM_SPACE = os.environ.get("TINY_VOXCPM_SPACE", "").strip()
266
  TINY_AYA_SPACE = os.environ.get("TINY_AYA_SPACE", "").strip()
 
267
  _local_tts = None # VoiceDesign model
268
  _local_clone = None # Base model (voice clone) — lazy, only if a clone is requested
269
  _local_tts_lock = threading.Lock()
@@ -467,9 +468,22 @@ def _tiny_aya_generate(system, user, max_tokens, temperature):
467
  return str(result or "")
468
 
469
 
470
- def _tiny_aya_stream(system, user, max_tokens, temperature):
471
  from gradio_client import Client
472
- client = Client(TINY_AYA_SPACE, token=HF_TOKEN or None)
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  try:
474
  job = client.submit(
475
  system or "",
@@ -486,11 +500,19 @@ def _tiny_aya_stream(system, user, max_tokens, temperature):
486
  yield text[len(prev):]
487
  prev = text
488
  except Exception:
489
- text = _tiny_aya_generate(system, user, max_tokens, temperature)
490
  if text:
491
  yield text
492
 
493
 
 
 
 
 
 
 
 
 
494
  @fastapi_app.post("/voxcpm-tts")
495
  async def voxcpm_tts(request: Request):
496
  body = await request.json()
@@ -790,6 +812,13 @@ async def text_generate_stream(request: Request):
790
  if stop.is_set():
791
  break
792
  loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
 
 
 
 
 
 
 
793
  else:
794
  for chunk in llm.stream_chat(
795
  system,
 
264
  TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
265
  VOXCPM_SPACE = os.environ.get("TINY_VOXCPM_SPACE", "").strip()
266
  TINY_AYA_SPACE = os.environ.get("TINY_AYA_SPACE", "").strip()
267
+ MINICPM5_SPACE = os.environ.get("TINY_MINICPM5_SPACE", "").strip()
268
  _local_tts = None # VoiceDesign model
269
  _local_clone = None # Base model (voice clone) — lazy, only if a clone is requested
270
  _local_tts_lock = threading.Lock()
 
468
  return str(result or "")
469
 
470
 
471
+ def _space_text_generate(space, system, user, max_tokens, temperature):
472
  from gradio_client import Client
473
+ client = Client(space, token=HF_TOKEN or None)
474
+ result = client.predict(
475
+ system or "",
476
+ user or "",
477
+ int(max_tokens or 400),
478
+ float(temperature if temperature is not None else 0.8),
479
+ api_name="/generate",
480
+ )
481
+ return str(result or "")
482
+
483
+
484
+ def _space_text_stream(space, system, user, max_tokens, temperature):
485
+ from gradio_client import Client
486
+ client = Client(space, token=HF_TOKEN or None)
487
  try:
488
  job = client.submit(
489
  system or "",
 
500
  yield text[len(prev):]
501
  prev = text
502
  except Exception:
503
+ text = _space_text_generate(space, system, user, max_tokens, temperature)
504
  if text:
505
  yield text
506
 
507
 
508
+ def _tiny_aya_stream(system, user, max_tokens, temperature):
509
+ yield from _space_text_stream(TINY_AYA_SPACE, system, user, max_tokens, temperature)
510
+
511
+
512
+ def _minicpm5_stream(system, user, max_tokens, temperature):
513
+ yield from _space_text_stream(MINICPM5_SPACE, system, user, max_tokens, temperature)
514
+
515
+
516
  @fastapi_app.post("/voxcpm-tts")
517
  async def voxcpm_tts(request: Request):
518
  body = await request.json()
 
812
  if stop.is_set():
813
  break
814
  loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
815
+ elif model == "minicpm5-1b-zerogpu":
816
+ if not MINICPM5_SPACE:
817
+ raise llm.LlmUnavailable("TINY_MINICPM5_SPACE not set")
818
+ for chunk in _minicpm5_stream(system, user, max_tokens, temperature):
819
+ if stop.is_set():
820
+ break
821
+ loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
822
  else:
823
  for chunk in llm.stream_chat(
824
  system,
web/engineServer.js CHANGED
@@ -6,6 +6,7 @@ import { statsTracker } from '/web/genStats.js'
6
  const MODELS = [
7
  { id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
8
  { id: 'tiny-aya-global-zerogpu', label: 'Tiny Aya Global 3.35B', params: '3.35B', note: 'ZeroGPU sidecar; multilingual' },
 
9
  ]
10
  const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
11
 
 
6
  const MODELS = [
7
  { id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
8
  { id: 'tiny-aya-global-zerogpu', label: 'Tiny Aya Global 3.35B', params: '3.35B', note: 'ZeroGPU sidecar; multilingual' },
9
+ { id: 'minicpm5-1b-zerogpu', label: 'MiniCPM5 1B', params: '1B', note: 'ZeroGPU sidecar; efficient MiniCPM5 text model' },
10
  ]
11
  const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
12