Spaces:
Running
Running
Add MiniCPM5 text generation option
Browse files- app.py +32 -3
- web/engineServer.js +1 -0
app.py
CHANGED
|
@@ -264,6 +264,7 @@ _DASHSCOPE_URL = _DASHSCOPE_BASE + "/api/v1/services/audio/tts/customization"
|
|
| 264 |
TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
|
| 265 |
VOXCPM_SPACE = os.environ.get("TINY_VOXCPM_SPACE", "").strip()
|
| 266 |
TINY_AYA_SPACE = os.environ.get("TINY_AYA_SPACE", "").strip()
|
|
|
|
| 267 |
_local_tts = None # VoiceDesign model
|
| 268 |
_local_clone = None # Base model (voice clone) — lazy, only if a clone is requested
|
| 269 |
_local_tts_lock = threading.Lock()
|
|
@@ -467,9 +468,22 @@ def _tiny_aya_generate(system, user, max_tokens, temperature):
|
|
| 467 |
return str(result or "")
|
| 468 |
|
| 469 |
|
| 470 |
-
def
|
| 471 |
from gradio_client import Client
|
| 472 |
-
client = Client(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
try:
|
| 474 |
job = client.submit(
|
| 475 |
system or "",
|
|
@@ -486,11 +500,19 @@ def _tiny_aya_stream(system, user, max_tokens, temperature):
|
|
| 486 |
yield text[len(prev):]
|
| 487 |
prev = text
|
| 488 |
except Exception:
|
| 489 |
-
text =
|
| 490 |
if text:
|
| 491 |
yield text
|
| 492 |
|
| 493 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
@fastapi_app.post("/voxcpm-tts")
|
| 495 |
async def voxcpm_tts(request: Request):
|
| 496 |
body = await request.json()
|
|
@@ -790,6 +812,13 @@ async def text_generate_stream(request: Request):
|
|
| 790 |
if stop.is_set():
|
| 791 |
break
|
| 792 |
loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
else:
|
| 794 |
for chunk in llm.stream_chat(
|
| 795 |
system,
|
|
|
|
| 264 |
TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
|
| 265 |
VOXCPM_SPACE = os.environ.get("TINY_VOXCPM_SPACE", "").strip()
|
| 266 |
TINY_AYA_SPACE = os.environ.get("TINY_AYA_SPACE", "").strip()
|
| 267 |
+
MINICPM5_SPACE = os.environ.get("TINY_MINICPM5_SPACE", "").strip()
|
| 268 |
_local_tts = None # VoiceDesign model
|
| 269 |
_local_clone = None # Base model (voice clone) — lazy, only if a clone is requested
|
| 270 |
_local_tts_lock = threading.Lock()
|
|
|
|
| 468 |
return str(result or "")
|
| 469 |
|
| 470 |
|
| 471 |
+
def _space_text_generate(space, system, user, max_tokens, temperature):
|
| 472 |
from gradio_client import Client
|
| 473 |
+
client = Client(space, token=HF_TOKEN or None)
|
| 474 |
+
result = client.predict(
|
| 475 |
+
system or "",
|
| 476 |
+
user or "",
|
| 477 |
+
int(max_tokens or 400),
|
| 478 |
+
float(temperature if temperature is not None else 0.8),
|
| 479 |
+
api_name="/generate",
|
| 480 |
+
)
|
| 481 |
+
return str(result or "")
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
def _space_text_stream(space, system, user, max_tokens, temperature):
|
| 485 |
+
from gradio_client import Client
|
| 486 |
+
client = Client(space, token=HF_TOKEN or None)
|
| 487 |
try:
|
| 488 |
job = client.submit(
|
| 489 |
system or "",
|
|
|
|
| 500 |
yield text[len(prev):]
|
| 501 |
prev = text
|
| 502 |
except Exception:
|
| 503 |
+
text = _space_text_generate(space, system, user, max_tokens, temperature)
|
| 504 |
if text:
|
| 505 |
yield text
|
| 506 |
|
| 507 |
|
| 508 |
+
def _tiny_aya_stream(system, user, max_tokens, temperature):
|
| 509 |
+
yield from _space_text_stream(TINY_AYA_SPACE, system, user, max_tokens, temperature)
|
| 510 |
+
|
| 511 |
+
|
| 512 |
+
def _minicpm5_stream(system, user, max_tokens, temperature):
|
| 513 |
+
yield from _space_text_stream(MINICPM5_SPACE, system, user, max_tokens, temperature)
|
| 514 |
+
|
| 515 |
+
|
| 516 |
@fastapi_app.post("/voxcpm-tts")
|
| 517 |
async def voxcpm_tts(request: Request):
|
| 518 |
body = await request.json()
|
|
|
|
| 812 |
if stop.is_set():
|
| 813 |
break
|
| 814 |
loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
|
| 815 |
+
elif model == "minicpm5-1b-zerogpu":
|
| 816 |
+
if not MINICPM5_SPACE:
|
| 817 |
+
raise llm.LlmUnavailable("TINY_MINICPM5_SPACE not set")
|
| 818 |
+
for chunk in _minicpm5_stream(system, user, max_tokens, temperature):
|
| 819 |
+
if stop.is_set():
|
| 820 |
+
break
|
| 821 |
+
loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
|
| 822 |
else:
|
| 823 |
for chunk in llm.stream_chat(
|
| 824 |
system,
|
web/engineServer.js
CHANGED
|
@@ -6,6 +6,7 @@ import { statsTracker } from '/web/genStats.js'
|
|
| 6 |
const MODELS = [
|
| 7 |
{ id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
|
| 8 |
{ id: 'tiny-aya-global-zerogpu', label: 'Tiny Aya Global 3.35B', params: '3.35B', note: 'ZeroGPU sidecar; multilingual' },
|
|
|
|
| 9 |
]
|
| 10 |
const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
|
| 11 |
|
|
|
|
| 6 |
const MODELS = [
|
| 7 |
{ id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
|
| 8 |
{ id: 'tiny-aya-global-zerogpu', label: 'Tiny Aya Global 3.35B', params: '3.35B', note: 'ZeroGPU sidecar; multilingual' },
|
| 9 |
+
{ id: 'minicpm5-1b-zerogpu', label: 'MiniCPM5 1B', params: '1B', note: 'ZeroGPU sidecar; efficient MiniCPM5 text model' },
|
| 10 |
]
|
| 11 |
const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
|
| 12 |
|