Spaces:

build-small-hackathon
/

tiny-army

Running

App Files Files Community

polats commited on 3 days ago

Commit

22a51b2

1 Parent(s): bfdbea8

Add MiniCPM5 text generation option

Browse files

Files changed (2) hide show

app.py +32 -3
web/engineServer.js +1 -0

app.py CHANGED Viewed

@@ -264,6 +264,7 @@ _DASHSCOPE_URL = _DASHSCOPE_BASE + "/api/v1/services/audio/tts/customization"
 TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
 VOXCPM_SPACE = os.environ.get("TINY_VOXCPM_SPACE", "").strip()
 TINY_AYA_SPACE = os.environ.get("TINY_AYA_SPACE", "").strip()
 _local_tts = None       # VoiceDesign model
 _local_clone = None     # Base model (voice clone) — lazy, only if a clone is requested
 _local_tts_lock = threading.Lock()
@@ -467,9 +468,22 @@ def _tiny_aya_generate(system, user, max_tokens, temperature):
     return str(result or "")
-def _tiny_aya_stream(system, user, max_tokens, temperature):
     from gradio_client import Client
-    client = Client(TINY_AYA_SPACE, token=HF_TOKEN or None)
     try:
         job = client.submit(
             system or "",
@@ -486,11 +500,19 @@ def _tiny_aya_stream(system, user, max_tokens, temperature):
                 yield text[len(prev):]
             prev = text
     except Exception:
-        text = _tiny_aya_generate(system, user, max_tokens, temperature)
         if text:
             yield text
 @fastapi_app.post("/voxcpm-tts")
 async def voxcpm_tts(request: Request):
     body = await request.json()
@@ -790,6 +812,13 @@ async def text_generate_stream(request: Request):
                         if stop.is_set():
                             break
                         loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
                 else:
                     for chunk in llm.stream_chat(
                         system,

 TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
 VOXCPM_SPACE = os.environ.get("TINY_VOXCPM_SPACE", "").strip()
 TINY_AYA_SPACE = os.environ.get("TINY_AYA_SPACE", "").strip()
+MINICPM5_SPACE = os.environ.get("TINY_MINICPM5_SPACE", "").strip()
 _local_tts = None       # VoiceDesign model
 _local_clone = None     # Base model (voice clone) — lazy, only if a clone is requested
 _local_tts_lock = threading.Lock()
     return str(result or "")
+def _space_text_generate(space, system, user, max_tokens, temperature):
     from gradio_client import Client
+    client = Client(space, token=HF_TOKEN or None)
+    result = client.predict(
+        system or "",
+        user or "",
+        int(max_tokens or 400),
+        float(temperature if temperature is not None else 0.8),
+        api_name="/generate",
+    )
+    return str(result or "")
+def _space_text_stream(space, system, user, max_tokens, temperature):
+    from gradio_client import Client
+    client = Client(space, token=HF_TOKEN or None)
     try:
         job = client.submit(
             system or "",
                 yield text[len(prev):]
             prev = text
     except Exception:
+        text = _space_text_generate(space, system, user, max_tokens, temperature)
         if text:
             yield text
+def _tiny_aya_stream(system, user, max_tokens, temperature):
+    yield from _space_text_stream(TINY_AYA_SPACE, system, user, max_tokens, temperature)
+def _minicpm5_stream(system, user, max_tokens, temperature):
+    yield from _space_text_stream(MINICPM5_SPACE, system, user, max_tokens, temperature)
 @fastapi_app.post("/voxcpm-tts")
 async def voxcpm_tts(request: Request):
     body = await request.json()
                         if stop.is_set():
                             break
                         loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
+                elif model == "minicpm5-1b-zerogpu":
+                    if not MINICPM5_SPACE:
+                        raise llm.LlmUnavailable("TINY_MINICPM5_SPACE not set")
+                    for chunk in _minicpm5_stream(system, user, max_tokens, temperature):
+                        if stop.is_set():
+                            break
+                        loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
                 else:
                     for chunk in llm.stream_chat(
                         system,

web/engineServer.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { statsTracker } from '/web/genStats.js'
 const MODELS = [
   { id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
   { id: 'tiny-aya-global-zerogpu', label: 'Tiny Aya Global 3.35B', params: '3.35B', note: 'ZeroGPU sidecar; multilingual' },
 ]
 const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]

 const MODELS = [
   { id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
   { id: 'tiny-aya-global-zerogpu', label: 'Tiny Aya Global 3.35B', params: '3.35B', note: 'ZeroGPU sidecar; multilingual' },
+  { id: 'minicpm5-1b-zerogpu', label: 'MiniCPM5 1B', params: '1B', note: 'ZeroGPU sidecar; efficient MiniCPM5 text model' },
 ]
 const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]