LiquidAI-LFM2.5-1.2B-Instruct

Sleeping

hadadrjt commited on Jan 13

Commit

e2e7b98

1 Parent(s): bb118d3

LFM2.5-1.2B: 2026-01-14.

* Revert "LFM2.5-1.2B: Unlock the context length limit."
This reverts commit bb118d399b262e09c415949a64f8271d0adddf12.

* Migrate to an OpenAI-Compatible API.

* Minor bug fixes.

Files changed (4) hide show

Dockerfile +1 -8
LICENSE +13 -0
app.py → src/app.py +47 -60
src/config.py +28 -0

Dockerfile CHANGED Viewed

@@ -3,17 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 #
-# Use a specific container image for the app
 FROM hadadrjt/playground:public-latest
-# Set the main working directory inside the container
 WORKDIR /app
-# Copy all files into the container
-COPY . .
-# Open the port so the app can be accessed
-EXPOSE 7860
-# Start the app
 CMD ["python", "app.py"]

 # SPDX-License-Identifier: Apache-2.0
 #
 FROM hadadrjt/playground:public-latest
 WORKDIR /app
+COPY src/* .
 CMD ["python", "app.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,13 @@

+Copyright (c) 2025 Hadad <hadad@linuxmail.org>
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

app.py → src/app.py RENAMED Viewed

@@ -4,12 +4,15 @@
 #
 import os
-from ollama import AsyncClient
 import gradio as gr
 async def playground(
     message,
     history,
     temperature,
     repeat_penalty,
     top_k,
@@ -19,13 +22,6 @@ async def playground(
         yield []
         return
-    client = AsyncClient(
-        host=os.getenv("OLLAMA_API_BASE_URL"),
-        headers={
-            "Authorization": f"Bearer {os.getenv('OLLAMA_API_KEY')}"
-        }
-    )
     messages = []
     for item in history:
         if isinstance(item, dict) and "role" in item and "content" in item:
@@ -36,59 +32,54 @@ async def playground(
     messages.append({"role": "user", "content": message})
     response = ""
-    async for part in await client.chat(
-        model="hf.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF:Q4_K_M",
         messages=messages,
-        options={
-            "temperature": float(temperature),
             "repeat_penalty": float(repeat_penalty),
-            "top_k": int(top_k),
-            "top_p": float(top_p)
-        },
-        stream=True
-    ):
-        response += part.get("message", {}).get("content", "")
-        yield response
 with gr.Blocks(
     fill_height=True,
-    fill_width=True
 ) as app:
     with gr.Sidebar():
-        gr.HTML(
-            """
-            <h1>Ollama Inference Playground part of the
-            <a href="https://huggingface.co/spaces/hadadxyz/ai" target="_blank">
-            Demo Playground</a>, and the <a href="https://huggingface.co/umint"
-            target="_blank">UltimaX Intelligence</a> project</h1><br />
-            This space run the <b><a href=
-            "https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct"
-            target="_blank">LFM2.5 (1.2B)</a></b> model from
-            <b>LiquidAI</b>, hosted on a server using <b>Ollama</b>
-            and accessed via the <b>Ollama Python SDK</b>.<br><br>
-            Official <b>documentation</b> for using Ollama with the
-            Python SDK can be found
-            <b><a href="https://github.com/ollama/ollama-python"
-            target="_blank">here</a></b>.<br><br>
-            LFM2.5 (1.2B) runs entirely on a <b>dual-core CPU</b>.
-            Thanks to its small size, the model can
-            operate efficiently on minimal hardware.<br><br>
-            The LFM2.5 (1.2B) model can also be viewed or downloaded
-            from the official repository
-            <b><a href="https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF"
-            target="_blank">here</a></b>.<br><br>
-            <b>Like this project? You can support me by buying a
-            <a href="https://ko-fi.com/hadad" target="_blank">
-            coffee</a></b>.
-            """
-        )
         gr.Markdown("---")
         gr.Markdown("## Model Parameters")
         temperature = gr.Slider(
             minimum=0.1,
             maximum=1.0,
@@ -103,7 +94,7 @@ with gr.Blocks(
             maximum=2.0,
             value=1.05,
             step=0.1,
-            label="Repeat Penalty",
             info="Penalty for repeating tokens"
         )
         gr.Markdown("")
@@ -128,17 +119,13 @@ with gr.Blocks(
     gr.ChatInterface(
         fn=playground,
         additional_inputs=[
             temperature,
             repeat_penalty,
             top_k,
             top_p
         ],
-        chatbot=gr.Chatbot(
-            label="Ollama | LFM2.5 (1.2B)",
-            type="messages",
-            show_copy_button=True,
-            scale=1
-        ),
         type="messages",
         examples=[
             ["Please introduce yourself."],
@@ -151,6 +138,6 @@ with gr.Blocks(
     )
 app.launch(
-    server_name="0.0.0.0",
     pwa=True
 )

 #
 import os
+from config import MODEL, INFO, HOST
+from openai import AsyncOpenAI
 import gradio as gr
 async def playground(
     message,
     history,
+    num_ctx,
+    max_tokens,
     temperature,
     repeat_penalty,
     top_k,
         yield []
         return
     messages = []
     for item in history:
         if isinstance(item, dict) and "role" in item and "content" in item:
     messages.append({"role": "user", "content": message})
     response = ""
+    stream = await AsyncOpenAI(
+        base_url=os.getenv("OLLAMA_API_BASE_URL"),
+        api_key=os.getenv("OLLAMA_API_KEY")
+    ).chat.completions.create(
+        model=MODEL,
         messages=messages,
+        max_tokens=int(max_tokens),
+        temperature=float(temperature),
+        top_p=float(top_p),
+        stream=True,
+        extra_body={
+            "num_ctx": int(num_ctx),
             "repeat_penalty": float(repeat_penalty),
+            "top_k": int(top_k)
+        }
+    )
+    async for chunk in stream:
+        if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
+            response += chunk.choices[0].delta.content
+            yield response
 with gr.Blocks(
     fill_height=True,
+    fill_width=False
 ) as app:
     with gr.Sidebar():
+        gr.HTML(INFO)
         gr.Markdown("---")
         gr.Markdown("## Model Parameters")
+        num_ctx = gr.Slider(
+            minimum=512,
+            maximum=8192,
+            value=512,
+            step=128,
+            label="Context Length",
+            info="Maximum context window size (memory)"
+        )
+        gr.Markdown("")
+        max_tokens = gr.Slider(
+            minimum=512,
+            maximum=8192,
+            value=512,
+            step=128,
+            label="Max Tokens",
+            info="Maximum number of tokens to generate"
+        )
+        gr.Markdown("")
         temperature = gr.Slider(
             minimum=0.1,
             maximum=1.0,
             maximum=2.0,
             value=1.05,
             step=0.1,
+            label="Repetition Penalty",
             info="Penalty for repeating tokens"
         )
         gr.Markdown("")
     gr.ChatInterface(
         fn=playground,
         additional_inputs=[
+            num_ctx,
+            max_tokens,
             temperature,
             repeat_penalty,
             top_k,
             top_p
         ],
         type="messages",
         examples=[
             ["Please introduce yourself."],
     )
 app.launch(
+    server_name=HOST,
     pwa=True
 )

src/config.py ADDED Viewed

	@@ -0,0 +1,28 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+# ---------------------------------------------
+# | OLLAMA_API_BASE_URL | /v1 | ENV or SECRET |
+# |---------------------|-----|---------------|
+# | OLLAMA_API_KEY      |     |    SECRET     |
+# ---------------------------------------------
+MODEL = "hf.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF:Q4_K_M"
+INFO = """
+<h1>Ollama Inference Playground part of the <a href="https://huggingface.co/spaces/hadadxyz/ai" target="_blank">Demo Playground</a>, and the <a href="https://huggingface.co/umint" target="_blank">UltimaX Intelligence</a> project</h1><br>
+This space run the <b><a href="https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct" target="_blank">LFM2.5 (1.2B)</a></b> model from <b>LiquidAI</b>, hosted on a server using <b>Ollama</b> and accessed via the <b>OpenAI Python SDK</b>.<br><br>
+Official <b>documentation</b> for using Ollama with the OpenAI-Compatible API can be found <b><a href="https://docs.ollama.com/api/openai-compatibility" target="_blank">here</a></b>.<br><br>
+LFM2.5 (1.2B) runs entirely on a <b>dual-core CPU</b>. Thanks to its small size, the model can operate efficiently on minimal hardware.<br><br>
+The LFM2.5 (1.2B) model can also be viewed or downloaded from the official repository <b><a href="https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF" target="_blank">here</a></b>.<br><br>
+<b>Like this project? You can support me by buying a <a href="https://ko-fi.com/hadad" target="_blank">coffee</a></b>.
+"""
+HOST = "0.0.0.0"