hadadrjt commited on
Commit
e2e7b98
·
1 Parent(s): bb118d3

LFM2.5-1.2B: 2026-01-14.

Browse files

* Revert "LFM2.5-1.2B: Unlock the context length limit."
This reverts commit bb118d399b262e09c415949a64f8271d0adddf12.

* Migrate to an OpenAI-Compatible API.

* Minor bug fixes.

Files changed (4) hide show
  1. Dockerfile +1 -8
  2. LICENSE +13 -0
  3. app.py → src/app.py +47 -60
  4. src/config.py +28 -0
Dockerfile CHANGED
@@ -3,17 +3,10 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
- # Use a specific container image for the app
7
  FROM hadadrjt/playground:public-latest
8
 
9
- # Set the main working directory inside the container
10
  WORKDIR /app
11
 
12
- # Copy all files into the container
13
- COPY . .
14
 
15
- # Open the port so the app can be accessed
16
- EXPOSE 7860
17
-
18
- # Start the app
19
  CMD ["python", "app.py"]
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
 
6
  FROM hadadrjt/playground:public-latest
7
 
 
8
  WORKDIR /app
9
 
10
+ COPY src/* .
 
11
 
 
 
 
 
12
  CMD ["python", "app.py"]
LICENSE ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2025 Hadad <hadad@linuxmail.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
app.py → src/app.py RENAMED
@@ -4,12 +4,15 @@
4
  #
5
 
6
  import os
7
- from ollama import AsyncClient
 
8
  import gradio as gr
9
 
10
  async def playground(
11
  message,
12
  history,
 
 
13
  temperature,
14
  repeat_penalty,
15
  top_k,
@@ -19,13 +22,6 @@ async def playground(
19
  yield []
20
  return
21
 
22
- client = AsyncClient(
23
- host=os.getenv("OLLAMA_API_BASE_URL"),
24
- headers={
25
- "Authorization": f"Bearer {os.getenv('OLLAMA_API_KEY')}"
26
- }
27
- )
28
-
29
  messages = []
30
  for item in history:
31
  if isinstance(item, dict) and "role" in item and "content" in item:
@@ -36,59 +32,54 @@ async def playground(
36
  messages.append({"role": "user", "content": message})
37
 
38
  response = ""
39
- async for part in await client.chat(
40
- model="hf.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF:Q4_K_M",
 
 
 
41
  messages=messages,
42
- options={
43
- "temperature": float(temperature),
 
 
 
 
44
  "repeat_penalty": float(repeat_penalty),
45
- "top_k": int(top_k),
46
- "top_p": float(top_p)
47
- },
48
- stream=True
49
- ):
50
- response += part.get("message", {}).get("content", "")
51
- yield response
 
52
 
53
  with gr.Blocks(
54
  fill_height=True,
55
- fill_width=True
56
  ) as app:
57
  with gr.Sidebar():
58
- gr.HTML(
59
- """
60
- <h1>Ollama Inference Playground part of the
61
- <a href="https://huggingface.co/spaces/hadadxyz/ai" target="_blank">
62
- Demo Playground</a>, and the <a href="https://huggingface.co/umint"
63
- target="_blank">UltimaX Intelligence</a> project</h1><br />
64
-
65
- This space run the <b><a href=
66
- "https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct"
67
- target="_blank">LFM2.5 (1.2B)</a></b> model from
68
- <b>LiquidAI</b>, hosted on a server using <b>Ollama</b>
69
- and accessed via the <b>Ollama Python SDK</b>.<br><br>
70
-
71
- Official <b>documentation</b> for using Ollama with the
72
- Python SDK can be found
73
- <b><a href="https://github.com/ollama/ollama-python"
74
- target="_blank">here</a></b>.<br><br>
75
-
76
- LFM2.5 (1.2B) runs entirely on a <b>dual-core CPU</b>.
77
- Thanks to its small size, the model can
78
- operate efficiently on minimal hardware.<br><br>
79
-
80
- The LFM2.5 (1.2B) model can also be viewed or downloaded
81
- from the official repository
82
- <b><a href="https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF"
83
- target="_blank">here</a></b>.<br><br>
84
-
85
- <b>Like this project? You can support me by buying a
86
- <a href="https://ko-fi.com/hadad" target="_blank">
87
- coffee</a></b>.
88
- """
89
- )
90
  gr.Markdown("---")
91
  gr.Markdown("## Model Parameters")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  temperature = gr.Slider(
93
  minimum=0.1,
94
  maximum=1.0,
@@ -103,7 +94,7 @@ with gr.Blocks(
103
  maximum=2.0,
104
  value=1.05,
105
  step=0.1,
106
- label="Repeat Penalty",
107
  info="Penalty for repeating tokens"
108
  )
109
  gr.Markdown("")
@@ -128,17 +119,13 @@ with gr.Blocks(
128
  gr.ChatInterface(
129
  fn=playground,
130
  additional_inputs=[
 
 
131
  temperature,
132
  repeat_penalty,
133
  top_k,
134
  top_p
135
  ],
136
- chatbot=gr.Chatbot(
137
- label="Ollama | LFM2.5 (1.2B)",
138
- type="messages",
139
- show_copy_button=True,
140
- scale=1
141
- ),
142
  type="messages",
143
  examples=[
144
  ["Please introduce yourself."],
@@ -151,6 +138,6 @@ with gr.Blocks(
151
  )
152
 
153
  app.launch(
154
- server_name="0.0.0.0",
155
  pwa=True
156
  )
 
4
  #
5
 
6
  import os
7
+ from config import MODEL, INFO, HOST
8
+ from openai import AsyncOpenAI
9
  import gradio as gr
10
 
11
  async def playground(
12
  message,
13
  history,
14
+ num_ctx,
15
+ max_tokens,
16
  temperature,
17
  repeat_penalty,
18
  top_k,
 
22
  yield []
23
  return
24
 
 
 
 
 
 
 
 
25
  messages = []
26
  for item in history:
27
  if isinstance(item, dict) and "role" in item and "content" in item:
 
32
  messages.append({"role": "user", "content": message})
33
 
34
  response = ""
35
+ stream = await AsyncOpenAI(
36
+ base_url=os.getenv("OLLAMA_API_BASE_URL"),
37
+ api_key=os.getenv("OLLAMA_API_KEY")
38
+ ).chat.completions.create(
39
+ model=MODEL,
40
  messages=messages,
41
+ max_tokens=int(max_tokens),
42
+ temperature=float(temperature),
43
+ top_p=float(top_p),
44
+ stream=True,
45
+ extra_body={
46
+ "num_ctx": int(num_ctx),
47
  "repeat_penalty": float(repeat_penalty),
48
+ "top_k": int(top_k)
49
+ }
50
+ )
51
+
52
+ async for chunk in stream:
53
+ if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
54
+ response += chunk.choices[0].delta.content
55
+ yield response
56
 
57
  with gr.Blocks(
58
  fill_height=True,
59
+ fill_width=False
60
  ) as app:
61
  with gr.Sidebar():
62
+ gr.HTML(INFO)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  gr.Markdown("---")
64
  gr.Markdown("## Model Parameters")
65
+ num_ctx = gr.Slider(
66
+ minimum=512,
67
+ maximum=8192,
68
+ value=512,
69
+ step=128,
70
+ label="Context Length",
71
+ info="Maximum context window size (memory)"
72
+ )
73
+ gr.Markdown("")
74
+ max_tokens = gr.Slider(
75
+ minimum=512,
76
+ maximum=8192,
77
+ value=512,
78
+ step=128,
79
+ label="Max Tokens",
80
+ info="Maximum number of tokens to generate"
81
+ )
82
+ gr.Markdown("")
83
  temperature = gr.Slider(
84
  minimum=0.1,
85
  maximum=1.0,
 
94
  maximum=2.0,
95
  value=1.05,
96
  step=0.1,
97
+ label="Repetition Penalty",
98
  info="Penalty for repeating tokens"
99
  )
100
  gr.Markdown("")
 
119
  gr.ChatInterface(
120
  fn=playground,
121
  additional_inputs=[
122
+ num_ctx,
123
+ max_tokens,
124
  temperature,
125
  repeat_penalty,
126
  top_k,
127
  top_p
128
  ],
 
 
 
 
 
 
129
  type="messages",
130
  examples=[
131
  ["Please introduce yourself."],
 
138
  )
139
 
140
  app.launch(
141
+ server_name=HOST,
142
  pwa=True
143
  )
src/config.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ # ---------------------------------------------
7
+ # | OLLAMA_API_BASE_URL | /v1 | ENV or SECRET |
8
+ # |---------------------|-----|---------------|
9
+ # | OLLAMA_API_KEY | | SECRET |
10
+ # ---------------------------------------------
11
+
12
+ MODEL = "hf.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF:Q4_K_M"
13
+
14
+ INFO = """
15
+ <h1>Ollama Inference Playground part of the <a href="https://huggingface.co/spaces/hadadxyz/ai" target="_blank">Demo Playground</a>, and the <a href="https://huggingface.co/umint" target="_blank">UltimaX Intelligence</a> project</h1><br>
16
+
17
+ This space run the <b><a href="https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct" target="_blank">LFM2.5 (1.2B)</a></b> model from <b>LiquidAI</b>, hosted on a server using <b>Ollama</b> and accessed via the <b>OpenAI Python SDK</b>.<br><br>
18
+
19
+ Official <b>documentation</b> for using Ollama with the OpenAI-Compatible API can be found <b><a href="https://docs.ollama.com/api/openai-compatibility" target="_blank">here</a></b>.<br><br>
20
+
21
+ LFM2.5 (1.2B) runs entirely on a <b>dual-core CPU</b>. Thanks to its small size, the model can operate efficiently on minimal hardware.<br><br>
22
+
23
+ The LFM2.5 (1.2B) model can also be viewed or downloaded from the official repository <b><a href="https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF" target="_blank">here</a></b>.<br><br>
24
+
25
+ <b>Like this project? You can support me by buying a <a href="https://ko-fi.com/hadad" target="_blank">coffee</a></b>.
26
+ """
27
+
28
+ HOST = "0.0.0.0"