hadadrjt commited on
Commit
48886b7
·
0 Parent(s):

LFM2.5-1.2B: Initial.

Browse files
Files changed (3) hide show
  1. Dockerfile +19 -0
  2. README.md +11 -0
  3. app.py +168 -0
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ # Use a specific container image for the app
7
+ FROM hadadrjt/playground:public-latest
8
+
9
+ # Set the main working directory inside the container
10
+ WORKDIR /app
11
+
12
+ # Copy all files into the container
13
+ COPY . .
14
+
15
+ # Open the port so the app can be accessed
16
+ EXPOSE 7860
17
+
18
+ # Start the app
19
+ CMD ["python", "app.py"]
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: LiquidAI/LFM2.5-1.2B-Instruct
3
+ short_description: LFM2.5 (1.2B) runs on Ollama using only a dual-core CPU
4
+ license: apache-2.0
5
+ emoji: ⚡
6
+ colorFrom: red
7
+ colorTo: red
8
+ sdk: docker
9
+ app_port: 7860
10
+ pinned: false
11
+ ---
app.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import os
7
+ from ollama import AsyncClient
8
+ import gradio as gr
9
+
10
+ async def playground(
11
+ message,
12
+ history,
13
+ num_ctx,
14
+ temperature,
15
+ repeat_penalty,
16
+ top_k,
17
+ top_p
18
+ ):
19
+ if not isinstance(message, str) or not message.strip():
20
+ yield []
21
+ return
22
+
23
+ client = AsyncClient(
24
+ host=os.getenv("OLLAMA_API_BASE_URL"),
25
+ headers={
26
+ "Authorization": f"Bearer {os.getenv('OLLAMA_API_KEY')}"
27
+ }
28
+ )
29
+
30
+ messages = []
31
+ for item in history:
32
+ if isinstance(item, dict) and "role" in item and "content" in item:
33
+ messages.append({
34
+ "role": item["role"],
35
+ "content": item["content"]
36
+ })
37
+ messages.append({"role": "user", "content": message})
38
+
39
+ response = ""
40
+ async for part in await client.chat(
41
+ model="hf.co/unsloth/LFM2.5-1.2B-Instruct-GGUF:Q2_K",
42
+ messages=messages,
43
+ options={
44
+ "num_ctx": int(num_ctx),
45
+ "temperature": float(temperature),
46
+ "repeat_penalty": float(repeat_penalty),
47
+ "top_k": int(top_k),
48
+ "top_p": float(top_p)
49
+ },
50
+ stream=True
51
+ ):
52
+ response += part.get("message", {}).get("content", "")
53
+ yield response
54
+
55
+ with gr.Blocks(
56
+ fill_height=True,
57
+ fill_width=True
58
+ ) as app:
59
+ with gr.Sidebar():
60
+ gr.HTML(
61
+ """
62
+ <h1>Ollama Inference Playground part of the
63
+ <a href="https://huggingface.co/spaces/hadadxyz/ai" target="_blank">
64
+ Demo Playground</a>, and the <a href="https://huggingface.co/umint"
65
+ target="_blank">UltimaX Intelligence</a> project</h1><br />
66
+
67
+ This space run the <b><a href=
68
+ "https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct"
69
+ target="_blank">LFM2.5 (1.2B)</a></b> model from
70
+ <b>LiquidAI</b>, hosted on a server using <b>Ollama</b>
71
+ and accessed via the <b>Ollama Python SDK</b>.<br><br>
72
+
73
+ Official <b>documentation</b> for using Ollama with the
74
+ Python SDK can be found
75
+ <b><a href="https://github.com/ollama/ollama-python"
76
+ target="_blank">here</a></b>.<br><br>
77
+
78
+ LFM2.5 (1.2B) runs entirely on a <b>dual-core CPU</b>.
79
+ Thanks to its small size, the model can
80
+ operate efficiently on minimal hardware.<br><br>
81
+
82
+ The LFM2.5 (1.2B) model can also be viewed or downloaded
83
+ from Unsloth repository
84
+ <b><a href="https://huggingface.co/unsloth/LFM2.5-1.2B-Instruct-GGUF"
85
+ target="_blank">here</a></b>.<br><br>
86
+
87
+ <b>Like this project? You can support me by buying a
88
+ <a href="https://ko-fi.com/hadad" target="_blank">
89
+ coffee</a></b>.
90
+ """
91
+ )
92
+ gr.Markdown("---")
93
+ gr.Markdown("## Model Parameters")
94
+ num_ctx = gr.Slider(
95
+ minimum=512,
96
+ maximum=1024,
97
+ value=512,
98
+ step=128,
99
+ label="Context Length",
100
+ info="Maximum context window size (limited to CPU usage)"
101
+ )
102
+ gr.Markdown("")
103
+ temperature = gr.Slider(
104
+ minimum=0.1,
105
+ maximum=1.0,
106
+ value=0.1,
107
+ step=0.1,
108
+ label="Temperature",
109
+ info="Controls randomness in generation"
110
+ )
111
+ gr.Markdown("")
112
+ repeat_penalty = gr.Slider(
113
+ minimum=0.1,
114
+ maximum=2.0,
115
+ value=1.05,
116
+ step=0.1,
117
+ label="Repeat Penalty",
118
+ info="Penalty for repeating tokens"
119
+ )
120
+ gr.Markdown("")
121
+ top_k = gr.Slider(
122
+ minimum=0,
123
+ maximum=100,
124
+ value=50,
125
+ step=1,
126
+ label="Top K",
127
+ info="Number of top tokens to consider"
128
+ )
129
+ gr.Markdown("")
130
+ top_p = gr.Slider(
131
+ minimum=0.0,
132
+ maximum=1.0,
133
+ value=0.1,
134
+ step=0.05,
135
+ label="Top P",
136
+ info="Cumulative probability threshold"
137
+ )
138
+
139
+ gr.ChatInterface(
140
+ fn=playground,
141
+ additional_inputs=[
142
+ num_ctx,
143
+ temperature,
144
+ repeat_penalty,
145
+ top_k,
146
+ top_p
147
+ ],
148
+ chatbot=gr.Chatbot(
149
+ label="Ollama | LFM2.5 (1.2B)",
150
+ type="messages",
151
+ show_copy_button=True,
152
+ scale=1
153
+ ),
154
+ type="messages",
155
+ examples=[
156
+ ["Please introduce yourself."],
157
+ ["What caused World War II?"],
158
+ ["Give me a short introduction to large language model."],
159
+ ["Explain about quantum computers."]
160
+ ],
161
+ cache_examples=False,
162
+ show_api=False
163
+ )
164
+
165
+ app.launch(
166
+ server_name="0.0.0.0",
167
+ pwa=True
168
+ )