File size: 15,973 Bytes
9a3021b
 
 
 
 
7235193
 
 
 
9a3021b
7235193
9a3021b
7235193
9a3021b
7235193
 
 
 
9a3021b
7235193
9a3021b
7235193
 
 
 
9a3021b
7235193
9a3021b
7235193
 
 
 
 
9a3021b
7235193
 
 
 
 
 
9a3021b
7235193
 
 
 
 
 
 
 
9a3021b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7235193
9a3021b
 
 
 
7235193
 
 
 
9a3021b
 
7235193
 
9a3021b
7235193
9a3021b
 
 
7235193
9a3021b
 
 
7235193
9a3021b
 
7235193
 
 
 
 
 
 
 
 
 
 
9a3021b
 
7235193
9a3021b
 
 
7235193
9a3021b
7235193
 
 
 
9a3021b
 
 
7235193
9a3021b
 
 
 
 
 
7235193
 
 
9a3021b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7235193
9a3021b
 
 
 
7235193
9a3021b
 
 
7235193
9a3021b
 
 
 
7235193
 
9a3021b
 
 
7235193
 
 
 
9a3021b
7235193
9a3021b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7235193
 
 
9a3021b
 
 
 
7235193
 
 
 
 
 
9a3021b
7235193
9a3021b
 
7235193
 
 
9a3021b
7235193
 
9a3021b
 
 
 
 
7235193
 
9a3021b
7235193
 
 
 
 
 
 
 
 
 
9a3021b
7235193
9a3021b
 
 
7235193
 
 
 
9a3021b
7235193
 
 
 
 
9a3021b
7235193
9a3021b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7235193
 
9a3021b
 
 
 
7235193
9a3021b
 
7235193
 
 
 
 
 
 
 
9a3021b
7235193
 
 
 
 
 
 
 
9a3021b
7235193
 
 
9a3021b
7235193
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
# app.py — نسخة مصححة ومتكاملة
import inspect
import threading
from threading import Thread

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

# ====== إعدادات النموذج ======
MODEL_ID = "LiquidAI/LFM2.5-1.2B-Thinking"
DEFAULT_SYSTEM_PROMPT = """You are LFM2.5, an advanced reasoning model developed by LiquidAI. You excel at breaking down complex problems, thinking step-by-step, and providing clear, well-reasoned answers. Always think through problems systematically before providing your final answer."""

# ====== متغيرات عالمية ======
model = None
tokenizer = None
is_model_loaded = False


def load_model():
    """Load the model and tokenizer (مرّة واحدة)."""
    global model, tokenizer, is_model_loaded
    if is_model_loaded:
        return True
    try:
        print("Loading tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
        print("Loading model...")
        if torch.cuda.is_available():
            model = AutoModelForCausalLM.from_pretrained(
                MODEL_ID,
                torch_dtype=torch.float16,
                device_map="auto",
                trust_remote_code=True,
            )
        else:
            model = AutoModelForCausalLM.from_pretrained(
                MODEL_ID,
                torch_dtype=torch.float32,
                device_map="cpu",
                trust_remote_code=True,
            )
        is_model_loaded = True
        print("Model loaded successfully!")
        return True
    except Exception as e:
        print(f"Error loading model: {e}")
        return False


# ====== تحويل الصيغ بين Gradio وداخل التطبيق ======
def gradio_history_to_internal(gr_history):
    """
    Gradio Chatbot state is typically a list of (user, assistant) tuples.
    We convert to a list of dicts: {"role": "user"|"assistant", "content": str}
    """
    if not gr_history:
        return []
    # If already in internal dict format, return as-is
    if isinstance(gr_history, list) and len(gr_history) > 0 and isinstance(gr_history[0], dict):
        return gr_history
    internal = []
    for pair in gr_history:
        if not pair:
            continue
        # pair may be a tuple/list of length 2 or a single string
        if isinstance(pair, (list, tuple)) and len(pair) >= 2:
            user_txt, assistant_txt = pair[0], pair[1]
            if user_txt is not None and user_txt != "":
                internal.append({"role": "user", "content": str(user_txt)})
            if assistant_txt is not None and assistant_txt != "":
                internal.append({"role": "assistant", "content": str(assistant_txt)})
        else:
            # fallback: treat item as a user message
            internal.append({"role": "user", "content": str(pair)})
    return internal


def internal_history_to_gradio(internal_history):
    """
    Convert internal list of dicts to Gradio Chatbot format:
    list of (user, assistant) tuples. We group sequential pairs.
    """
    pairs = []
    user_buf = None
    assistant_buf = None
    for msg in internal_history:
        role = msg.get("role")
        content = msg.get("content", "")
        if role == "user":
            # If previous user buffered without assistant, flush it as (user, "")
            if user_buf is not None and assistant_buf is None:
                pairs.append((user_buf, ""))
            user_buf = content
            assistant_buf = None
        elif role == "assistant":
            assistant_buf = content
            if user_buf is None:
                # assistant message without explicit user -> push as ("", assistant)
                pairs.append(("", assistant_buf))
                user_buf = None
                assistant_buf = None
            else:
                pairs.append((user_buf, assistant_buf))
                user_buf = None
                assistant_buf = None
    # flush any leftover user
    if user_buf is not None and assistant_buf is None:
        pairs.append((user_buf, ""))
    return pairs


# ====== تنسيق الرسائل للـ model ======
def format_chat_history(history, system_prompt):
    """
    history: list of dicts {"role":..., "content":...}
    Returns list of messages formatted for apply_chat_template or manual fallback.
    """
    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
    for msg in history:
        if msg.get("role") and "content" in msg:
            messages.append({"role": msg["role"], "content": msg["content"]})
    return messages


def apply_chat_template(messages):
    """
    Use tokenizer.apply_chat_template when available; otherwise fallback to simple markers.
    """
    try:
        # Some tokenizers expose apply_chat_template
        # tokenize=False because we will tokenize later
        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        return prompt
    except Exception:
        # manual fallback
        prompt = ""
        for msg in messages:
            if msg["role"] == "system":
                prompt += f"<|system|>\n{msg['content']}\n"
            elif msg["role"] == "user":
                prompt += f"<|user|>\n{msg['content']}\n"
            elif msg["role"] == "assistant":
                prompt += f"<|assistant|>\n{msg['content']}\n"
        prompt += "<|assistant|>\n"
        return prompt


# ====== توليد الاستجابة (يدعم البث streaming) ======
def generate_response(message, history, system_prompt, temperature, max_tokens, top_p):
    """
    Generator that yields (partial_text, internal_history) while streaming.
    """
    global model, tokenizer, is_model_loaded
    # ensure model loaded
    if not is_model_loaded:
        if not load_model():
            yield "❌ Error: Failed to load model. Please check the logs.", history
            return

    # Append user message into internal history
    history = list(history)  # copy
    history.append({"role": "user", "content": message})

    # Format messages for the model
    messages_for_model = format_chat_history(history, system_prompt)
    prompt = apply_chat_template(messages_for_model)

    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt")
    if torch.cuda.is_available():
        inputs = {k: v.cuda() for k, v in inputs.items()}

    # Try streaming via TextIteratorStreamer; if it fails, fallback to non-streaming generation
    try:
        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=20.0)
        generation_kwargs = {
            **inputs,
            "streamer": streamer,
            "max_new_tokens": int(max_tokens),
            "temperature": float(temperature),
            "top_p": float(top_p),
            "do_sample": float(temperature) > 0.0,
            "pad_token_id": tokenizer.eos_token_id,
        }
        # start generation in a thread
        gen_thread = Thread(target=model.generate, kwargs=generation_kwargs)
        gen_thread.start()

        response = ""
        for new_text in streamer:
            response += new_text
            # update last assistant entry in history
            # ensure we don't duplicate user entry — we know last entry is user, append/update assistant
            if len(history) == 0 or history[-1].get("role") != "assistant":
                history.append({"role": "assistant", "content": response})
            else:
                history[-1]["content"] = response
            yield response, history
        gen_thread.join()
    except Exception as e:
        # Fallback: synchronous non-streaming generation (less interactive)
        try:
            outputs = model.generate(
                **inputs,
                max_new_tokens=int(max_tokens),
                temperature=float(temperature),
                top_p=float(top_p),
                do_sample=float(temperature) > 0.0,
                pad_token_id=tokenizer.eos_token_id,
            )
            decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
            # update history
            history.append({"role": "assistant", "content": decoded})
            yield decoded, history
        except Exception as e2:
            err = f"❌ Generation error: {e} | fallback error: {e2}"
            history.append({"role": "assistant", "content": err})
            yield err, history


# ====== غلاف للدردشة مع معالجة الأخطاء وتحويل الصيغ ======
def chat_with_model(message, gr_chat_history, system_prompt, temperature, max_tokens, top_p):
    """
    This function is connected to Gradio. It receives:
      - message (str)
      - gr_chat_history (Gradio Chatbot state)
    It should return:
      - cleared msg_input (""), updated gr_chat_history (list of tuples)
    We implement streaming by yielding successive (msg_input, gr_chat_history) pairs.
    """
    # If empty message, do nothing
    if not message or not str(message).strip():
        # return unchanged history and empty input
        yield "", gr_chat_history
        return

    # Convert gradio history format to internal
    internal_history = gradio_history_to_internal(gr_chat_history)

    try:
        # stream generator
        for response_text, updated_internal in generate_response(
            message, internal_history, system_prompt, temperature, max_tokens, top_p
        ):
            # convert to Gradio format for display
            gr_history_for_component = internal_history_to_gradio(updated_internal)
            # clear input box on each yield (keeps behavior consistent)
            yield "", gr_history_for_component
    except Exception as e:
        error_msg = f"❌ Error: {str(e)}"
        internal_history.append({"role": "assistant", "content": error_msg})
        yield "", internal_history_to_gradio(internal_history)


def clear_conversation():
    return [], ""


def get_model_info():
    return f""" ### 🧠 LFM2.5-1.2B-Thinking
**Model:** {MODEL_ID}
**Description:** An advanced reasoning model optimized for step-by-step thinking and complex problem-solving.
**Parameters:** ~1.2 Billion
**Capabilities:** - Logical reasoning - Mathematical problem solving - Code generation and analysis - Step-by-step thinking
**Tips:** Use the system prompt to guide the model's behavior and adjust temperature for creativity vs. precision.
"""


# ====== واجهة Gradio ======
with gr.Blocks(title="LFM2.5-1.2B-Thinking Trial", fill_height=True) as demo:
    gr.Markdown(
        """
# 🧠 LFM2.5-1.2B-Thinking
### Advanced Reasoning Model by LiquidAI
"""
    )

    with gr.Row():
        with gr.Column(scale=3):
            # Note: avoid using `show_copy_button` directly (it may not exist in installed Gradio).
            # If you want a copy button in newer Gradio versions, you could use `buttons=["copy"]`.
            chatbot = gr.Chatbot(label="Conversation", height=500, bubble_full_width=False, type="messages")

            with gr.Row():
                msg_input = gr.Textbox(
                    label="Your Message",
                    placeholder="Ask me anything... Press Enter to send, Shift+Enter for new line",
                    lines=2,
                    show_label=False,
                    container=False,
                )
                send_btn = gr.Button("🚀 Send", variant="primary")

            with gr.Row():
                clear_btn = gr.Button("🗑️ Clear Conversation", variant="secondary")
                retry_btn = gr.Button("🔄 Retry Last", variant="secondary")

        with gr.Column(scale=1):
            with gr.Accordion("⚙️ Settings", open=False):
                system_prompt = gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=4)
                temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
                max_tokens = gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Max Tokens")
                top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top P")

            with gr.Accordion("ℹ️ Model Info", open=False):
                model_info = gr.Markdown(get_model_info())

            gr.Markdown("### 💡 Example Prompts")
            examples = gr.Examples(
                examples=[
                    "Explain quantum entanglement in simple terms.",
                    "Solve this math problem: If a train travels at 60 mph for 2.5 hours, how far does it go?",
                    "Write a Python function to check if a number is prime.",
                    "What are the steps to debug a React application?",
                    "Explain the difference between supervised and unsupervised learning.",
                ],
                inputs=msg_input,
                label="Click to try:",
            )

    # Events
    # msg_input.submit and send_btn.click both call chat_with_model.
    msg_input.submit(
        fn=chat_with_model,
        inputs=[msg_input, chatbot, system_prompt, temperature, max_tokens, top_p],
        outputs=[msg_input, chatbot],
        api_visibility="public",
    )
    send_btn.click(
        fn=chat_with_model,
        inputs=[msg_input, chatbot, system_prompt, temperature, max_tokens, top_p],
        outputs=[msg_input, chatbot],
        api_visibility="public",
    )
    clear_btn.click(fn=clear_conversation, inputs=None, outputs=[chatbot, msg_input], api_visibility="private")

    # Optional: retry last — naive implementation: re-send last user message
    def retry_last(gr_chat_history, system_prompt, temperature, max_tokens, top_p):
        internal = gradio_history_to_internal(gr_chat_history)
        # find last user message
        last_user = None
        for msg in reversed(internal):
            if msg.get("role") == "user" and msg.get("content", "").strip():
                last_user = msg["content"]
                break
        if last_user is None:
            return "", gr_chat_history
        # call chat_with_model generator directly (non-streaming here for retry convenience)
        for response_text, updated_internal in generate_response(last_user, internal[:-1], system_prompt, temperature, max_tokens, top_p):
            # continue streaming until finished
            pass
        return "", internal_history_to_gradio(updated_internal)

    retry_btn.click(
        fn=retry_last,
        inputs=[chatbot, system_prompt, temperature, max_tokens, top_p],
        outputs=[msg_input, chatbot],
        api_visibility="private",
    )

    # load placeholder (avoid heavy work on import; model will lazy-load on first request)
    demo.load(fn=lambda: None)

# Launch
if __name__ == "__main__":
    # You can pin a Gradio version in your environment instead of changing the code.
    # The app below avoids `show_copy_button` to be compatible with multiple Gradio releases.
    demo.launch(
        theme=gr.themes.Soft(
            primary_hue="blue",
            secondary_hue="indigo",
            neutral_hue="slate",
            font=gr.themes.GoogleFont("Inter"),
            text_size="md",
            spacing_size="md",
            radius_size="md",
        ).set(
            button_primary_background_fill="*primary_600",
            button_primary_background_fill_hover="*primary_700",
            block_title_text_weight="600",
        ),
        footer_links=[
            {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
            {"label": "LiquidAI", "url": "https://huggingface.co/LiquidAI"},
            {"label": "Model Card", "url": "https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking"},
        ],
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True,
    )