Spaces:
Runtime error
Runtime error
| # app.py — نسخة مصححة ومتكاملة | |
| import inspect | |
| import threading | |
| from threading import Thread | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| # ====== إعدادات النموذج ====== | |
| MODEL_ID = "LiquidAI/LFM2.5-1.2B-Thinking" | |
| DEFAULT_SYSTEM_PROMPT = """You are LFM2.5, an advanced reasoning model developed by LiquidAI. You excel at breaking down complex problems, thinking step-by-step, and providing clear, well-reasoned answers. Always think through problems systematically before providing your final answer.""" | |
| # ====== متغيرات عالمية ====== | |
| model = None | |
| tokenizer = None | |
| is_model_loaded = False | |
| def load_model(): | |
| """Load the model and tokenizer (مرّة واحدة).""" | |
| global model, tokenizer, is_model_loaded | |
| if is_model_loaded: | |
| return True | |
| try: | |
| print("Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| print("Loading model...") | |
| if torch.cuda.is_available(): | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True, | |
| ) | |
| else: | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=torch.float32, | |
| device_map="cpu", | |
| trust_remote_code=True, | |
| ) | |
| is_model_loaded = True | |
| print("Model loaded successfully!") | |
| return True | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| return False | |
| # ====== تحويل الصيغ بين Gradio وداخل التطبيق ====== | |
| def gradio_history_to_internal(gr_history): | |
| """ | |
| Gradio Chatbot state is typically a list of (user, assistant) tuples. | |
| We convert to a list of dicts: {"role": "user"|"assistant", "content": str} | |
| """ | |
| if not gr_history: | |
| return [] | |
| # If already in internal dict format, return as-is | |
| if isinstance(gr_history, list) and len(gr_history) > 0 and isinstance(gr_history[0], dict): | |
| return gr_history | |
| internal = [] | |
| for pair in gr_history: | |
| if not pair: | |
| continue | |
| # pair may be a tuple/list of length 2 or a single string | |
| if isinstance(pair, (list, tuple)) and len(pair) >= 2: | |
| user_txt, assistant_txt = pair[0], pair[1] | |
| if user_txt is not None and user_txt != "": | |
| internal.append({"role": "user", "content": str(user_txt)}) | |
| if assistant_txt is not None and assistant_txt != "": | |
| internal.append({"role": "assistant", "content": str(assistant_txt)}) | |
| else: | |
| # fallback: treat item as a user message | |
| internal.append({"role": "user", "content": str(pair)}) | |
| return internal | |
| def internal_history_to_gradio(internal_history): | |
| """ | |
| Convert internal list of dicts to Gradio Chatbot format: | |
| list of (user, assistant) tuples. We group sequential pairs. | |
| """ | |
| pairs = [] | |
| user_buf = None | |
| assistant_buf = None | |
| for msg in internal_history: | |
| role = msg.get("role") | |
| content = msg.get("content", "") | |
| if role == "user": | |
| # If previous user buffered without assistant, flush it as (user, "") | |
| if user_buf is not None and assistant_buf is None: | |
| pairs.append((user_buf, "")) | |
| user_buf = content | |
| assistant_buf = None | |
| elif role == "assistant": | |
| assistant_buf = content | |
| if user_buf is None: | |
| # assistant message without explicit user -> push as ("", assistant) | |
| pairs.append(("", assistant_buf)) | |
| user_buf = None | |
| assistant_buf = None | |
| else: | |
| pairs.append((user_buf, assistant_buf)) | |
| user_buf = None | |
| assistant_buf = None | |
| # flush any leftover user | |
| if user_buf is not None and assistant_buf is None: | |
| pairs.append((user_buf, "")) | |
| return pairs | |
| # ====== تنسيق الرسائل للـ model ====== | |
| def format_chat_history(history, system_prompt): | |
| """ | |
| history: list of dicts {"role":..., "content":...} | |
| Returns list of messages formatted for apply_chat_template or manual fallback. | |
| """ | |
| messages = [] | |
| if system_prompt: | |
| messages.append({"role": "system", "content": system_prompt}) | |
| for msg in history: | |
| if msg.get("role") and "content" in msg: | |
| messages.append({"role": msg["role"], "content": msg["content"]}) | |
| return messages | |
| def apply_chat_template(messages): | |
| """ | |
| Use tokenizer.apply_chat_template when available; otherwise fallback to simple markers. | |
| """ | |
| try: | |
| # Some tokenizers expose apply_chat_template | |
| # tokenize=False because we will tokenize later | |
| prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| return prompt | |
| except Exception: | |
| # manual fallback | |
| prompt = "" | |
| for msg in messages: | |
| if msg["role"] == "system": | |
| prompt += f"<|system|>\n{msg['content']}\n" | |
| elif msg["role"] == "user": | |
| prompt += f"<|user|>\n{msg['content']}\n" | |
| elif msg["role"] == "assistant": | |
| prompt += f"<|assistant|>\n{msg['content']}\n" | |
| prompt += "<|assistant|>\n" | |
| return prompt | |
| # ====== توليد الاستجابة (يدعم البث streaming) ====== | |
| def generate_response(message, history, system_prompt, temperature, max_tokens, top_p): | |
| """ | |
| Generator that yields (partial_text, internal_history) while streaming. | |
| """ | |
| global model, tokenizer, is_model_loaded | |
| # ensure model loaded | |
| if not is_model_loaded: | |
| if not load_model(): | |
| yield "❌ Error: Failed to load model. Please check the logs.", history | |
| return | |
| # Append user message into internal history | |
| history = list(history) # copy | |
| history.append({"role": "user", "content": message}) | |
| # Format messages for the model | |
| messages_for_model = format_chat_history(history, system_prompt) | |
| prompt = apply_chat_template(messages_for_model) | |
| # Tokenize | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| if torch.cuda.is_available(): | |
| inputs = {k: v.cuda() for k, v in inputs.items()} | |
| # Try streaming via TextIteratorStreamer; if it fails, fallback to non-streaming generation | |
| try: | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=20.0) | |
| generation_kwargs = { | |
| **inputs, | |
| "streamer": streamer, | |
| "max_new_tokens": int(max_tokens), | |
| "temperature": float(temperature), | |
| "top_p": float(top_p), | |
| "do_sample": float(temperature) > 0.0, | |
| "pad_token_id": tokenizer.eos_token_id, | |
| } | |
| # start generation in a thread | |
| gen_thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| gen_thread.start() | |
| response = "" | |
| for new_text in streamer: | |
| response += new_text | |
| # update last assistant entry in history | |
| # ensure we don't duplicate user entry — we know last entry is user, append/update assistant | |
| if len(history) == 0 or history[-1].get("role") != "assistant": | |
| history.append({"role": "assistant", "content": response}) | |
| else: | |
| history[-1]["content"] = response | |
| yield response, history | |
| gen_thread.join() | |
| except Exception as e: | |
| # Fallback: synchronous non-streaming generation (less interactive) | |
| try: | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=int(max_tokens), | |
| temperature=float(temperature), | |
| top_p=float(top_p), | |
| do_sample=float(temperature) > 0.0, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) | |
| # update history | |
| history.append({"role": "assistant", "content": decoded}) | |
| yield decoded, history | |
| except Exception as e2: | |
| err = f"❌ Generation error: {e} | fallback error: {e2}" | |
| history.append({"role": "assistant", "content": err}) | |
| yield err, history | |
| # ====== غلاف للدردشة مع معالجة الأخطاء وتحويل الصيغ ====== | |
| def chat_with_model(message, gr_chat_history, system_prompt, temperature, max_tokens, top_p): | |
| """ | |
| This function is connected to Gradio. It receives: | |
| - message (str) | |
| - gr_chat_history (Gradio Chatbot state) | |
| It should return: | |
| - cleared msg_input (""), updated gr_chat_history (list of tuples) | |
| We implement streaming by yielding successive (msg_input, gr_chat_history) pairs. | |
| """ | |
| # If empty message, do nothing | |
| if not message or not str(message).strip(): | |
| # return unchanged history and empty input | |
| yield "", gr_chat_history | |
| return | |
| # Convert gradio history format to internal | |
| internal_history = gradio_history_to_internal(gr_chat_history) | |
| try: | |
| # stream generator | |
| for response_text, updated_internal in generate_response( | |
| message, internal_history, system_prompt, temperature, max_tokens, top_p | |
| ): | |
| # convert to Gradio format for display | |
| gr_history_for_component = internal_history_to_gradio(updated_internal) | |
| # clear input box on each yield (keeps behavior consistent) | |
| yield "", gr_history_for_component | |
| except Exception as e: | |
| error_msg = f"❌ Error: {str(e)}" | |
| internal_history.append({"role": "assistant", "content": error_msg}) | |
| yield "", internal_history_to_gradio(internal_history) | |
| def clear_conversation(): | |
| return [], "" | |
| def get_model_info(): | |
| return f""" ### 🧠 LFM2.5-1.2B-Thinking | |
| **Model:** {MODEL_ID} | |
| **Description:** An advanced reasoning model optimized for step-by-step thinking and complex problem-solving. | |
| **Parameters:** ~1.2 Billion | |
| **Capabilities:** - Logical reasoning - Mathematical problem solving - Code generation and analysis - Step-by-step thinking | |
| **Tips:** Use the system prompt to guide the model's behavior and adjust temperature for creativity vs. precision. | |
| """ | |
| # ====== واجهة Gradio ====== | |
| with gr.Blocks(title="LFM2.5-1.2B-Thinking Trial", fill_height=True) as demo: | |
| gr.Markdown( | |
| """ | |
| # 🧠 LFM2.5-1.2B-Thinking | |
| ### Advanced Reasoning Model by LiquidAI | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| # Note: avoid using `show_copy_button` directly (it may not exist in installed Gradio). | |
| # If you want a copy button in newer Gradio versions, you could use `buttons=["copy"]`. | |
| chatbot = gr.Chatbot(label="Conversation", height=500, bubble_full_width=False, type="messages") | |
| with gr.Row(): | |
| msg_input = gr.Textbox( | |
| label="Your Message", | |
| placeholder="Ask me anything... Press Enter to send, Shift+Enter for new line", | |
| lines=2, | |
| show_label=False, | |
| container=False, | |
| ) | |
| send_btn = gr.Button("🚀 Send", variant="primary") | |
| with gr.Row(): | |
| clear_btn = gr.Button("🗑️ Clear Conversation", variant="secondary") | |
| retry_btn = gr.Button("🔄 Retry Last", variant="secondary") | |
| with gr.Column(scale=1): | |
| with gr.Accordion("⚙️ Settings", open=False): | |
| system_prompt = gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=4) | |
| temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature") | |
| max_tokens = gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Max Tokens") | |
| top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top P") | |
| with gr.Accordion("ℹ️ Model Info", open=False): | |
| model_info = gr.Markdown(get_model_info()) | |
| gr.Markdown("### 💡 Example Prompts") | |
| examples = gr.Examples( | |
| examples=[ | |
| "Explain quantum entanglement in simple terms.", | |
| "Solve this math problem: If a train travels at 60 mph for 2.5 hours, how far does it go?", | |
| "Write a Python function to check if a number is prime.", | |
| "What are the steps to debug a React application?", | |
| "Explain the difference between supervised and unsupervised learning.", | |
| ], | |
| inputs=msg_input, | |
| label="Click to try:", | |
| ) | |
| # Events | |
| # msg_input.submit and send_btn.click both call chat_with_model. | |
| msg_input.submit( | |
| fn=chat_with_model, | |
| inputs=[msg_input, chatbot, system_prompt, temperature, max_tokens, top_p], | |
| outputs=[msg_input, chatbot], | |
| api_visibility="public", | |
| ) | |
| send_btn.click( | |
| fn=chat_with_model, | |
| inputs=[msg_input, chatbot, system_prompt, temperature, max_tokens, top_p], | |
| outputs=[msg_input, chatbot], | |
| api_visibility="public", | |
| ) | |
| clear_btn.click(fn=clear_conversation, inputs=None, outputs=[chatbot, msg_input], api_visibility="private") | |
| # Optional: retry last — naive implementation: re-send last user message | |
| def retry_last(gr_chat_history, system_prompt, temperature, max_tokens, top_p): | |
| internal = gradio_history_to_internal(gr_chat_history) | |
| # find last user message | |
| last_user = None | |
| for msg in reversed(internal): | |
| if msg.get("role") == "user" and msg.get("content", "").strip(): | |
| last_user = msg["content"] | |
| break | |
| if last_user is None: | |
| return "", gr_chat_history | |
| # call chat_with_model generator directly (non-streaming here for retry convenience) | |
| for response_text, updated_internal in generate_response(last_user, internal[:-1], system_prompt, temperature, max_tokens, top_p): | |
| # continue streaming until finished | |
| pass | |
| return "", internal_history_to_gradio(updated_internal) | |
| retry_btn.click( | |
| fn=retry_last, | |
| inputs=[chatbot, system_prompt, temperature, max_tokens, top_p], | |
| outputs=[msg_input, chatbot], | |
| api_visibility="private", | |
| ) | |
| # load placeholder (avoid heavy work on import; model will lazy-load on first request) | |
| demo.load(fn=lambda: None) | |
| # Launch | |
| if __name__ == "__main__": | |
| # You can pin a Gradio version in your environment instead of changing the code. | |
| # The app below avoids `show_copy_button` to be compatible with multiple Gradio releases. | |
| demo.launch( | |
| theme=gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="indigo", | |
| neutral_hue="slate", | |
| font=gr.themes.GoogleFont("Inter"), | |
| text_size="md", | |
| spacing_size="md", | |
| radius_size="md", | |
| ).set( | |
| button_primary_background_fill="*primary_600", | |
| button_primary_background_fill_hover="*primary_700", | |
| block_title_text_weight="600", | |
| ), | |
| footer_links=[ | |
| {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}, | |
| {"label": "LiquidAI", "url": "https://huggingface.co/LiquidAI"}, | |
| {"label": "Model Card", "url": "https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking"}, | |
| ], | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| show_error=True, | |
| ) |