Spaces:
Runtime error
Runtime error
File size: 15,973 Bytes
9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 9a3021b 7235193 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 | # app.py — نسخة مصححة ومتكاملة
import inspect
import threading
from threading import Thread
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
# ====== إعدادات النموذج ======
MODEL_ID = "LiquidAI/LFM2.5-1.2B-Thinking"
DEFAULT_SYSTEM_PROMPT = """You are LFM2.5, an advanced reasoning model developed by LiquidAI. You excel at breaking down complex problems, thinking step-by-step, and providing clear, well-reasoned answers. Always think through problems systematically before providing your final answer."""
# ====== متغيرات عالمية ======
model = None
tokenizer = None
is_model_loaded = False
def load_model():
"""Load the model and tokenizer (مرّة واحدة)."""
global model, tokenizer, is_model_loaded
if is_model_loaded:
return True
try:
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
print("Loading model...")
if torch.cuda.is_available():
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
)
else:
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32,
device_map="cpu",
trust_remote_code=True,
)
is_model_loaded = True
print("Model loaded successfully!")
return True
except Exception as e:
print(f"Error loading model: {e}")
return False
# ====== تحويل الصيغ بين Gradio وداخل التطبيق ======
def gradio_history_to_internal(gr_history):
"""
Gradio Chatbot state is typically a list of (user, assistant) tuples.
We convert to a list of dicts: {"role": "user"|"assistant", "content": str}
"""
if not gr_history:
return []
# If already in internal dict format, return as-is
if isinstance(gr_history, list) and len(gr_history) > 0 and isinstance(gr_history[0], dict):
return gr_history
internal = []
for pair in gr_history:
if not pair:
continue
# pair may be a tuple/list of length 2 or a single string
if isinstance(pair, (list, tuple)) and len(pair) >= 2:
user_txt, assistant_txt = pair[0], pair[1]
if user_txt is not None and user_txt != "":
internal.append({"role": "user", "content": str(user_txt)})
if assistant_txt is not None and assistant_txt != "":
internal.append({"role": "assistant", "content": str(assistant_txt)})
else:
# fallback: treat item as a user message
internal.append({"role": "user", "content": str(pair)})
return internal
def internal_history_to_gradio(internal_history):
"""
Convert internal list of dicts to Gradio Chatbot format:
list of (user, assistant) tuples. We group sequential pairs.
"""
pairs = []
user_buf = None
assistant_buf = None
for msg in internal_history:
role = msg.get("role")
content = msg.get("content", "")
if role == "user":
# If previous user buffered without assistant, flush it as (user, "")
if user_buf is not None and assistant_buf is None:
pairs.append((user_buf, ""))
user_buf = content
assistant_buf = None
elif role == "assistant":
assistant_buf = content
if user_buf is None:
# assistant message without explicit user -> push as ("", assistant)
pairs.append(("", assistant_buf))
user_buf = None
assistant_buf = None
else:
pairs.append((user_buf, assistant_buf))
user_buf = None
assistant_buf = None
# flush any leftover user
if user_buf is not None and assistant_buf is None:
pairs.append((user_buf, ""))
return pairs
# ====== تنسيق الرسائل للـ model ======
def format_chat_history(history, system_prompt):
"""
history: list of dicts {"role":..., "content":...}
Returns list of messages formatted for apply_chat_template or manual fallback.
"""
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
for msg in history:
if msg.get("role") and "content" in msg:
messages.append({"role": msg["role"], "content": msg["content"]})
return messages
def apply_chat_template(messages):
"""
Use tokenizer.apply_chat_template when available; otherwise fallback to simple markers.
"""
try:
# Some tokenizers expose apply_chat_template
# tokenize=False because we will tokenize later
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
return prompt
except Exception:
# manual fallback
prompt = ""
for msg in messages:
if msg["role"] == "system":
prompt += f"<|system|>\n{msg['content']}\n"
elif msg["role"] == "user":
prompt += f"<|user|>\n{msg['content']}\n"
elif msg["role"] == "assistant":
prompt += f"<|assistant|>\n{msg['content']}\n"
prompt += "<|assistant|>\n"
return prompt
# ====== توليد الاستجابة (يدعم البث streaming) ======
def generate_response(message, history, system_prompt, temperature, max_tokens, top_p):
"""
Generator that yields (partial_text, internal_history) while streaming.
"""
global model, tokenizer, is_model_loaded
# ensure model loaded
if not is_model_loaded:
if not load_model():
yield "❌ Error: Failed to load model. Please check the logs.", history
return
# Append user message into internal history
history = list(history) # copy
history.append({"role": "user", "content": message})
# Format messages for the model
messages_for_model = format_chat_history(history, system_prompt)
prompt = apply_chat_template(messages_for_model)
# Tokenize
inputs = tokenizer(prompt, return_tensors="pt")
if torch.cuda.is_available():
inputs = {k: v.cuda() for k, v in inputs.items()}
# Try streaming via TextIteratorStreamer; if it fails, fallback to non-streaming generation
try:
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=20.0)
generation_kwargs = {
**inputs,
"streamer": streamer,
"max_new_tokens": int(max_tokens),
"temperature": float(temperature),
"top_p": float(top_p),
"do_sample": float(temperature) > 0.0,
"pad_token_id": tokenizer.eos_token_id,
}
# start generation in a thread
gen_thread = Thread(target=model.generate, kwargs=generation_kwargs)
gen_thread.start()
response = ""
for new_text in streamer:
response += new_text
# update last assistant entry in history
# ensure we don't duplicate user entry — we know last entry is user, append/update assistant
if len(history) == 0 or history[-1].get("role") != "assistant":
history.append({"role": "assistant", "content": response})
else:
history[-1]["content"] = response
yield response, history
gen_thread.join()
except Exception as e:
# Fallback: synchronous non-streaming generation (less interactive)
try:
outputs = model.generate(
**inputs,
max_new_tokens=int(max_tokens),
temperature=float(temperature),
top_p=float(top_p),
do_sample=float(temperature) > 0.0,
pad_token_id=tokenizer.eos_token_id,
)
decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
# update history
history.append({"role": "assistant", "content": decoded})
yield decoded, history
except Exception as e2:
err = f"❌ Generation error: {e} | fallback error: {e2}"
history.append({"role": "assistant", "content": err})
yield err, history
# ====== غلاف للدردشة مع معالجة الأخطاء وتحويل الصيغ ======
def chat_with_model(message, gr_chat_history, system_prompt, temperature, max_tokens, top_p):
"""
This function is connected to Gradio. It receives:
- message (str)
- gr_chat_history (Gradio Chatbot state)
It should return:
- cleared msg_input (""), updated gr_chat_history (list of tuples)
We implement streaming by yielding successive (msg_input, gr_chat_history) pairs.
"""
# If empty message, do nothing
if not message or not str(message).strip():
# return unchanged history and empty input
yield "", gr_chat_history
return
# Convert gradio history format to internal
internal_history = gradio_history_to_internal(gr_chat_history)
try:
# stream generator
for response_text, updated_internal in generate_response(
message, internal_history, system_prompt, temperature, max_tokens, top_p
):
# convert to Gradio format for display
gr_history_for_component = internal_history_to_gradio(updated_internal)
# clear input box on each yield (keeps behavior consistent)
yield "", gr_history_for_component
except Exception as e:
error_msg = f"❌ Error: {str(e)}"
internal_history.append({"role": "assistant", "content": error_msg})
yield "", internal_history_to_gradio(internal_history)
def clear_conversation():
return [], ""
def get_model_info():
return f""" ### 🧠 LFM2.5-1.2B-Thinking
**Model:** {MODEL_ID}
**Description:** An advanced reasoning model optimized for step-by-step thinking and complex problem-solving.
**Parameters:** ~1.2 Billion
**Capabilities:** - Logical reasoning - Mathematical problem solving - Code generation and analysis - Step-by-step thinking
**Tips:** Use the system prompt to guide the model's behavior and adjust temperature for creativity vs. precision.
"""
# ====== واجهة Gradio ======
with gr.Blocks(title="LFM2.5-1.2B-Thinking Trial", fill_height=True) as demo:
gr.Markdown(
"""
# 🧠 LFM2.5-1.2B-Thinking
### Advanced Reasoning Model by LiquidAI
"""
)
with gr.Row():
with gr.Column(scale=3):
# Note: avoid using `show_copy_button` directly (it may not exist in installed Gradio).
# If you want a copy button in newer Gradio versions, you could use `buttons=["copy"]`.
chatbot = gr.Chatbot(label="Conversation", height=500, bubble_full_width=False, type="messages")
with gr.Row():
msg_input = gr.Textbox(
label="Your Message",
placeholder="Ask me anything... Press Enter to send, Shift+Enter for new line",
lines=2,
show_label=False,
container=False,
)
send_btn = gr.Button("🚀 Send", variant="primary")
with gr.Row():
clear_btn = gr.Button("🗑️ Clear Conversation", variant="secondary")
retry_btn = gr.Button("🔄 Retry Last", variant="secondary")
with gr.Column(scale=1):
with gr.Accordion("⚙️ Settings", open=False):
system_prompt = gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=4)
temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
max_tokens = gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Max Tokens")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top P")
with gr.Accordion("ℹ️ Model Info", open=False):
model_info = gr.Markdown(get_model_info())
gr.Markdown("### 💡 Example Prompts")
examples = gr.Examples(
examples=[
"Explain quantum entanglement in simple terms.",
"Solve this math problem: If a train travels at 60 mph for 2.5 hours, how far does it go?",
"Write a Python function to check if a number is prime.",
"What are the steps to debug a React application?",
"Explain the difference between supervised and unsupervised learning.",
],
inputs=msg_input,
label="Click to try:",
)
# Events
# msg_input.submit and send_btn.click both call chat_with_model.
msg_input.submit(
fn=chat_with_model,
inputs=[msg_input, chatbot, system_prompt, temperature, max_tokens, top_p],
outputs=[msg_input, chatbot],
api_visibility="public",
)
send_btn.click(
fn=chat_with_model,
inputs=[msg_input, chatbot, system_prompt, temperature, max_tokens, top_p],
outputs=[msg_input, chatbot],
api_visibility="public",
)
clear_btn.click(fn=clear_conversation, inputs=None, outputs=[chatbot, msg_input], api_visibility="private")
# Optional: retry last — naive implementation: re-send last user message
def retry_last(gr_chat_history, system_prompt, temperature, max_tokens, top_p):
internal = gradio_history_to_internal(gr_chat_history)
# find last user message
last_user = None
for msg in reversed(internal):
if msg.get("role") == "user" and msg.get("content", "").strip():
last_user = msg["content"]
break
if last_user is None:
return "", gr_chat_history
# call chat_with_model generator directly (non-streaming here for retry convenience)
for response_text, updated_internal in generate_response(last_user, internal[:-1], system_prompt, temperature, max_tokens, top_p):
# continue streaming until finished
pass
return "", internal_history_to_gradio(updated_internal)
retry_btn.click(
fn=retry_last,
inputs=[chatbot, system_prompt, temperature, max_tokens, top_p],
outputs=[msg_input, chatbot],
api_visibility="private",
)
# load placeholder (avoid heavy work on import; model will lazy-load on first request)
demo.load(fn=lambda: None)
# Launch
if __name__ == "__main__":
# You can pin a Gradio version in your environment instead of changing the code.
# The app below avoids `show_copy_button` to be compatible with multiple Gradio releases.
demo.launch(
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="indigo",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
text_size="md",
spacing_size="md",
radius_size="md",
).set(
button_primary_background_fill="*primary_600",
button_primary_background_fill_hover="*primary_700",
block_title_text_weight="600",
),
footer_links=[
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
{"label": "LiquidAI", "url": "https://huggingface.co/LiquidAI"},
{"label": "Model Card", "url": "https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking"},
],
server_name="0.0.0.0",
server_port=7860,
show_error=True,
) |