Spaces:

kate-line
/

anycoder-8157c445

Runtime error

App Files Files Community

anycoder-8157c445 / app.py

kate-line

Update app.py

9a3021b verified 15 days ago

raw

history blame contribute delete

16 kB

	# app.py — نسخة مصححة ومتكاملة
	import inspect
	import threading
	from threading import Thread

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

	# ====== إعدادات النموذج ======
	MODEL_ID = "LiquidAI/LFM2.5-1.2B-Thinking"
	DEFAULT_SYSTEM_PROMPT = """You are LFM2.5, an advanced reasoning model developed by LiquidAI. You excel at breaking down complex problems, thinking step-by-step, and providing clear, well-reasoned answers. Always think through problems systematically before providing your final answer."""

	# ====== متغيرات عالمية ======
	model = None
	tokenizer = None
	is_model_loaded = False


	def load_model():
	"""Load the model and tokenizer (مرّة واحدة)."""
	global model, tokenizer, is_model_loaded
	if is_model_loaded:
	return True
	try:
	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
	print("Loading model...")
	if torch.cuda.is_available():
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True,
	)
	else:
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float32,
	device_map="cpu",
	trust_remote_code=True,
	)
	is_model_loaded = True
	print("Model loaded successfully!")
	return True
	except Exception as e:
	print(f"Error loading model: {e}")
	return False


	# ====== تحويل الصيغ بين Gradio وداخل التطبيق ======
	def gradio_history_to_internal(gr_history):
	"""
	Gradio Chatbot state is typically a list of (user, assistant) tuples.
	We convert to a list of dicts: {"role": "user"\|"assistant", "content": str}
	"""
	if not gr_history:
	return []
	# If already in internal dict format, return as-is
	if isinstance(gr_history, list) and len(gr_history) > 0 and isinstance(gr_history[0], dict):
	return gr_history
	internal = []
	for pair in gr_history:
	if not pair:
	continue
	# pair may be a tuple/list of length 2 or a single string
	if isinstance(pair, (list, tuple)) and len(pair) >= 2:
	user_txt, assistant_txt = pair[0], pair[1]
	if user_txt is not None and user_txt != "":
	internal.append({"role": "user", "content": str(user_txt)})
	if assistant_txt is not None and assistant_txt != "":
	internal.append({"role": "assistant", "content": str(assistant_txt)})
	else:
	# fallback: treat item as a user message
	internal.append({"role": "user", "content": str(pair)})
	return internal


	def internal_history_to_gradio(internal_history):
	"""
	Convert internal list of dicts to Gradio Chatbot format:
	list of (user, assistant) tuples. We group sequential pairs.
	"""
	pairs = []
	user_buf = None
	assistant_buf = None
	for msg in internal_history:
	role = msg.get("role")
	content = msg.get("content", "")
	if role == "user":
	# If previous user buffered without assistant, flush it as (user, "")
	if user_buf is not None and assistant_buf is None:
	pairs.append((user_buf, ""))
	user_buf = content
	assistant_buf = None
	elif role == "assistant":
	assistant_buf = content
	if user_buf is None:
	# assistant message without explicit user -> push as ("", assistant)
	pairs.append(("", assistant_buf))
	user_buf = None
	assistant_buf = None
	else:
	pairs.append((user_buf, assistant_buf))
	user_buf = None
	assistant_buf = None
	# flush any leftover user
	if user_buf is not None and assistant_buf is None:
	pairs.append((user_buf, ""))
	return pairs


	# ====== تنسيق الرسائل للـ model ======
	def format_chat_history(history, system_prompt):
	"""
	history: list of dicts {"role":..., "content":...}
	Returns list of messages formatted for apply_chat_template or manual fallback.
	"""
	messages = []
	if system_prompt:
	messages.append({"role": "system", "content": system_prompt})
	for msg in history:
	if msg.get("role") and "content" in msg:
	messages.append({"role": msg["role"], "content": msg["content"]})
	return messages


	def apply_chat_template(messages):
	"""
	Use tokenizer.apply_chat_template when available; otherwise fallback to simple markers.
	"""
	try:
	# Some tokenizers expose apply_chat_template
	# tokenize=False because we will tokenize later
	prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	return prompt
	except Exception:
	# manual fallback
	prompt = ""
	for msg in messages:
	if msg["role"] == "system":
	prompt += f"<\|system\|>\n{msg['content']}\n"
	elif msg["role"] == "user":
	prompt += f"<\|user\|>\n{msg['content']}\n"
	elif msg["role"] == "assistant":
	prompt += f"<\|assistant\|>\n{msg['content']}\n"
	prompt += "<\|assistant\|>\n"
	return prompt


	# ====== توليد الاستجابة (يدعم البث streaming) ======
	def generate_response(message, history, system_prompt, temperature, max_tokens, top_p):
	"""
	Generator that yields (partial_text, internal_history) while streaming.
	"""
	global model, tokenizer, is_model_loaded
	# ensure model loaded
	if not is_model_loaded:
	if not load_model():
	yield "❌ Error: Failed to load model. Please check the logs.", history
	return

	# Append user message into internal history
	history = list(history) # copy
	history.append({"role": "user", "content": message})

	# Format messages for the model
	messages_for_model = format_chat_history(history, system_prompt)
	prompt = apply_chat_template(messages_for_model)

	# Tokenize
	inputs = tokenizer(prompt, return_tensors="pt")
	if torch.cuda.is_available():
	inputs = {k: v.cuda() for k, v in inputs.items()}

	# Try streaming via TextIteratorStreamer; if it fails, fallback to non-streaming generation
	try:
	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=20.0)
	generation_kwargs = {
	**inputs,
	"streamer": streamer,
	"max_new_tokens": int(max_tokens),
	"temperature": float(temperature),
	"top_p": float(top_p),
	"do_sample": float(temperature) > 0.0,
	"pad_token_id": tokenizer.eos_token_id,
	}
	# start generation in a thread
	gen_thread = Thread(target=model.generate, kwargs=generation_kwargs)
	gen_thread.start()

	response = ""
	for new_text in streamer:
	response += new_text
	# update last assistant entry in history
	# ensure we don't duplicate user entry — we know last entry is user, append/update assistant
	if len(history) == 0 or history[-1].get("role") != "assistant":
	history.append({"role": "assistant", "content": response})
	else:
	history[-1]["content"] = response
	yield response, history
	gen_thread.join()
	except Exception as e:
	# Fallback: synchronous non-streaming generation (less interactive)
	try:
	outputs = model.generate(
	**inputs,
	max_new_tokens=int(max_tokens),
	temperature=float(temperature),
	top_p=float(top_p),
	do_sample=float(temperature) > 0.0,
	pad_token_id=tokenizer.eos_token_id,
	)
	decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
	# update history
	history.append({"role": "assistant", "content": decoded})
	yield decoded, history
	except Exception as e2:
	err = f"❌ Generation error: {e} \| fallback error: {e2}"
	history.append({"role": "assistant", "content": err})
	yield err, history


	# ====== غلاف للدردشة مع معالجة الأخطاء وتحويل الصيغ ======
	def chat_with_model(message, gr_chat_history, system_prompt, temperature, max_tokens, top_p):
	"""
	This function is connected to Gradio. It receives:
	- message (str)
	- gr_chat_history (Gradio Chatbot state)
	It should return:
	- cleared msg_input (""), updated gr_chat_history (list of tuples)
	We implement streaming by yielding successive (msg_input, gr_chat_history) pairs.
	"""
	# If empty message, do nothing
	if not message or not str(message).strip():
	# return unchanged history and empty input
	yield "", gr_chat_history
	return

	# Convert gradio history format to internal
	internal_history = gradio_history_to_internal(gr_chat_history)

	try:
	# stream generator
	for response_text, updated_internal in generate_response(
	message, internal_history, system_prompt, temperature, max_tokens, top_p
	):
	# convert to Gradio format for display
	gr_history_for_component = internal_history_to_gradio(updated_internal)
	# clear input box on each yield (keeps behavior consistent)
	yield "", gr_history_for_component
	except Exception as e:
	error_msg = f"❌ Error: {str(e)}"
	internal_history.append({"role": "assistant", "content": error_msg})
	yield "", internal_history_to_gradio(internal_history)


	def clear_conversation():
	return [], ""


	def get_model_info():
	return f""" ### 🧠 LFM2.5-1.2B-Thinking
	Model: {MODEL_ID}
	Description: An advanced reasoning model optimized for step-by-step thinking and complex problem-solving.
	Parameters: ~1.2 Billion
	Capabilities: - Logical reasoning - Mathematical problem solving - Code generation and analysis - Step-by-step thinking
	Tips: Use the system prompt to guide the model's behavior and adjust temperature for creativity vs. precision.
	"""


	# ====== واجهة Gradio ======
	with gr.Blocks(title="LFM2.5-1.2B-Thinking Trial", fill_height=True) as demo:
	gr.Markdown(
	"""
	# 🧠 LFM2.5-1.2B-Thinking
	### Advanced Reasoning Model by LiquidAI
	"""
	)

	with gr.Row():
	with gr.Column(scale=3):
	# Note: avoid using `show_copy_button` directly (it may not exist in installed Gradio).
	# If you want a copy button in newer Gradio versions, you could use `buttons=["copy"]`.
	chatbot = gr.Chatbot(label="Conversation", height=500, bubble_full_width=False, type="messages")

	with gr.Row():
	msg_input = gr.Textbox(
	label="Your Message",
	placeholder="Ask me anything... Press Enter to send, Shift+Enter for new line",
	lines=2,
	show_label=False,
	container=False,
	)
	send_btn = gr.Button("🚀 Send", variant="primary")

	with gr.Row():
	clear_btn = gr.Button("🗑️ Clear Conversation", variant="secondary")
	retry_btn = gr.Button("🔄 Retry Last", variant="secondary")

	with gr.Column(scale=1):
	with gr.Accordion("⚙️ Settings", open=False):
	system_prompt = gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=4)
	temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
	max_tokens = gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Max Tokens")
	top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top P")

	with gr.Accordion("ℹ️ Model Info", open=False):
	model_info = gr.Markdown(get_model_info())

	gr.Markdown("### 💡 Example Prompts")
	examples = gr.Examples(
	examples=[
	"Explain quantum entanglement in simple terms.",
	"Solve this math problem: If a train travels at 60 mph for 2.5 hours, how far does it go?",
	"Write a Python function to check if a number is prime.",
	"What are the steps to debug a React application?",
	"Explain the difference between supervised and unsupervised learning.",
	],
	inputs=msg_input,
	label="Click to try:",
	)

	# Events
	# msg_input.submit and send_btn.click both call chat_with_model.
	msg_input.submit(
	fn=chat_with_model,
	inputs=[msg_input, chatbot, system_prompt, temperature, max_tokens, top_p],
	outputs=[msg_input, chatbot],
	api_visibility="public",
	)
	send_btn.click(
	fn=chat_with_model,
	inputs=[msg_input, chatbot, system_prompt, temperature, max_tokens, top_p],
	outputs=[msg_input, chatbot],
	api_visibility="public",
	)
	clear_btn.click(fn=clear_conversation, inputs=None, outputs=[chatbot, msg_input], api_visibility="private")

	# Optional: retry last — naive implementation: re-send last user message
	def retry_last(gr_chat_history, system_prompt, temperature, max_tokens, top_p):
	internal = gradio_history_to_internal(gr_chat_history)
	# find last user message
	last_user = None
	for msg in reversed(internal):
	if msg.get("role") == "user" and msg.get("content", "").strip():
	last_user = msg["content"]
	break
	if last_user is None:
	return "", gr_chat_history
	# call chat_with_model generator directly (non-streaming here for retry convenience)
	for response_text, updated_internal in generate_response(last_user, internal[:-1], system_prompt, temperature, max_tokens, top_p):
	# continue streaming until finished
	pass
	return "", internal_history_to_gradio(updated_internal)

	retry_btn.click(
	fn=retry_last,
	inputs=[chatbot, system_prompt, temperature, max_tokens, top_p],
	outputs=[msg_input, chatbot],
	api_visibility="private",
	)

	# load placeholder (avoid heavy work on import; model will lazy-load on first request)
	demo.load(fn=lambda: None)

	# Launch
	if __name__ == "__main__":
	# You can pin a Gradio version in your environment instead of changing the code.
	# The app below avoids `show_copy_button` to be compatible with multiple Gradio releases.
	demo.launch(
	theme=gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="indigo",
	neutral_hue="slate",
	font=gr.themes.GoogleFont("Inter"),
	text_size="md",
	spacing_size="md",
	radius_size="md",
	).set(
	button_primary_background_fill="*primary_600",
	button_primary_background_fill_hover="*primary_700",
	block_title_text_weight="600",
	),
	footer_links=[
	{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
	{"label": "LiquidAI", "url": "https://huggingface.co/LiquidAI"},
	{"label": "Model Card", "url": "https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking"},
	],
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True,
	)