Spaces:

Derr11
/

Der11

Paused

App Files Files Community

Der11 / app.py

Derr11

Update app.py

3f46c32 verified 16 days ago

raw

history blame

2.68 kB

	import gradio as gr
	from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
	import torch
	import spaces # مكتبة ZeroGPU

	# 1. إعدادات النموذج (Qwen3-Omni-Thinking)
	MODEL_ID = "Qwen/Qwen3-Omni-30B-A3B-Thinking"

	print(f"جاري تحميل النموذج العملاق {MODEL_ID}... هذا سيستغرق بضعة دقائق.")

	# إعداد الضغط (4-bit Quantization) لتناسب ذاكرة ZeroGPU
	nf4_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_use_double_quant=True,
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	# تحميل الـ Tokenizer
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

	# تحميل النموذج مع الضغط
	# التعديل هنا: استخدام AutoModel بدلاً من AutoModelForCausalLM
	model = AutoModel.from_pretrained(
	MODEL_ID,
	quantization_config=nf4_config,
	device_map="auto",
	trust_remote_code=True
	)

	print("تم تحميل النموذج بنجاح! المعلم جاهز.")

	# 2. دالة التفكير والرد
	@spaces.GPU(duration=120)
	def chat_with_thinking_model(message, history):
	messages = []

	for user_msg, bot_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": bot_msg})

	messages.append({"role": "user", "content": message})

	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

	generated_ids = model.generate(
	**model_inputs,
	max_new_tokens=1024,
	temperature=0.7
	)

	generated_ids = [
	output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
	]

	response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return response

	# 3. واجهة المستخدم
	custom_css = """
	#chatbot {min-height: 400px;}
	"""

	with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🧠 Nasaq AI Tutor (Thinking Mode)")
	gr.Markdown("هذا النموذج يستخدم Qwen3-Omni-Thinking. ستلاحظ أنه قد يكتب خطوات تفكيره قبل الإجابة النهائية.")

	chatbot = gr.ChatInterface(
	fn=chat_with_thinking_model,
	examples=["اشرح لي النظرية النسبية وكأنني طفل في الخامسة", "حل المعادلة: س^2 + 5س + 6 = 0 مع الشرح"],
	cache_examples=False,
	)

	if __name__ == "__main__":
	demo.launch()