Spaces:

Riy777
/

Orgteh0.1

Sleeping

App Files Files Community

Orgteh0.1 / app.py

Riy777

Update app.py

88c37a8 verified about 2 months ago

raw

history blame contribute delete

4.07 kB

	import gradio as gr
	from llama_cpp import Llama
	import re

	print("🧠 جاري تحميل دماغ فرانكشتاين...")
	llm = Llama(
	model_path="model-q4_k_m.gguf",
	n_ctx=2048, # تقليل إذا لم تحتاج 4096
	n_threads=4, # زود حسب أنوية معالجك (4 أو 8)
	n_batch=512, # ي speeding كبير في time-to-first-token
	verbose=False
	)

	def generate_response(message, history, context):
	# تعليمات صارمة + حقن التفكير
	system_msg = """You MUST think step-by-step first, then answer.
	Format: <\|deep_think\|>your analysis</\|deep_think\|> then your answer."""

	user_content = message
	if context and context.strip():
	user_content = f"Context:\n{context}\n\nQuery:\n{message}"

	# 🔴 الحقن الإجباري: نجبر النموذج على البدء بالتفكير
	prompt = f"<\|im_start\|>system\n{system_msg}<\|im_end\|>\n<\|im_start\|>user\n{user_content}<\|im_end\|>\n<\|im_start\|>assistant\n<\|deep_think\|>Let me analyze this:\n"

	response = llm(
	prompt,
	max_tokens=1024,
	stop=["<\|im_end\|>"],
	temperature=0.1,
	stream=True
	)

	buffer = ""
	thinking = ""
	answer = ""
	mode = "thinking" # نبدأ مباشرة بوضع التفكير

	for chunk in response:
	if "choices" in chunk and len(chunk["choices"]) > 0:
	text = chunk["choices"][0].get("text", "")
	if not text:
	continue

	buffer += text

	if mode == "thinking":
	# البحث عن نهاية التفكير
	if "</\|deep_think\|>" in buffer:
	parts = buffer.split("</\|deep_think\|>", 1)
	thinking = parts[0]
	answer = parts[1] if len(parts) > 1 else ""
	mode = "answer"

	# عرض الفصل الكامل
	yield f"""<div style="background:#2d2d2d; border-left:4px solid #ff9800; padding:12px; margin:5px 0;">
	<details open>
	<summary style="color:#ff9800; font-weight:bold;">🧠 التفكير</summary>
	<div style="color:#ccc; margin-top:10px; white-space:pre-wrap;">{thinking}</div>
	</details>
	</div>

	<div style="background:#0d2818; border:2px solid #2e7d32; padding:16px; margin:8px 0; border-radius:8px; color:#e8f5e9;">
	<div style="font-weight:bold; color:#4caf50; margin-bottom:10px;">✅ الإجابة النهائية</div>
	<div style="line-height:1.7; white-space:pre-wrap;">{answer}</div>
	</div>"""
	else:
	thinking = buffer
	yield f"""<div style="background:#2d2d2d; border-left:4px solid #ff9800; padding:12px; margin:5px 0;">
	<details open>
	<summary style="color:#ff9800; font-weight:bold;">🧠 جاري التفكير...</summary>
	<div style="color:#ccc; margin-top:10px; white-space:pre-wrap;">{thinking}</div>
	</details>
	</div>"""

	else: # answer mode
	answer += text
	yield f"""<div style="background:#2d2d2d; border-left:4px solid #ff9800; padding:12px; margin:5px 0;">
	<details>
	<summary style="color:#888; cursor:pointer;">🧠 عرض التفكير</summary>
	<div style="color:#aaa; margin-top:8px; padding:8px; background:#252525; white-space:pre-wrap;">{thinking}</div>
	</details>
	</div>

	<div style="background:#0d2818; border:2px solid #2e7d32; padding:16px; margin:8px 0; border-radius:8px; color:#e8f5e9;">
	<div style="font-weight:bold; color:#4caf50; margin-bottom:10px;">✅ الإجابة النهائية</div>
	<div style="line-height:1.7; white-space:pre-wrap;">{answer}</div>
	</div>"""

	demo = gr.ChatInterface(
	fn=generate_response,
	additional_inputs=[gr.Textbox(label="📦 صندوق السياق (RAG)", lines=4)],
	title="🤖 Frankenstein Agent",
	description="وكيل ذكي مع تفكير إجباري + RAG"
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)