| import gradio as gr |
| from llama_cpp import Llama |
| import re |
|
|
| print("🧠 جاري تحميل دماغ فرانكشتاين...") |
| llm = Llama( |
| model_path="model-q4_k_m.gguf", |
| n_ctx=2048, |
| n_threads=4, |
| n_batch=512, |
| verbose=False |
| ) |
|
|
| def generate_response(message, history, context): |
| |
| system_msg = """You MUST think step-by-step first, then answer. |
| Format: <|deep_think|>your analysis</|deep_think|> then your answer.""" |
|
|
| user_content = message |
| if context and context.strip(): |
| user_content = f"Context:\n{context}\n\nQuery:\n{message}" |
| |
| |
| prompt = f"<|im_start|>system\n{system_msg}<|im_end|>\n<|im_start|>user\n{user_content}<|im_end|>\n<|im_start|>assistant\n<|deep_think|>Let me analyze this:\n" |
|
|
| response = llm( |
| prompt, |
| max_tokens=1024, |
| stop=["<|im_end|>"], |
| temperature=0.1, |
| stream=True |
| ) |
| |
| buffer = "" |
| thinking = "" |
| answer = "" |
| mode = "thinking" |
| |
| for chunk in response: |
| if "choices" in chunk and len(chunk["choices"]) > 0: |
| text = chunk["choices"][0].get("text", "") |
| if not text: |
| continue |
| |
| buffer += text |
| |
| if mode == "thinking": |
| |
| if "</|deep_think|>" in buffer: |
| parts = buffer.split("</|deep_think|>", 1) |
| thinking = parts[0] |
| answer = parts[1] if len(parts) > 1 else "" |
| mode = "answer" |
| |
| |
| yield f"""<div style="background:#2d2d2d; border-left:4px solid #ff9800; padding:12px; margin:5px 0;"> |
| <details open> |
| <summary style="color:#ff9800; font-weight:bold;">🧠 التفكير</summary> |
| <div style="color:#ccc; margin-top:10px; white-space:pre-wrap;">{thinking}</div> |
| </details> |
| </div> |
| |
| <div style="background:#0d2818; border:2px solid #2e7d32; padding:16px; margin:8px 0; border-radius:8px; color:#e8f5e9;"> |
| <div style="font-weight:bold; color:#4caf50; margin-bottom:10px;">✅ الإجابة النهائية</div> |
| <div style="line-height:1.7; white-space:pre-wrap;">{answer}</div> |
| </div>""" |
| else: |
| thinking = buffer |
| yield f"""<div style="background:#2d2d2d; border-left:4px solid #ff9800; padding:12px; margin:5px 0;"> |
| <details open> |
| <summary style="color:#ff9800; font-weight:bold;">🧠 جاري التفكير...</summary> |
| <div style="color:#ccc; margin-top:10px; white-space:pre-wrap;">{thinking}</div> |
| </details> |
| </div>""" |
| |
| else: |
| answer += text |
| yield f"""<div style="background:#2d2d2d; border-left:4px solid #ff9800; padding:12px; margin:5px 0;"> |
| <details> |
| <summary style="color:#888; cursor:pointer;">🧠 عرض التفكير</summary> |
| <div style="color:#aaa; margin-top:8px; padding:8px; background:#252525; white-space:pre-wrap;">{thinking}</div> |
| </details> |
| </div> |
| |
| <div style="background:#0d2818; border:2px solid #2e7d32; padding:16px; margin:8px 0; border-radius:8px; color:#e8f5e9;"> |
| <div style="font-weight:bold; color:#4caf50; margin-bottom:10px;">✅ الإجابة النهائية</div> |
| <div style="line-height:1.7; white-space:pre-wrap;">{answer}</div> |
| </div>""" |
|
|
| demo = gr.ChatInterface( |
| fn=generate_response, |
| additional_inputs=[gr.Textbox(label="📦 صندوق السياق (RAG)", lines=4)], |
| title="🤖 Frankenstein Agent", |
| description="وكيل ذكي مع تفكير إجباري + RAG" |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|