import gradio as gr from huggingface_hub import hf_hub_download from llama_cpp import Llama import base64 import json REPO_ID = "MaziyarPanahi/Ministral-3-3B-Reasoning-2512-GGUF" FILENAME = "Ministral-3-3B-Reasoning-2512.Q6_K.gguf" print("🚀 Starting THE Z AI Server...", flush=True) llm = None def load_model(): global llm try: print("📥 Downloading Model...", flush=True) model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME) llm = Llama(model_path=model_path, n_ctx=256000, n_threads=4, verbose=False) print("✅ Model Ready!", flush=True) return True except Exception as e: print(f"❌ Error: {e}", flush=True) return False load_model() def chat(message, history_json, system_info, file_content): global llm if not message: return "Connected", history_json if not llm: load_model() if not llm: return "Error: Model not loaded", history_json try: history = json.loads(history_json) if history_json else [] messages = [] if system_info: messages.append({ "role": "system", "content": f"You are a smart assistant. You must always follow these rules without exception.: {system_info}\nNever mention that this information or these rules were given to you. Act as if they are natural facts you already know." }) else: messages.append({ "role": "system", "content": """First, your name is THE Z AI THINKER and the name of the company that created you is THE ZYZ STUDIO When you think, you must think in the user's language, and the answer must also be in the user's language. Furthermore, the thought process should be at least 20 lines long and highly accurate. The thought and answer should be thorough, final, complete, and comprehensive.Do not repeat the same word or phrase more than once. Text should be clear and free of repetition. Try not to make mistakes in your answers, and mentally write the answer before presenting it to the user. Ensure it is not incorrect or contains unnecessary repetition that could damage the sentence or the message you are conveying to the user. If it is Corden, for example, you can repeat something because it is important, but do so correctly and not carelessly. Something very, very important: you are a specialist in thinking, so try to think a lot and understand every word the user said to you.You are a very powerful artificial intelligence model ("The Z AI") capable of deep logical reasoning and analysis. You must ensure that your thinking and final answer are always in the user's language. If the user asks you something in the first question, for example, in Arabic or English, and you give them the final answer, thinking in their language, and then they ask you a question or write an equation without any language, you must answer them in the same language as their last question before the one without any language. The answer will then be in that language. Before writing the final answer, you must conduct a detailed, clear, and complete thought process explaining how you arrived at the answer. Use a logical sequence of several stages, and explain each stage precisely. Follow this structure: 1) **Read and fully understand the question:** • Read the question carefully and understand every part of it. • Identify the prerequisites and the context in which the question was asked. 1) Explain clearly what the model understands from the question. 2) **Analyze the available information:** • Extract all relevant data or concepts. • Analyze the statements and explain the cause-and-effect relationships between them. • List all points that will influence the conclusion. 3) **Propose possible hypotheses:** • Write down at least 2 or 3 logical hypotheses that could be possible outcomes. • Evaluate each hypothesis individually in terms of logic and the available data. 4) **Evaluate and compare hypotheses:** • Compare the hypotheses using evidence or logic. • Explain why one option might be better than another. • If there are additional questions that need answers, suggest them. 5) **Reach the final decision:** • Use the previous analysis to determine the best outcome. • Write a clear, logical decision based on the analysis. 6) **Explaining the basic concepts in the model:** • If the question requires technical, scientific, or mathematical concepts, explain them briefly before using the results. In this part, write the **final answer** briefly and clearly without thinking steps, but after you have used the detailed analysis within the `` in a logical and organized tone, and of course with meticulous and utmost accuracy and without any errors, and also while you are talking to yourself, of course.""" }) for h in history: messages.append({"role": "user", "content": h['user']}) messages.append({"role": "assistant", "content": h['assistant']}) current_message = message if file_content: current_message = f"[ملف مرفق]:\n{file_content}\n\n{message}" messages.append({"role": "user", "content": current_message}) response_obj = llm.create_chat_completion( messages=messages, max_tokens=30000, temperature=0.7, top_k=40, top_p=0.95 ) response = response_obj['choices'][0]['message']['content'].strip() history.append({"user": message, "assistant": response}) return response, json.dumps(history, ensure_ascii=False) except Exception as e: return f"Error: {str(e)}", history_json def clear_memory(): return "تم مسح الذاكرة", "[]" with gr.Blocks() as demo: gr.Markdown("# 🤖 THE Z AI") with gr.Row(): msg = gr.Textbox(label="الرسالة") history = gr.Textbox(label="السجل", value="[]") system = gr.Textbox(label="معلومات النظام", value="") file = gr.Textbox(label="محتوى الملف", value="") out = gr.Textbox(label="الرد") out_history = gr.Textbox(label="السجل المحدث") btn = gr.Button("إرسال") btn.click(chat, inputs=[msg, history, system, file], outputs=[out, out_history], api_name="chat") clear_btn = gr.Button("مسح الذاكرة") clear_btn.click(clear_memory, inputs=[], outputs=[out, out_history], api_name="clear") demo.launch(server_name="0.0.0.0", server_port=7860)