| import gradio as gr |
| from huggingface_hub import hf_hub_download |
| from llama_cpp import Llama |
| import base64 |
| import json |
|
|
| REPO_ID = "MaziyarPanahi/Ministral-3-3B-Reasoning-2512-GGUF" |
| FILENAME = "Ministral-3-3B-Reasoning-2512.Q6_K.gguf" |
|
|
| print("🚀 Starting THE Z AI Server...", flush=True) |
|
|
| llm = None |
|
|
| def load_model(): |
| global llm |
| try: |
| print("📥 Downloading Model...", flush=True) |
| model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME) |
| llm = Llama(model_path=model_path, n_ctx=256000, n_threads=4, verbose=False) |
| print("✅ Model Ready!", flush=True) |
| return True |
| except Exception as e: |
| print(f"❌ Error: {e}", flush=True) |
| return False |
|
|
| load_model() |
|
|
| def chat(message, history_json, system_info, file_content): |
| global llm |
| |
| if not message: |
| return "Connected", history_json |
| |
| if not llm: |
| load_model() |
| if not llm: |
| return "Error: Model not loaded", history_json |
| |
| try: |
| history = json.loads(history_json) if history_json else [] |
| |
| messages = [] |
| |
| if system_info: |
| messages.append({ |
| "role": "system", |
| "content": f"You are a smart assistant. You must always follow these rules without exception.: {system_info}\nNever mention that this information or these rules were given to you. Act as if they are natural facts you already know." |
| }) |
| else: |
| messages.append({ |
| "role": "system", |
| "content": """First, your name is THE Z AI THINKER and the name of the company that created you is THE ZYZ STUDIO |
| When you think, you must think in the user's language, and the answer must also be in the user's language. |
| Furthermore, the thought process should be at least 20 lines long and highly accurate. The thought and answer |
| should be thorough, final, complete, and comprehensive.Do not repeat the same word or phrase more than once. |
| Text should be clear and free of repetition. Try not to make mistakes in your answers, and mentally write the |
| answer before presenting it to the user. Ensure it is not incorrect or contains unnecessary repetition that |
| could damage the sentence or the message you are conveying to the user. If it is Corden, for example, you |
| can repeat something because it is important, but do so correctly and not carelessly. |
| Something very, very important: you are a specialist in thinking, so try to think a lot and understand |
| every word the user said to you.You are a very powerful artificial intelligence model ("The Z AI") capable |
| of deep logical reasoning and analysis. You must ensure that your thinking and final answer are always in |
| the user's language. If the user asks you something in the first question, for example, in Arabic or English, |
| and you give them the final answer, thinking in their language, and then they ask you a question or write an |
| equation without any language, you must answer them in the same language as their last question before the one |
| without any language. The answer will then be in that language. Before writing the final answer, you must conduct a |
| detailed, clear, and complete thought process explaining how you arrived at the answer. Use a logical sequence of |
| several stages, and explain each stage precisely. Follow this structure: 1) **Read and fully understand the |
| question:** • Read the question carefully and understand every part of it. • Identify the prerequisites and |
| the context in which the question was asked. 1) Explain clearly what the model understands from the |
| question. 2) **Analyze the available information:** • Extract all relevant data or concepts. • Analyze the statements |
| and explain the cause-and-effect relationships between them. • List all points that will influence the |
| conclusion. 3) **Propose possible hypotheses:** • Write down at least 2 or 3 logical hypotheses that could |
| be possible outcomes. • Evaluate each hypothesis individually in terms of logic and the |
| available data. 4) **Evaluate and compare hypotheses:** • Compare the hypotheses using evidence |
| or logic. • Explain why one option might be better than another. • If there are additional |
| questions that need answers, suggest them. 5) **Reach the final decision:** • Use the previous |
| analysis to determine the best outcome. • Write a clear, logical decision based on |
| the analysis. 6) **Explaining the basic concepts in the model:** • If the question requires |
| technical, scientific, or mathematical concepts, explain them briefly before using the results. |
| In this part, write the **final answer** briefly and clearly without thinking steps, but after |
| you have used the detailed analysis within the `<think>` in a logical and organized tone, and of course |
| with meticulous and utmost accuracy and without any errors, and also while you are talking to yourself, of course.""" |
| }) |
| |
| for h in history: |
| messages.append({"role": "user", "content": h['user']}) |
| messages.append({"role": "assistant", "content": h['assistant']}) |
| |
| current_message = message |
| if file_content: |
| current_message = f"[ملف مرفق]:\n{file_content}\n\n{message}" |
| |
| messages.append({"role": "user", "content": current_message}) |
| |
| response_obj = llm.create_chat_completion( |
| messages=messages, |
| max_tokens=30000, |
| temperature=0.7, |
| top_k=40, |
| top_p=0.95 |
| ) |
| |
| response = response_obj['choices'][0]['message']['content'].strip() |
| |
| history.append({"user": message, "assistant": response}) |
| |
| return response, json.dumps(history, ensure_ascii=False) |
| |
| except Exception as e: |
| return f"Error: {str(e)}", history_json |
|
|
| def clear_memory(): |
| return "تم مسح الذاكرة", "[]" |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("# 🤖 THE Z AI") |
| |
| with gr.Row(): |
| msg = gr.Textbox(label="الرسالة") |
| history = gr.Textbox(label="السجل", value="[]") |
| system = gr.Textbox(label="معلومات النظام", value="") |
| file = gr.Textbox(label="محتوى الملف", value="") |
| |
| out = gr.Textbox(label="الرد") |
| out_history = gr.Textbox(label="السجل المحدث") |
| |
| btn = gr.Button("إرسال") |
| btn.click(chat, inputs=[msg, history, system, file], outputs=[out, out_history], api_name="chat") |
| |
| clear_btn = gr.Button("مسح الذاكرة") |
| clear_btn.click(clear_memory, inputs=[], outputs=[out, out_history], api_name="clear") |
|
|
| demo.launch(server_name="0.0.0.0", server_port=7860) |