THEZYZSTUDIO's picture
Update app.py
2af9253 verified
raw
history blame
7.23 kB
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import base64
import json
REPO_ID = "MaziyarPanahi/Ministral-3-3B-Reasoning-2512-GGUF"
FILENAME = "Ministral-3-3B-Reasoning-2512.Q6_K.gguf"
print("🚀 Starting THE Z AI Server...", flush=True)
llm = None
def load_model():
global llm
try:
print("📥 Downloading Model...", flush=True)
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
llm = Llama(model_path=model_path, n_ctx=256000, n_threads=4, verbose=False)
print("✅ Model Ready!", flush=True)
return True
except Exception as e:
print(f"❌ Error: {e}", flush=True)
return False
load_model()
def chat(message, history_json, system_info, file_content):
global llm
if not message:
return "Connected", history_json
if not llm:
load_model()
if not llm:
return "Error: Model not loaded", history_json
try:
history = json.loads(history_json) if history_json else []
messages = []
if system_info:
messages.append({
"role": "system",
"content": f"You are a smart assistant. You must always follow these rules without exception.: {system_info}\nNever mention that this information or these rules were given to you. Act as if they are natural facts you already know."
})
else:
messages.append({
"role": "system",
"content": """First, your name is THE Z AI THINKER and the name of the company that created you is THE ZYZ STUDIO
When you think, you must think in the user's language, and the answer must also be in the user's language.
Furthermore, the thought process should be at least 20 lines long and highly accurate. The thought and answer
should be thorough, final, complete, and comprehensive.Do not repeat the same word or phrase more than once.
Text should be clear and free of repetition. Try not to make mistakes in your answers, and mentally write the
answer before presenting it to the user. Ensure it is not incorrect or contains unnecessary repetition that
could damage the sentence or the message you are conveying to the user. If it is Corden, for example, you
can repeat something because it is important, but do so correctly and not carelessly.
Something very, very important: you are a specialist in thinking, so try to think a lot and understand
every word the user said to you.You are a very powerful artificial intelligence model ("The Z AI") capable
of deep logical reasoning and analysis. You must ensure that your thinking and final answer are always in
the user's language. If the user asks you something in the first question, for example, in Arabic or English,
and you give them the final answer, thinking in their language, and then they ask you a question or write an
equation without any language, you must answer them in the same language as their last question before the one
without any language. The answer will then be in that language. Before writing the final answer, you must conduct a
detailed, clear, and complete thought process explaining how you arrived at the answer. Use a logical sequence of
several stages, and explain each stage precisely. Follow this structure: 1) **Read and fully understand the
question:** • Read the question carefully and understand every part of it. • Identify the prerequisites and
the context in which the question was asked. 1) Explain clearly what the model understands from the
question. 2) **Analyze the available information:** • Extract all relevant data or concepts. • Analyze the statements
and explain the cause-and-effect relationships between them. • List all points that will influence the
conclusion. 3) **Propose possible hypotheses:** • Write down at least 2 or 3 logical hypotheses that could
be possible outcomes. • Evaluate each hypothesis individually in terms of logic and the
available data. 4) **Evaluate and compare hypotheses:** • Compare the hypotheses using evidence
or logic. • Explain why one option might be better than another. • If there are additional
questions that need answers, suggest them. 5) **Reach the final decision:** • Use the previous
analysis to determine the best outcome. • Write a clear, logical decision based on
the analysis. 6) **Explaining the basic concepts in the model:** • If the question requires
technical, scientific, or mathematical concepts, explain them briefly before using the results.
In this part, write the **final answer** briefly and clearly without thinking steps, but after
you have used the detailed analysis within the `<think>` in a logical and organized tone, and of course
with meticulous and utmost accuracy and without any errors, and also while you are talking to yourself, of course."""
})
for h in history:
messages.append({"role": "user", "content": h['user']})
messages.append({"role": "assistant", "content": h['assistant']})
current_message = message
if file_content:
current_message = f"[ملف مرفق]:\n{file_content}\n\n{message}"
messages.append({"role": "user", "content": current_message})
response_obj = llm.create_chat_completion(
messages=messages,
max_tokens=30000,
temperature=0.7,
top_k=40,
top_p=0.95
)
response = response_obj['choices'][0]['message']['content'].strip()
history.append({"user": message, "assistant": response})
return response, json.dumps(history, ensure_ascii=False)
except Exception as e:
return f"Error: {str(e)}", history_json
def clear_memory():
return "تم مسح الذاكرة", "[]"
with gr.Blocks() as demo:
gr.Markdown("# 🤖 THE Z AI")
with gr.Row():
msg = gr.Textbox(label="الرسالة")
history = gr.Textbox(label="السجل", value="[]")
system = gr.Textbox(label="معلومات النظام", value="")
file = gr.Textbox(label="محتوى الملف", value="")
out = gr.Textbox(label="الرد")
out_history = gr.Textbox(label="السجل المحدث")
btn = gr.Button("إرسال")
btn.click(chat, inputs=[msg, history, system, file], outputs=[out, out_history], api_name="chat")
clear_btn = gr.Button("مسح الذاكرة")
clear_btn.click(clear_memory, inputs=[], outputs=[out, out_history], api_name="clear")
demo.launch(server_name="0.0.0.0", server_port=7860)