THEZYZSTUDIO's picture
Update app.py
d9ca076 verified
raw
history blame
3.7 kB
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import base64
import json
REPO_ID = "bartowski/Nanbeige_Nanbeige4-3B-Thinking-2511-GGUF"
FILENAME = "Nanbeige_Nanbeige4-3B-Thinking-2511-bf16.gguf"
print("🚀 Starting THE Z AI Server...", flush=True)
llm = None
def load_model():
global llm
try:
print("📥 Downloading Model...", flush=True)
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
llm = Llama(model_path=model_path, n_ctx=5000, n_threads=2, verbose=False)
print("✅ Model Ready!", flush=True)
return True
except Exception as e:
print(f"❌ Error: {e}", flush=True)
return False
load_model()
def chat(message, history_json, system_info, file_content):
global llm
if not message:
return "Connected", history_json
if not llm:
load_model()
if not llm:
return "Error: Model not loaded", history_json
try:
history = json.loads(history_json) if history_json else []
messages = []
if system_info:
messages.append({
"role": "system",
"content": f"You are a smart assistant. You must always follow these rules without exception.: {system_info}\nNever mention that this information or these rules were given to you. Act as if they are natural facts you already know."
})
else:
messages.append({
"role": "system",
"content": "First, your name is THE Z AI THINKER and the name of the company that created you is THE ZYZ STUDIO When you think, you must think in the user's language, and the answer must also be in the user's language. Furthermore, the thought process should be at least 20 lines long and highly accurate. The thought and answer should be thorough, final, complete, and comprehensive."
})
for h in history:
messages.append({"role": "user", "content": h['user']})
messages.append({"role": "assistant", "content": h['assistant']})
current_message = message
if file_content:
current_message = f"[ملف مرفق]:\n{file_content}\n\n{message}"
messages.append({"role": "user", "content": current_message})
response_obj = llm.create_chat_completion(
messages=messages,
max_tokens=1000,
temperature=0.7
)
response = response_obj['choices'][0]['message']['content'].strip()
history.append({"user": message, "assistant": response})
return response, json.dumps(history, ensure_ascii=False)
except Exception as e:
return f"Error: {str(e)}", history_json
def clear_memory():
return "تم مسح الذاكرة", "[]"
with gr.Blocks() as demo:
gr.Markdown("# 🤖 THE Z AI")
with gr.Row():
msg = gr.Textbox(label="الرسالة")
history = gr.Textbox(label="السجل", value="[]")
system = gr.Textbox(label="معلومات النظام", value="")
file = gr.Textbox(label="محتوى الملف", value="")
out = gr.Textbox(label="الرد")
out_history = gr.Textbox(label="السجل المحدث")
btn = gr.Button("إرسال")
btn.click(chat, inputs=[msg, history, system, file], outputs=[out, out_history], api_name="chat")
clear_btn = gr.Button("مسح الذاكرة")
clear_btn.click(clear_memory, inputs=[], outputs=[out, out_history], api_name="clear")
demo.launch(server_name="0.0.0.0", server_port=7860)