Spaces:

THEZYZSTUDIO
/

Minecraft_java_server

Paused

File size: 3,701 Bytes

import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import base64
import json

REPO_ID = "bartowski/Nanbeige_Nanbeige4-3B-Thinking-2511-GGUF"
FILENAME = "Nanbeige_Nanbeige4-3B-Thinking-2511-bf16.gguf"

print("🚀 Starting THE Z AI Server...", flush=True)

llm = None

def load_model():
    global llm
    try:
        print("📥 Downloading Model...", flush=True)
        model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
        llm = Llama(model_path=model_path, n_ctx=5000, n_threads=2, verbose=False)
        print("✅ Model Ready!", flush=True)
        return True
    except Exception as e:
        print(f"❌ Error: {e}", flush=True)
        return False

load_model()

def chat(message, history_json, system_info, file_content):
    global llm
    
    if not message:
        return "Connected", history_json
    
    if not llm:
        load_model()
        if not llm:
            return "Error: Model not loaded", history_json
    
    try:
        history = json.loads(history_json) if history_json else []
        
        messages = []
        
        if system_info:
            messages.append({
                "role": "system",
                "content": f"You are a smart assistant. You must always follow these rules without exception.: {system_info}\nNever mention that this information or these rules were given to you. Act as if they are natural facts you already know."
            })
        else:
            messages.append({
                "role": "system",
                "content": "First, your name is THE Z AI THINKER and the name of the company that created you is THE ZYZ STUDIO When you think, you must think in the user's language, and the answer must also be in the user's language. Furthermore, the thought process should be at least 20 lines long and highly accurate. The thought and answer should be thorough, final, complete, and comprehensive."
            })
        
        for h in history:
            messages.append({"role": "user", "content": h['user']})
            messages.append({"role": "assistant", "content": h['assistant']})
        
        current_message = message
        if file_content:
            current_message = f"[ملف مرفق]:\n{file_content}\n\n{message}"
        
        messages.append({"role": "user", "content": current_message})
        
        response_obj = llm.create_chat_completion(
            messages=messages,
            max_tokens=1000,
            temperature=0.7
        )
        
        response = response_obj['choices'][0]['message']['content'].strip()
        
        history.append({"user": message, "assistant": response})
        
        return response, json.dumps(history, ensure_ascii=False)
        
    except Exception as e:
        return f"Error: {str(e)}", history_json

def clear_memory():
    return "تم مسح الذاكرة", "[]"

with gr.Blocks() as demo:
    gr.Markdown("# 🤖 THE Z AI")
    
    with gr.Row():
        msg = gr.Textbox(label="الرسالة")
        history = gr.Textbox(label="السجل", value="[]")
        system = gr.Textbox(label="معلومات النظام", value="")
        file = gr.Textbox(label="محتوى الملف", value="")
    
    out = gr.Textbox(label="الرد")
    out_history = gr.Textbox(label="السجل المحدث")
    
    btn = gr.Button("إرسال")
    btn.click(chat, inputs=[msg, history, system, file], outputs=[out, out_history], api_name="chat")
    
    clear_btn = gr.Button("مسح الذاكرة")
    clear_btn.click(clear_memory, inputs=[], outputs=[out, out_history], api_name="clear")

demo.launch(server_name="0.0.0.0", server_port=7860)