Spaces:
Build error
Build error
| import os | |
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| # 模型配置 | |
| REPO_ID = "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF" | |
| MODEL_FILE = "DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf" | |
| LORA_FILE = "ark_soul_adapter.gguf" | |
| def load_engine(): | |
| print("--- [SYSTEM] 正在加载基础燃料... ---") | |
| model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE) | |
| lora_path = os.path.join(os.getcwd(), LORA_FILE) | |
| if os.path.exists(lora_path): | |
| print(f"---[IGNITION] 注入灵魂补丁: {LORA_FILE} ---") | |
| return Llama( | |
| model_path=model_path, | |
| lora_path=lora_path, | |
| lora_scale=0.65, | |
| n_ctx=4096, | |
| n_batch=512, | |
| n_threads=2, | |
| use_mlock=False, | |
| verbose=False, | |
| check_tensors=False | |
| ) | |
| else: | |
| print("---[WARNING] 未检测到 LoRA 补丁,以基础架构运行 ---") | |
| return Llama(model_path=model_path, n_ctx=2048, n_threads=2) | |
| # 使用懒加载,避免全局阻塞 | |
| _llm = None | |
| def get_llm(): | |
| global _llm | |
| if _llm is None: | |
| _llm = load_engine() | |
| return _llm | |
| def stark_infer(message, history): | |
| if not message: | |
| return "" | |
| # 极简 Prompt(此处为示例,请根据实际调整) | |
| system_logic = ( | |
| "你是一个智能助手,专注于技术支持和逻辑推理。" | |
| ) | |
| prompt = f"System: {system_logic}\nUser: {message}\nAssistant:" | |
| llm = get_llm() | |
| try: | |
| output = llm( | |
| prompt, | |
| max_tokens=1024, | |
| stop=["User:", "###", "</s>"], | |
| echo=False, | |
| temperature=0.2, | |
| repeat_penalty=1.2 | |
| ) | |
| res = output["choices"][0]["text"] | |
| return str(res).strip() | |
| except Exception as e: | |
| print(f"模型推理错误: {e}") # 记录日志 | |
| return "抱歉,模型出现异常,请稍后重试。" | |
| # Gradio 界面 | |
| with gr.Blocks(title="ARK-AI SOVEREIGN CONSOLE") as demo: | |
| gr.Markdown("# 🚀 550C SOVEREIGN CONSOLE (KERNEL OVERRIDE)\n大西洋坐标已锁定") | |
| chatbot = gr.Chatbot(height=500) | |
| msg = gr.Textbox(placeholder="Input command...") | |
| def respond(message, chat_history): | |
| bot_message = stark_infer(message, chat_history) | |
| chat_history.append((message, bot_message)) | |
| return "", chat_history | |
| msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
| if __name__ == "__main__": | |
| print("--- [IGNITION] 550C 引擎物理锁定,强制点火... ---") | |
| demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False) |