lora2 / app.py
marsparamecium's picture
Create app.py
5f53153 verified
import os
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# 模型配置
REPO_ID = "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF"
MODEL_FILE = "DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf"
LORA_FILE = "ark_soul_adapter.gguf"
def load_engine():
print("--- [SYSTEM] 正在加载基础燃料... ---")
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE)
lora_path = os.path.join(os.getcwd(), LORA_FILE)
if os.path.exists(lora_path):
print(f"---[IGNITION] 注入灵魂补丁: {LORA_FILE} ---")
return Llama(
model_path=model_path,
lora_path=lora_path,
lora_scale=0.65,
n_ctx=4096,
n_batch=512,
n_threads=2,
use_mlock=False,
verbose=False,
check_tensors=False
)
else:
print("---[WARNING] 未检测到 LoRA 补丁,以基础架构运行 ---")
return Llama(model_path=model_path, n_ctx=2048, n_threads=2)
# 使用懒加载,避免全局阻塞
_llm = None
def get_llm():
global _llm
if _llm is None:
_llm = load_engine()
return _llm
def stark_infer(message, history):
if not message:
return ""
# 极简 Prompt(此处为示例,请根据实际调整)
system_logic = (
"你是一个智能助手,专注于技术支持和逻辑推理。"
)
prompt = f"System: {system_logic}\nUser: {message}\nAssistant:"
llm = get_llm()
try:
output = llm(
prompt,
max_tokens=1024,
stop=["User:", "###", "</s>"],
echo=False,
temperature=0.2,
repeat_penalty=1.2
)
res = output["choices"][0]["text"]
return str(res).strip()
except Exception as e:
print(f"模型推理错误: {e}") # 记录日志
return "抱歉,模型出现异常,请稍后重试。"
# Gradio 界面
with gr.Blocks(title="ARK-AI SOVEREIGN CONSOLE") as demo:
gr.Markdown("# 🚀 550C SOVEREIGN CONSOLE (KERNEL OVERRIDE)\n大西洋坐标已锁定")
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(placeholder="Input command...")
def respond(message, chat_history):
bot_message = stark_infer(message, chat_history)
chat_history.append((message, bot_message))
return "", chat_history
msg.submit(respond, [msg, chatbot], [msg, chatbot])
if __name__ == "__main__":
print("--- [IGNITION] 550C 引擎物理锁定,强制点火... ---")
demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False)