| |
| |
| |
| import os |
| import json |
| import pathlib |
| import gc |
| from typing import List, Tuple, Any |
|
|
| import gradio as gr |
| from llama_cpp import Llama |
| from huggingface_hub import snapshot_download |
|
|
| |
| |
| |
| MODEL_REPO = "neuphonic/neutts-air" |
| MODEL_FILE = "neutss-air-BF16.gguf" |
| CACHE_DIR = os.getenv("HF_HOME", pathlib.Path.home() / ".cache" / "huggingface" / "hub") |
|
|
| print("🔎 Скачиваем модель (может занять несколько минут)...") |
| model_path = snapshot_download( |
| repo_id=MODEL_REPO, |
| revision="main", |
| cache_dir=str(CACHE_DIR), |
| local_files_only=False, |
| allow_patterns=[MODEL_FILE], |
| ) |
|
|
| gguf_path = os.path.join(model_path, MODEL_FILE) |
| print(f"✅ Модель скачана в {gguf_path}") |
|
|
| |
| |
| |
| llm = Llama( |
| model_path=gguf_path, |
| n_ctx=2048, |
| n_threads=8, |
| n_gpu_layers=-1, |
| verbose=False, |
| ) |
|
|
| |
| |
| |
| SYSTEM_PROMPT = """You are a Web‑assistant. For every user request return **exactly one JSON object** |
| with the following possible fields: |
| { |
| "TEXT": "<optional short explanation>", |
| "WEBSITE": "<full URL to open>", |
| "SEARCH": "<search query>", |
| "SUGGESTIONS": [ |
| {"title":"...", "url":"..."}, |
| {"title":"...", "url":"..."} |
| ], |
| "TOOL": { |
| "action":"click|type|scroll|none", |
| "selector":"CSS selector (optional)", |
| "value":"text to type (if action==type)" |
| } |
| } |
| If you don't need any action, set all fields to null or empty strings. |
| """ |
|
|
| |
| |
| |
| def build_chat(messages: List[Tuple[str, str]]) -> str: |
| """ |
| Преобразуем историю (list of (human,assistant)) в один строковый prompt, |
| совместимый с Llama‑CPP, где каждая реплика отделяется тегами <|user|>, |
| <|assistant|> и <|system|>. |
| """ |
| prompt = f"<|system|>{SYSTEM_PROMPT}<|end|>" |
| for human, assistant in messages: |
| prompt += f"<|user|>{human}<|end|>" |
| prompt += f"<|assistant|>{assistant}<|end|>" |
| return prompt |
|
|
|
|
| |
| |
| |
| def respond(message: str, history: List[List[str]]) -> List[List[Any]]: |
| """ |
| Принимает новое сообщение пользователя и текущую историю чата. |
| Возвращает обновлённую историю, где второй элемент списка – JSON‑строка |
| модели (или сообщение об ошибке). |
| """ |
| |
| chat_history = [(h, a) for h, a in history] |
|
|
| |
| prompt = build_chat(chat_history + [(message, "")]) |
|
|
| |
| try: |
| |
| |
| |
| |
| out = llm( |
| prompt, |
| max_tokens=512, |
| temperature=0.2, |
| top_p=0.95, |
| repeat_penalty=1.1, |
| stop=["<|assistant|>", "<|user|>", "<|system|>"], |
| ) |
| raw = out["choices"][0]["text"].strip() |
|
|
| |
| |
| try: |
| |
| start = raw.find("{") |
| json_part = raw[start:] if start != -1 else raw |
| parsed = json.loads(json_part) |
| except Exception: |
| |
| parsed = {"TEXT": raw, "WEBSITE": "", "SEARCH": "", "SUGGESTIONS": [], "TOOL": {}} |
| except Exception as exc: |
| parsed = {"TEXT": f"Ошибка модели: {str(exc)}", |
| "WEBSITE": "", "SEARCH": "", "SUGGESTIONS": [], "TOOL": {}} |
|
|
| |
| bot_message = json.dumps(parsed, ensure_ascii=False, indent=2) |
|
|
| |
| return history + [[message, bot_message]] |
|
|
|
|
| |
| |
| |
| with gr.Blocks(title="ESP Brain – локальная LLaMA") as demo: |
| gr.Markdown("## 🤖 Web‑assistant powered by **neutts‑air** (LLaMA‑CPP)") |
|
|
| chatbot = gr.Chatbot(height=600) |
|
|
| with gr.Row(): |
| txt = gr.Textbox( |
| placeholder="Напиши сообщение…", |
| show_label=False, |
| scale=8, |
| ) |
| submit_btn = gr.Button("Отправить", scale=2) |
|
|
| with gr.Row(): |
| retry_btn = gr.Button("🔄 Повторить") |
| undo_btn = gr.Button("↩️ Отменить") |
| clear_btn = gr.Button("🗑️ Очистить") |
|
|
| |
| |
| |
| txt.submit(fn=respond, inputs=[txt, chatbot], outputs=chatbot) |
| submit_btn.click(fn=respond, inputs=[txt, chatbot], outputs=chatbot) |
|
|
| def retry_last(history): |
| """Очистить последний ответ, чтобы пользователь мог написать заново.""" |
| if history: |
| last_user = history[-1][0] |
| return history[:-1] + [[last_user, None]] |
| return history |
|
|
| retry_btn.click(fn=retry_last, inputs=chatbot, outputs=chatbot, queue=False) |
|
|
| def undo_last(history): |
| """Удалить последнюю пару (user‑assistant).""" |
| return history[:-1] |
|
|
| undo_btn.click(fn=undo_last, inputs=chatbot, outputs=chatbot, queue=False) |
|
|
| clear_btn.click(lambda: [], outputs=chatbot, queue=False) |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| |
| demo.queue() |
| demo.launch( |
| share=True, |
| ssr_mode=False, |
| debug=True, |
| ) |
|
|