Spaces:

Kenan023214
/

PyroNet-mini

Sleeping

App Files Files Community

Kenan023214 commited on Aug 23, 2025

Commit

67c514f

verified ·

1 Parent(s): 6694a15

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -34

app.py CHANGED Viewed

@@ -1,55 +1,38 @@
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from huggingface_hub import hf_hub_download
 from functools import lru_cache
-# --- Hugging Face Space Configuration ---
 MODEL_NAME = "Kenan023214/PyroNet-mini"
-DEVICE = "cpu"  # Use CPU for basic Space
-MAX_NEW_TOKENS = 1024
 MAX_CONTEXT_TOKENS = 2048
-# Dictionary to store the full paths of downloaded templates
-TEMPLATE_PATHS = {}
 @lru_cache(maxsize=1)
 def load_model():
-    """Loads the model and tokenizer, caching them for performance."""
     print("Loading model and tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         device_map=DEVICE,
-        torch_dtype=torch.float32  # Use float32 for CPU compatibility
     )
     print("Model loaded.")
     return tokenizer, model
-def download_templates():
-    """Downloads template files from the model repository and stores their paths."""
-    print("Downloading chat templates...")
-    for lang in ["ru", "en", "uk"]:
-        filename = f"chat_template_{lang}.jinja"
-        file_path = hf_hub_download(
-            repo_id=MODEL_NAME,
-            filename=filename,
-            local_dir=".",
-            local_dir_use_symlinks=False
-        )
-        TEMPLATE_PATHS[lang] = file_path
-    print("Templates downloaded.")
 tokenizer, model = load_model()
-download_templates()
-# --- Utilities ---
 def num_tokens_of_text(text: str) -> int:
-    """Approximate number of tokens for a given text."""
     return len(tokenizer.encode(text, add_special_tokens=False))
 def trim_history_to_max_tokens(messages, max_tokens):
-    """Trims the message history to fit within a token limit."""
     rev = list(reversed(messages))
     total = 0
     kept = []
@@ -62,7 +45,7 @@ def trim_history_to_max_tokens(messages, max_tokens):
     return list(reversed(kept))
 def build_messages_for_template(history_messages, reasoning: bool, language: str):
-    """Prepares messages for the chat template."""
     if language == 'ru':
         system_message = "Ты — дружелюбный ассистент, который говорит на русском. Отвечай кратко, но по делу."
         reasoning_instruction = ("[REASONING MODE]\n"
@@ -87,7 +70,7 @@ def build_messages_for_template(history_messages, reasoning: bool, language: str
     return messages
 def extract_assistant_reply(raw_generated_text: str) -> str:
-    """Removes extra tokens and returns only the assistant's reply."""
     text = raw_generated_text
     if "<|assistant|>" in text:
         text = text.split("<|assistant|>")[-1]
@@ -95,9 +78,9 @@ def extract_assistant_reply(raw_generated_text: str) -> str:
         text = text.replace(tag, "")
     return text.strip()
-# --- Main function for Gradio ---
 def generate_response(user_text: str, history, reasoning: bool, language: str):
-    """Processes user input and generates a response."""
     history.append({"role": "user", "content": user_text})
@@ -105,8 +88,8 @@ def generate_response(user_text: str, history, reasoning: bool, language: str):
     messages_for_template = build_messages_for_template(trimmed_history, reasoning, language)
-    # Use the full path from the TEMPLATE_PATHS dictionary
-    template_file = TEMPLATE_PATHS.get(language, TEMPLATE_PATHS["en"])
     text = tokenizer.apply_chat_template(
         messages_for_template,
@@ -134,7 +117,7 @@ def generate_response(user_text: str, history, reasoning: bool, language: str):
     return "", history
-# --- Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# PyroNet-mini Chat")
     gr.Markdown("A demonstration of PyroNet-mini with multilingual templates and a reasoning mode.")

 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from functools import lru_cache
+# --- Конфигурация Hugging Face Space ---
+# Загрузка модели и токенизатора один раз при запуске приложения
 MODEL_NAME = "Kenan023214/PyroNet-mini"
+DEVICE = "cpu"  # Используем CPU, как указано для Basic Space
+MAX_NEW_TOKENS = 256
 MAX_CONTEXT_TOKENS = 2048
+# Загрузка модели и токенизатора
 @lru_cache(maxsize=1)
 def load_model():
+    """Загружает модель и токенайзер, кешируя их для производительности."""
     print("Loading model and tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         device_map=DEVICE,
+        torch_dtype=torch.float32  # Используем float32 для совместимости с CPU
     )
     print("Model loaded.")
     return tokenizer, model
 tokenizer, model = load_model()
+# --- Утилиты ---
 def num_tokens_of_text(text: str) -> int:
+    """Приближённое количество токенов для заданного текста."""
     return len(tokenizer.encode(text, add_special_tokens=False))
 def trim_history_to_max_tokens(messages, max_tokens):
+    """Обрезает историю сообщений, чтобы она соответствовала лимиту токенов."""
     rev = list(reversed(messages))
     total = 0
     kept = []
     return list(reversed(kept))
 def build_messages_for_template(history_messages, reasoning: bool, language: str):
+    """Подготавливает сообщения для шаблона, включая системное сообщение."""
     if language == 'ru':
         system_message = "Ты — дружелюбный ассистент, который говорит на русском. Отвечай кратко, но по делу."
         reasoning_instruction = ("[REASONING MODE]\n"
     return messages
 def extract_assistant_reply(raw_generated_text: str) -> str:
+    """Убирает лишние токены и возвращает только ответ ассистента."""
     text = raw_generated_text
     if "<|assistant|>" in text:
         text = text.split("<|assistant|>")[-1]
         text = text.replace(tag, "")
     return text.strip()
+# --- Основная функция для Gradio ---
 def generate_response(user_text: str, history, reasoning: bool, language: str):
+    """Обрабатывает пользовательский запрос и генерирует ответ."""
     history.append({"role": "user", "content": user_text})
     messages_for_template = build_messages_for_template(trimmed_history, reasoning, language)
+    # Выбираем шаблон из файлов в репозитории
+    template_file = f"chat_template_{language}.jinja"
     text = tokenizer.apply_chat_template(
         messages_for_template,
     return "", history
+# --- Интерфейс Gradio ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# PyroNet-mini Chat")
     gr.Markdown("A demonstration of PyroNet-mini with multilingual templates and a reasoning mode.")