Spaces:

SRP-base-model-training
/

Multilingual-Translation-App

Runtime error

File size: 7,589 Bytes

import warnings
warnings.filterwarnings("ignore")

import os
import sys
from typing import List, Tuple
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
from huggingface_hub import hf_hub_download
import gradio as gr
from logger import logging
from exception import CustomExceptionHandling

# Load the Environment Variables from .env file
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")

# Download gguf model files
if not os.path.exists("./models"):
    os.makedirs("./models")

hf_hub_download(
    repo_id="SRP-base-model-training/gemma_3_800M_sft_v2_translation-kazparc_latest",
    filename="gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",
    local_dir="./models",
)

# Define the prompt markers for Gemma 3
gemma_3_prompt_markers = {
    Roles.system: PromptMarkers("<start_of_turn>system\n", "<end_of_turn>\n"),
    Roles.user: PromptMarkers("<start_of_turn>user\n", "<end_of_turn>\n"),
    Roles.assistant: PromptMarkers("<start_of_turn>assistant", ""),
    Roles.tool: PromptMarkers("", ""),
}

gemma_3_formatter = MessagesFormatter(
    pre_prompt="",
    prompt_markers=gemma_3_prompt_markers,
    include_sys_prompt_in_first_user_message=True,
    default_stop_sequences=["<end_of_turn>", "<start_of_turn>"],
    strip_prompt=False,
    bos_token="<bos>",
    eos_token="<eos>",
)

# Translation direction to prompts mapping
direction_to_prompts = {
    "English to Kazakh": {
        "system": "You are a professional translator. Translate the following sentence into қазақ.",
        "prefix": "<src=en><tgt=kk>"
    },
    "Kazakh to English": {
        "system": "Сіз кәсіби аудармашысыз. Төмендегі сөйлемді English тіліне аударыңыз.",
        "prefix": "<src=kk><tgt=en>"
    },
    "Kazakh to Russian": {
        "system": "Сіз кәсіби аудармашысыз. Төмендегі сөйлемді орыс тіліне аударыңыз.",
        "prefix": "<src=kk><tgt=ru>"
    },
    "Russian to Kazakh": {
        "system": "Вы профессиональный переводчик. Переведите следующее предложение на қазақ язык.",
        "prefix": "<src=ru><tgt=kk>"
    }
}

llm = None
llm_model = None

def respond(
    message: str,
    history: List[Tuple[str, str]],
    direction: str,
    model: str = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",
    max_tokens: int = 1024,
    temperature: float = 0.7,
    top_p: float = 0.95,
    top_k: int = 40,
    repeat_penalty: float = 1.1,
):
    """
    Respond to a message by translating it using the specified direction.
    
    Args:
        message (str): The text to translate.
        history (List[Tuple[str, str]]): The chat history.
        direction (str): The translation direction (e.g., "English to Kazakh").
        model (str): The model file to use.
        max_tokens (int): Maximum number of tokens to generate.
        temperature (float): Sampling temperature.
        top_p (float): Top-p sampling parameter.
        top_k (int): Top-k sampling parameter.
        repeat_penalty (float): Penalty for repetition.
    
    Yields:
        str: The translated text as it is generated.
    """
    try:
        global llm, llm_model
        if llm is None or llm_model != model:
            model_path = f"models/{model}"
            if not os.path.exists(model_path):
                yield f"Error: Model file not found at {model_path}."
                return
            llm = Llama(
                model_path=model_path,
                flash_attn=False,
                n_gpu_layers=0,
                n_batch=8,
                n_ctx=2048,
                n_threads=8,
                n_threads_batch=8,
            )
            llm_model = model
        provider = LlamaCppPythonProvider(llm)

        # Get system prompt and user prefix based on direction
        prompts = direction_to_prompts[direction]
        system_message = prompts["system"]
        user_prefix = prompts["prefix"]

        agent = LlamaCppAgent(
            provider,
            system_prompt=system_message,
            custom_messages_formatter=gemma_3_formatter,
            debug_output=True,
        )

        settings = provider.get_provider_default_settings()
        settings.temperature = temperature
        settings.top_k = top_k
        settings.top_p = top_p
        settings.max_tokens = max_tokens
        settings.repeat_penalty = repeat_penalty
        settings.stream = True

        messages = BasicChatHistory()
        for user_msg, assistant_msg in history:
            full_user_msg = user_prefix + " " + user_msg
            messages.add_message({"role": Roles.user, "content": full_user_msg})
            messages.add_message({"role": Roles.assistant, "content": assistant_msg})

        full_message = user_prefix + " " + message

        stream = agent.get_chat_response(
            full_message,
            llm_sampling_settings=settings,
            chat_history=messages,
            returns_streaming_generator=True,
            print_output=False,
        )

        logging.info("Response stream generated successfully")
        outputs = ""
        for output in stream:
            outputs += output
            yield outputs

    except Exception as e:
        raise CustomExceptionHandling(e, sys) from e

demo = gr.ChatInterface(
    respond,
    examples=[["Hello"], ["Сәлем"], ["Привет"]],
    additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
    additional_inputs=[
        gr.Dropdown(
            choices=["English to Kazakh", "Kazakh to English", "Kazakh to Russian", "Russian to Kazakh"],
            label="Translation Direction",
            info="Select the direction of translation"
        ),
        gr.Slider(
            minimum=512,
            maximum=2048,
            value=1024,
            step=1,
            label="Max Tokens",
            info="Maximum length of the translation"
        ),
        gr.Slider(
            minimum=0.1,
            maximum=2.0,
            value=0.7,
            step=0.1,
            label="Temperature",
            info="Controls randomness (higher = more creative)"
        ),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p",
            info="Nucleus sampling threshold"
        ),
        gr.Slider(
            minimum=1,
            maximum=100,
            value=40,
            step=1,
            label="Top-k",
            info="Limits vocabulary to top K tokens"
        ),
        gr.Slider(
            minimum=1.0,
            maximum=2.0,
            value=1.1,
            step=0.1,
            label="Repetition Penalty",
            info="Penalizes repeated words"
        ),
    ],
    theme="Ocean",
    submit_btn="Translate",
    stop_btn="Stop",
    title="Kazakh Translation Model",
    description="Translate text between Kazakh, English, and Russian using a specialized language model.",
    chatbot=gr.Chatbot(scale=1, show_copy_button=True),
    cache_examples=False,
)

if __name__ == "__main__":
    demo.launch(
        share=False,
        server_name="0.0.0.0",
        server_port=7860,
        show_api=False,
    )