Spaces:
Sleeping
Sleeping
| """ | |
| Processing utilities for Vietnamese translation integration | |
| """ | |
| import logging | |
| from typing import Dict, Any, List, Optional, Callable | |
| logger = logging.getLogger(__name__) | |
| def translate_sft_row(row: Dict[str, Any], translator, text_fields: List[str] = None) -> Dict[str, Any]: | |
| """ | |
| Translate specific text fields in an SFT row from English to Vietnamese. | |
| Args: | |
| row: SFT row dictionary | |
| translator: VietnameseTranslator instance | |
| text_fields: List of field names to translate. If None, uses default fields. | |
| Returns: | |
| Translated SFT row dictionary | |
| """ | |
| if not translator or not translator.is_loaded(): | |
| logger.warning("Translator not available, skipping translation") | |
| return row | |
| if text_fields is None: | |
| # Default fields to translate in SFT format | |
| text_fields = ["instruction", "input", "output"] | |
| try: | |
| translated_row = translator.translate_dict(row, text_fields) | |
| logger.debug(f"Translated SFT row with fields: {text_fields}") | |
| return translated_row | |
| except Exception as e: | |
| logger.error(f"Failed to translate SFT row: {e}") | |
| return row | |
| def translate_rag_row(row: Dict[str, Any], translator, text_fields: List[str] = None) -> Dict[str, Any]: | |
| """ | |
| Translate specific text fields in a RAG row from English to Vietnamese. | |
| Args: | |
| row: RAG row dictionary | |
| translator: VietnameseTranslator instance | |
| text_fields: List of field names to translate. If None, uses default fields. | |
| Returns: | |
| Translated RAG row dictionary | |
| """ | |
| if not translator or not translator.is_loaded(): | |
| logger.warning("Translator not available, skipping translation") | |
| return row | |
| if text_fields is None: | |
| # Default fields to translate in RAG format | |
| text_fields = ["instruction", "input", "output"] | |
| try: | |
| translated_row = translator.translate_dict(row, text_fields) | |
| logger.debug(f"Translated RAG row with fields: {text_fields}") | |
| return translated_row | |
| except Exception as e: | |
| logger.error(f"Failed to translate RAG row: {e}") | |
| return row | |
| def should_translate(vietnamese_translation: bool, translator) -> bool: | |
| """ | |
| Check if translation should be performed. | |
| Args: | |
| vietnamese_translation: Flag from user input | |
| translator: VietnameseTranslator instance | |
| Returns: | |
| True if translation should be performed | |
| """ | |
| if not vietnamese_translation: | |
| return False | |
| if not translator or not translator.is_loaded(): | |
| logger.warning("Vietnamese translation requested but translator not available") | |
| return False | |
| return True | |
| def log_translation_stats(stats: Dict[str, Any], translated_count: int) -> None: | |
| """ | |
| Log translation statistics. | |
| Args: | |
| stats: Statistics dictionary to update | |
| translated_count: Number of items translated | |
| """ | |
| stats["vietnamese_translated"] = translated_count | |
| logger.info(f"Vietnamese translation completed: {translated_count} items translated") |