Spaces:
Sleeping
Sleeping
File size: 3,163 Bytes
1d46eb9 95a3120 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
"""
Processing utilities for Vietnamese translation integration
"""
import logging
from typing import Dict, Any, List, Optional, Callable
logger = logging.getLogger(__name__)
def translate_sft_row(row: Dict[str, Any], translator, text_fields: List[str] = None) -> Dict[str, Any]:
"""
Translate specific text fields in an SFT row from English to Vietnamese.
Args:
row: SFT row dictionary
translator: VietnameseTranslator instance
text_fields: List of field names to translate. If None, uses default fields.
Returns:
Translated SFT row dictionary
"""
if not translator or not translator.is_loaded():
logger.warning("Translator not available, skipping translation")
return row
if text_fields is None:
# Default fields to translate in SFT format
text_fields = ["instruction", "input", "output"]
try:
translated_row = translator.translate_dict(row, text_fields)
logger.debug(f"Translated SFT row with fields: {text_fields}")
return translated_row
except Exception as e:
logger.error(f"Failed to translate SFT row: {e}")
return row
def translate_rag_row(row: Dict[str, Any], translator, text_fields: List[str] = None) -> Dict[str, Any]:
"""
Translate specific text fields in a RAG row from English to Vietnamese.
Args:
row: RAG row dictionary
translator: VietnameseTranslator instance
text_fields: List of field names to translate. If None, uses default fields.
Returns:
Translated RAG row dictionary
"""
if not translator or not translator.is_loaded():
logger.warning("Translator not available, skipping translation")
return row
if text_fields is None:
# Default fields to translate in RAG format
text_fields = ["instruction", "input", "output"]
try:
translated_row = translator.translate_dict(row, text_fields)
logger.debug(f"Translated RAG row with fields: {text_fields}")
return translated_row
except Exception as e:
logger.error(f"Failed to translate RAG row: {e}")
return row
def should_translate(vietnamese_translation: bool, translator) -> bool:
"""
Check if translation should be performed.
Args:
vietnamese_translation: Flag from user input
translator: VietnameseTranslator instance
Returns:
True if translation should be performed
"""
if not vietnamese_translation:
return False
if not translator or not translator.is_loaded():
logger.warning("Vietnamese translation requested but translator not available")
return False
return True
def log_translation_stats(stats: Dict[str, Any], translated_count: int) -> None:
"""
Log translation statistics.
Args:
stats: Statistics dictionary to update
translated_count: Number of items translated
"""
stats["vietnamese_translated"] = translated_count
logger.info(f"Vietnamese translation completed: {translated_count} items translated") |