LiamKhoaLe commited on
Commit
e2fd976
·
1 Parent(s): 4056c2c

Upd valid output SFT saver

Browse files
Files changed (2) hide show
  1. vi/processing.py +10 -2
  2. vi/translator.py +3 -1
vi/processing.py CHANGED
@@ -104,12 +104,20 @@ def translate_sft_row(row: Dict[str, Any], translator, text_fields: List[str] =
104
  original = sft_data[field]
105
  translated = translator.translate_text(original)
106
 
 
 
 
 
 
 
107
  # Validate and sanitize translated field
108
  if _validate_vi_translation(original, translated):
109
  translated_sft[field] = _vi_sanitize_text(translated)
110
- logger.debug(f"Translated field '{field}': '{original[:50]}...' -> '{translated[:50]}...'")
111
  else:
112
- logger.warning(f"Invalid Vietnamese translation for field {field}, keeping original")
 
 
113
  translated_sft[field] = original
114
  except Exception as e:
115
  logger.error(f"Failed to translate field '{field}': {e}")
 
104
  original = sft_data[field]
105
  translated = translator.translate_text(original)
106
 
107
+ # Debug logging
108
+ logger.debug(f"Translation attempt for field '{field}':")
109
+ logger.debug(f" Original: '{original[:50]}...'")
110
+ logger.debug(f" Translated: '{translated[:50]}...'")
111
+ logger.debug(f" Are they the same? {original == translated}")
112
+
113
  # Validate and sanitize translated field
114
  if _validate_vi_translation(original, translated):
115
  translated_sft[field] = _vi_sanitize_text(translated)
116
+ logger.debug(f" Successfully translated field '{field}'")
117
  else:
118
+ logger.warning(f"Invalid Vietnamese translation for field {field}, keeping original")
119
+ logger.warning(f" Original: '{original[:50]}...'")
120
+ logger.warning(f" Translated: '{translated[:50]}...'")
121
  translated_sft[field] = original
122
  except Exception as e:
123
  logger.error(f"Failed to translate field '{field}': {e}")
vi/translator.py CHANGED
@@ -127,7 +127,9 @@ class VietnameseTranslator:
127
  # Decode
128
  translated = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
129
 
130
- logger.debug(f"Translated: '{text[:50]}...' -> '{translated[:50]}...'")
 
 
131
  return translated.strip()
132
 
133
  except Exception as e:
 
127
  # Decode
128
  translated = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
129
 
130
+ logger.debug(f"Translation result: '{text[:50]}...' -> '{translated[:50]}...'")
131
+ logger.debug(f"Are original and translated the same? {text.strip() == translated.strip()}")
132
+
133
  return translated.strip()
134
 
135
  except Exception as e: