anggars commited on
Commit
1b1ab37
·
verified ·
1 Parent(s): a63526c

Sync from GitHub Actions: e47275f6fa73e62abbc2b5982f8d3a30ddeec898

Browse files
Files changed (2) hide show
  1. api/core/nlp_handler.py +20 -2
  2. api/requirements.txt +1 -0
api/core/nlp_handler.py CHANGED
@@ -4,6 +4,10 @@ import requests
4
  import html
5
  from deep_translator import GoogleTranslator
6
  from youtube_transcript_api import YouTubeTranscriptApi
 
 
 
 
7
 
8
  # --- CONFIG ---
9
  BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -86,8 +90,21 @@ class NLPHandler:
86
 
87
 
88
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  @staticmethod
90
  def translate_to_english(text):
 
91
  try:
92
  if len(text) > 4500: text = text[:4500]
93
  return GoogleTranslator(source='auto', target='en').translate(text)
@@ -113,7 +130,8 @@ class NLPHandler:
113
  @staticmethod
114
  def predict_all(raw_text):
115
  NLPHandler.load_models()
116
- processed_text = NLPHandler.translate_to_english(raw_text)
 
117
 
118
  # --- MBTI PREDICTION (anggars/xlm-mbti) ---
119
  mbti_result = "UNKNOWN"
@@ -212,7 +230,7 @@ class NLPHandler:
212
  return {
213
  "mbti": mbti_result,
214
  "emotion": emotion_data,
215
- "keywords": NLPHandler.extract_keywords(processed_text),
216
  "reasoning": {
217
  "mbti": mbti_desc,
218
  "emotion": emotion_reasoning,
 
4
  import html
5
  from deep_translator import GoogleTranslator
6
  from youtube_transcript_api import YouTubeTranscriptApi
7
+ from langdetect import detect, DetectorFactory
8
+
9
+ # Force consistent language detection
10
+ DetectorFactory.seed = 0
11
 
12
  # --- CONFIG ---
13
  BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
90
 
91
 
92
 
93
+ @staticmethod
94
+ def prepare_text(text):
95
+ """Only translate if language is not Indonesian or English.
96
+ XLM-RoBERTa handles id/en natively, no translation needed."""
97
+ try:
98
+ if len(text) > 4500: text = text[:4500]
99
+ lang = detect(text)
100
+ if lang not in ['id', 'en']:
101
+ return GoogleTranslator(source='auto', target='en').translate(text)
102
+ return text
103
+ except: return text
104
+
105
  @staticmethod
106
  def translate_to_english(text):
107
+ """Force translate to English (used for keywords extraction)."""
108
  try:
109
  if len(text) > 4500: text = text[:4500]
110
  return GoogleTranslator(source='auto', target='en').translate(text)
 
130
  @staticmethod
131
  def predict_all(raw_text):
132
  NLPHandler.load_models()
133
+ processed_text = NLPHandler.prepare_text(raw_text)
134
+ english_text = NLPHandler.translate_to_english(raw_text) # For keywords only
135
 
136
  # --- MBTI PREDICTION (anggars/xlm-mbti) ---
137
  mbti_result = "UNKNOWN"
 
230
  return {
231
  "mbti": mbti_result,
232
  "emotion": emotion_data,
233
+ "keywords": NLPHandler.extract_keywords(english_text),
234
  "reasoning": {
235
  "mbti": mbti_desc,
236
  "emotion": emotion_reasoning,
api/requirements.txt CHANGED
@@ -4,6 +4,7 @@ python-dotenv
4
  pydantic
5
  numpy
6
  deep-translator
 
7
  requests
8
  youtube-transcript-api
9
  google-genai
 
4
  pydantic
5
  numpy
6
  deep-translator
7
+ langdetect
8
  requests
9
  youtube-transcript-api
10
  google-genai