kenleeyx commited on
Commit
01355c1
·
1 Parent(s): ea8f271

feat: add translation to English for quotes

Browse files

Add quote translation. Translation is not threaded so it will take a
short while after tagging all the quotes. Translation is also done by
OpenAI so results may vary if compared with Google Translate.

Files changed (1) hide show
  1. app.py +19 -1
app.py CHANGED
@@ -74,6 +74,21 @@ def tag_quote(quote: str, tags_list: list) -> list:
74
  logger.warning(f"Invalid tag {tag} found and has been filtered out.")
75
  return valid_tags
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  def count_tags(tags_list: list, tags_col: pd.Series )->pd.DataFrame:
78
  """
79
  Creates a DataFrame indicating number of occurences of each tag from a DataFrame column containing lists of tags.
@@ -201,10 +216,13 @@ def process_quotes(quotes_file_path: str, quotes_col_name: str, retained_columns
201
  for tag in tags_counter_df['Tag']:
202
  tagged_quotes_list = quotes_df.loc[quotes_df[tag]==1, quotes_col_name].tolist()
203
  sample_quotes = random.sample(tagged_quotes_list, min(2, len(tagged_quotes_list)))
 
204
  while len(sample_quotes) < 2:
205
  sample_quotes.append(None)
 
206
  [tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Quote 1'], tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Quote 2']] = sample_quotes
207
-
 
208
  #Convert values in tags column from list to str
209
  quotes_df['Tags'] = quotes_df["Tags"].apply(lambda x: ", ".join(x))
210
 
 
74
  logger.warning(f"Invalid tag {tag} found and has been filtered out.")
75
  return valid_tags
76
 
77
+ def translate_quote(quote: str) -> str:
78
+ """
79
+ Translates a quote to English.
80
+ """
81
+ logger.info(f"Translating quote {quote}")
82
+ response = client.chat.completions.create(
83
+ model = "gpt-4o-mini",
84
+ messages=[
85
+ {"role": "user", "content": f"Translate the following quote into English. Do not return anything other than the translated quote. {quote}"}
86
+ ]
87
+ )
88
+ logger.info("Content")
89
+ logger.info(response.choices[0].message.content)
90
+ return response.choices[0].message.content
91
+
92
  def count_tags(tags_list: list, tags_col: pd.Series )->pd.DataFrame:
93
  """
94
  Creates a DataFrame indicating number of occurences of each tag from a DataFrame column containing lists of tags.
 
216
  for tag in tags_counter_df['Tag']:
217
  tagged_quotes_list = quotes_df.loc[quotes_df[tag]==1, quotes_col_name].tolist()
218
  sample_quotes = random.sample(tagged_quotes_list, min(2, len(tagged_quotes_list)))
219
+ translated_quotes = [translate_quote(quote) for quote in sample_quotes]
220
  while len(sample_quotes) < 2:
221
  sample_quotes.append(None)
222
+ translated_quotes.append(None)
223
  [tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Quote 1'], tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Quote 2']] = sample_quotes
224
+ [tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Translated Quote 1'], tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Translated Quote 2']] = translated_quotes
225
+
226
  #Convert values in tags column from list to str
227
  quotes_df['Tags'] = quotes_df["Tags"].apply(lambda x: ", ".join(x))
228