Spaces:

Perceptechai
/

code-tagging

Sleeping

kenleeyx commited on Jun 17, 2025

Commit

01355c1

1 Parent(s): ea8f271

feat: add translation to English for quotes

Add quote translation. Translation is not threaded so it will take a
short while after tagging all the quotes. Translation is also done by
OpenAI so results may vary if compared with Google Translate.

Files changed (1) hide show

app.py +19 -1

app.py CHANGED Viewed

@@ -74,6 +74,21 @@ def tag_quote(quote: str, tags_list: list) -> list:
             logger.warning(f"Invalid tag {tag} found and has been filtered out.")
     return valid_tags
 def count_tags(tags_list: list, tags_col: pd.Series )->pd.DataFrame:
     """
     Creates a DataFrame indicating number of occurences of each tag from a DataFrame column containing lists of tags.
@@ -201,10 +216,13 @@ def process_quotes(quotes_file_path: str, quotes_col_name: str, retained_columns
     for tag in tags_counter_df['Tag']:
         tagged_quotes_list = quotes_df.loc[quotes_df[tag]==1, quotes_col_name].tolist()
         sample_quotes = random.sample(tagged_quotes_list, min(2, len(tagged_quotes_list)))
         while len(sample_quotes) < 2:
             sample_quotes.append(None)
         [tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Quote 1'], tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Quote 2']] = sample_quotes
     #Convert values in tags column from list to str
     quotes_df['Tags'] = quotes_df["Tags"].apply(lambda x: ", ".join(x))

             logger.warning(f"Invalid tag {tag} found and has been filtered out.")
     return valid_tags
+def translate_quote(quote: str) -> str:
+    """
+    Translates a quote to English.
+    """
+    logger.info(f"Translating quote {quote}")
+    response = client.chat.completions.create(
+        model = "gpt-4o-mini",
+        messages=[
+            {"role": "user", "content": f"Translate the following quote into English. Do not return anything other than the translated quote. {quote}"}
+        ]
+    )
+    logger.info("Content")
+    logger.info(response.choices[0].message.content)
+    return response.choices[0].message.content
 def count_tags(tags_list: list, tags_col: pd.Series )->pd.DataFrame:
     """
     Creates a DataFrame indicating number of occurences of each tag from a DataFrame column containing lists of tags.
     for tag in tags_counter_df['Tag']:
         tagged_quotes_list = quotes_df.loc[quotes_df[tag]==1, quotes_col_name].tolist()
         sample_quotes = random.sample(tagged_quotes_list, min(2, len(tagged_quotes_list)))
+        translated_quotes = [translate_quote(quote) for quote in sample_quotes]
         while len(sample_quotes) < 2:
             sample_quotes.append(None)
+            translated_quotes.append(None)
         [tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Quote 1'], tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Quote 2']] = sample_quotes
+        [tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Translated Quote 1'], tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Translated Quote 2']] = translated_quotes
     #Convert values in tags column from list to str
     quotes_df['Tags'] = quotes_df["Tags"].apply(lambda x: ", ".join(x))