Spaces:
Sleeping
Sleeping
feat: add translation to English for quotes
Browse filesAdd quote translation. Translation is not threaded so it will take a
short while after tagging all the quotes. Translation is also done by
OpenAI so results may vary if compared with Google Translate.
app.py
CHANGED
|
@@ -74,6 +74,21 @@ def tag_quote(quote: str, tags_list: list) -> list:
|
|
| 74 |
logger.warning(f"Invalid tag {tag} found and has been filtered out.")
|
| 75 |
return valid_tags
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
def count_tags(tags_list: list, tags_col: pd.Series )->pd.DataFrame:
|
| 78 |
"""
|
| 79 |
Creates a DataFrame indicating number of occurences of each tag from a DataFrame column containing lists of tags.
|
|
@@ -201,10 +216,13 @@ def process_quotes(quotes_file_path: str, quotes_col_name: str, retained_columns
|
|
| 201 |
for tag in tags_counter_df['Tag']:
|
| 202 |
tagged_quotes_list = quotes_df.loc[quotes_df[tag]==1, quotes_col_name].tolist()
|
| 203 |
sample_quotes = random.sample(tagged_quotes_list, min(2, len(tagged_quotes_list)))
|
|
|
|
| 204 |
while len(sample_quotes) < 2:
|
| 205 |
sample_quotes.append(None)
|
|
|
|
| 206 |
[tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Quote 1'], tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Quote 2']] = sample_quotes
|
| 207 |
-
|
|
|
|
| 208 |
#Convert values in tags column from list to str
|
| 209 |
quotes_df['Tags'] = quotes_df["Tags"].apply(lambda x: ", ".join(x))
|
| 210 |
|
|
|
|
| 74 |
logger.warning(f"Invalid tag {tag} found and has been filtered out.")
|
| 75 |
return valid_tags
|
| 76 |
|
| 77 |
+
def translate_quote(quote: str) -> str:
|
| 78 |
+
"""
|
| 79 |
+
Translates a quote to English.
|
| 80 |
+
"""
|
| 81 |
+
logger.info(f"Translating quote {quote}")
|
| 82 |
+
response = client.chat.completions.create(
|
| 83 |
+
model = "gpt-4o-mini",
|
| 84 |
+
messages=[
|
| 85 |
+
{"role": "user", "content": f"Translate the following quote into English. Do not return anything other than the translated quote. {quote}"}
|
| 86 |
+
]
|
| 87 |
+
)
|
| 88 |
+
logger.info("Content")
|
| 89 |
+
logger.info(response.choices[0].message.content)
|
| 90 |
+
return response.choices[0].message.content
|
| 91 |
+
|
| 92 |
def count_tags(tags_list: list, tags_col: pd.Series )->pd.DataFrame:
|
| 93 |
"""
|
| 94 |
Creates a DataFrame indicating number of occurences of each tag from a DataFrame column containing lists of tags.
|
|
|
|
| 216 |
for tag in tags_counter_df['Tag']:
|
| 217 |
tagged_quotes_list = quotes_df.loc[quotes_df[tag]==1, quotes_col_name].tolist()
|
| 218 |
sample_quotes = random.sample(tagged_quotes_list, min(2, len(tagged_quotes_list)))
|
| 219 |
+
translated_quotes = [translate_quote(quote) for quote in sample_quotes]
|
| 220 |
while len(sample_quotes) < 2:
|
| 221 |
sample_quotes.append(None)
|
| 222 |
+
translated_quotes.append(None)
|
| 223 |
[tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Quote 1'], tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Quote 2']] = sample_quotes
|
| 224 |
+
[tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Translated Quote 1'], tags_counter_df.loc[tags_counter_df['Tag'] == tag, 'Translated Quote 2']] = translated_quotes
|
| 225 |
+
|
| 226 |
#Convert values in tags column from list to str
|
| 227 |
quotes_df['Tags'] = quotes_df["Tags"].apply(lambda x: ", ".join(x))
|
| 228 |
|