Spaces:
Build error
Build error
diable lfs for 00b notebook
Browse files- .gitattributes +0 -1
- llm_toolkit/translation_utils.py +35 -10
- notebooks/00b_Data Analysis_Few_Shots.ipynb +0 -0
- requirements.txt +1 -0
.gitattributes
CHANGED
|
@@ -56,7 +56,6 @@ results/mac-results_few_shots.csv filter=lfs diff=lfs merge=lfs -text
|
|
| 56 |
results/mac-results_metrics.csv filter=lfs diff=lfs merge=lfs -text
|
| 57 |
notebooks/00_Data[[:space:]]Analysis.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 58 |
notebooks/00a_Data[[:space:]]Analysis_greedy_decoding.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 59 |
-
notebooks/00b_Data[[:space:]]Analysis_Few_Shots.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 60 |
notebooks/01_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 61 |
notebooks/01a_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 62 |
logs/l40-1gpu.txt filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 56 |
results/mac-results_metrics.csv filter=lfs diff=lfs merge=lfs -text
|
| 57 |
notebooks/00_Data[[:space:]]Analysis.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 58 |
notebooks/00a_Data[[:space:]]Analysis_greedy_decoding.ipynb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 59 |
notebooks/01_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 60 |
notebooks/01a_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 61 |
logs/l40-1gpu.txt filter=lfs diff=lfs merge=lfs -text
|
llm_toolkit/translation_utils.py
CHANGED
|
@@ -19,6 +19,7 @@ rouge = evaluate.load("rouge")
|
|
| 19 |
meteor = evaluate.load("meteor")
|
| 20 |
accuracy = evaluate.load("accuracy")
|
| 21 |
sacrebleu = evaluate.load("sacrebleu")
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
def extract_answer(text, debug=False):
|
|
@@ -43,7 +44,7 @@ def extract_answer(text, debug=False):
|
|
| 43 |
return text
|
| 44 |
|
| 45 |
|
| 46 |
-
def calc_metrics(references, predictions, debug=False):
|
| 47 |
assert len(references) == len(
|
| 48 |
predictions
|
| 49 |
), f"lengths are difference: {len(references)} != {len(predictions)}"
|
|
@@ -51,6 +52,10 @@ def calc_metrics(references, predictions, debug=False):
|
|
| 51 |
predictions = [extract_answer(text) for text in predictions]
|
| 52 |
results = {}
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
results["meteor"] = meteor.compute(predictions=predictions, references=references)[
|
| 55 |
"meteor"
|
| 56 |
]
|
|
@@ -233,7 +238,18 @@ def detect_repetition_scores(row, col, debug=False):
|
|
| 233 |
)
|
| 234 |
|
| 235 |
|
| 236 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
chinese_char_pattern = re.compile(r"[\u4e00-\u9fff]")
|
| 238 |
return 1 if chinese_char_pattern.search(text) else 0
|
| 239 |
|
|
@@ -264,7 +280,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
|
|
| 264 |
repetition_score = []
|
| 265 |
total_repetitions = []
|
| 266 |
num_max_output_tokens = []
|
| 267 |
-
|
| 268 |
columns = df.columns[2:]
|
| 269 |
|
| 270 |
df[
|
|
@@ -275,8 +291,13 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
|
|
| 275 |
]
|
| 276 |
] = df["english"].apply(detect_scores)
|
| 277 |
|
|
|
|
|
|
|
|
|
|
| 278 |
for col in columns:
|
| 279 |
-
metrics = calc_metrics(
|
|
|
|
|
|
|
| 280 |
print(f"{col}: {metrics}")
|
| 281 |
|
| 282 |
meteor.append(metrics["meteor"])
|
|
@@ -298,9 +319,11 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
|
|
| 298 |
lambda x: len(tokenizers[model](x)["input_ids"])
|
| 299 |
)
|
| 300 |
|
| 301 |
-
new_col = f"
|
| 302 |
-
df[new_col] = df[col].apply(
|
| 303 |
-
|
|
|
|
|
|
|
| 304 |
|
| 305 |
new_col = f"output_tokens-{col}"
|
| 306 |
df[new_col] = df[col].apply(lambda x: len(tokenizers[model](x)["input_ids"]))
|
|
@@ -320,7 +343,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
|
|
| 320 |
lambda x: x["meteor"] / math.log10(10 + x["total_repetitions"]), axis=1
|
| 321 |
)
|
| 322 |
|
| 323 |
-
metrics_df["
|
| 324 |
metrics_df["num_max_output_tokens"] = num_max_output_tokens
|
| 325 |
|
| 326 |
if variant != "rpp":
|
|
@@ -359,8 +382,10 @@ def analyze_translation_results(df, col, max_new_tokens=300, repetition_threshol
|
|
| 359 |
)
|
| 360 |
print_row_details(df2, range(len(df2)))
|
| 361 |
|
| 362 |
-
|
| 363 |
-
df3 = df[df[
|
|
|
|
|
|
|
| 364 |
|
| 365 |
print(f"\n*** Found {len(df3)} rows with incomplete translations for {col}")
|
| 366 |
print_row_details(df3, range(len(df3)))
|
|
|
|
| 19 |
meteor = evaluate.load("meteor")
|
| 20 |
accuracy = evaluate.load("accuracy")
|
| 21 |
sacrebleu = evaluate.load("sacrebleu")
|
| 22 |
+
comet = evaluate.load("comet")
|
| 23 |
|
| 24 |
|
| 25 |
def extract_answer(text, debug=False):
|
|
|
|
| 44 |
return text
|
| 45 |
|
| 46 |
|
| 47 |
+
def calc_metrics(references, predictions, sources=None, debug=False):
|
| 48 |
assert len(references) == len(
|
| 49 |
predictions
|
| 50 |
), f"lengths are difference: {len(references)} != {len(predictions)}"
|
|
|
|
| 52 |
predictions = [extract_answer(text) for text in predictions]
|
| 53 |
results = {}
|
| 54 |
|
| 55 |
+
results["comet"] = comet.compute(
|
| 56 |
+
predictions=predictions, references=references, sources=sources
|
| 57 |
+
)["mean_score"]
|
| 58 |
+
|
| 59 |
results["meteor"] = meteor.compute(predictions=predictions, references=references)[
|
| 60 |
"meteor"
|
| 61 |
]
|
|
|
|
| 238 |
)
|
| 239 |
|
| 240 |
|
| 241 |
+
def count_chinese_characters(text):
|
| 242 |
+
# Define a regular expression pattern for Chinese characters
|
| 243 |
+
chinese_char_pattern = r"[\u4e00-\u9fff]"
|
| 244 |
+
|
| 245 |
+
# Use re.findall to find all Chinese characters in the text
|
| 246 |
+
chinese_chars = re.findall(chinese_char_pattern, text)
|
| 247 |
+
|
| 248 |
+
# Return the count of Chinese characters
|
| 249 |
+
return len(chinese_chars)
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def count_chinese_characters(text):
|
| 253 |
chinese_char_pattern = re.compile(r"[\u4e00-\u9fff]")
|
| 254 |
return 1 if chinese_char_pattern.search(text) else 0
|
| 255 |
|
|
|
|
| 280 |
repetition_score = []
|
| 281 |
total_repetitions = []
|
| 282 |
num_max_output_tokens = []
|
| 283 |
+
translation_completeness = []
|
| 284 |
columns = df.columns[2:]
|
| 285 |
|
| 286 |
df[
|
|
|
|
| 291 |
]
|
| 292 |
] = df["english"].apply(detect_scores)
|
| 293 |
|
| 294 |
+
new_col = f"count_chinese_characters-ground_truth"
|
| 295 |
+
df[new_col] = df["chinese"].apply(count_chinese_characters)
|
| 296 |
+
|
| 297 |
for col in columns:
|
| 298 |
+
metrics = calc_metrics(
|
| 299 |
+
df["english"], df[col], sources=df["chinese"], debug=True
|
| 300 |
+
)
|
| 301 |
print(f"{col}: {metrics}")
|
| 302 |
|
| 303 |
meteor.append(metrics["meteor"])
|
|
|
|
| 319 |
lambda x: len(tokenizers[model](x)["input_ids"])
|
| 320 |
)
|
| 321 |
|
| 322 |
+
new_col = f"count_chinese_characters-{col}"
|
| 323 |
+
df[new_col] = df[col].apply(count_chinese_characters)
|
| 324 |
+
translation_completeness.append(
|
| 325 |
+
1 - df[new_col].sum() / df["count_chinese_characters-ground_truth"].sum()
|
| 326 |
+
)
|
| 327 |
|
| 328 |
new_col = f"output_tokens-{col}"
|
| 329 |
df[new_col] = df[col].apply(lambda x: len(tokenizers[model](x)["input_ids"]))
|
|
|
|
| 343 |
lambda x: x["meteor"] / math.log10(10 + x["total_repetitions"]), axis=1
|
| 344 |
)
|
| 345 |
|
| 346 |
+
metrics_df["translation_completeness"] = translation_completeness
|
| 347 |
metrics_df["num_max_output_tokens"] = num_max_output_tokens
|
| 348 |
|
| 349 |
if variant != "rpp":
|
|
|
|
| 382 |
)
|
| 383 |
print_row_details(df2, range(len(df2)))
|
| 384 |
|
| 385 |
+
count_chinese_characters = f"count_chinese_characters-{col}"
|
| 386 |
+
df3 = df[df[count_chinese_characters] > 0][
|
| 387 |
+
["chinese", "english", col, count_chinese_characters]
|
| 388 |
+
]
|
| 389 |
|
| 390 |
print(f"\n*** Found {len(df3)} rows with incomplete translations for {col}")
|
| 391 |
print_row_details(df3, range(len(df3)))
|
notebooks/00b_Data Analysis_Few_Shots.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
CHANGED
|
@@ -19,3 +19,4 @@ einops==0.8.0
|
|
| 19 |
accelerate==0.32.0
|
| 20 |
peft==0.11.1
|
| 21 |
sacrebleu==2.4.2
|
|
|
|
|
|
| 19 |
accelerate==0.32.0
|
| 20 |
peft==0.11.1
|
| 21 |
sacrebleu==2.4.2
|
| 22 |
+
unbabel-comet==2.2.2
|