Spaces:
Paused
Paused
update
Browse files
examples/api_eval/eval_gemini_google.py
CHANGED
|
@@ -16,6 +16,11 @@ from project_settings import environment, project_path
|
|
| 16 |
|
| 17 |
|
| 18 |
def get_args():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
parser = argparse.ArgumentParser()
|
| 20 |
parser.add_argument(
|
| 21 |
"--google_application_credentials",
|
|
@@ -25,7 +30,8 @@ def get_args():
|
|
| 25 |
parser.add_argument(
|
| 26 |
"--model_name",
|
| 27 |
# default="gemini-2.5-pro",
|
| 28 |
-
default="gemini-2.5-flash",
|
|
|
|
| 29 |
type=str
|
| 30 |
)
|
| 31 |
parser.add_argument(
|
|
@@ -76,7 +82,7 @@ def main():
|
|
| 76 |
|
| 77 |
with open(args.eval_data, "r", encoding="utf-8") as fin, open(args.eval_result, "a+", encoding="utf-8") as fout:
|
| 78 |
for row in fin:
|
| 79 |
-
if total >
|
| 80 |
break
|
| 81 |
|
| 82 |
row = json.loads(row)
|
|
@@ -126,7 +132,7 @@ Your output is:
|
|
| 126 |
config=generate_content_config,
|
| 127 |
)
|
| 128 |
time_cost = time.time() - time_begin
|
| 129 |
-
print(time_cost)
|
| 130 |
try:
|
| 131 |
prediction = response.candidates[0].content.parts[0].text
|
| 132 |
except TypeError:
|
|
@@ -152,7 +158,7 @@ Your output is:
|
|
| 152 |
row_ = json.dumps(row_, ensure_ascii=False)
|
| 153 |
fout.write(f"{row_}\n")
|
| 154 |
|
| 155 |
-
|
| 156 |
|
| 157 |
return
|
| 158 |
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
def get_args():
|
| 19 |
+
"""
|
| 20 |
+
python3 eval_gemini_google.py --model_name gemini-2.5-pro --eval_result eval_math_result_gemini-2.5-pro.jsonl
|
| 21 |
+
python3 eval_gemini_google.py --model_name gemini-2.5-flash --eval_result eval_math_result_gemini-2.5-flash.jsonl
|
| 22 |
+
:return:
|
| 23 |
+
"""
|
| 24 |
parser = argparse.ArgumentParser()
|
| 25 |
parser.add_argument(
|
| 26 |
"--google_application_credentials",
|
|
|
|
| 30 |
parser.add_argument(
|
| 31 |
"--model_name",
|
| 32 |
# default="gemini-2.5-pro",
|
| 33 |
+
# default="gemini-2.5-flash",
|
| 34 |
+
default="gemini-2.5-flash-lite-preview-06-17",
|
| 35 |
type=str
|
| 36 |
)
|
| 37 |
parser.add_argument(
|
|
|
|
| 82 |
|
| 83 |
with open(args.eval_data, "r", encoding="utf-8") as fin, open(args.eval_result, "a+", encoding="utf-8") as fout:
|
| 84 |
for row in fin:
|
| 85 |
+
if total > 1000:
|
| 86 |
break
|
| 87 |
|
| 88 |
row = json.loads(row)
|
|
|
|
| 132 |
config=generate_content_config,
|
| 133 |
)
|
| 134 |
time_cost = time.time() - time_begin
|
| 135 |
+
# print(time_cost)
|
| 136 |
try:
|
| 137 |
prediction = response.candidates[0].content.parts[0].text
|
| 138 |
except TypeError:
|
|
|
|
| 158 |
row_ = json.dumps(row_, ensure_ascii=False)
|
| 159 |
fout.write(f"{row_}\n")
|
| 160 |
|
| 161 |
+
print(f"score: {score}")
|
| 162 |
|
| 163 |
return
|
| 164 |
|
examples/api_eval/eval_openai.py
CHANGED
|
@@ -16,6 +16,11 @@ from project_settings import environment, project_path
|
|
| 16 |
|
| 17 |
|
| 18 |
def get_args():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
parser = argparse.ArgumentParser()
|
| 20 |
parser.add_argument(
|
| 21 |
"--api_key",
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
def get_args():
|
| 19 |
+
"""
|
| 20 |
+
python3 eval_openai.py --model_name gpt-4o --eval_result eval_math_result_gpt-4o.jsonl
|
| 21 |
+
python3 eval_openai.py --model_name gpt-4o-mini --eval_result eval_math_result_gpt-4o-mini.jsonl
|
| 22 |
+
:return:
|
| 23 |
+
"""
|
| 24 |
parser = argparse.ArgumentParser()
|
| 25 |
parser.add_argument(
|
| 26 |
"--api_key",
|