File size: 8,351 Bytes
2b267d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import os
import random
import datetime
import json
import multiprocessing
import sys
from tqdm import tqdm
# μμ λλ ν 리μ utils.pyλ₯Ό importνκΈ° μν΄ κ²½λ‘ μΆκ°
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from utils import track_api_cost
from llm_functions import generate_answer_flow
# ν
μ€νΈλ₯Ό μν νκ²½ λ³μ λ‘λ (νμμ)
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
pass
# μμ λ°μ΄ν°
example_companies = ["μΌμ±μ μ", "μΉ΄μΉ΄μ€", "λ€μ΄λ²", "μΏ ν‘", "ν μ€", "νλμλμ°¨", "CJμ μΌμ λΉ", "νμ΄λΈ"]
example_jobs_jds = {
"λ°±μλ κ°λ°μ": "Spring Boot, JPA, MySQL κ²½νμ. MSA νκ²½ κ²½ν μ°λ. ν΄λΌμ°λ(AWS) νκ²½ λ°°ν¬ λ° μ΄μ κ²½νμ.",
"νλ‘ νΈμλ κ°λ°μ": "React, TypeScript, Redux μ¬μ©. λ°μν μΉ κ°λ° κ²½ν νμ. UI/UXμ λν μ΄ν΄λκ° λμ λΆ.",
"λ°μ΄ν° λΆμκ°": "SQL, Python(Pandas, Scikit-learn) νμ© λ₯λ ₯. ν΅κ³μ μ§μκ³Ό λ°μ΄ν° μκ°ν(Tableau λ±) λ₯λ ₯.",
"νλ‘λνΈ λ§€λμ (PM)": "IT νλ‘λνΈ κΈ°ν λ° κ΄λ¦¬ κ²½ν. μ¬μ©μ μ€ν 리 μμ±, λ°±λ‘κ·Έ κ΄λ¦¬. λ°μ΄ν° κΈ°λ° μμ¬κ²°μ λ₯λ ₯.",
"λ§μΌν°": "λμ§νΈ λ§μΌν
(SA, DA, SEO) κ²½ν. μ½ν
μΈ κΈ°ν λ° μ μ, μ±κ³Ό λΆμ. SNS μ±λ μ΄μ κ²½ν."
}
example_questions = {
"μ§μλκΈ°": "{company_name}μ μ§μν λκΈ°μ λν΄ κΈ°μ ν΄μ£Όμμμ€.",
"μ±μ₯κ³Όμ ": "λ³ΈμΈμ μ±μ₯κ³Όμ μ κ°λ΅ν κΈ°μ νλ νμ¬μ μμ μκ² κ°μ₯ ν° μν₯μ λΌμΉ μ¬κ±΄, μΈλ¬Ό λ±μ ν¬ν¨νμ¬ κΈ°μ ν΄μ£Όμμμ€.",
"μ§λ¬΄μλ": "{job_title} μ§λ¬΄ μνμ νμν μλμ 무μμ΄λΌκ³ μκ°νλ©°, μ΄λ₯Ό κ°μΆκΈ° μν΄ μ΄λ€ λ
Έλ ₯μ ν΄μλμ§ κΈ°μ ν΄μ£Όμμμ€.",
"μ
μ¬νν¬λΆ": "μ
μ¬ ν 10λ
λμμ νμ¬μν μλ리μ€μ κ·Έκ²μ μΆκ΅¬νλ μ΄μ λ₯Ό κΈ°μ ν΄μ£Όμμμ€."
}
experience_levels = ["μ μ
", "κ²½λ ₯", "μΈν΄"]
example_conversations = [
"User: μλ
νμΈμ, μκΈ°μκ°μ 컨μ€ν
μ λ°κ³ μΆμ΅λλ€.\nAI: λ€, μ΄λ€ μ§λ¬΄μ νμ¬μ μ§μνμλμ?\nUser: μΏ ν‘μ PM μ§λ¬΄μ
λλ€. μ κ°μ μ λ°μ΄ν° λΆμ λ₯λ ₯μ
λλ€.",
"User: μ±μ₯κ³Όμ νλͺ©μ μ΄λ»κ² μ°λ κ² μ’μκΉμ?\nAI: μ§λ¬΄μ κ΄λ ¨λ κ²½νμ μ€μ¬μΌλ‘, κ·Έ κ²½νμ ν΅ν΄ 무μμ λ°°μ°κ³ μ΄λ»κ² μ±μ₯νλμ§ κ΅¬μ²΄μ μΈ μ¬λ‘λ₯Ό λ€μ΄ μμ±νλ κ²μ΄ μ€μν©λλ€.",
"User: μ κ²½ν μ€ μ΄λ€ κ²μ κ°μ‘°ν΄μΌ ν μ§ λͺ¨λ₯΄κ² μ΄μ.\nAI: μ§μνμλ μ§λ¬΄μ JD(μ§λ¬΄κΈ°μ μ)λ₯Ό 보면 μ΄λ€ μλμ μ€μνκ² μκ°νλμ§ μ μ μμ΅λλ€. κ·Έμ κ΄λ ¨λ κ²½νμ μ°μ μ μΌλ‘ μ΄νν΄λ³΄μΈμ."
]
NUM_TESTS = 100
NUM_PROCESSES = 10
MODEL_NAME = "gpt-4o-mini"
def run_test(test_input_with_id):
"""κ°λ³ ν
μ€νΈ μΌμ΄μ€λ₯Ό μ€ννλ μ컀 ν¨μ"""
test_id, test_input = test_input_with_id
total_cost = 0
error_message = ""
status = "β
Success"
try:
parsed_flow, response = generate_answer_flow(**test_input)
if response and hasattr(response, 'usage'):
total_cost = track_api_cost(response, MODEL_NAME, None)
if "error" in parsed_flow or "flow" not in parsed_flow or not parsed_flow["flow"]:
status = "β Failure"
error_message = str(parsed_flow.get("error", "Invalid flow format"))
return {
"id": test_id,
"input": test_input,
"parsed_result": parsed_flow,
"error": error_message,
"cost": total_cost,
"status": status,
}
except Exception as e:
return {
"id": test_id,
"input": test_input,
"parsed_result": {},
"error": f"Exception: {str(e)}",
"cost": total_cost,
"status": "β Error"
}
def main():
"""ν
μ€νΈλ₯Ό μ€λΉ, μ€ννκ³ λ³΄κ³ μλ₯Ό μμ±νλ λ©μΈ ν¨μ"""
print(f"μ΄ {NUM_TESTS}κ°μ ν
μ€νΈλ₯Ό {NUM_PROCESSES}κ° νλ‘μΈμ€λ‘ λ³λ ¬ μ€νν©λλ€...")
test_inputs = []
for i in range(NUM_TESTS):
company = random.choice(example_companies)
job_title, jd = random.choice(list(example_jobs_jds.items()))
question_template = random.choice(list(example_questions.values()))
question = question_template.format(company_name=company, job_title=job_title)
experience = random.choice(experience_levels)
conversation = random.choice(example_conversations)
test_input = {
"company_name": company,
"jd": jd,
"question": question,
"experience_level": experience,
"conversation": conversation
}
test_inputs.append((i + 1, test_input))
results = []
with multiprocessing.Pool(processes=NUM_PROCESSES) as pool:
with tqdm(total=NUM_TESTS, desc="λ΅λ³ νλ¦ μμ± ν
μ€νΈ") as pbar:
for result in pool.imap_unordered(run_test, test_inputs):
results.append(result)
pbar.update()
results.sort(key=lambda x: x['id'])
print("λͺ¨λ ν
μ€νΈκ° μλ£λμμ΅λλ€.")
now = datetime.datetime.now()
report_filename = f"answer_flow_report_{now.strftime('%Y%m%d_%H%M%S')}.html"
failure_count = sum(1 for r in results if "Success" not in r["status"])
total_cost = sum(r['cost'] for r in results)
failure_rate = (failure_count / NUM_TESTS) * 100 if NUM_TESTS > 0 else 0
html_template = """
<!DOCTYPE html><html lang="ko"><head><meta charset="UTF-8"><title>λ΅λ³ νλ¦ μμ± ν
μ€νΈ λ³΄κ³ μ</title>
<style>body{{font-family:sans-serif;margin:20px;}} h1,h2{{text-align:center;}} .summary{{border:1px solid #ddd;padding:20px;margin-bottom:20px;}} table{{width:100%;border-collapse:collapse;}} th,td{{border:1px solid #ddd;padding:8px;text-align:left;vertical-align:top;}} th{{background-color:#f2f2f2;}} .status-success{{color:green;font-weight:bold;}} .status-failure{{color:red;font-weight:bold;}} pre{{white-space:pre-wrap;word-wrap:break-word;background-color:#f9f9f9;padding:10px;border:1px solid #ddd;}} .container{{max-width:1400px;margin:auto;}}</style>
</head><body><div class="container"><h1>λ΅λ³ νλ¦ μμ± ν
μ€νΈ λ³΄κ³ μ</h1><div class="summary"><h2>μμ½</h2><p><strong>ν
μ€νΈ μκ°:</strong> {now}</p><p><strong>μ΄ ν
μ€νΈ μ:</strong> {total_tests}</p><p><strong>μ±κ³΅:</strong> {success_count}</p><p><strong>μ€ν¨/μλ¬:</strong> {failure_count}</p><p><strong>μ€ν¨μ¨:</strong> <span class="{status_class}">{failure_rate:.2f}%</span></p><p><strong>μ΄ μμ λΉμ©:</strong> ${total_cost:.6f}</p></div>
<h2>μμΈ κ²°κ³Ό</h2><table><thead><tr><th>ID</th><th>μ
λ ₯ (Input)</th><th>νμ± κ²°κ³Ό (Parsed)</th><th>μλ¬</th><th>λΉμ©</th><th>μν</th></tr></thead><tbody>{table_rows}</tbody></table></div></body></html>
"""
table_rows_html = ""
for res in results:
status_class = "status-success" if "Success" in res["status"] else "status-failure"
table_rows_html += f"""
<tr>
<td>{res['id']}</td>
<td><pre>{json.dumps(res['input'], indent=2, ensure_ascii=False)}</pre></td>
<td><pre>{json.dumps(res['parsed_result'], indent=2, ensure_ascii=False)}</pre></td>
<td><pre>{res['error']}</pre></td>
<td>${res['cost']:.6f}</td>
<td class="{status_class}">{res['status']}</td>
</tr>
"""
final_html = html_template.format(
now=now.strftime('%Y-%m-%d %H:%M:%S'),
total_tests=NUM_TESTS,
success_count=NUM_TESTS - failure_count,
failure_count=failure_count,
status_class="status-success" if failure_rate < 10 else "status-failure",
failure_rate=failure_rate,
total_cost=total_cost,
table_rows=table_rows_html
)
with open(report_filename, "w", encoding="utf-8") as f:
f.write(final_html)
print(f"'{report_filename}' νμΌλ‘ λ³΄κ³ μκ° μ μ₯λμμ΅λλ€.")
print(f"μ΄ μμ λΉμ©: ${total_cost:.6f}")
if __name__ == "__main__":
main()
|