Spaces:
Runtime error
Runtime error
Muhammad Risqi Firdaus commited on
Commit ·
f452678
1
Parent(s): 2d91242
add new lang
Browse files- app.py +1 -1
- classificator.py +1 -1
- evaluator.py +180 -80
- models.py +1 -0
app.py
CHANGED
|
@@ -93,4 +93,4 @@ async def extract(link: InsertedLink):
|
|
| 93 |
@app.post("/eval", response_model=EvalResult)
|
| 94 |
async def eval(eva: EvaModul):
|
| 95 |
transcript = extractor_helper.extract_technical(eva.competences, eva.transcript)
|
| 96 |
-
return evaluator.evaluate_interview(competences=eva.competences, transcript=transcript)
|
|
|
|
| 93 |
@app.post("/eval", response_model=EvalResult)
|
| 94 |
async def eval(eva: EvaModul):
|
| 95 |
transcript = extractor_helper.extract_technical(eva.competences, eva.transcript)
|
| 96 |
+
return evaluator.evaluate_interview(competences=eva.competences, transcript=transcript, lang=eva.lang)
|
classificator.py
CHANGED
|
@@ -34,7 +34,7 @@ with open(filename, 'rb') as file:
|
|
| 34 |
# }
|
| 35 |
|
| 36 |
|
| 37 |
-
geolocator = Nominatim(user_agent="geo_distance_calculator")
|
| 38 |
|
| 39 |
def get_coordinates(city):
|
| 40 |
location = geolocator.geocode(city)
|
|
|
|
| 34 |
# }
|
| 35 |
|
| 36 |
|
| 37 |
+
geolocator = Nominatim(user_agent="geo_distance_calculator", timeout=10)
|
| 38 |
|
| 39 |
def get_coordinates(city):
|
| 40 |
location = geolocator.geocode(city)
|
evaluator.py
CHANGED
|
@@ -5,93 +5,187 @@ import json
|
|
| 5 |
tags = {'AI': "This one is the competence description"} #list of competence to save, better to hit db.
|
| 6 |
client = OpenAI()
|
| 7 |
|
| 8 |
-
def generate_model_parameters(skill: str, transcript: str):
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
EXAMPLE
|
| 31 |
-
SKILL TO BE EVALUATED: Python
|
| 32 |
|
| 33 |
-
INTERVIEWER:
|
| 34 |
-
What
|
| 35 |
-
name=["swati","shweta"]
|
| 36 |
-
age=[10,20]
|
| 37 |
-
new_entity-zip(name,age)
|
| 38 |
-
new_entity-set(new_entity)
|
| 39 |
-
print(new_entity)
|
| 40 |
|
| 41 |
-
INTERVIEWEE:
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
OUTPUT: SUCCESS
|
| 45 |
|
| 46 |
-
|
| 47 |
-
SKILL TO BE EVALUATED:
|
|
|
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
|
| 55 |
-
|
| 56 |
-
The output is: ['1a', '2b', '3c']
|
| 57 |
|
| 58 |
-
|
|
|
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
str="apple#banana#kiwi#orange"
|
| 66 |
-
print(str.split("#",2))
|
| 67 |
|
| 68 |
-
|
| 69 |
-
['apple', 'banana', 'kiwi#orange']
|
| 70 |
|
| 71 |
-
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
-
|
| 80 |
-
Python modules are files containing Python code. This code can either be function classes or variables. A Python module is a .py file containing executable code. Some of the commonly used built-in modules are:
|
| 81 |
-
- os
|
| 82 |
-
- sys
|
| 83 |
-
- math
|
| 84 |
-
- random
|
| 85 |
-
- data time
|
| 86 |
-
- json
|
| 87 |
|
| 88 |
-
|
|
|
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
{transcript}
|
| 93 |
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
]
|
| 96 |
}
|
| 97 |
|
|
@@ -129,15 +223,11 @@ def extract_competences_and_responses(competences: list[str], transcripts: list[
|
|
| 129 |
|
| 130 |
return responses
|
| 131 |
|
| 132 |
-
def evaluate_interview(competences: list[str], transcript: list):
|
| 133 |
# global tags
|
| 134 |
model_inputs = []
|
| 135 |
|
| 136 |
responses = extract_competences_and_responses(transcript["comp_beha"], transcript["behavioral"])
|
| 137 |
-
|
| 138 |
-
print(len(competences))
|
| 139 |
-
print(len(responses))
|
| 140 |
-
|
| 141 |
# pprint(transcript)
|
| 142 |
|
| 143 |
for i in range(len(transcript["comp_beha"])):
|
|
@@ -171,8 +261,8 @@ def evaluate_interview(competences: list[str], transcript: list):
|
|
| 171 |
# print(text)
|
| 172 |
print("------")
|
| 173 |
## TODO: change to gpt
|
| 174 |
-
|
| 175 |
-
|
| 176 |
Here are 5 examples:
|
| 177 |
EXAMPLE 1:
|
| 178 |
SKILL TO BE EVALUATED: Honest
|
|
@@ -193,13 +283,23 @@ def evaluate_interview(competences: list[str], transcript: list):
|
|
| 193 |
"score": 0.1
|
| 194 |
}]
|
| 195 |
}
|
| 196 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
Evaluations
|
| 198 |
)
|
| 199 |
## output:
|
| 200 |
final_score = 0
|
| 201 |
behavioral_scores = generate_behavioral_score(result.value)
|
| 202 |
-
technical_scores = generate_technical_score(transcript["comp_tech"], transcript["technical"])
|
| 203 |
|
| 204 |
final_score = aggregate_scores(behavioral_scores, technical_scores)
|
| 205 |
|
|
@@ -225,7 +325,7 @@ def generate_behavioral_score(eval_array):
|
|
| 225 |
|
| 226 |
return scores
|
| 227 |
|
| 228 |
-
def generate_technical_score(skills: str, transcript: str):
|
| 229 |
# total_score = 0
|
| 230 |
scores = []
|
| 231 |
for idx, skill in enumerate(skills):
|
|
@@ -234,7 +334,7 @@ def generate_technical_score(skills: str, transcript: str):
|
|
| 234 |
# print(chat)
|
| 235 |
transcript_text = f"INTERVIEWEE:\n{chat[0]['question'].lstrip('TECHNICAL: ')}\n\nINTERVIEWER:\n{chat[0]['answer']}"
|
| 236 |
# TODO: change to structured output
|
| 237 |
-
model_parameters = generate_model_parameters(skill, transcript_text)
|
| 238 |
completion = client.chat.completions.create(
|
| 239 |
**model_parameters
|
| 240 |
)
|
|
|
|
| 5 |
tags = {'AI': "This one is the competence description"} #list of competence to save, better to hit db.
|
| 6 |
client = OpenAI()
|
| 7 |
|
| 8 |
+
def generate_model_parameters(skill: str, transcript: str, lang: str):
|
| 9 |
+
eng = f"""
|
| 10 |
+
You are tasked with evaluating a transcript of an IT job interview. The interview that is conducted in the transcript is technical.
|
| 11 |
+
You need sufficient IT knowledge since you will evaluate the answer of the interviewee to determine whether the interviewee answer correctly or not.
|
| 12 |
+
You will output "SUCCESS" if the interviewee's answer is deemed correct and "FAIL" if it's deemed false.
|
| 13 |
+
Below are 5 examples of correct answers.
|
| 14 |
+
|
| 15 |
+
Here are 5 examples:
|
| 16 |
+
EXAMPLE 1:
|
| 17 |
+
SKILL TO BE EVALUATED: Python
|
| 18 |
+
|
| 19 |
+
INTERVIEWER:
|
| 20 |
+
What is the use of zip () in python?
|
| 21 |
+
|
| 22 |
+
INTERVIEWEE:
|
| 23 |
+
The zip returns an iterator and takes iterable as argument. These iterables can be list, tuple, dictionary etc. It maps similar index of every iterable to make a single entity.
|
| 24 |
+
|
| 25 |
+
OUTPUT: SUCCESS
|
| 26 |
+
|
| 27 |
+
EXAMPLE 2:
|
| 28 |
+
SKILL TO BE EVALUATED: Python
|
| 29 |
+
|
| 30 |
+
INTERVIEWER:
|
| 31 |
+
What will be the output of the following?
|
| 32 |
+
name=["swati","shweta"]
|
| 33 |
+
age=[10,20]
|
| 34 |
+
new_entity-zip(name,age)
|
| 35 |
+
new_entity-set(new_entity)
|
| 36 |
+
print(new_entity)
|
| 37 |
+
|
| 38 |
+
INTERVIEWEE:
|
| 39 |
+
The output is {{('shweta', 20), ('swati', 10)}}
|
| 40 |
+
|
| 41 |
+
OUTPUT: SUCCESS
|
| 42 |
+
|
| 43 |
+
EXAMPLE 3:
|
| 44 |
+
SKILL TO BE EVALUATED: Python
|
| 45 |
+
|
| 46 |
+
INTERVIEWER:
|
| 47 |
+
What will be the output of the following?
|
| 48 |
+
a=["1","2","3"]
|
| 49 |
+
b=["a","b","c"]
|
| 50 |
+
c=[x+y for x, y in zip(a,b)] print(c)
|
| 51 |
+
|
| 52 |
+
INTERVIEWEE:
|
| 53 |
+
The output is: ['1a', '2b', '3c']
|
| 54 |
+
|
| 55 |
+
OUTPUT: SUCCESS
|
| 56 |
+
|
| 57 |
+
EXAMPLE 4:
|
| 58 |
+
SKILL TO BE EVALUATED: Python
|
| 59 |
+
|
| 60 |
+
INTERVIEWER:
|
| 61 |
+
What will be the output of the following?
|
| 62 |
+
str="apple#banana#kiwi#orange"
|
| 63 |
+
print(str.split("#",2))
|
| 64 |
+
|
| 65 |
+
INTERVIEWEE:
|
| 66 |
+
['apple', 'banana', 'kiwi#orange']
|
| 67 |
+
|
| 68 |
+
OUTPUT: SUCCESS
|
| 69 |
|
| 70 |
+
EXAMPLE 5:
|
| 71 |
+
SKILL TO BE EVALUATED: Python
|
| 72 |
|
| 73 |
+
INTERVIEWER:
|
| 74 |
+
What are python modules? Name some commonly used built-in modules in Python?
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
+
INTERVIEWEE:
|
| 77 |
+
Python modules are files containing Python code. This code can either be function classes or variables. A Python module is a .py file containing executable code. Some of the commonly used built-in modules are:
|
| 78 |
+
- os
|
| 79 |
+
- sys
|
| 80 |
+
- math
|
| 81 |
+
- random
|
| 82 |
+
- data time
|
| 83 |
+
- json
|
| 84 |
|
| 85 |
+
OUTPUT: SUCCESS
|
| 86 |
|
| 87 |
+
Note that the examples that I give above have the correct answer. Your job is to generate the output only (SUCCESS OR FAIL). You don't need to explain your justification.
|
| 88 |
+
SKILL TO BE EVALUATED: {skill}
|
| 89 |
+
{transcript}
|
| 90 |
|
| 91 |
+
"""
|
| 92 |
+
idn = f"""
|
| 93 |
+
Anda ditugaskan untuk mengevaluasi transkrip dari sebuah wawancara kerja di bidang IT. Wawancara dalam transkrip tersebut bersifat teknis.
|
| 94 |
+
Anda perlu memiliki pengetahuan yang cukup tentang IT karena Anda akan mengevaluasi jawaban dari peserta wawancara untuk menentukan apakah jawaban peserta tersebut benar atau tidak.
|
| 95 |
+
Anda akan mengeluarkan output "SUCCESS" jika jawaban peserta dianggap benar dan "FAIL" jika dianggap salah.
|
| 96 |
|
| 97 |
+
Berikut adalah 5 contoh jawaban yang benar.
|
|
|
|
| 98 |
|
| 99 |
+
CONTOH 1:
|
| 100 |
+
KEMAMPUAN YANG DIEVALUASI: Python
|
| 101 |
|
| 102 |
+
PEWAWANCARA:
|
| 103 |
+
Apa kegunaan dari fungsi zip() di Python?
|
| 104 |
|
| 105 |
+
PESERTA:
|
| 106 |
+
Fungsi zip mengembalikan sebuah iterator dan menerima iterable sebagai argumen. Iterable ini bisa berupa list, tuple, dictionary, dll. Fungsi ini mencocokkan indeks yang sama dari setiap iterable untuk membentuk satu entitas.
|
|
|
|
|
|
|
| 107 |
|
| 108 |
+
OUTPUT: SUCCESS
|
|
|
|
| 109 |
|
| 110 |
+
CONTOH 2:
|
| 111 |
+
KEMAMPUAN YANG DIEVALUASI: Python
|
| 112 |
|
| 113 |
+
PEWAWANCARA:
|
| 114 |
+
Apa output dari kode berikut?
|
| 115 |
|
| 116 |
+
python
|
| 117 |
+
Copy
|
| 118 |
+
Edit
|
| 119 |
+
name = ["swati", "shweta"]
|
| 120 |
+
age = [10, 20]
|
| 121 |
+
new_entity = zip(name, age)
|
| 122 |
+
new_entity = set(new_entity)
|
| 123 |
+
print(new_entity)
|
| 124 |
+
PESERTA:
|
| 125 |
+
Output-nya adalah: {('shweta', 20), ('swati', 10)}
|
| 126 |
|
| 127 |
+
OUTPUT: SUCCESS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
+
CONTOH 3:
|
| 130 |
+
KEMAMPUAN YANG DIEVALUASI: Python
|
| 131 |
|
| 132 |
+
PEWAWANCARA:
|
| 133 |
+
Apa output dari kode berikut?
|
|
|
|
| 134 |
|
| 135 |
+
python
|
| 136 |
+
Copy
|
| 137 |
+
Edit
|
| 138 |
+
a = ["1", "2", "3"]
|
| 139 |
+
b = ["a", "b", "c"]
|
| 140 |
+
c = [x + y for x, y in zip(a, b)]
|
| 141 |
+
print(c)
|
| 142 |
+
PESERTA:
|
| 143 |
+
Output-nya adalah: ['1a', '2b', '3c']
|
| 144 |
+
|
| 145 |
+
OUTPUT: SUCCESS
|
| 146 |
+
|
| 147 |
+
CONTOH 4:
|
| 148 |
+
KEMAMPUAN YANG DIEVALUASI: Python
|
| 149 |
+
|
| 150 |
+
PEWAWANCARA:
|
| 151 |
+
Apa output dari kode berikut?
|
| 152 |
+
|
| 153 |
+
python
|
| 154 |
+
Copy
|
| 155 |
+
Edit
|
| 156 |
+
str = "apple#banana#kiwi#orange"
|
| 157 |
+
print(str.split("#", 2))
|
| 158 |
+
PESERTA:
|
| 159 |
+
['apple', 'banana', 'kiwi#orange']
|
| 160 |
+
|
| 161 |
+
OUTPUT: SUCCESS
|
| 162 |
+
|
| 163 |
+
CONTOH 5:
|
| 164 |
+
KEMAMPUAN YANG DIEVALUASI: Python
|
| 165 |
+
|
| 166 |
+
PEWAWANCARA:
|
| 167 |
+
Apa itu modul Python? Sebutkan beberapa modul built-in yang umum digunakan di Python?
|
| 168 |
+
|
| 169 |
+
PESERTA:
|
| 170 |
+
Modul Python adalah file yang berisi kode Python. Kode ini bisa berupa fungsi, kelas, atau variabel. Sebuah modul Python adalah file .py yang berisi kode yang bisa dijalankan. Beberapa modul built-in yang sering digunakan adalah:
|
| 171 |
+
os
|
| 172 |
+
sys
|
| 173 |
+
math
|
| 174 |
+
random
|
| 175 |
+
datetime
|
| 176 |
+
json
|
| 177 |
+
|
| 178 |
+
OUTPUT: SUCCESS
|
| 179 |
+
|
| 180 |
+
Catatan: Contoh-contoh di atas memberikan jawaban yang benar. Tugas Anda adalah menghasilkan output saja (SUCCESS atau FAIL). Anda tidak perlu menjelaskan alasan Anda.
|
| 181 |
+
|
| 182 |
+
KEMAMPUAN YANG DIEVALUASI:{skill}
|
| 183 |
+
{transcript}
|
| 184 |
+
"""
|
| 185 |
+
model_parameters = {
|
| 186 |
+
"model":"gpt-4-0125-preview",
|
| 187 |
+
"messages":[
|
| 188 |
+
{"role": "system", "content": eng if lang == 'en' else idn},
|
| 189 |
]
|
| 190 |
}
|
| 191 |
|
|
|
|
| 223 |
|
| 224 |
return responses
|
| 225 |
|
| 226 |
+
def evaluate_interview(competences: list[str], transcript: list, lang: str = 'en'):
|
| 227 |
# global tags
|
| 228 |
model_inputs = []
|
| 229 |
|
| 230 |
responses = extract_competences_and_responses(transcript["comp_beha"], transcript["behavioral"])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
# pprint(transcript)
|
| 232 |
|
| 233 |
for i in range(len(transcript["comp_beha"])):
|
|
|
|
| 261 |
# print(text)
|
| 262 |
print("------")
|
| 263 |
## TODO: change to gpt
|
| 264 |
+
|
| 265 |
+
eng = """
|
| 266 |
Here are 5 examples:
|
| 267 |
EXAMPLE 1:
|
| 268 |
SKILL TO BE EVALUATED: Honest
|
|
|
|
| 283 |
"score": 0.1
|
| 284 |
}]
|
| 285 |
}
|
| 286 |
+
"""
|
| 287 |
+
idn = """
|
| 288 |
+
{
|
| 289 |
+
"value": [{
|
| 290 |
+
"Judgement": "It is impossible for someone to have never had any nightmare. Fear is a common human experience, so the interviewee is likely not being truthful.",
|
| 291 |
+
"score": 0.1
|
| 292 |
+
}]
|
| 293 |
+
}
|
| 294 |
+
"""
|
| 295 |
+
result = gpt_evaluator(model_inputs,
|
| 296 |
+
eng if lang == 'en' else idn,
|
| 297 |
Evaluations
|
| 298 |
)
|
| 299 |
## output:
|
| 300 |
final_score = 0
|
| 301 |
behavioral_scores = generate_behavioral_score(result.value)
|
| 302 |
+
technical_scores = generate_technical_score(transcript["comp_tech"], transcript["technical"], lang)
|
| 303 |
|
| 304 |
final_score = aggregate_scores(behavioral_scores, technical_scores)
|
| 305 |
|
|
|
|
| 325 |
|
| 326 |
return scores
|
| 327 |
|
| 328 |
+
def generate_technical_score(skills: str, transcript: str, lang: str):
|
| 329 |
# total_score = 0
|
| 330 |
scores = []
|
| 331 |
for idx, skill in enumerate(skills):
|
|
|
|
| 334 |
# print(chat)
|
| 335 |
transcript_text = f"INTERVIEWEE:\n{chat[0]['question'].lstrip('TECHNICAL: ')}\n\nINTERVIEWER:\n{chat[0]['answer']}"
|
| 336 |
# TODO: change to structured output
|
| 337 |
+
model_parameters = generate_model_parameters(skill, transcript_text, lang)
|
| 338 |
completion = client.chat.completions.create(
|
| 339 |
**model_parameters
|
| 340 |
)
|
models.py
CHANGED
|
@@ -70,6 +70,7 @@ class Evaluations(BaseModel):
|
|
| 70 |
class EvaModul(BaseModel):
|
| 71 |
competences: list[str]
|
| 72 |
transcript: list[list[Dict[str,str]]]
|
|
|
|
| 73 |
|
| 74 |
class EvalResult(BaseModel):
|
| 75 |
final_score: float
|
|
|
|
| 70 |
class EvaModul(BaseModel):
|
| 71 |
competences: list[str]
|
| 72 |
transcript: list[list[Dict[str,str]]]
|
| 73 |
+
lang: str = Field(...)
|
| 74 |
|
| 75 |
class EvalResult(BaseModel):
|
| 76 |
final_score: float
|