Spaces:
Paused
Paused
Tao Wu commited on
Commit ·
e789d9b
1
Parent(s): ffd107a
add skills query
Browse files- app/app.py +15 -1
app/app.py
CHANGED
|
@@ -20,6 +20,7 @@ def retrieve_documents(occupation,skills):
|
|
| 20 |
output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>")
|
| 21 |
oc_uri = occupations.get(occupation, "")
|
| 22 |
skill_query = ''
|
|
|
|
| 23 |
if isinstance(oc_uri, int):
|
| 24 |
df = pd.read_csv("/app/data/berufe_info.csv")
|
| 25 |
target_occupation = df[df['id'] == oc_uri]
|
|
@@ -32,13 +33,26 @@ def retrieve_documents(occupation,skills):
|
|
| 32 |
target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
|
| 33 |
for german_label in skills:
|
| 34 |
skill_query += german_label + ' '
|
|
|
|
|
|
|
| 35 |
query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query
|
| 36 |
llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query
|
| 37 |
print(query)
|
| 38 |
docs = retriever.get_relevant_documents(query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
partial_compare_docs = functools.partial(compare_docs_with_context, target_occupation_name=target_occupation_name, target_occupation_dsp=target_occupation_dsp,skill_gap = skill_query)
|
| 41 |
-
sorted_docs = sorted(
|
| 42 |
|
| 43 |
|
| 44 |
batch_prompts = []
|
|
|
|
| 20 |
output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>")
|
| 21 |
oc_uri = occupations.get(occupation, "")
|
| 22 |
skill_query = ''
|
| 23 |
+
candidate_doc = []
|
| 24 |
if isinstance(oc_uri, int):
|
| 25 |
df = pd.read_csv("/app/data/berufe_info.csv")
|
| 26 |
target_occupation = df[df['id'] == oc_uri]
|
|
|
|
| 33 |
target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
|
| 34 |
for german_label in skills:
|
| 35 |
skill_query += german_label + ' '
|
| 36 |
+
skills_doc = retriever.get_relevant_documents(german_label)
|
| 37 |
+
candidate_doc.extend(skills_doc[:2])
|
| 38 |
query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query
|
| 39 |
llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query
|
| 40 |
print(query)
|
| 41 |
docs = retriever.get_relevant_documents(query)
|
| 42 |
+
candidate_doc.extend(docs[:5])
|
| 43 |
+
|
| 44 |
+
#remove duplicates
|
| 45 |
+
seen_course_ids = set()
|
| 46 |
+
candidate_doc_unique = []
|
| 47 |
+
|
| 48 |
+
for doc in candidate_doc:
|
| 49 |
+
course_id = doc['metadata'].get('course_id')
|
| 50 |
+
if course_id not in seen_course_ids:
|
| 51 |
+
candidate_doc_unique.append(doc)
|
| 52 |
+
seen_course_ids.add(course_id)
|
| 53 |
|
| 54 |
partial_compare_docs = functools.partial(compare_docs_with_context, target_occupation_name=target_occupation_name, target_occupation_dsp=target_occupation_dsp,skill_gap = skill_query)
|
| 55 |
+
sorted_docs = sorted(candidate_doc, key=functools.cmp_to_key(partial_compare_docs), reverse=True)
|
| 56 |
|
| 57 |
|
| 58 |
batch_prompts = []
|