Spaces:
Paused
Paused
Tao Wu commited on
Commit ·
6fc2fca
1
Parent(s): a5dc95f
add skills query
Browse files- app/app.py +6 -6
app/app.py
CHANGED
|
@@ -20,7 +20,7 @@ def retrieve_documents(occupation,skills):
|
|
| 20 |
output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>")
|
| 21 |
oc_uri = occupations.get(occupation, "")
|
| 22 |
skill_query = ''
|
| 23 |
-
|
| 24 |
if isinstance(oc_uri, int):
|
| 25 |
df = pd.read_csv("/app/data/berufe_info.csv")
|
| 26 |
target_occupation = df[df['id'] == oc_uri]
|
|
@@ -33,20 +33,20 @@ def retrieve_documents(occupation,skills):
|
|
| 33 |
target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
|
| 34 |
for german_label in skills:
|
| 35 |
skill_query += german_label + ' '
|
| 36 |
-
|
| 37 |
-
|
| 38 |
query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query
|
| 39 |
llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query
|
| 40 |
print(query)
|
| 41 |
docs = retriever.get_relevant_documents(query)
|
| 42 |
-
|
| 43 |
|
| 44 |
#remove duplicates
|
| 45 |
seen_course_ids = set()
|
| 46 |
candidate_doc_unique = []
|
| 47 |
|
| 48 |
-
for doc in
|
| 49 |
-
course_id = doc.metadata.get('
|
| 50 |
if course_id not in seen_course_ids:
|
| 51 |
candidate_doc_unique.append(doc)
|
| 52 |
seen_course_ids.add(course_id)
|
|
|
|
| 20 |
output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>")
|
| 21 |
oc_uri = occupations.get(occupation, "")
|
| 22 |
skill_query = ''
|
| 23 |
+
candidate_docs = []
|
| 24 |
if isinstance(oc_uri, int):
|
| 25 |
df = pd.read_csv("/app/data/berufe_info.csv")
|
| 26 |
target_occupation = df[df['id'] == oc_uri]
|
|
|
|
| 33 |
target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
|
| 34 |
for german_label in skills:
|
| 35 |
skill_query += german_label + ' '
|
| 36 |
+
skills_docs = retriever.get_relevant_documents(german_label)
|
| 37 |
+
candidate_docs.extend(skills_docs[:2])
|
| 38 |
query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query
|
| 39 |
llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query
|
| 40 |
print(query)
|
| 41 |
docs = retriever.get_relevant_documents(query)
|
| 42 |
+
candidate_docs.extend(docs[:5])
|
| 43 |
|
| 44 |
#remove duplicates
|
| 45 |
seen_course_ids = set()
|
| 46 |
candidate_doc_unique = []
|
| 47 |
|
| 48 |
+
for doc in candidate_docs:
|
| 49 |
+
course_id = doc.metadata.get('id','')
|
| 50 |
if course_id not in seen_course_ids:
|
| 51 |
candidate_doc_unique.append(doc)
|
| 52 |
seen_course_ids.add(course_id)
|