Spaces:
Paused
Paused
| import gradio as gr | |
| import pandas as pd | |
| import redis | |
| import json | |
| import requests | |
| from config import * | |
| import functools | |
| from embedding_setup import retriever, find_similar_occupation, compare_docs_with_context,generate_exp,generate_prompt_exp | |
| from data_process import get_occupations_from_csv, get_courses_from_BA, get_occupation_detial, build_occupation_query | |
| with open('/app/data/redis_data.json', 'r') as file: | |
| data_dict = json.load(file) | |
| #r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB, decode_responses=True) | |
| skill_details_mapping = {} | |
| # Function to retrieve documents based on selected skills | |
| def retrieve_documents(occupation,skills): | |
| output = [] | |
| output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>") | |
| oc_uri = occupations.get(occupation, "") | |
| skill_query = '' | |
| candidate_docs = [] | |
| if isinstance(oc_uri, int): | |
| df = pd.read_csv("/app/data/berufe_info.csv") | |
| target_occupation = df[df['id'] == oc_uri] | |
| target_occupation_name = target_occupation['short name'].values[0] | |
| target_occupation_dsp = target_occupation['description'].values[0] | |
| target_occupation_query = target_occupation_name + ' ' + target_occupation_dsp | |
| target_occupation_query = target_occupation_query | |
| else: | |
| target_occupation = get_occupation_detial(oc_uri) | |
| target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation) | |
| for german_label in skills: | |
| skill_query += german_label + ' ' | |
| ocsk_query = target_occupation_name + ' ' + german_label | |
| skills_docs = retriever.get_relevant_documents(ocsk_query) | |
| candidate_docs.extend(skills_docs[:2]) | |
| query = target_occupation_query + ' ' + skill_query | |
| llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query | |
| print(query) | |
| docs = retriever.get_relevant_documents(query) | |
| candidate_docs.extend(docs[:5]) | |
| #remove duplicates | |
| seen_course_ids = set() | |
| candidate_doc_unique = [] | |
| for doc in candidate_docs: | |
| course_id = doc.metadata.get('id','') | |
| if course_id not in seen_course_ids: | |
| candidate_doc_unique.append(doc) | |
| seen_course_ids.add(course_id) | |
| partial_compare_docs = functools.partial(compare_docs_with_context, target_occupation_name=target_occupation_name, target_occupation_dsp=target_occupation_dsp,skill_gap = skill_query) | |
| sorted_docs = sorted(candidate_doc_unique, key=functools.cmp_to_key(partial_compare_docs), reverse=True) | |
| batch_prompts = [] | |
| for doc in sorted_docs[:5]: | |
| doc_name = doc.metadata.get('name', 'Unnamed Document') | |
| doc_skill = doc.metadata.get('skills', '') | |
| input_text = f"target occupation: {llama_query}\n Recommended course: name: {doc_name}, learning objectives: {doc_skill[:2000]}" | |
| prompt = generate_prompt_exp(input_text) | |
| batch_prompts.append(prompt) | |
| # Evaluate the current batch of prompts | |
| batch_output = generate_exp(batch_prompts) | |
| output.append(f"<b>Zielberuf:</b> {target_occupation_name}") | |
| output.append(f"<b>Qualifikationslücke:</b> {skill_query}") | |
| output.append(f"<b>Empfohlene Kurse:</b>") | |
| for i in range(5): | |
| doc = sorted_docs[i] | |
| doc_name = doc.metadata.get('name', 'Unnamed Document') | |
| doc_url = doc.metadata.get('url', '#') | |
| doc_skill = doc.metadata.get('skills', '') | |
| output.append(f"<a href='{doc_url}' target='_blank'>{doc_name}</a>") | |
| output.append(f"<b>Empfehlungsgrund:</b> {batch_output[i]}") | |
| output.append(f"<br>") | |
| return "<br>".join(output) | |
| def get_candidate_courses(occupation, skills): | |
| output = [] | |
| output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>") | |
| df_lookup = pd.read_csv('/app/data/kldb_isco_lookup.csv') | |
| df_berufe = pd.read_csv('/app/data/berufe_info.csv') | |
| occupation_codes = set() | |
| kldB_set = set() | |
| occupation_hrefs = set() | |
| BA_berufe = set() | |
| oc_uri = occupations.get(occupation, "") | |
| target_occupation = get_occupation_detial(oc_uri) | |
| target_occupation_query = build_occupation_query(target_occupation) | |
| for german_label in skills: | |
| skill = skill_details_mapping.get(german_label, {}) | |
| uri = f'https://ec.europa.eu/esco/api/resource/skill?selectedVersion=v1.0.9&language=en&uri={skill["uri"]}' | |
| try: | |
| skill_response = requests.get(uri) | |
| skill_response.raise_for_status() | |
| skill_json = skill_response.json() | |
| # Combine essential and optional occupations | |
| skill_related_occupations = (skill_json['_links'].get('isEssentialForOccupation', []) + | |
| skill_json['_links'].get('isOptionalForOccupation', [])) | |
| for occupation in skill_related_occupations: | |
| href = occupation.get('href') | |
| if href: | |
| occupation_hrefs.add(href) | |
| except requests.RequestException as e: | |
| print(f"Error while fetching skill details: {e}") | |
| for href in occupation_hrefs: | |
| try: | |
| occupation_response = requests.get(href) | |
| occupation_response.raise_for_status() | |
| occupation_details = occupation_response.json() | |
| code = occupation_details.get('code') | |
| if code: | |
| occupation_codes.add(code.split('.')[0]) | |
| except requests.RequestException as e: | |
| print(f"Error while fetching occupation details: {e}") | |
| for isco_code in occupation_codes: | |
| kldB_codes = df_lookup[df_lookup['isco08'] == int(isco_code)]['kldb2010'].values | |
| for code in kldB_codes: | |
| kldB_set.add(str(code)) | |
| dfs = [] | |
| for kldb in kldB_set: | |
| berufe = df_berufe[df_berufe['KldB codes']=='B '+kldb] | |
| dfs.append(berufe) | |
| merged_df = pd.concat(dfs, ignore_index=True) | |
| top_k_berufe = find_similar_occupation(target_occupation_query,merged_df,5,'cosine') | |
| for beruf in top_k_berufe: | |
| entry_requirement = beruf.metadata['entry_requirements'] | |
| corrected_json_string = entry_requirement.replace("'", '"') | |
| entry_requirement_json = json.loads(corrected_json_string) | |
| for js in entry_requirement_json: | |
| BA_berufe.add(str(js['data_idref'])) | |
| result = get_courses_from_BA(BA_berufe) | |
| courses = result | |
| for course in courses['_embedded']['termine']: | |
| output.append(f"<a href='{course['angebot']['link']}' target='_blank'>{course['angebot']['titel']}</a>") | |
| return "<br>".join(output) | |
| def get_occupation_skills(oc_uri): | |
| #skills_json = r.get(oc_uri) | |
| skills_json = data_dict.get(oc_uri, None) | |
| skill_labels = [] | |
| if skills_json: | |
| skills = json.loads(skills_json) | |
| for skill in skills: | |
| german_label = skill['preferredLabel']['de'] | |
| skill_details_mapping[german_label] = skill | |
| skill_labels.append(german_label) | |
| return skill_labels | |
| else: | |
| return skill_labels | |
| def get_occupation_skills_BA(oc_uri): | |
| df = pd.read_csv("/app/data/berufe_info.csv") | |
| essential_skills = df[df['id'] == oc_uri]['essential skills'].values | |
| optional_skills = df[df['id'] == oc_uri]['optional skills'].values | |
| combined_skills = essential_skills[0][:-1] + ',' + optional_skills[0][1:] | |
| combined_skills = combined_skills.replace("'", "\"") | |
| skills = json.loads(combined_skills) | |
| skill_labels = [] | |
| for skill in skills: | |
| german_label = skill['skill'] | |
| skill_details_mapping[german_label] = skill | |
| skill_labels.append(german_label) | |
| return skill_labels | |
| # Function to update the skills dropdown | |
| def update_skills(occupation): | |
| oc_uri = occupations.get(occupation, "") | |
| if isinstance(oc_uri, int): | |
| skills = get_occupation_skills_BA(oc_uri) | |
| return gr.Dropdown(skills,label="aktuelle Fähigkeiten", multiselect=True,info='Bitte wählen Sie die Fähigkeiten aus, die Sie derzeit besitzen') | |
| else: | |
| skills = get_occupation_skills(oc_uri) | |
| return gr.Dropdown(skills,label="aktuelle Fähigkeiten", multiselect=True,info='Bitte wählen Sie die Fähigkeiten aus, die Sie derzeit besitzen') | |
| return | |
| def update_skillgap(occupation, current_skills): | |
| oc_uri = occupations.get(occupation, "") | |
| if isinstance(oc_uri, int): | |
| ocupation_skills = get_occupation_skills_BA(oc_uri) | |
| else: | |
| ocupation_skills = get_occupation_skills(oc_uri) | |
| skill_gap = [skill for skill in ocupation_skills if skill not in current_skills] | |
| return gr.Dropdown(skill_gap, label="Qualifikationslücke", multiselect=True, info='Bitte wählen Sie die Fähigkeiten aus, die Sie lernen möchten.') | |
| if __name__ == "__main__": | |
| # Load occupations from CSV | |
| occupations_esco = get_occupations_from_csv(CSV_FILE_PATH) | |
| df = pd.read_csv("/app/data/berufe_info.csv") | |
| occupations_BA = df[['short name', 'id']].set_index('short name').to_dict()['id'] | |
| occupations = {**occupations_esco, **occupations_BA} | |
| # Gradio interface | |
| with gr.Blocks(title="MyEduLife Kursempfehlungssystem") as demo: | |
| occupation_dropdown = gr.Dropdown(list(occupations.keys()), label="Zielberuf",info='Bitte wählen Sie Ihren Zielberuf aus.') | |
| currentskill_dropdown = gr.Dropdown([],label="aktuelle Fähigkeiten", multiselect=True,info='Bitte wählen Sie die Fähigkeiten aus, die Sie derzeit besitzen') | |
| sb_btn = gr.Button("Absenden") | |
| skillgap_dropdown = gr.Dropdown([],label="Fähigkeiten", multiselect=True,info='Bitte wählen Sie die Fähigkeiten aus, die Sie lernen möchten.') | |
| # Use gr.HTML to display the HTML content | |
| button = gr.Button("Kursempfehlungen") | |
| documents_output = gr.HTML() | |
| occupation_dropdown.change(update_skills, inputs=occupation_dropdown, outputs=currentskill_dropdown) | |
| sb_btn.click( | |
| update_skillgap, | |
| inputs=[occupation_dropdown,currentskill_dropdown], | |
| outputs=skillgap_dropdown | |
| ) | |
| button.click( | |
| retrieve_documents, | |
| inputs=[occupation_dropdown,skillgap_dropdown], | |
| outputs=documents_output | |
| ) | |
| print('Initialization completed') | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |