Spaces:

wt3639
/

Course_rec

Paused

Tao Wu commited on Jul 2, 2024

Commit

12baee3

1 Parent(s): d0487d6

update

Files changed (3) hide show

app/app.py CHANGED Viewed

@@ -32,7 +32,7 @@ def retrieve_documents(occupation,skills):
         target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
     for german_label in skills:
         skill_query += german_label + ' '
-    query = target_occupation_query + ' ' + skill_query
     llama_query = 'info:' + target_occupation_name + ' ' + 'skills gap:' + skill_query
     print(query)
     docs = retriever.get_relevant_documents(query)

         target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
     for german_label in skills:
         skill_query += german_label + ' '
+    query = 'target occupation: ' + target_occupation_query + ' skills gap:' + skill_query
     llama_query = 'info:' + target_occupation_name + ' ' + 'skills gap:' + skill_query
     print(query)
     docs = retriever.get_relevant_documents(query)

app/config.py CHANGED Viewed

@@ -8,15 +8,19 @@ REDIS_DB = int(os.getenv('REDIS_DB', 0))
 # Model and embedding configuration
 #MODEL_NAME = os.getenv('MODEL_NAME', "intfloat/multilingual-e5-large-instruct")
-MODEL_NAME = os.getenv('MODEL_NAME', "wt3639/EduGBERT_CourseRec")
 ENCODE_KWARGS = {
     'normalize_embeddings': os.getenv('NORMALIZE_EMBEDDINGS', 'True') == 'True',
     'convert_to_tensor': os.getenv('CONVERT_TO_TENSOR', 'True') == 'True'
 }
-QUERY_INSTRUCTION = os.getenv('QUERY_INSTRUCTION', '')
 # Other configurations
 TOP_K = int(os.getenv('TOP_K', 10))
 #PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/course_emb_db")
 PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/EduGBERT_cos_escoai")
 CSV_FILE_PATH = os.getenv('CSV_FILE_PATH', '/app/data/occupations_de.csv')

 # Model and embedding configuration
 #MODEL_NAME = os.getenv('MODEL_NAME', "intfloat/multilingual-e5-large-instruct")
+MODEL_NAME = os.getenv('MODEL_NAME', "intfloat/multilingual-e5-large-instruct")
 ENCODE_KWARGS = {
     'normalize_embeddings': os.getenv('NORMALIZE_EMBEDDINGS', 'True') == 'True',
     'convert_to_tensor': os.getenv('CONVERT_TO_TENSOR', 'True') == 'True'
 }
+QUERY_INSTRUCTION = os.getenv('QUERY_INSTRUCTION', 'Find the course that relates to the given occupation and cover the skills gap')
 # Other configurations
 TOP_K = int(os.getenv('TOP_K', 10))
 #PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/course_emb_db")
 PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/EduGBERT_cos_escoai")
 CSV_FILE_PATH = os.getenv('CSV_FILE_PATH', '/app/data/occupations_de.csv')
+REC_LORA_MODEL = os.getenv('REC_LORA_MODEL', 'wt3639/Llama-3-8B-Instruct_CourseRec_lora')
+EXP_LORA_MODEL = os.getenv('EXP_LORA_MODEL', 'wt3639/alpaca_german_english')
+LLM_MODEL = os.getenv('LLM_MODEL', '"meta-llama/Meta-Llama-3-8B-Instruct"')

app/embedding_setup.py CHANGED Viewed

@@ -30,9 +30,9 @@ db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embedding_in
 retriever = db.as_retriever(search_kwargs={"k": TOP_K})
-LLM_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"
-lora_weights_rec = "wt3639/Llama-3-8B-Instruct_CourseRec_lora"
-lora_weights_exp = "wt3639/alpaca_german_english"
 hf_auth  = os.environ.get("hf_token")

 retriever = db.as_retriever(search_kwargs={"k": TOP_K})
+lora_weights_rec = REC_LORA_MODEL
+lora_weights_exp = EXP_LORA_MODEL
 hf_auth  = os.environ.get("hf_token")