Spaces:
Paused
Paused
Tao Wu commited on
Commit ·
12baee3
1
Parent(s): d0487d6
update
Browse files- app/app.py +1 -1
- app/config.py +6 -2
- app/embedding_setup.py +3 -3
app/app.py
CHANGED
|
@@ -32,7 +32,7 @@ def retrieve_documents(occupation,skills):
|
|
| 32 |
target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
|
| 33 |
for german_label in skills:
|
| 34 |
skill_query += german_label + ' '
|
| 35 |
-
query = target_occupation_query + ' ' + skill_query
|
| 36 |
llama_query = 'info:' + target_occupation_name + ' ' + 'skills gap:' + skill_query
|
| 37 |
print(query)
|
| 38 |
docs = retriever.get_relevant_documents(query)
|
|
|
|
| 32 |
target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
|
| 33 |
for german_label in skills:
|
| 34 |
skill_query += german_label + ' '
|
| 35 |
+
query = 'target occupation: ' + target_occupation_query + ' skills gap:' + skill_query
|
| 36 |
llama_query = 'info:' + target_occupation_name + ' ' + 'skills gap:' + skill_query
|
| 37 |
print(query)
|
| 38 |
docs = retriever.get_relevant_documents(query)
|
app/config.py
CHANGED
|
@@ -8,15 +8,19 @@ REDIS_DB = int(os.getenv('REDIS_DB', 0))
|
|
| 8 |
|
| 9 |
# Model and embedding configuration
|
| 10 |
#MODEL_NAME = os.getenv('MODEL_NAME', "intfloat/multilingual-e5-large-instruct")
|
| 11 |
-
MODEL_NAME = os.getenv('MODEL_NAME', "
|
| 12 |
ENCODE_KWARGS = {
|
| 13 |
'normalize_embeddings': os.getenv('NORMALIZE_EMBEDDINGS', 'True') == 'True',
|
| 14 |
'convert_to_tensor': os.getenv('CONVERT_TO_TENSOR', 'True') == 'True'
|
| 15 |
}
|
| 16 |
-
QUERY_INSTRUCTION = os.getenv('QUERY_INSTRUCTION', '')
|
| 17 |
|
| 18 |
# Other configurations
|
| 19 |
TOP_K = int(os.getenv('TOP_K', 10))
|
| 20 |
#PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/course_emb_db")
|
| 21 |
PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/EduGBERT_cos_escoai")
|
| 22 |
CSV_FILE_PATH = os.getenv('CSV_FILE_PATH', '/app/data/occupations_de.csv')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Model and embedding configuration
|
| 10 |
#MODEL_NAME = os.getenv('MODEL_NAME', "intfloat/multilingual-e5-large-instruct")
|
| 11 |
+
MODEL_NAME = os.getenv('MODEL_NAME', "intfloat/multilingual-e5-large-instruct")
|
| 12 |
ENCODE_KWARGS = {
|
| 13 |
'normalize_embeddings': os.getenv('NORMALIZE_EMBEDDINGS', 'True') == 'True',
|
| 14 |
'convert_to_tensor': os.getenv('CONVERT_TO_TENSOR', 'True') == 'True'
|
| 15 |
}
|
| 16 |
+
QUERY_INSTRUCTION = os.getenv('QUERY_INSTRUCTION', 'Find the course that relates to the given occupation and cover the skills gap')
|
| 17 |
|
| 18 |
# Other configurations
|
| 19 |
TOP_K = int(os.getenv('TOP_K', 10))
|
| 20 |
#PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/course_emb_db")
|
| 21 |
PERSIST_DIRECTORY = os.getenv('PERSIST_DIRECTORY', "/app/data/EduGBERT_cos_escoai")
|
| 22 |
CSV_FILE_PATH = os.getenv('CSV_FILE_PATH', '/app/data/occupations_de.csv')
|
| 23 |
+
|
| 24 |
+
REC_LORA_MODEL = os.getenv('REC_LORA_MODEL', 'wt3639/Llama-3-8B-Instruct_CourseRec_lora')
|
| 25 |
+
EXP_LORA_MODEL = os.getenv('EXP_LORA_MODEL', 'wt3639/alpaca_german_english')
|
| 26 |
+
LLM_MODEL = os.getenv('LLM_MODEL', '"meta-llama/Meta-Llama-3-8B-Instruct"')
|
app/embedding_setup.py
CHANGED
|
@@ -30,9 +30,9 @@ db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embedding_in
|
|
| 30 |
retriever = db.as_retriever(search_kwargs={"k": TOP_K})
|
| 31 |
|
| 32 |
|
| 33 |
-
|
| 34 |
-
lora_weights_rec =
|
| 35 |
-
lora_weights_exp =
|
| 36 |
hf_auth = os.environ.get("hf_token")
|
| 37 |
|
| 38 |
|
|
|
|
| 30 |
retriever = db.as_retriever(search_kwargs={"k": TOP_K})
|
| 31 |
|
| 32 |
|
| 33 |
+
|
| 34 |
+
lora_weights_rec = REC_LORA_MODEL
|
| 35 |
+
lora_weights_exp = EXP_LORA_MODEL
|
| 36 |
hf_auth = os.environ.get("hf_token")
|
| 37 |
|
| 38 |
|