Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from E_Model_utils import fine_tune_and_save_model | |
| from sentence_transformers import SentenceTransformer | |
| from A_Preprocess import load_pdf_data | |
| from pathlib import Path | |
| # Load the dataset from BASE_DIR | |
| BASE_DIR = Path(__file__).resolve().parents[1] | |
| data_file_path = BASE_DIR / "data" / "Pager_Intents_cleaned.csv" | |
| print(data_file_path) | |
| # Load the data | |
| data = load_pdf_data(str(data_file_path)) | |
| # OLDPATH data = load_pdf_data(r'C:\Users\ZZ029K826\Documents\GitHub\LLM_Intent_Recognition\data\Pager_Intents_cleaned.csv') | |
| # Specify the model name | |
| # 'intfloat/multilingual-e5-small' | |
| # 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2' | |
| # 'McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp' #llama | |
| # "multilingual-e5-small":"intfloat/multilingual-e5-small", "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", "all-mpnet-base-v2":"sentence-transformers/all-mpnet-base-v2" | |
| #"bert-base-nli-mean-tokens":"sentence-transformers/bert-base-nli-mean-tokens", #"all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", "all-distilroberta-v1":"sentence-transformers/all-distilroberta-v1" | |
| # 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2' | |
| # "all-mpnet-base-v2":"sentence-transformers/all-mpnet-base-v2", | |
| # "bert-base-nli":"sentence-transformers/bert-base-nli-mean-tokens", | |
| # "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", | |
| # "all-distilroberta-v1":"sentence-transformers/all-distilroberta-v1" | |
| # "bert-base-romanian-cased-v1": "sentence-transformers/bert-base-romanian-cased-v1", | |
| # "bert-base-romanian-uncased-v1": "sentence-transformers/dumitrescustefan/bert-base-romanian-uncased-v1", | |
| #"mBERT": "bert-base-multilingual-cased", "XLM-R": "xlm-roberta-base", "Romanian BERT": "dumitrescustefan/bert-base-romanian-cased-v1", "dumitrescustefan/bert-base-romanian-uncased-v1": "dumitrescustefan/bert-base-romanian-uncased-v1" | |
| # Generate and save embeddings for each model, "xlm-r-distilroberta-base-paraphrase-v1" | |
| # 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2' | |
| # 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2' | |
| model_name = 'BlackKakapo/stsb-xlm-r-multilingual-ro' | |
| # Fine-tune and save the model | |
| fine_tune_and_save_model(model_name, data) |