bc-test / Makefile
lamossta's picture
updated readme and added report
8159f14
install:
pip install -r requirements.txt
install-runpod:
pip install --ignore-installed blinker && pip install -r requirements.txt
DATA_GDRIVE_ID ?= 1e0ld3FXNYJUQFre0fwgQOUTnTaO_ONnJ
download_data:
gdown $(DATA_GDRIVE_ID) -O data.zip
unzip -o data.zip
rm data.zip
zip_data:
zip -r data.zip data/
download_tedseg:
python -m src.datasets.download_tedseg
download_pubmed:
python -m src.datasets.download_pubmed --k 100
download_wikipedia:
python -m src.datasets.download_wikipedia --k 100
preprocess_all: preprocess_pubmed preprocess_wikipedia preprocess_gutenberg preprocess_ted
preprocess_pubmed:
python -m src.datasets.preprocess pubmed --input data/pubmed/raw/pubmed_raw.jsonl --output data/pubmed/pubmed_data.jsonl
preprocess_wikipedia:
python -m src.datasets.preprocess wikipedia --input data/wikipedia/raw/wikipedia_raw.jsonl --output data/wikipedia/wikipedia_data.jsonl
preprocess_gutenberg:
python -m src.datasets.preprocess gutenberg --raw_dir data/gutenberg/raw --output data/gutenberg/gutenberg_data.jsonl
preprocess_ted:
python -m src.datasets.preprocess ted --input_dir data/tedseg/raw --output_dir data/tedseg
create_recipes:
python -m src.datasets.create_recipes_dataset
sentence_split_all: sentence_split_gutenberg sentence_split_pubmed sentence_split_wikipedia sentence_split_recipes
sentence_split_gutenberg:
python -m src.datasets.sentence_splitter gutenberg --input data/gutenberg/gutenberg_data.jsonl --output data/gutenberg/gutenberg_sentences.jsonl --device cuda
sentence_split_ted:
python -m src.datasets.sentence_splitter ted --input_dir data/tedseg/raw --output_dir data/tedseg --device cuda
sentence_split_pubmed:
python -m src.datasets.sentence_splitter pubmed --input data/pubmed/pubmed_data.jsonl --output data/pubmed/pubmed_sentences.jsonl --device cuda
sentence_split_wikipedia:
python -m src.datasets.sentence_splitter wikipedia --input data/wikipedia/wikipedia_data.jsonl --output data/wikipedia/wikipedia_sentences.jsonl --device cuda
sentence_split_recipes:
python -m src.datasets.sentence_splitter recipes --input data/recipes/recipes_data.jsonl --output data/recipes/recipes_sentences.jsonl --device cuda
build_recipes_pairs:
python -m src.datasets.build_pairs recipes
train_distilbert:
python -m src.models.train --model distilbert --out checkpoints/distilbert
train_bert:
python -m src.models.train --model bert --out checkpoints/bert
train_deberta:
python -m src.models.train --model deberta --out checkpoints/deberta
train_all: train_distilbert train_bert train_deberta
export_distilbert:
python -m src.models.export_and_download --checkpoint checkpoints/distilbert/best
export_bert:
python -m src.models.export_and_download --checkpoint checkpoints/bert/best
export_deberta:
python -m src.models.export_and_download --checkpoint checkpoints/deberta/best
export_all: export_distilbert export_bert export_deberta
upload_distilbert:
python -m src.models.export_and_download --checkpoint checkpoints/distilbert/best --repo slamos/bc-models-distilbert
upload_bert:
python -m src.models.export_and_download --checkpoint checkpoints/bert/best --repo slamos/bc-models-bert
upload_deberta:
python -m src.models.export_and_download --checkpoint checkpoints/deberta/best --repo slamos/bc-models-deberta
upload_all: upload_distilbert upload_bert upload_deberta
download_distilbert:
python -m src.models.export_and_download --download distilbert
download_bert:
python -m src.models.export_and_download --download bert
download_deberta:
python -m src.models.export_and_download --download deberta
download_all: download_distilbert download_bert download_deberta
inference_distilbert:
python -m src.models.inference --model distilbert
inference_bert:
python -m src.models.inference --model bert
inference_deberta:
python -m src.models.inference --model deberta
inference_local_distilbert:
python -m src.models.inference --model distilbert --local
inference_local_bert:
python -m src.models.inference --model bert --local
inference_local_deberta:
python -m src.models.inference --model deberta --local
test-api:
python -m pytest tests/test_api.py -v
test-dataset:
python -m pytest tests/test_dataset.py -v
test-pipelines:
python -m pytest tests/test_pipelines.py -v
test-all:
python -m pytest tests/ -v
clean:
rm -rf checkpoints/distilbert checkpoints/bert checkpoints/deberta checkpoints/plots
run-be:
uvicorn main:app --host 0.0.0.0 --port 8000
run-fe:
streamlit run app.py