| install: |
| pip install -r requirements.txt |
|
|
| install-runpod: |
| pip install --ignore-installed blinker && pip install -r requirements.txt |
|
|
| DATA_GDRIVE_ID ?= 1e0ld3FXNYJUQFre0fwgQOUTnTaO_ONnJ |
| |
| download_data: |
| gdown $(DATA_GDRIVE_ID) -O data.zip |
| unzip -o data.zip |
| rm data.zip |
| |
| zip_data: |
| zip -r data.zip data/ |
| |
| download_tedseg: |
| python -m src.datasets.download_tedseg |
| |
| download_pubmed: |
| python -m src.datasets.download_pubmed --k 100 |
| |
| download_wikipedia: |
| python -m src.datasets.download_wikipedia --k 100 |
| |
| preprocess_all: preprocess_pubmed preprocess_wikipedia preprocess_gutenberg preprocess_ted |
| |
| preprocess_pubmed: |
| python -m src.datasets.preprocess pubmed --input data/pubmed/raw/pubmed_raw.jsonl --output data/pubmed/pubmed_data.jsonl |
| |
| preprocess_wikipedia: |
| python -m src.datasets.preprocess wikipedia --input data/wikipedia/raw/wikipedia_raw.jsonl --output data/wikipedia/wikipedia_data.jsonl |
| |
| preprocess_gutenberg: |
| python -m src.datasets.preprocess gutenberg --raw_dir data/gutenberg/raw --output data/gutenberg/gutenberg_data.jsonl |
| |
| preprocess_ted: |
| python -m src.datasets.preprocess ted --input_dir data/tedseg/raw --output_dir data/tedseg |
| |
| create_recipes: |
| python -m src.datasets.create_recipes_dataset |
| |
| sentence_split_all: sentence_split_gutenberg sentence_split_pubmed sentence_split_wikipedia sentence_split_recipes |
| |
| sentence_split_gutenberg: |
| python -m src.datasets.sentence_splitter gutenberg --input data/gutenberg/gutenberg_data.jsonl --output data/gutenberg/gutenberg_sentences.jsonl --device cuda |
| |
| sentence_split_ted: |
| python -m src.datasets.sentence_splitter ted --input_dir data/tedseg/raw --output_dir data/tedseg --device cuda |
| |
| sentence_split_pubmed: |
| python -m src.datasets.sentence_splitter pubmed --input data/pubmed/pubmed_data.jsonl --output data/pubmed/pubmed_sentences.jsonl --device cuda |
| |
| sentence_split_wikipedia: |
| python -m src.datasets.sentence_splitter wikipedia --input data/wikipedia/wikipedia_data.jsonl --output data/wikipedia/wikipedia_sentences.jsonl --device cuda |
| |
| sentence_split_recipes: |
| python -m src.datasets.sentence_splitter recipes --input data/recipes/recipes_data.jsonl --output data/recipes/recipes_sentences.jsonl --device cuda |
| |
| build_recipes_pairs: |
| python -m src.datasets.build_pairs recipes |
| |
| train_distilbert: |
| python -m src.models.train --model distilbert --out checkpoints/distilbert |
| |
| train_bert: |
| python -m src.models.train --model bert --out checkpoints/bert |
| |
| train_deberta: |
| python -m src.models.train --model deberta --out checkpoints/deberta |
| |
| train_all: train_distilbert train_bert train_deberta |
| |
| export_distilbert: |
| python -m src.models.export_and_download --checkpoint checkpoints/distilbert/best |
| |
| export_bert: |
| python -m src.models.export_and_download --checkpoint checkpoints/bert/best |
| |
| export_deberta: |
| python -m src.models.export_and_download --checkpoint checkpoints/deberta/best |
| |
| export_all: export_distilbert export_bert export_deberta |
| |
| upload_distilbert: |
| python -m src.models.export_and_download --checkpoint checkpoints/distilbert/best --repo slamos/bc-models-distilbert |
| |
| upload_bert: |
| python -m src.models.export_and_download --checkpoint checkpoints/bert/best --repo slamos/bc-models-bert |
| |
| upload_deberta: |
| python -m src.models.export_and_download --checkpoint checkpoints/deberta/best --repo slamos/bc-models-deberta |
| |
| upload_all: upload_distilbert upload_bert upload_deberta |
| |
| download_distilbert: |
| python -m src.models.export_and_download --download distilbert |
| |
| download_bert: |
| python -m src.models.export_and_download --download bert |
| |
| download_deberta: |
| python -m src.models.export_and_download --download deberta |
| |
| download_all: download_distilbert download_bert download_deberta |
| |
| inference_distilbert: |
| python -m src.models.inference --model distilbert |
| |
| inference_bert: |
| python -m src.models.inference --model bert |
| |
| inference_deberta: |
| python -m src.models.inference --model deberta |
| |
| inference_local_distilbert: |
| python -m src.models.inference --model distilbert --local |
| |
| inference_local_bert: |
| python -m src.models.inference --model bert --local |
| |
| inference_local_deberta: |
| python -m src.models.inference --model deberta --local |
|
|
| test-api: |
| python -m pytest tests/test_api.py -v |
|
|
| test-dataset: |
| python -m pytest tests/test_dataset.py -v |
|
|
| test-pipelines: |
| python -m pytest tests/test_pipelines.py -v |
|
|
| test-all: |
| python -m pytest tests/ -v |
| |
| clean: |
| rm -rf checkpoints/distilbert checkpoints/bert checkpoints/deberta checkpoints/plots |
|
|
| run-be: |
| uvicorn main:app --host 0.0.0.0 --port 8000 |
|
|
| run-fe: |
| streamlit run app.py |
|
|