install: pip install -r requirements.txt install-runpod: pip install --ignore-installed blinker && pip install -r requirements.txt DATA_GDRIVE_ID ?= 1e0ld3FXNYJUQFre0fwgQOUTnTaO_ONnJ download_data: gdown $(DATA_GDRIVE_ID) -O data.zip unzip -o data.zip rm data.zip zip_data: zip -r data.zip data/ download_tedseg: python -m src.datasets.download_tedseg download_pubmed: python -m src.datasets.download_pubmed --k 100 download_wikipedia: python -m src.datasets.download_wikipedia --k 100 preprocess_all: preprocess_pubmed preprocess_wikipedia preprocess_gutenberg preprocess_ted preprocess_pubmed: python -m src.datasets.preprocess pubmed --input data/pubmed/raw/pubmed_raw.jsonl --output data/pubmed/pubmed_data.jsonl preprocess_wikipedia: python -m src.datasets.preprocess wikipedia --input data/wikipedia/raw/wikipedia_raw.jsonl --output data/wikipedia/wikipedia_data.jsonl preprocess_gutenberg: python -m src.datasets.preprocess gutenberg --raw_dir data/gutenberg/raw --output data/gutenberg/gutenberg_data.jsonl preprocess_ted: python -m src.datasets.preprocess ted --input_dir data/tedseg/raw --output_dir data/tedseg create_recipes: python -m src.datasets.create_recipes_dataset sentence_split_all: sentence_split_gutenberg sentence_split_pubmed sentence_split_wikipedia sentence_split_recipes sentence_split_gutenberg: python -m src.datasets.sentence_splitter gutenberg --input data/gutenberg/gutenberg_data.jsonl --output data/gutenberg/gutenberg_sentences.jsonl --device cuda sentence_split_ted: python -m src.datasets.sentence_splitter ted --input_dir data/tedseg/raw --output_dir data/tedseg --device cuda sentence_split_pubmed: python -m src.datasets.sentence_splitter pubmed --input data/pubmed/pubmed_data.jsonl --output data/pubmed/pubmed_sentences.jsonl --device cuda sentence_split_wikipedia: python -m src.datasets.sentence_splitter wikipedia --input data/wikipedia/wikipedia_data.jsonl --output data/wikipedia/wikipedia_sentences.jsonl --device cuda sentence_split_recipes: python -m src.datasets.sentence_splitter recipes --input data/recipes/recipes_data.jsonl --output data/recipes/recipes_sentences.jsonl --device cuda build_recipes_pairs: python -m src.datasets.build_pairs recipes train_distilbert: python -m src.models.train --model distilbert --out checkpoints/distilbert train_bert: python -m src.models.train --model bert --out checkpoints/bert train_deberta: python -m src.models.train --model deberta --out checkpoints/deberta train_all: train_distilbert train_bert train_deberta export_distilbert: python -m src.models.export_and_download --checkpoint checkpoints/distilbert/best export_bert: python -m src.models.export_and_download --checkpoint checkpoints/bert/best export_deberta: python -m src.models.export_and_download --checkpoint checkpoints/deberta/best export_all: export_distilbert export_bert export_deberta upload_distilbert: python -m src.models.export_and_download --checkpoint checkpoints/distilbert/best --repo slamos/bc-models-distilbert upload_bert: python -m src.models.export_and_download --checkpoint checkpoints/bert/best --repo slamos/bc-models-bert upload_deberta: python -m src.models.export_and_download --checkpoint checkpoints/deberta/best --repo slamos/bc-models-deberta upload_all: upload_distilbert upload_bert upload_deberta download_distilbert: python -m src.models.export_and_download --download distilbert download_bert: python -m src.models.export_and_download --download bert download_deberta: python -m src.models.export_and_download --download deberta download_all: download_distilbert download_bert download_deberta inference_distilbert: python -m src.models.inference --model distilbert inference_bert: python -m src.models.inference --model bert inference_deberta: python -m src.models.inference --model deberta inference_local_distilbert: python -m src.models.inference --model distilbert --local inference_local_bert: python -m src.models.inference --model bert --local inference_local_deberta: python -m src.models.inference --model deberta --local test-api: python -m pytest tests/test_api.py -v test-dataset: python -m pytest tests/test_dataset.py -v test-pipelines: python -m pytest tests/test_pipelines.py -v test-all: python -m pytest tests/ -v clean: rm -rf checkpoints/distilbert checkpoints/bert checkpoints/deberta checkpoints/plots run-be: uvicorn main:app --host 0.0.0.0 --port 8000 run-fe: streamlit run app.py