jaothan commited on
Commit
6f20934
·
verified ·
1 Parent(s): 13735ca

Upload 24 files

Browse files
api/Dockerfile ADDED
@@ -0,0 +1 @@
 
 
1
+ FROM python:3.8-slimWORKDIR /appCOPY requirements.txt requirements.txtRUN pip install -r requirements.txtCOPY . .CMD ["python", "app.py"]
api/app.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from flask import Flask, jsonify, requestimport mlflow.pyfuncapp = Flask(__name__)# Load the model as a PyFuncModel.model = mlflow.pyfunc.load_model(model_uri="models:/deployed_model/1")@app.route('/predict', methods=['POST'])def predict(): data = request.get_json() predictions = model.predict(data) return jsonify(predictions.tolist())if __name__ == '__main__': app.run(host='0.0.0.0', port=5000)
api/requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ flaskmlflow
config/config.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ database: uri: postgresql://username:password@localhost:5432/dbnamestorage: path: /data/storagemodel: save_path: /data/modelslogging: level: INFO file: /logs/app.log
data/datasets.py ADDED
@@ -0,0 +1 @@
 
 
1
+ import pandas as pddef load_dataset(file_path): """ Load dataset from a CSV file. """ return pd.read_csv(file_path)def preprocess_data(df): """ Preprocess the dataset. """ # Example preprocessing steps df = df.dropna() df = df[df['column_name'] > 0] return df
docker-compose.yml ADDED
@@ -0,0 +1 @@
 
 
1
+ version: '3.7' services: minio: image: minio/minio container_name: minio environment: MINIO_ROOT_USER: minioadmin MINIO_ROOT_PASSWORD: minioadmin volumes: - ./minio/data:/data ports: - "9000:9000" command: server /data postgres: image: postgres:13 container_name: postgres environment: POSTGRES_USER: youruser POSTGRES_PASSWORD: yourpassword POSTGRES_DB: yourdb volumes: - ./postgres/data:/var/lib/postgresql/data ports: - "5432:5432" keycloak: image: quay.io/keycloak/keycloak:latest container_name: keycloak environment: KEYCLOAK_USER: admin KEYCLOAK_PASSWORD: admin ports: - "8080:8080" command: start-dev vault: image: vault container_name: vault environment: VAULT_DEV_ROOT_TOKEN_ID: root ports: - "8200:8200" command: server -dev nifi: image: apache/nifi:latest container_name: nifi ports: - "8081:8080" environment: NIFI_WEB_HTTP_PORT: 8080 spark: image: bitnami/spark:latest container_name: spark environment: SPARK_MODE: master ports: - "7077:7077" - "8082:8080" mlflow: image: jupyter/mlflow container_name: mlflow ports: - "5000:5000" environment: MLFLOW_TRACKING_URI: http://localhost:5000 volumes: - ./mlruns:/mlflow/mlruns command: mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root /mlflow/mlruns flask-api: build: ./api container_name: flask-api ports: - "5001:5001" environment: MODEL_URI: 'models:/deployed_model/1'
evaluations/evaluate_models.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from sklearn.metrics import accuracy_score, f1_scoredef evaluate_model(model, X_test, y_test): """ Evaluate the model's performance on the test set. """ predictions = model.predict(X_test) accuracy = accuracy_score(y_test, predictions) f1 = f1_score(y_test, predictions, average='weighted') return {'accuracy': accuracy, 'f1_score': f1}
evaluations/prompt_engineering.py ADDED
@@ -0,0 +1 @@
 
 
1
+ def generate_prompt(data): """ Generate prompts for the model based on the input data. """ prompts = [] for item in data: prompt = f"Generate a response for the following input: {item}" prompts.append(prompt) return prompts
models/full_finetune_model.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from transformers import AutoModelForSequenceClassification, Trainer, TrainingArgumentsdef train_full_finetune_model(train_dataset, val_dataset): """ Train a model with full fine-tuning. """ model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased") training_args = TrainingArguments( output_dir='./results', num_train_epochs=3, per_device_train_batch_size=16, per_device_eval_batch_size=16, evaluation_strategy="epoch", save_tota
models/lora_model.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from transformers import AutoModelForSequenceClassification, Trainer, TrainingArgumentsdef train_lora_model(train_dataset, val_dataset): """ Train a LoRA model. """ model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased") training_args = TrainingArguments( output_dir='./results', num_train_epochs=3, per_device_train_batch_size=16, per_device_eval_batch_size=16, evaluation_strategy="epoch", save_total_limit=1 ) trainer
models/pert_model.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from transformers import AutoModelForSequenceClassification, Trainer, TrainingArgumentsdef train_pert_model(train_dataset, val_dataset): """ Train a PERT model. """ model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased") training_args = TrainingArguments( output_dir='./results', num_train_epochs=3, per_device_train_batch_size=16, per_device_eval_batch_size=16, evaluation_strategy="epoch", save_total_limit=1 ) trainer
models/student_model.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from transformers import AutoModelForSequenceClassification, Trainer, TrainingArgumentsdef train_student_model(train_dataset, val_dataset): """ Train a student model. """ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased") training_args = TrainingArguments( output_dir='./results', num_train_epochs=3, per_device_train_batch_size=16, per_device_eval_batch_size=16, evaluation_strategy="epoch", save_total_limit=1 )
nomad/jobs/service.nomad ADDED
@@ -0,0 +1 @@
 
 
1
+ job "example-service" { datacenters = ["dc1"] group "example-group" { count = 1 task "example-task" { driver = "docker" config { image = "example/image:latest" ports = ["http"] } resources { cpu = 500 memory = 256 } service { name = "example-service" port = "http" check { name = "alive" type = "http" path = "/health" interval = "10s" timeout = "2s" } } env { EXAMPLE_ENV_VAR = "value" } volume_mount { volume = "data" destination = "/mnt/data" } } } volume "data" { type = "host" read_only = false source = "path/to/data" } }
nomad/jobs/service_autoscale.nomad ADDED
@@ -0,0 +1 @@
 
 
1
+ job "example-service-autoscale" { datacenters = ["dc1"] group "example-group" { count = 1 scaling { min = 1 max = 10 policy { cooldown = "1m" check "cpu" { source = "nomad" metric = "cpu.percent" target = 75 type = "horizontal" direction = "increase" value = 1 } check "cpu" { source = "nomad" metric = "cpu.percent" target = 25 type = "horizontal" direction = "decrease" value = 1 } } } task "example-task" { driver = "docker" config { image = "example/image:latest" ports = ["http"] } resources { cpu = 500 memory = 256 } service { name = "example-service" port = "http" check { name = "alive" type = "http" path = "/health" interval = "10s" timeout = "2s" } } env { EXAMPLE_ENV_VAR = "value" } volume_mount { volume = "data" destination = "/mnt/data" } } } volume "data" { type = "host" read_only = false source = "path/to/data" } }
scripts/deploy_model.py ADDED
@@ -0,0 +1 @@
 
 
1
+ import mlflow.pyfuncdef deploy_model(model, model_path): """ Deploy the model using MLflow. """ # Save the model mlflow.pyfunc.save_model(model_path=model_path, python_model=model) # Deploy the model to a serving endpoint mlflow.pyfunc.log_model(artifact_path=model_path, python_model=model) print(f"Model deployed at {model_path}")# Example usagetrained_model = train_full_finetune_model(train_data, val_data)deploy_model(trained_model, 'models/deployed_model')
scripts/distill_student.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from data.datasets import load_dataset, preprocess_datafrom models.student_model import train_student_model# Load and preprocess datatrain_data = preprocess_data(load_dataset('data/train.csv'))val_data = preprocess_data(load_dataset('data/val.csv'))# Train the student modelmodel = train_student_model(train_data, val_data)
scripts/evaluate.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from data.datasets import load_dataset, preprocess_datafrom models.full_finetune_model import train_full_finetune_modelfrom evaluations.evaluate_models import evaluate_model# Load and preprocess datatrain_data = preprocess_data(load_dataset('data/train.csv'))val_data = preprocess_data(load_dataset('data/val.csv'))# Train the modelmodel = train_full_finetune_model(train_data, val_data)# Evaluate the modelresults = evaluate_model(model, val_data)print(f"Accuracy: {results['accuracy']}, F1 Score: {results['f
scripts/train_full_finetune.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from data.datasets import load_dataset, preprocess_datafrom models.full_finetune_model import train_full_finetune_model# Load and preprocess datatrain_data = preprocess_data(load_dataset('data/train.csv'))val_data = preprocess_data(load_dataset('data/val.csv'))# Train the modelmodel = train_full_finetune_model(train_data, val_data)
scripts/train_lora.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from data.datasets import load_dataset, preprocess_datafrom models.lora_model import train_lora_model# Load and preprocess datatrain_data = preprocess_data(load_dataset('data/train.csv'))val_data = preprocess_data(load_dataset('data/val.csv'))# Train the modelmodel = train_lora_model(train_data, val_data)
scripts/train_pert.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from data.datasets import load_dataset, preprocess_datafrom models.pert_model import train_pert_model# Load and preprocess datatrain_data = preprocess_data(load_dataset('data/train.csv'))val_data = preprocess_data(load_dataset('data/val.csv'))# Train the modelmodel = train_pert_model(train_data, val_data)
tests/test_datasets.py ADDED
@@ -0,0 +1 @@
 
 
1
+ import unittestfrom data.datasets import load_dataset, preprocess_dataclass TestDatasets(unittest.TestCase): def test_load_dataset(self): df = load_dataset('data/test.csv') self.assertIsNotNone(df) def test_preprocess_data(self): df = load_dataset('data/test.csv') preprocessed_df = preprocess_data(df) self.assertFalse(preprocessed_df.isnull().values.any())if __name__ == '__main__': unittest.main()
tests/test_metrics.py ADDED
@@ -0,0 +1 @@
 
 
1
+ import unittestfrom evaluations.evaluate_models import evaluate_modelclass TestMetrics(unittest.TestCase): def test_evaluate_model(self): model = train_full_finetune_model(train_data, val_data) results = evaluate_model(model, val_data) self.assertIn('accuracy', results) self.assertIn('f1_score', results)if __name__ == '__main__': unittest.main()
tests/test_models.py ADDED
@@ -0,0 +1 @@
 
 
1
+ import unittestfrom models.full_finetune_model import train_full_finetune_modelclass TestModels(unittest.TestCase): def test_train_full_finetune_model(self): train_data = preprocess_data(load_dataset('data/train.csv')) val_data = preprocess_data(load_dataset('data/val.csv')) model = train_full_finetune_model(train_data, val_data) self.assertIsNotNone(model)if __name__ == '__main__': unittest.main()
utils/monitor.py ADDED
@@ -0,0 +1 @@
 
 
1
+ import psutildef monitor_resources(): """ Monitor system resources such as CPU, memory, and disk usage. """ cpu_usage = psutil.cpu_percent(interval=1) memory_usage = psutil.virtual_memory().percent disk_usage = psutil.disk_usage('/').percent print(f"CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage}%, Disk Usage: {disk_usage}%")# Example usagemonitor_resources()