data:
# url empty for now so script will default to local file; modify later as needed
  url: "https://storage.googleapis.com/coffee-quality-data/preprocessed_data.csv"
  local_path: "data/raw/raw_data.csv"
  preprocessed_path: "data/preprocessed/preprocessed_data.csv"
  target: "Total.Cup.Points"
  input_columns:
  - Number.of.Bags
  - Category.One.Defects
  - Category.Two.Defects
  - Aroma
  - Flavor
  - Aftertaste
  - Acidity
  - Body
  - Balance
  - Uniformity
  - Clean.Cup
  - Sweetness
  - Cupper.Points
  - Moisture
  - Quakers
  - altitude_low_meters
  - altitude_high_meters
  - altitude_mean_meters
  - Species
  - Owner
  - Country.of.Origin
  - Mill
  - ICO.Number
  - Company
  - Altitude
  - Region
  - Producer
  - Bag.Weight
  - In.Country.Partner
  - Harvest.Year
  - Grading.Date
  - Owner.1
  - Variety
  - Processing.Method
  - Color
  - Expiration
  - Certification.Body
  - Certification.Address
  - Certification.Contact
  - unit_of_measurement
  
# model details to be added later during train.py work 
train:
  test_size: 0.2
  random_state: 42
  model_params:
    n_estimators: 100
    random_state: 42
    n_jobs: -1

paths:
  X_train: "data/cleaned/X_train.csv"
  X_test: "data/cleaned/X_test.csv"
  y_train: "data/cleaned/y_train.csv"
  y_test: "data/cleaned/y_test.csv"
  
artifacts:
  model: "artifacts/model.joblib"
  preprocessor: "artifacts/preprocessor.joblib"
  metrics: "artifacts/metrics.json"
# The above snippet was generated by chatGPT 5.1 at 10:20p at 11/20/25.

api_url:
  # FastAPI: "http://127.0.0.1:8000/predict_named"
  FastAPI: "https://coffee-api-354131048216.us-central1.run.app/predict_named"