Skip to content

eval

The XGBoost evaluation logic.

kelp.xgb.training.eval.EvalConfig

Bases: PredictConfig

The config for running XGBoost model evaluation.

Source code in kelp/xgb/training/eval.py
33
34
35
36
37
38
39
class EvalConfig(PredictConfig):
    """The config for running XGBoost model evaluation."""

    metadata_fp: Path
    eval_split: int = 8
    experiment_name: str = "model-eval-exp"
    decision_threshold: float = 0.5

kelp.xgb.training.eval.eval

Runs evaluation using data from specified directory.

Parameters:

Name Type Description Default
model XGBClassifier

The XGBoost model.

required
data_dir Path

The data directory.

required
metadata DataFrame

The metadata dataframe.

required
spectral_indices List[str]

The spectral indices to append to the input image.

required
prefix str

The prefix for logged metrics.

required
decision_threshold float

The decision threshold.

0.5
Source code in kelp/xgb/training/eval.py
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def eval(
    model: XGBClassifier,
    data_dir: Path,
    metadata: pd.DataFrame,
    spectral_indices: List[str],
    prefix: str,
    decision_threshold: float = 0.5,
) -> None:
    """
    Runs evaluation using data from specified directory.

    Args:
        model: The XGBoost model.
        data_dir: The data directory.
        metadata: The metadata dataframe.
        spectral_indices: The spectral indices to append to the input image.
        prefix: The prefix for logged metrics.
        decision_threshold: The decision threshold.

    """
    tile_ids = metadata["tile_id"].tolist()
    metrics = MetricCollection(
        metrics={
            "dice": Dice(num_classes=2, average="macro"),
            "iou": JaccardIndex(task="binary"),
            "accuracy": Accuracy(task="binary"),
            "recall": Recall(task="binary", average="macro"),
            "precision": Precision(task="binary", average="macro"),
            "f1": F1Score(task="binary", average="macro"),
            "auroc": AUROC(task="binary"),
        },
        prefix=f"{prefix}/",
    ).to(DEVICE)
    transforms = build_append_index_transforms(spectral_indices)

    for tile in tqdm(tile_ids, desc="Running evaluation on images"):
        with rasterio.open(data_dir / "images" / f"{tile}_satellite.tif") as src:
            input_arr = src.read()
        with rasterio.open(data_dir / "masks" / f"{tile}_kelp.tif") as src:
            y_true = src.read(1)
        y_pred = predict_on_single_image(
            model=model,
            x=input_arr,
            transforms=transforms,
            columns=list(consts.data.ORIGINAL_BANDS) + spectral_indices,
            decision_threshold=decision_threshold,
        )
        metrics(
            torch.tensor(y_pred, device=DEVICE, dtype=torch.int32),
            torch.tensor(y_true, device=DEVICE, dtype=torch.int32),
        )

    metrics_dict = metrics.compute()

    for name, value in metrics_dict.items():
        metrics_dict[name] = value.item()

    mlflow.log_metrics(metrics_dict)
    _logger.info(f"{prefix.upper()} metrics: {json.dumps(metrics_dict, indent=4)}")

kelp.xgb.training.eval.main

Main entry point for running XGBoost model evaluation.

Source code in kelp/xgb/training/eval.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def main() -> None:
    """Main entry point for running XGBoost model evaluation."""
    cfg = parse_args()
    metadata = pd.read_parquet(cfg.metadata_fp)
    metadata = metadata[metadata[f"split_{cfg.eval_split}"] == "val"]
    run_eval(
        run_dir=cfg.run_dir,
        data_dir=cfg.data_dir,
        metadata=metadata,
        model_dir=cfg.model_path,
        train_cfg=cfg.training_config,
        experiment_name=cfg.experiment_name,
        decision_threshold=cfg.decision_threshold,
    )

kelp.xgb.training.eval.parse_args

Parse command line arguments.

Returns: An instance of :class:EvalConfig.

Source code in kelp/xgb/training/eval.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def parse_args() -> EvalConfig:
    """
    Parse command line arguments.

    Returns: An instance of :class:`EvalConfig`.

    """
    parser = build_prediction_arg_parser()
    parser.add_argument("--metadata_fp", type=str, required=True)
    parser.add_argument("--eval_split", type=int, default=8)
    parser.add_argument("--experiment_name", type=str, default="model-eval-exp")
    parser.add_argument("--decision_threshold", type=float, default=0.5)
    args = parser.parse_args()
    cfg = EvalConfig(**vars(args))
    cfg.log_self()
    cfg.output_dir.mkdir(exist_ok=True, parents=True)
    return cfg

kelp.xgb.training.eval.run_eval

Runs XGBoost model evaluation and logs metrics to MLFlow.

Parameters:

Name Type Description Default
run_dir Path

The run directory.

required
data_dir Path

The data directory.

required
metadata DataFrame

The metadata dataframe.

required
model_dir Path

The model directory to.

required
train_cfg TrainConfig

The original training configuration.

required
experiment_name str

The experiment name.

required
decision_threshold float

The decision threshold.

0.5
Source code in kelp/xgb/training/eval.py
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def run_eval(
    run_dir: Path,
    data_dir: Path,
    metadata: pd.DataFrame,
    model_dir: Path,
    train_cfg: TrainConfig,
    experiment_name: str,
    decision_threshold: float = 0.5,
) -> None:
    """
    Runs XGBoost model evaluation and logs metrics to MLFlow.

    Args:
        run_dir: The run directory.
        data_dir: The data directory.
        metadata: The metadata dataframe.
        model_dir: The model directory to.
        train_cfg: The original training configuration.
        experiment_name: The experiment name.
        decision_threshold: The decision threshold.

    """
    mlflow.set_experiment(experiment_name)
    mlflow.pytorch.autolog()
    run = mlflow.start_run(run_name=run_dir.parts[-1])

    with run:
        mlflow.log_dict(train_cfg.model_dump(mode="json"), artifact_file="config.yaml")
        mlflow.log_params(train_cfg.model_dump(mode="json"))
        mlflow.log_param("decision_threshold", decision_threshold)
        mlflow.set_tags(
            {
                "evaluated_at": datetime.utcnow().isoformat(),
                "original_run_id": run_dir.parts[-1],
                "original_experiment_id": model_dir.parts[-2],
            }
        )
        model = load_model(model_path=model_dir)
        eval(
            model=model,
            metadata=metadata,
            data_dir=data_dir,
            spectral_indices=train_cfg.spectral_indices,
            prefix="test",
            decision_threshold=decision_threshold,
        )