eval

The segmentation neural network evaluation logic.

`kelp.nn.training.eval.EvalConfig`

Bases: PredictConfig

Config for running NN model evaluation.

Source code in kelp/nn/training/eval.py

class EvalConfig(PredictConfig):
    """Config for running NN model evaluation."""

    metadata_dir: Path
    experiment_name: str = "model-eval-exp"
    log_model: bool = False

    @property
    def training_config(self) -> TrainConfig:
        cfg = super().training_config
        cfg.metadata_fp = self.metadata_dir / cfg.metadata_fp.name
        return cfg

`kelp.nn.training.eval.main`

Main entrypoint for model evaluation.

Source code in kelp/nn/training/eval.py

def main() -> None:
    """Main entrypoint for model evaluation."""
    cfg = parse_args()
    run_eval(
        run_dir=cfg.run_dir,
        output_dir=cfg.output_dir,
        model_checkpoint=cfg.model_checkpoint,
        use_mlflow=cfg.use_mlflow,
        train_cfg=cfg.training_config,
        experiment_name=cfg.experiment_name,
        tta=cfg.tta,
        tta_merge_mode=cfg.tta_merge_mode,
        decision_threshold=cfg.decision_threshold,
        log_model=cfg.log_model,
    )

`kelp.nn.training.eval.parse_args`

Parse command line arguments.

Returns: An instance of EvalConfig.

Source code in kelp/nn/training/eval.py

def parse_args() -> EvalConfig:
    """
    Parse command line arguments.

    Returns: An instance of EvalConfig.

    """
    parser = build_prediction_arg_parser()
    parser.add_argument("--metadata_dir", type=str, required=True)
    parser.add_argument("--experiment_name", type=str, default="model-eval-exp")
    parser.add_argument("--log_model", action="store_true")
    args = parser.parse_args()
    cfg = EvalConfig(**vars(args))
    cfg.log_self()
    cfg.output_dir.mkdir(exist_ok=True, parents=True)
    return cfg

`kelp.nn.training.eval.run_eval`

Runs model evaluation.

Parameters:

Name	Type	Description	Default
`run_dir`	`Path`	The run directory.	required
`output_dir`	`Path`	The output directory.	required
`model_checkpoint`	`Path`	The model checkpoint path.	required
`use_mlflow`	`bool`	A flag indicating whether to use MLFlow to load the model.	required
`train_cfg`	`TrainConfig`	The original training config.	required
`experiment_name`	`str`	The experiment name.	required
`log_model`	`bool`	A flag indicating whether to log model as an artifact.	`False`
`tta`	`bool`	A flag indicating whether to use TTA.	`False`
`tta_merge_mode`	`str`	TTA merge mode.	`'max'`
`decision_threshold`	`Optional[float]`	An optional decision threshold. Will use :meth:`torch.argmax` by default.	`None`

Source code in kelp/nn/training/eval.py

def run_eval(
    run_dir: Path,
    output_dir: Path,
    model_checkpoint: Path,
    use_mlflow: bool,
    train_cfg: TrainConfig,
    experiment_name: str,
    log_model: bool = False,
    tta: bool = False,
    tta_merge_mode: str = "max",
    decision_threshold: Optional[float] = None,
) -> None:
    """
    Runs model evaluation.

    Args:
        run_dir: The run directory.
        output_dir: The output directory.
        model_checkpoint: The model checkpoint path.
        use_mlflow: A flag indicating whether to use MLFlow to load the model.
        train_cfg: The original training config.
        experiment_name: The experiment name.
        log_model: A flag indicating whether to log model as an artifact.
        tta: A flag indicating whether to use TTA.
        tta_merge_mode: TTA merge mode.
        decision_threshold: An optional decision threshold. Will use :meth:`torch.argmax` by default.

    """
    set_gpu_power_limit_if_needed()
    mlflow.set_experiment(experiment_name)
    mlflow.pytorch.autolog()
    run = mlflow.start_run(run_name=run_dir.parts[-1])

    with run:
        pl.seed_everything(train_cfg.seed, workers=True)
        mlflow.log_dict(train_cfg.model_dump(mode="json"), artifact_file="config.yaml")
        mlflow.log_params(train_cfg.model_dump(mode="json"))
        mlflow.log_params(
            {
                "actual_tta": tta,
                "actual_tta_merge_mode": tta_merge_mode,
                "actual_decision_threshold": decision_threshold,
                "actual_precision": train_cfg.precision,
            }
        )
        mlflow.set_tags(
            {
                "evaluated_at": datetime.utcnow().isoformat(),
                "original_run_id": run_dir.parts[-1],
                "original_experiment_id": model_checkpoint.parts[-2],
            }
        )
        mlflow_run_dir = get_mlflow_run_dir(current_run=run, output_dir=output_dir)
        dm = KelpForestDataModule.from_metadata_file(**train_cfg.data_module_kwargs)
        model = load_model(
            model_path=model_checkpoint,
            use_mlflow=use_mlflow,
            tta=tta,
            tta_merge_mode=tta_merge_mode,
            decision_threshold=decision_threshold,
        )
        trainer = pl.Trainer(
            logger=make_loggers(
                experiment=train_cfg.resolved_experiment_name,
                tags=train_cfg.tags,
            ),
            callbacks=make_callbacks(
                output_dir=mlflow_run_dir / "artifacts" / "checkpoints",
                **train_cfg.callbacks_kwargs,
            ),
            accelerator="gpu",
            **train_cfg.trainer_kwargs,
        )
        trainer.test(model, datamodule=dm)
        if log_model:
            mlflow.pytorch.log_model(model, "model")