Skip to content

move_split_files

Logic for moving split validation files to separate directories.

kelp.data_prep.move_split_files.MoveSplitFilesConfig

Bases: ConfigBase

A config for moving split files to a new directory.

Source code in kelp/data_prep/move_split_files.py
12
13
14
15
16
17
class MoveSplitFilesConfig(ConfigBase):
    """A config for moving split files to a new directory."""

    data_dir: Path
    metadata_fp: Path
    output_dir: Path

kelp.data_prep.move_split_files.main

Main entry point for moving split files.

Source code in kelp/data_prep/move_split_files.py
63
64
65
66
def main() -> None:
    """Main entry point for moving split files."""
    cfg = parse_args()
    move_split_files(data_dir=cfg.data_dir, output_dir=cfg.output_dir, metadata_fp=cfg.metadata_fp)

kelp.data_prep.move_split_files.move_split_files

Move split files to a new directory.

Parameters:

Name Type Description Default
data_dir Path

Path to the data directory.

required
output_dir Path

Path to the output directory.

required
metadata_fp Path

Path to the metadata parquet file.

required
Source code in kelp/data_prep/move_split_files.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def move_split_files(data_dir: Path, output_dir: Path, metadata_fp: Path) -> None:
    """
    Move split files to a new directory.

    Args:
        data_dir: Path to the data directory.
        output_dir: Path to the output directory.
        metadata_fp: Path to the metadata parquet file.

    """
    df = pd.read_parquet(metadata_fp)
    split_cols = [col for col in df.columns if col.startswith("split_")]
    for split_col in tqdm(split_cols, desc="Moving CV split files"):
        val_tiles = df[df[split_col] == consts.data.VAL]["tile_id"].tolist()
        out_dir_images = output_dir / split_col / "images"
        out_dir_images.mkdir(exist_ok=True, parents=True)
        out_dir_masks = output_dir / split_col / "masks"
        out_dir_masks.mkdir(exist_ok=True, parents=True)
        for tile_id in tqdm(val_tiles, desc=f"Moving val files for {split_col}"):
            fname = f"{tile_id}_satellite.tif"
            shutil.copy(data_dir / "images" / fname, out_dir_images / fname)
            fname = f"{tile_id}_kelp.tif"
            shutil.copy(data_dir / "masks" / fname, out_dir_masks / fname)

kelp.data_prep.move_split_files.parse_args

Parse command line arguments.

Returns: An instance of MoveSplitFilesConfig.

Source code in kelp/data_prep/move_split_files.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def parse_args() -> MoveSplitFilesConfig:
    """
    Parse command line arguments.

    Returns: An instance of MoveSplitFilesConfig.

    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, required=True)
    parser.add_argument("--output_dir", type=str, required=True)
    parser.add_argument("--metadata_fp", type=str, required=True)
    args = parser.parse_args()
    cfg = MoveSplitFilesConfig(**vars(args))
    cfg.log_self()
    cfg.output_dir.mkdir(exist_ok=True, parents=True)
    return cfg