Skip to content

Train 3D images

Short Description

The function trains a deep learning model (autoencoder) for selected marker/markers in the provided training data. To train the ae3dTrain or ae3dTrainMulti model, simply direct the function to the dataset_dir folder.

ae3dTrain(dataset_dir, outModelPath, input_channels=1, output_channels=1, embedding_size=256, max_epoch_num=100, batch_size=8, num_workers=10, prefetch_factor=8)

dataset_dir (str): The file path leading to the directory that holds the training data.

outModelPath (str): Path to output directory for saving the trained model.

input_channels (int, optional): encoder input channels, (assumed to be 1).

output_channels (int, optional): decoder output channels (assumed to be 1).

embedding_size (int, optional): encoding dimension for expected extracting embedding.

max_epoch_num (int, optional): max epoch for the training.

batch_size (int, optional): batch size for dataloader.

num_workers (int, optional): how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process.

prefetch_factor (int, optional): Number of batches loaded in advance by each worker.

Example:

input_channels = 1
output_channels = 1
embedding_size = 256
dataset_dir="/n/scratch/users/r/roh6824/Results/LSP13626_DNA_padding/SpatialAE/Single3DPatch/DNA1/"
outModelPath = '/n/scratch/users/r/roh6824/Results/LSP13626_DNA_padding/SpatialAE/ln_3Dautoencoder_DNA_validate_300_model_update.pth'

ae3dTrain(dataset_dir, outModelPath,  input_channels, output_channels, embedding_size, max_epoch_num, batch_size)

Source code in spatialae/models/ae3dTrain.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def ae3dTrain(dataset_dir,
              outModelPath,
              input_channels = 1,
              output_channels = 1, 
              embedding_size = 256,
              max_epoch_num = 100,
              batch_size = 8,
              num_workers = 10,
              prefetch_factor = 8
              ):
    """
    Parameters:
    dataset_dir (str):
        The file path leading to the directory that holds the training data.

    outModelPath (str):
        Path to output directory for saving the trained model. 

    input_channels (int, optional):
        encoder input channels, (assumed to be 1).

    output_channels (int, optional):
        decoder output channels (assumed to be 1).

    embedding_size (int, optional):
        encoding dimension for expected extracting embedding.

    max_epoch_num (int, optional):
        max epoch for the training.

    batch_size (int, optional):
        batch size for dataloader.

    num_workers (int, optional):
        how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process.

    prefetch_factor (int, optional):
         Number of batches loaded in advance by each worker.


    Example:
    ```python

    input_channels = 1
    output_channels = 1
    embedding_size = 256
    dataset_dir="/n/scratch/users/r/roh6824/Results/LSP13626_DNA_padding/SpatialAE/Single3DPatch/DNA1/"
    outModelPath = '/n/scratch/users/r/roh6824/Results/LSP13626_DNA_padding/SpatialAE/ln_3Dautoencoder_DNA_validate_300_model_update.pth'

    ae3dTrain(dataset_dir, outModelPath,  input_channels, output_channels, embedding_size, max_epoch_num, batch_size)
    ```

    """
    # model = spatialae.models.LitAutoEncoder3D_update(input_channels, output_channels, embedding_size)
    model = LitAutoEncoder3D_Complex(input_channels, output_channels, embedding_size)

    # Instantiate the dataset
    transform = ToTensor3D()
    train_dataset = Spatial3DImageDataset(dataset_dir, transform=transform, get_train = True)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers = num_workers, prefetch_factor = prefetch_factor)

    validate_dataset = Spatial3DImageDataset(dataset_dir, transform=transform, get_validate = True)
    validate_loader = DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers = num_workers, prefetch_factor = prefetch_factor)

    trainer = pl.Trainer(max_epochs=max_epoch_num)
    trainer.fit(model, train_loader, validate_loader)

    # save the trained model
    torch.save(model.state_dict(), outModelPath)

ae3dTrainMulti(dataset_dir, outModelPath, channels, embedding_size=256, max_epoch_num=100, batch_size=8, num_workers=10, prefetch_factor=8)

dataset_dir (str): The file path leading to the directory that holds the training data.

outModelPath (str): Path to output directory for saving the trained model.

channels (list): Generate model from the specified list of markers. If the user wants to limit it to a specific list, they can pass in the folder names (e.g. ['CD3D', 'CD4'])

embedding_size (int, optional): encoding dimension for expected extracting embedding.

max_epoch_num (int, optional): max epoch for the training.

batch_size (int, optional): batch size for dataloader.

num_workers (int, optional): how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process.

prefetch_factor (int, optional): Number of batches loaded in advance by each worker.

Example:

dataset_dir='/n/scratch/users/r/roh6824/Results/LSP13626_DNA_padding/SpatialAE/Single3DPatch/'
outModelPath='/n/scratch/users/r/roh6824/Results/LSP13626_DNA_padding/SpatialAE//ln_3dautoencoder_multi_validate_300_model_dim256_withoutDNA_add.pth'
channels = ["MART-1",  "SOX10", "S100B", "Cytokeratin (pan)","CD31", "CD206", "CD20", "CD163", "CD3E","CD8a", "CD11b", "FOXP3", "CD11c","CD103"]
embedding_size=64
max_epoch_num=100
batch_size=32
ae3dTrainMulti(dataset_dir, outModelPath, channels,  embedding_size, max_epoch_num, batch_size)
Source code in spatialae/models/ae3dTrain.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def ae3dTrainMulti(dataset_dir,
                   outModelPath,
                   channels,
                   embedding_size = 256,
                   max_epoch_num = 100,
                   batch_size = 8,
                   num_workers = 10,
                   prefetch_factor = 8
                   ):
    """
    Parameters:
    dataset_dir (str):
        The file path leading to the directory that holds the training data.

    outModelPath (str):
        Path to output directory for saving the trained model. 

    channels (list):
        Generate model from the specified list of markers.  If the user wants to
        limit it to a specific list, they can pass in the folder names (e.g. ['CD3D', 'CD4'])

    embedding_size (int, optional):
        encoding dimension for expected extracting embedding.

    max_epoch_num (int, optional):
        max epoch for the training.

    batch_size (int, optional):
        batch size for dataloader.

    num_workers (int, optional):
        how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process.

    prefetch_factor (int, optional):
         Number of batches loaded in advance by each worker.

    Example:

    ```python
    dataset_dir='/n/scratch/users/r/roh6824/Results/LSP13626_DNA_padding/SpatialAE/Single3DPatch/'
    outModelPath='/n/scratch/users/r/roh6824/Results/LSP13626_DNA_padding/SpatialAE//ln_3dautoencoder_multi_validate_300_model_dim256_withoutDNA_add.pth'
    channels = ["MART-1",  "SOX10", "S100B", "Cytokeratin (pan)","CD31", "CD206", "CD20", "CD163", "CD3E","CD8a", "CD11b", "FOXP3", "CD11c","CD103"]
    embedding_size=64
    max_epoch_num=100
    batch_size=32
    ae3dTrainMulti(dataset_dir, outModelPath, channels,  embedding_size, max_epoch_num, batch_size)
    ```
    """
    input_channels = len(channels)
    output_channels = len(channels)
    # model = spatialae.models.LitAutoEncoder3D_update(input_channels, output_channels, embedding_size)
    model = LitAutoEncoder3D_Complex(input_channels, output_channels, embedding_size)

    # Instantiate the dataset
    transform = ToTensor3D()
    train_dataset = MultiChannelSpatial3DImageDataset(dataset_dir, channels, transform=transform, get_train = True)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,  num_workers = num_workers, prefetch_factor = prefetch_factor)

    validate_dataset = MultiChannelSpatial3DImageDataset(dataset_dir, channels, transform=transform, get_validate = True)
    validate_loader = DataLoader(validate_dataset, batch_size=batch_size, shuffle=False,  num_workers = num_workers, prefetch_factor = prefetch_factor)

    trainer = pl.Trainer(max_epochs=max_epoch_num)
    trainer.fit(model, train_loader, validate_loader)

    # save the trained model
    torch.save(model.state_dict(), outModelPath)