Skip to content

generator

ConstantInput

ConstantInput(channels: int, size: Resolution)

Bases: nn.Module

Constant input image

Source code in stylegan2_torch/generator/__init__.py
20
21
22
def __init__(self, channels: int, size: Resolution):
    super().__init__()
    self.input = Parameter(torch.randn(1, channels, size, size))

__call__ class-attribute

__call__ = proxy(forward)

input instance-attribute

input = Parameter(torch.randn(1, channels, size, size))

forward

forward(input: Tensor) -> Tensor
Source code in stylegan2_torch/generator/__init__.py
24
25
26
def forward(self, input: Tensor) -> Tensor:
    # Broadcast constant input to each sample
    return self.input.repeat(input.shape[0], 1, 1, 1)

Generator

Generator(
    resolution: Resolution,
    latent_dim: int = 512,
    n_mlp: int = 8,
    lr_mlp_mult: float = 0.01,
    channels: Dict[Resolution, int] = default_channels,
    blur_kernel: List[int] = [1, 3, 3, 1],
)

Bases: nn.Module

Generator module

Source code in stylegan2_torch/generator/__init__.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
def __init__(
    self,
    resolution: Resolution,
    latent_dim: int = 512,
    n_mlp: int = 8,
    lr_mlp_mult: float = 0.01,
    channels: Dict[Resolution, int] = default_channels,
    blur_kernel: List[int] = [1, 3, 3, 1],
):
    super().__init__()

    self.latent_dim = latent_dim

    # Create mapping network
    self.mapping = MappingNetwork(latent_dim, n_mlp, lr_mlp_mult)

    # Create constant input
    self.input = ConstantInput(channels[4], 4)

    # Create Conv, UpConv and ToRGB Blocks
    self.convs = nn.ModuleList()
    self.up_convs = nn.ModuleList()
    self.to_rgbs = nn.ModuleList()

    self.n_layers = int(math.log(resolution, 2))
    self.n_w_plus = self.n_layers * 2 - 2

    for layer_idx in range(2, self.n_layers + 1):
        # Upsample condition
        upsample = layer_idx > 2

        # Calculate image size and channels at the layer
        prev_layer_size = 2 ** (layer_idx - 1)
        layer_size: Resolution = 2 ** layer_idx
        layer_channel = channels[layer_size]

        # Upsampling Conv Block
        if upsample:
            self.up_convs.append(
                UpModConvBlock(
                    channels[prev_layer_size],
                    layer_channel,
                    3,
                    latent_dim,
                    2,
                    blur_kernel,
                )
            )

        # Normal Conv Block
        self.convs.append(ModConvBlock(layer_channel, layer_channel, 3, latent_dim))

        # ToRGB Block
        self.to_rgbs.append(
            ToRGB(
                layer_channel,
                latent_dim,
                2 if upsample else 1,
                blur_kernel,
            )
        )

__call__ class-attribute

__call__ = proxy(forward)

convs instance-attribute

convs = nn.ModuleList()

input instance-attribute

input = ConstantInput(channels[4], 4)

latent_dim instance-attribute

latent_dim = latent_dim

mapping instance-attribute

mapping = MappingNetwork(latent_dim, n_mlp, lr_mlp_mult)

n_layers instance-attribute

n_layers = int(math.log(resolution, 2))

n_w_plus instance-attribute

n_w_plus = self.n_layers * 2 - 2

to_rgbs instance-attribute

to_rgbs = nn.ModuleList()

up_convs instance-attribute

up_convs = nn.ModuleList()

forward

forward(
    input: Sequence[Tensor],
    *,
    return_latents: bool = False,
    input_type: Literal["z", "w", "w_plus"] = "z",
    trunc_option: Optional[Tuple[float, Tensor]] = None,
    mix_index: Optional[int] = None,
    noises: Optional[List[Optional[Tensor]]] = None
)
Source code in stylegan2_torch/generator/__init__.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def forward(
    self,
    # Input tensors (N, latent_dim)
    input: Sequence[Tensor],
    *,
    # Return latents
    return_latents: bool = False,
    # Type of input tensor
    input_type: Literal["z", "w", "w_plus"] = "z",
    # Truncation options
    trunc_option: Optional[Tuple[float, Tensor]] = None,
    # Mixing regularization options
    mix_index: Optional[int] = None,
    # Noise vectors
    noises: Optional[List[Optional[Tensor]]] = None,
):
    # Get w vectors (can have 2 w vectors for mixing regularization)
    ws: List[Tensor]

    if input_type == "z":
        ws = [self.mapping(z) for z in input]
    else:
        ws = list(input)

    # Perform truncation
    if trunc_option:
        trunc_coeff, trunc_tensor = trunc_option
        ws = [trunc_tensor + trunc_coeff * (w - trunc_tensor) for w in ws]

    # Mixing regularization (why add dimension 1 not 0 lol)
    w_plus: Tensor
    if len(ws) == 1:
        # No mixing regularization
        mix_index = self.n_w_plus

        if input_type == "w_plus":
            w_plus = ws[0]
        else:
            w_plus = ws[0].unsqueeze(1).repeat(1, mix_index, 1)

    else:
        mix_index = mix_index if mix_index else random.randint(1, self.n_w_plus - 1)

        w_plus1 = ws[0].unsqueeze(1).repeat(1, mix_index, 1)
        w_plus2 = ws[1].unsqueeze(1).repeat(1, self.n_w_plus - mix_index, 1)

        w_plus = torch.cat([w_plus1, w_plus2], 1)
    # Get noise
    noises_: List[Optional[Tensor]] = (
        noises if noises else [None] * (self.n_w_plus - 1)
    )

    # Constant input
    out = self.input(w_plus)

    # References for this weird indexing:
    # https://github.com/NVlabs/stylegan2-ada-pytorch/issues/50
    # https://github.com/rosinality/stylegan2-pytorch/issues/278
    img = None
    for i in range(self.n_layers - 1):
        if i > 0:
            out = self.up_convs[i - 1](
                out, w_plus[:, i * 2 - 1], noises_[i * 2 - 1]
            )

        out = self.convs[i](out, w_plus[:, i * 2], noises_[i * 2])
        img = self.to_rgbs[i](out, w_plus[:, i * 2 + 1], img)

    if return_latents:
        return img, w_plus
    else:
        return img

mean_latent

mean_latent(n_sample: int, device: str) -> Tensor
Source code in stylegan2_torch/generator/__init__.py
 98
 99
100
101
102
103
def mean_latent(self, n_sample: int, device: str) -> Tensor:
    mean_latent = self.mapping(
        torch.randn(n_sample, self.latent_dim, device=device)
    ).mean(0, keepdim=True)
    mean_latent.detach_()
    return mean_latent

conv_block

AddNoise

AddNoise()

Bases: nn.Module

Inject white noise scaled by a learnable scalar (same noise for whole batch)

Source code in stylegan2_torch/generator/conv_block.py
74
75
76
77
78
def __init__(self):
    super().__init__()

    # Trainable parameters
    self.weight = Parameter(torch.zeros(1))

__call__ class-attribute

__call__ = proxy(forward)

weight instance-attribute

weight = Parameter(torch.zeros(1))

forward

forward(input: Tensor, noise: Optional[Tensor]) -> Tensor
Source code in stylegan2_torch/generator/conv_block.py
80
81
82
83
84
85
def forward(self, input: Tensor, noise: Optional[Tensor]) -> Tensor:
    if noise is None:
        batch, _, height, width = input.shape
        noise = input.new_empty(batch, 1, height, width).normal_()

    return input + self.weight * noise

ModConvBlock

ModConvBlock(
    in_channel: int,
    out_channel: int,
    kernel_size: int,
    latent_dim: int,
)

Bases: nn.Module

Modulated convolution block

disentangled latent vector (w) => affine transformation => style vector style vector => modulate + demodulate convolution weights => new conv weights new conv weights & input features => group convolution => output features output features => add noise & leaky ReLU => final output features

Source code in stylegan2_torch/generator/conv_block.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def __init__(
    self, in_channel: int, out_channel: int, kernel_size: int, latent_dim: int
):
    super().__init__()

    # Affine mapping from W to style vector
    self.affine = EqualLinear(latent_dim, in_channel, bias_init=1)

    # Trainable parameters
    self.weight = Parameter(
        torch.randn(1, out_channel, in_channel, kernel_size, kernel_size)
    )
    self.scale = 1 / math.sqrt(in_channel * kernel_size**2)

    # Noise and Leaky ReLU
    self.add_noise = AddNoise()
    self.leaky_relu = FusedLeakyReLU(out_channel)

__call__ class-attribute

__call__ = proxy(forward)

add_noise instance-attribute

add_noise = AddNoise()

affine instance-attribute

affine = EqualLinear(latent_dim, in_channel, bias_init=1)

leaky_relu instance-attribute

leaky_relu = FusedLeakyReLU(out_channel)

scale instance-attribute

scale = 1 / math.sqrt(in_channel * kernel_size ** 2)

weight instance-attribute

weight = Parameter(
    torch.randn(
        1, out_channel, in_channel, kernel_size, kernel_size
    )
)

forward

forward(
    input: Tensor, w: Tensor, noise: Optional[Tensor]
) -> Tensor
Source code in stylegan2_torch/generator/conv_block.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def forward(self, input: Tensor, w: Tensor, noise: Optional[Tensor]) -> Tensor:
    batch, in_channel, _, _ = input.shape

    # Get style vectors (N, 1, C_in, 1, 1)
    style = self.affine(w).view(batch, 1, in_channel, 1, 1)

    # Modulate weights with equalized learning rate (N, C_out, C_in, K_h, K_w)
    weight = mod(self.scale * self.weight, style)

    # Demodulate weights
    weight = demod(weight)

    # Perform convolution
    out = group_conv(input, weight)

    # Add noise
    out = self.add_noise(out, noise)

    # Add learnable bias and activate
    return self.leaky_relu(out)

UpModConvBlock

UpModConvBlock(
    in_channel: int,
    out_channel: int,
    kernel_size: int,
    latent_dim: int,
    up: int,
    blur_kernel: List[int],
)

Bases: nn.Module

Modulated convolution block with upsampling

disentangled latent vector (w) => affine transformation => style vector style vector => modulate + demodulate convolution weights => new conv weights new conv weights & input features => group convolution and upsampling => output features output features => add noise & leaky ReLU => final output features

Source code in stylegan2_torch/generator/conv_block.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
def __init__(
    self,
    in_channel: int,
    out_channel: int,
    kernel_size: int,
    latent_dim: int,
    up: int,
    blur_kernel: List[int],
):
    super().__init__()

    # Affine mapping from W to style vector
    self.affine = EqualLinear(latent_dim, in_channel, bias_init=1)

    # Trainable parameters
    self.weight = Parameter(
        torch.randn(1, out_channel, in_channel, kernel_size, kernel_size)
    )
    self.scale = 1 / math.sqrt(in_channel * kernel_size**2)

    # Blurring kernel
    self.up = up
    self.blur = Blur(blur_kernel, up, kernel_size)

    # Noise and Leaky ReLU
    self.add_noise = AddNoise()
    self.leaky_relu = FusedLeakyReLU(out_channel)

__call__ class-attribute

__call__ = proxy(forward)

add_noise instance-attribute

add_noise = AddNoise()

affine instance-attribute

affine = EqualLinear(latent_dim, in_channel, bias_init=1)

blur instance-attribute

blur = Blur(blur_kernel, up, kernel_size)

leaky_relu instance-attribute

leaky_relu = FusedLeakyReLU(out_channel)

scale instance-attribute

scale = 1 / math.sqrt(in_channel * kernel_size ** 2)

up instance-attribute

up = up

weight instance-attribute

weight = Parameter(
    torch.randn(
        1, out_channel, in_channel, kernel_size, kernel_size
    )
)

forward

forward(
    input: Tensor, w: Tensor, noise: Optional[Tensor]
) -> Tensor
Source code in stylegan2_torch/generator/conv_block.py
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
def forward(self, input: Tensor, w: Tensor, noise: Optional[Tensor]) -> Tensor:
    batch, in_channel, _, _ = input.shape

    # Get style vectors (N, 1, C_in, 1, 1)
    style = self.affine(w).view(batch, 1, in_channel, 1, 1)

    # Modulate weights with equalized learning rate (N, C_out, C_in, K_h, K_w)
    weight = mod(self.scale * self.weight, style)

    # Demodulate weights
    weight = demod(weight)

    # Reshape to use group convolution
    out = group_conv_up(input, weight, self.up)

    # Apply blurring filter for anti-aliasing (linear operation so order doesn't matter?)
    out = self.blur(out)

    # Add noise
    out = self.add_noise(out, noise)

    # Add learnable bias and activate
    return self.leaky_relu(out)

demod

demod(weight: Tensor) -> Tensor

Demodulate convolution weights (normalization = statistically restore output feature map to unit s.d.)

Parameters:

Name Type Description Default
weight Tensor

(N, C_out, C_in, K_h, K_w)

required

Returns:

Name Type Description
Tensor Tensor

(N, C_out, C_in, K_h, K_w)

Source code in stylegan2_torch/generator/conv_block.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def demod(weight: Tensor) -> Tensor:
    """
    Demodulate convolution weights
    (normalization = statistically restore output feature map to unit s.d.)

    Args:
        weight (Tensor): (N, C_out, C_in, K_h, K_w)

    Returns:
        Tensor: (N, C_out, C_in, K_h, K_w)
    """
    batch, out_channel, _, _, _ = weight.shape
    demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + 1e-8).view(
        batch, out_channel, 1, 1, 1
    )
    return weight * demod

group_conv

group_conv(input: Tensor, weight: Tensor) -> Tensor

Efficiently perform modulated convolution (i.e. grouped convolution)

Parameters:

Name Type Description Default
input Tensor

(N, C_in, H, W)

required
weight Tensor

(N, C_out, C_in, K, K)

required

Returns:

Name Type Description
Tensor Tensor

(N, C, H + K - 1, W + K - 1)

Source code in stylegan2_torch/generator/conv_block.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def group_conv(input: Tensor, weight: Tensor) -> Tensor:
    """
    Efficiently perform modulated convolution
    (i.e. grouped convolution)

    Args:
        input (Tensor): (N, C_in, H, W)
        weight (Tensor): (N, C_out, C_in, K, K)

    Returns:
        Tensor: (N, C, H + K - 1, W + K - 1)
    """
    batch, in_channel, height, width = input.shape
    _, out_channel, _, k_h, k_w = weight.shape

    weight = weight.view(batch * out_channel, in_channel, k_h, k_w)
    input = input.view(1, batch * in_channel, height, width)

    out = conv2d(input=input, weight=weight, padding=k_h // 2, groups=batch)
    return out.view(batch, out_channel, height, width)

group_conv_up

group_conv_up(
    input: Tensor, weight: Tensor, up: int = 2
) -> Tensor

Efficiently perform upsampling + modulated convolution (i.e. grouped transpose convolution)

Parameters:

Name Type Description Default
input Tensor

(N, C_in, H, W)

required
weight Tensor

(N, C_out, C_in, K, K)

required
up int

U. Defaults to 2.

2

Returns:

Name Type Description
Tensor Tensor

(N, C, (H - 1) * U + K - 1 + 1, (W - 1) * U + K - 1 + 1)

Source code in stylegan2_torch/generator/conv_block.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def group_conv_up(input: Tensor, weight: Tensor, up: int = 2) -> Tensor:
    """
    Efficiently perform upsampling + modulated convolution
    (i.e. grouped transpose convolution)

    Args:
        input (Tensor): (N, C_in, H, W)
        weight (Tensor): (N, C_out, C_in, K, K)
        up (int, optional): U. Defaults to 2.

    Returns:
        Tensor: (N, C, (H - 1) * U + K - 1 + 1, (W - 1) * U + K - 1 + 1)
    """
    batch, in_channel, height, width = input.shape
    _, out_channel, _, k_h, k_w = weight.shape

    weight = weight.transpose(1, 2).reshape(batch * in_channel, out_channel, k_h, k_w)
    input = input.view(1, batch * in_channel, height, width)
    out = conv_transpose2d(
        input=input, weight=weight, stride=up, padding=0, groups=batch
    )
    _, _, out_h, out_w = out.shape
    return out.view(batch, out_channel, out_h, out_w)

mod

mod(weight: Tensor, style: Tensor) -> Tensor

Modulate convolution weights with style vector (styling = scale each input feature map before convolution)

Parameters:

Name Type Description Default
weight Tensor

(1, C_out, C_in, K_h, K_w)

required
style Tensor

(N, 1, C_in, 1, 1)

required

Returns:

Name Type Description
Tensor Tensor

(N, C_out, C_in, K_h, K_w)

Source code in stylegan2_torch/generator/conv_block.py
14
15
16
17
18
19
20
21
22
23
24
25
26
def mod(weight: Tensor, style: Tensor) -> Tensor:
    """
    Modulate convolution weights with style vector
    (styling = scale each input feature map before convolution)

    Args:
        weight (Tensor): (1, C_out, C_in, K_h, K_w)
        style (Tensor): (N, 1, C_in, 1, 1)

    Returns:
        Tensor: (N, C_out, C_in, K_h, K_w)
    """
    return weight * style

mapping

MappingNetwork

MappingNetwork(
    latent_dim: int, n_mlp: int, lr_mlp_mult: float
)

Bases: nn.Sequential

Mapping network from sampling space (z) to disentangled latent space (w)

Source code in stylegan2_torch/generator/mapping.py
23
24
25
26
27
28
29
30
31
32
33
34
def __init__(self, latent_dim: int, n_mlp: int, lr_mlp_mult: float):
    super().__init__(
        Normalize(),
        *[
            EqualLeakyReLU(
                latent_dim,
                latent_dim,
                lr_mult=lr_mlp_mult,
            )
            for _ in range(n_mlp)
        ]
    )

Normalize

Bases: nn.Module

Normalize latent vector for each sample

forward

forward(input: Tensor) -> Tensor
Source code in stylegan2_torch/generator/mapping.py
12
13
14
15
def forward(self, input: Tensor) -> Tensor:
    # input: (N, style_dim)
    # Normalize z in each sample to N(0,1)
    return input * torch.rsqrt(torch.mean(input ** 2, dim=1, keepdim=True) + 1e-8)

rgb

ToRGB

ToRGB(
    in_channel: int,
    latent_dim: int,
    up: int,
    blur_kernel: List[int],
)

Bases: nn.Module

Source code in stylegan2_torch/generator/rgb.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def __init__(
    self,
    in_channel: int,
    latent_dim: int,
    up: int,
    blur_kernel: List[int],
):
    super().__init__()

    # Affine mapping from W to style vector
    self.affine = EqualLinear(latent_dim, in_channel, bias_init=1)

    # Trainable parameters
    self.weight = Parameter(torch.randn(1, 1, in_channel, 1, 1))
    self.scale = 1 / math.sqrt(in_channel)
    self.bias = Parameter(torch.zeros(1, 1, 1, 1))

    if up > 1:
        self.upsample = Upsample(blur_kernel, up)

affine instance-attribute

affine = EqualLinear(latent_dim, in_channel, bias_init=1)

bias instance-attribute

bias = Parameter(torch.zeros(1, 1, 1, 1))

scale instance-attribute

scale = 1 / math.sqrt(in_channel)

upsample instance-attribute

upsample = Upsample(blur_kernel, up)

weight instance-attribute

weight = Parameter(torch.randn(1, 1, in_channel, 1, 1))

forward

forward(
    input: Tensor,
    w: Tensor,
    prev_output: Optional[Tensor] = None,
) -> Tensor
Source code in stylegan2_torch/generator/rgb.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def forward(self,
            input: Tensor,
            w: Tensor,
            prev_output: Optional[Tensor] = None) -> Tensor:
    batch, in_channel, _, _ = input.shape

    # Get style vectors (N, 1, C_in, 1, 1)
    style = self.affine(w).view(batch, 1, in_channel, 1, 1)

    # Modulate weights with equalized learning rate (N, C_out, C_in, K_h, K_w)
    weight = mod(self.scale * self.weight, style)

    # Perform convolution and add bias
    out = group_conv(input, weight) + self.bias

    if prev_output is not None:
        out = out + self.upsample(prev_output)

    return out

Upsample

Upsample(blur_kernel: List[int], factor: int)

Bases: nn.Module

Upsampling + apply blurring FIR filter

Source code in stylegan2_torch/generator/rgb.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def __init__(self, blur_kernel: List[int], factor: int):
    super().__init__()

    self.factor = factor

    # Factor to compensate for averaging with zeros
    self.kernel: Tensor
    self.register_buffer("kernel", make_kernel(blur_kernel, self.factor))

    # Since upsampling by factor means there is factor - 1 pad1 already built-in
    """
    UPSAMPLE CASE

       kernel: [kkkkk]................[kkkkk] (k_w = 5)
    upsampled:     [x---x---x---x---x---x---] (in_w = 6, up_x = 4)
       padded: [ppppx---x---x---x---x---x---] (pad0 = 4, pad1 = 0)
       output:   [oooooooooooooooooooooooo]   (out_w = 24)
    Hence, pad0 + pad1 = k_w - 1
           pad0 - pad1 = up_x - 1


    DOWNSAMPLE CASE

       kernel: [kkkkk]...............[kkkkk] (k_w = 5)
        input:   [xxxxxxxxxxxxxxxxxxxxxxxx]  (in_w = 24)
       padded: [ppxxxxxxxxxxxxxxxxxxxxxxxxp] (pad0 = 2, pad1 = 1)
       output:   [o-o-o-o-o-o-o-o-o-o-o-o]   (out_w = 12)
    Since last (factor - 1) elements are discarded anyway,
    they don't need to be padded
    Hence, pad0 + pad1 = k_w - 1 - (factor - 1)
           pad0 - pad1 = 0 or 1
    """
    p = len(blur_kernel) - factor
    pad0 = (p + 1) // 2 + factor - 1
    pad1 = p // 2

    self.pad = (pad0, pad1)

factor instance-attribute

factor = factor

kernel instance-attribute

kernel: Tensor = None

pad instance-attribute

pad = (pad0, pad1)

forward

forward(input: Tensor) -> Tensor
Source code in stylegan2_torch/generator/rgb.py
57
58
59
60
61
62
def forward(self, input: Tensor) -> Tensor:
    return upfirdn2d(input,
                     self.kernel,
                     up=self.factor,
                     down=1,
                     pad=self.pad)