generator

`ConstantInput`

ConstantInput(channels: int, size: Resolution)

Bases: nn.Module

Constant input image

Source code in stylegan2_torch/generator/__init__.py

20
21
22

def __init__(self, channels: int, size: Resolution):
    super().__init__()
    self.input = Parameter(torch.randn(1, channels, size, size))

call `class-attribute`

__call__ = proxy(forward)

input `instance-attribute`

input = Parameter(torch.randn(1, channels, size, size))

forward

forward(input: Tensor) -> Tensor

Source code in stylegan2_torch/generator/__init__.py

24
25
26

def forward(self, input: Tensor) -> Tensor:
    # Broadcast constant input to each sample
    return self.input.repeat(input.shape[0], 1, 1, 1)

`Generator`

Generator(
    resolution: Resolution,
    latent_dim: int = 512,
    n_mlp: int = 8,
    lr_mlp_mult: float = 0.01,
    channels: Dict[Resolution, int] = default_channels,
    blur_kernel: List[int] = [1, 3, 3, 1],
)

Bases: nn.Module

Generator module

Source code in stylegan2_torch/generator/__init__.py

def __init__(
    self,
    resolution: Resolution,
    latent_dim: int = 512,
    n_mlp: int = 8,
    lr_mlp_mult: float = 0.01,
    channels: Dict[Resolution, int] = default_channels,
    blur_kernel: List[int] = [1, 3, 3, 1],
):
    super().__init__()

    self.latent_dim = latent_dim

    # Create mapping network
    self.mapping = MappingNetwork(latent_dim, n_mlp, lr_mlp_mult)

    # Create constant input
    self.input = ConstantInput(channels[4], 4)

    # Create Conv, UpConv and ToRGB Blocks
    self.convs = nn.ModuleList()
    self.up_convs = nn.ModuleList()
    self.to_rgbs = nn.ModuleList()

    self.n_layers = int(math.log(resolution, 2))
    self.n_w_plus = self.n_layers * 2 - 2

    for layer_idx in range(2, self.n_layers + 1):
        # Upsample condition
        upsample = layer_idx > 2

        # Calculate image size and channels at the layer
        prev_layer_size = 2 ** (layer_idx - 1)
        layer_size: Resolution = 2 ** layer_idx
        layer_channel = channels[layer_size]

        # Upsampling Conv Block
        if upsample:
            self.up_convs.append(
                UpModConvBlock(
                    channels[prev_layer_size],
                    layer_channel,
                    3,
                    latent_dim,
                    2,
                    blur_kernel,
                )
            )

        # Normal Conv Block
        self.convs.append(ModConvBlock(layer_channel, layer_channel, 3, latent_dim))

        # ToRGB Block
        self.to_rgbs.append(
            ToRGB(
                layer_channel,
                latent_dim,
                2 if upsample else 1,
                blur_kernel,
            )
        )

call `class-attribute`

__call__ = proxy(forward)

convs `instance-attribute`

convs = nn.ModuleList()

input `instance-attribute`

input = ConstantInput(channels[4], 4)

latent_dim `instance-attribute`

latent_dim = latent_dim

mapping `instance-attribute`

mapping = MappingNetwork(latent_dim, n_mlp, lr_mlp_mult)

n_layers `instance-attribute`

n_layers = int(math.log(resolution, 2))

n_w_plus `instance-attribute`

n_w_plus = self.n_layers * 2 - 2

to_rgbs `instance-attribute`

to_rgbs = nn.ModuleList()

up_convs `instance-attribute`

up_convs = nn.ModuleList()

forward

forward(
    input: Sequence[Tensor],
    *,
    return_latents: bool = False,
    input_type: Literal["z", "w", "w_plus"] = "z",
    trunc_option: Optional[Tuple[float, Tensor]] = None,
    mix_index: Optional[int] = None,
    noises: Optional[List[Optional[Tensor]]] = None
)

Source code in stylegan2_torch/generator/__init__.py

def forward(
    self,
    # Input tensors (N, latent_dim)
    input: Sequence[Tensor],
    *,
    # Return latents
    return_latents: bool = False,
    # Type of input tensor
    input_type: Literal["z", "w", "w_plus"] = "z",
    # Truncation options
    trunc_option: Optional[Tuple[float, Tensor]] = None,
    # Mixing regularization options
    mix_index: Optional[int] = None,
    # Noise vectors
    noises: Optional[List[Optional[Tensor]]] = None,
):
    # Get w vectors (can have 2 w vectors for mixing regularization)
    ws: List[Tensor]

    if input_type == "z":
        ws = [self.mapping(z) for z in input]
    else:
        ws = list(input)

    # Perform truncation
    if trunc_option:
        trunc_coeff, trunc_tensor = trunc_option
        ws = [trunc_tensor + trunc_coeff * (w - trunc_tensor) for w in ws]

    # Mixing regularization (why add dimension 1 not 0 lol)
    w_plus: Tensor
    if len(ws) == 1:
        # No mixing regularization
        mix_index = self.n_w_plus

        if input_type == "w_plus":
            w_plus = ws[0]
        else:
            w_plus = ws[0].unsqueeze(1).repeat(1, mix_index, 1)

    else:
        mix_index = mix_index if mix_index else random.randint(1, self.n_w_plus - 1)

        w_plus1 = ws[0].unsqueeze(1).repeat(1, mix_index, 1)
        w_plus2 = ws[1].unsqueeze(1).repeat(1, self.n_w_plus - mix_index, 1)

        w_plus = torch.cat([w_plus1, w_plus2], 1)
    # Get noise
    noises_: List[Optional[Tensor]] = (
        noises if noises else [None] * (self.n_w_plus - 1)
    )

    # Constant input
    out = self.input(w_plus)

    # References for this weird indexing:
    # https://github.com/NVlabs/stylegan2-ada-pytorch/issues/50
    # https://github.com/rosinality/stylegan2-pytorch/issues/278
    img = None
    for i in range(self.n_layers - 1):
        if i > 0:
            out = self.up_convs[i - 1](
                out, w_plus[:, i * 2 - 1], noises_[i * 2 - 1]
            )

        out = self.convs[i](out, w_plus[:, i * 2], noises_[i * 2])
        img = self.to_rgbs[i](out, w_plus[:, i * 2 + 1], img)

    if return_latents:
        return img, w_plus
    else:
        return img

mean_latent

mean_latent(n_sample: int, device: str) -> Tensor

Source code in stylegan2_torch/generator/__init__.py

def mean_latent(self, n_sample: int, device: str) -> Tensor:
    mean_latent = self.mapping(
        torch.randn(n_sample, self.latent_dim, device=device)
    ).mean(0, keepdim=True)
    mean_latent.detach_()
    return mean_latent

`conv_block`

`AddNoise`

AddNoise()

Bases: nn.Module

Inject white noise scaled by a learnable scalar (same noise for whole batch)

Source code in stylegan2_torch/generator/conv_block.py

def __init__(self):
    super().__init__()

    # Trainable parameters
    self.weight = Parameter(torch.zeros(1))

call `class-attribute`

__call__ = proxy(forward)

weight `instance-attribute`

weight = Parameter(torch.zeros(1))

forward

forward(input: Tensor, noise: Optional[Tensor]) -> Tensor

Source code in stylegan2_torch/generator/conv_block.py

def forward(self, input: Tensor, noise: Optional[Tensor]) -> Tensor:
    if noise is None:
        batch, _, height, width = input.shape
        noise = input.new_empty(batch, 1, height, width).normal_()

    return input + self.weight * noise

`ModConvBlock`

ModConvBlock(
    in_channel: int,
    out_channel: int,
    kernel_size: int,
    latent_dim: int,
)

Bases: nn.Module

Modulated convolution block

disentangled latent vector (w) => affine transformation => style vector style vector => modulate + demodulate convolution weights => new conv weights new conv weights & input features => group convolution => output features output features => add noise & leaky ReLU => final output features

Source code in stylegan2_torch/generator/conv_block.py

def __init__(
    self, in_channel: int, out_channel: int, kernel_size: int, latent_dim: int
):
    super().__init__()

    # Affine mapping from W to style vector
    self.affine = EqualLinear(latent_dim, in_channel, bias_init=1)

    # Trainable parameters
    self.weight = Parameter(
        torch.randn(1, out_channel, in_channel, kernel_size, kernel_size)
    )
    self.scale = 1 / math.sqrt(in_channel * kernel_size**2)

    # Noise and Leaky ReLU
    self.add_noise = AddNoise()
    self.leaky_relu = FusedLeakyReLU(out_channel)

call `class-attribute`

__call__ = proxy(forward)

add_noise `instance-attribute`

add_noise = AddNoise()

affine `instance-attribute`

affine = EqualLinear(latent_dim, in_channel, bias_init=1)

leaky_relu `instance-attribute`

leaky_relu = FusedLeakyReLU(out_channel)

scale `instance-attribute`

scale = 1 / math.sqrt(in_channel * kernel_size ** 2)

weight `instance-attribute`

weight = Parameter(
    torch.randn(
        1, out_channel, in_channel, kernel_size, kernel_size
    )
)

forward

forward(
    input: Tensor, w: Tensor, noise: Optional[Tensor]
) -> Tensor

Source code in stylegan2_torch/generator/conv_block.py

def forward(self, input: Tensor, w: Tensor, noise: Optional[Tensor]) -> Tensor:
    batch, in_channel, _, _ = input.shape

    # Get style vectors (N, 1, C_in, 1, 1)
    style = self.affine(w).view(batch, 1, in_channel, 1, 1)

    # Modulate weights with equalized learning rate (N, C_out, C_in, K_h, K_w)
    weight = mod(self.scale * self.weight, style)

    # Demodulate weights
    weight = demod(weight)

    # Perform convolution
    out = group_conv(input, weight)

    # Add noise
    out = self.add_noise(out, noise)

    # Add learnable bias and activate
    return self.leaky_relu(out)

`UpModConvBlock`

UpModConvBlock(
    in_channel: int,
    out_channel: int,
    kernel_size: int,
    latent_dim: int,
    up: int,
    blur_kernel: List[int],
)

Bases: nn.Module

Modulated convolution block with upsampling

disentangled latent vector (w) => affine transformation => style vector style vector => modulate + demodulate convolution weights => new conv weights new conv weights & input features => group convolution and upsampling => output features output features => add noise & leaky ReLU => final output features

Source code in stylegan2_torch/generator/conv_block.py

def __init__(
    self,
    in_channel: int,
    out_channel: int,
    kernel_size: int,
    latent_dim: int,
    up: int,
    blur_kernel: List[int],
):
    super().__init__()

    # Affine mapping from W to style vector
    self.affine = EqualLinear(latent_dim, in_channel, bias_init=1)

    # Trainable parameters
    self.weight = Parameter(
        torch.randn(1, out_channel, in_channel, kernel_size, kernel_size)
    )
    self.scale = 1 / math.sqrt(in_channel * kernel_size**2)

    # Blurring kernel
    self.up = up
    self.blur = Blur(blur_kernel, up, kernel_size)

    # Noise and Leaky ReLU
    self.add_noise = AddNoise()
    self.leaky_relu = FusedLeakyReLU(out_channel)

call `class-attribute`

__call__ = proxy(forward)

add_noise `instance-attribute`

add_noise = AddNoise()

affine `instance-attribute`

affine = EqualLinear(latent_dim, in_channel, bias_init=1)

blur `instance-attribute`

blur = Blur(blur_kernel, up, kernel_size)

leaky_relu `instance-attribute`

leaky_relu = FusedLeakyReLU(out_channel)

scale `instance-attribute`

scale = 1 / math.sqrt(in_channel * kernel_size ** 2)

up `instance-attribute`

up = up

weight `instance-attribute`

weight = Parameter(
    torch.randn(
        1, out_channel, in_channel, kernel_size, kernel_size
    )
)

forward

forward(
    input: Tensor, w: Tensor, noise: Optional[Tensor]
) -> Tensor

Source code in stylegan2_torch/generator/conv_block.py

def forward(self, input: Tensor, w: Tensor, noise: Optional[Tensor]) -> Tensor:
    batch, in_channel, _, _ = input.shape

    # Get style vectors (N, 1, C_in, 1, 1)
    style = self.affine(w).view(batch, 1, in_channel, 1, 1)

    # Modulate weights with equalized learning rate (N, C_out, C_in, K_h, K_w)
    weight = mod(self.scale * self.weight, style)

    # Demodulate weights
    weight = demod(weight)

    # Reshape to use group convolution
    out = group_conv_up(input, weight, self.up)

    # Apply blurring filter for anti-aliasing (linear operation so order doesn't matter?)
    out = self.blur(out)

    # Add noise
    out = self.add_noise(out, noise)

    # Add learnable bias and activate
    return self.leaky_relu(out)

demod

demod(weight: Tensor) -> Tensor

Demodulate convolution weights (normalization = statistically restore output feature map to unit s.d.)

Parameters:

Name	Type	Description	Default
`weight`	`Tensor`	(N, C_out, C_in, K_h, K_w)	required

Returns:

Name	Type	Description
`Tensor`	`Tensor`	(N, C_out, C_in, K_h, K_w)

Source code in stylegan2_torch/generator/conv_block.py

def demod(weight: Tensor) -> Tensor:
    """
    Demodulate convolution weights
    (normalization = statistically restore output feature map to unit s.d.)

    Args:
        weight (Tensor): (N, C_out, C_in, K_h, K_w)

    Returns:
        Tensor: (N, C_out, C_in, K_h, K_w)
    """
    batch, out_channel, _, _, _ = weight.shape
    demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + 1e-8).view(
        batch, out_channel, 1, 1, 1
    )
    return weight * demod

group_conv

group_conv(input: Tensor, weight: Tensor) -> Tensor

Efficiently perform modulated convolution (i.e. grouped convolution)

Parameters:

Name	Type	Description	Default
`input`	`Tensor`	(N, C_in, H, W)	required
`weight`	`Tensor`	(N, C_out, C_in, K, K)	required

Returns:

Name	Type	Description
`Tensor`	`Tensor`	(N, C, H + K - 1, W + K - 1)

Source code in stylegan2_torch/generator/conv_block.py

def group_conv(input: Tensor, weight: Tensor) -> Tensor:
    """
    Efficiently perform modulated convolution
    (i.e. grouped convolution)

    Args:
        input (Tensor): (N, C_in, H, W)
        weight (Tensor): (N, C_out, C_in, K, K)

    Returns:
        Tensor: (N, C, H + K - 1, W + K - 1)
    """
    batch, in_channel, height, width = input.shape
    _, out_channel, _, k_h, k_w = weight.shape

    weight = weight.view(batch * out_channel, in_channel, k_h, k_w)
    input = input.view(1, batch * in_channel, height, width)

    out = conv2d(input=input, weight=weight, padding=k_h // 2, groups=batch)
    return out.view(batch, out_channel, height, width)

group_conv_up

group_conv_up(
    input: Tensor, weight: Tensor, up: int = 2
) -> Tensor

Efficiently perform upsampling + modulated convolution (i.e. grouped transpose convolution)

Parameters:

Name	Type	Description	Default
`input`	`Tensor`	(N, C_in, H, W)	required
`weight`	`Tensor`	(N, C_out, C_in, K, K)	required
`up`	`int`	U. Defaults to 2.	`2`

Returns:

Name	Type	Description
`Tensor`	`Tensor`	(N, C, (H - 1) * U + K - 1 + 1, (W - 1) * U + K - 1 + 1)

Source code in stylegan2_torch/generator/conv_block.py

def group_conv_up(input: Tensor, weight: Tensor, up: int = 2) -> Tensor:
    """
    Efficiently perform upsampling + modulated convolution
    (i.e. grouped transpose convolution)

    Args:
        input (Tensor): (N, C_in, H, W)
        weight (Tensor): (N, C_out, C_in, K, K)
        up (int, optional): U. Defaults to 2.

    Returns:
        Tensor: (N, C, (H - 1) * U + K - 1 + 1, (W - 1) * U + K - 1 + 1)
    """
    batch, in_channel, height, width = input.shape
    _, out_channel, _, k_h, k_w = weight.shape

    weight = weight.transpose(1, 2).reshape(batch * in_channel, out_channel, k_h, k_w)
    input = input.view(1, batch * in_channel, height, width)
    out = conv_transpose2d(
        input=input, weight=weight, stride=up, padding=0, groups=batch
    )
    _, _, out_h, out_w = out.shape
    return out.view(batch, out_channel, out_h, out_w)

mod

mod(weight: Tensor, style: Tensor) -> Tensor

Modulate convolution weights with style vector (styling = scale each input feature map before convolution)

Parameters:

Name	Type	Description	Default
`weight`	`Tensor`	(1, C_out, C_in, K_h, K_w)	required
`style`	`Tensor`	(N, 1, C_in, 1, 1)	required

Returns:

Name	Type	Description
`Tensor`	`Tensor`	(N, C_out, C_in, K_h, K_w)

Source code in stylegan2_torch/generator/conv_block.py

def mod(weight: Tensor, style: Tensor) -> Tensor:
    """
    Modulate convolution weights with style vector
    (styling = scale each input feature map before convolution)

    Args:
        weight (Tensor): (1, C_out, C_in, K_h, K_w)
        style (Tensor): (N, 1, C_in, 1, 1)

    Returns:
        Tensor: (N, C_out, C_in, K_h, K_w)
    """
    return weight * style

`mapping`

`MappingNetwork`

MappingNetwork(
    latent_dim: int, n_mlp: int, lr_mlp_mult: float
)

Bases: nn.Sequential

Mapping network from sampling space (z) to disentangled latent space (w)

Source code in stylegan2_torch/generator/mapping.py

def __init__(self, latent_dim: int, n_mlp: int, lr_mlp_mult: float):
    super().__init__(
        Normalize(),
        *[
            EqualLeakyReLU(
                latent_dim,
                latent_dim,
                lr_mult=lr_mlp_mult,
            )
            for _ in range(n_mlp)
        ]
    )

`Normalize`

Bases: nn.Module

Normalize latent vector for each sample

forward

forward(input: Tensor) -> Tensor

Source code in stylegan2_torch/generator/mapping.py

def forward(self, input: Tensor) -> Tensor:
    # input: (N, style_dim)
    # Normalize z in each sample to N(0,1)
    return input * torch.rsqrt(torch.mean(input ** 2, dim=1, keepdim=True) + 1e-8)

`rgb`

`ToRGB`

ToRGB(
    in_channel: int,
    latent_dim: int,
    up: int,
    blur_kernel: List[int],
)

Bases: nn.Module

Source code in stylegan2_torch/generator/rgb.py

def __init__(
    self,
    in_channel: int,
    latent_dim: int,
    up: int,
    blur_kernel: List[int],
):
    super().__init__()

    # Affine mapping from W to style vector
    self.affine = EqualLinear(latent_dim, in_channel, bias_init=1)

    # Trainable parameters
    self.weight = Parameter(torch.randn(1, 1, in_channel, 1, 1))
    self.scale = 1 / math.sqrt(in_channel)
    self.bias = Parameter(torch.zeros(1, 1, 1, 1))

    if up > 1:
        self.upsample = Upsample(blur_kernel, up)

affine `instance-attribute`

affine = EqualLinear(latent_dim, in_channel, bias_init=1)

bias `instance-attribute`

bias = Parameter(torch.zeros(1, 1, 1, 1))

scale `instance-attribute`

scale = 1 / math.sqrt(in_channel)

upsample `instance-attribute`

upsample = Upsample(blur_kernel, up)

weight `instance-attribute`

weight = Parameter(torch.randn(1, 1, in_channel, 1, 1))

forward

forward(
    input: Tensor,
    w: Tensor,
    prev_output: Optional[Tensor] = None,
) -> Tensor

Source code in stylegan2_torch/generator/rgb.py

def forward(self,
            input: Tensor,
            w: Tensor,
            prev_output: Optional[Tensor] = None) -> Tensor:
    batch, in_channel, _, _ = input.shape

    # Get style vectors (N, 1, C_in, 1, 1)
    style = self.affine(w).view(batch, 1, in_channel, 1, 1)

    # Modulate weights with equalized learning rate (N, C_out, C_in, K_h, K_w)
    weight = mod(self.scale * self.weight, style)

    # Perform convolution and add bias
    out = group_conv(input, weight) + self.bias

    if prev_output is not None:
        out = out + self.upsample(prev_output)

    return out

`Upsample`

Upsample(blur_kernel: List[int], factor: int)

Bases: nn.Module

Upsampling + apply blurring FIR filter

Source code in stylegan2_torch/generator/rgb.py

def __init__(self, blur_kernel: List[int], factor: int):
    super().__init__()

    self.factor = factor

    # Factor to compensate for averaging with zeros
    self.kernel: Tensor
    self.register_buffer("kernel", make_kernel(blur_kernel, self.factor))

    # Since upsampling by factor means there is factor - 1 pad1 already built-in
    """
    UPSAMPLE CASE

       kernel: [kkkkk]................[kkkkk] (k_w = 5)
    upsampled:     [x---x---x---x---x---x---] (in_w = 6, up_x = 4)
       padded: [ppppx---x---x---x---x---x---] (pad0 = 4, pad1 = 0)
       output:   [oooooooooooooooooooooooo]   (out_w = 24)
    Hence, pad0 + pad1 = k_w - 1
           pad0 - pad1 = up_x - 1


    DOWNSAMPLE CASE

       kernel: [kkkkk]...............[kkkkk] (k_w = 5)
        input:   [xxxxxxxxxxxxxxxxxxxxxxxx]  (in_w = 24)
       padded: [ppxxxxxxxxxxxxxxxxxxxxxxxxp] (pad0 = 2, pad1 = 1)
       output:   [o-o-o-o-o-o-o-o-o-o-o-o]   (out_w = 12)
    Since last (factor - 1) elements are discarded anyway,
    they don't need to be padded
    Hence, pad0 + pad1 = k_w - 1 - (factor - 1)
           pad0 - pad1 = 0 or 1
    """
    p = len(blur_kernel) - factor
    pad0 = (p + 1) // 2 + factor - 1
    pad1 = p // 2

    self.pad = (pad0, pad1)

factor `instance-attribute`

factor = factor

kernel `instance-attribute`

kernel: Tensor = None

pad `instance-attribute`

pad = (pad0, pad1)

forward

forward(input: Tensor) -> Tensor

Source code in stylegan2_torch/generator/rgb.py

def forward(self, input: Tensor) -> Tensor:
    return upfirdn2d(input,
                     self.kernel,
                     up=self.factor,
                     down=1,
                     pad=self.pad)

generator

ConstantInput

__call__ class-attribute

input instance-attribute

forward

Generator

__call__ class-attribute

convs instance-attribute

input instance-attribute

latent_dim instance-attribute

mapping instance-attribute

n_layers instance-attribute

n_w_plus instance-attribute

to_rgbs instance-attribute

up_convs instance-attribute

forward

mean_latent

conv_block

AddNoise

__call__ class-attribute

weight instance-attribute

forward

ModConvBlock

__call__ class-attribute

add_noise instance-attribute

affine instance-attribute

leaky_relu instance-attribute

scale instance-attribute

weight instance-attribute

forward

UpModConvBlock

__call__ class-attribute

add_noise instance-attribute

affine instance-attribute

blur instance-attribute

leaky_relu instance-attribute

scale instance-attribute

up instance-attribute

weight instance-attribute

forward

demod

group_conv

group_conv_up

mod

mapping

MappingNetwork

Normalize

forward

rgb

ToRGB

affine instance-attribute

bias instance-attribute

scale instance-attribute

upsample instance-attribute

weight instance-attribute

forward

Upsample

factor instance-attribute

kernel instance-attribute

pad instance-attribute

forward

`ConstantInput`

call `class-attribute`

input `instance-attribute`

`Generator`

call `class-attribute`

convs `instance-attribute`

input `instance-attribute`

latent_dim `instance-attribute`

mapping `instance-attribute`

n_layers `instance-attribute`

n_w_plus `instance-attribute`

to_rgbs `instance-attribute`

up_convs `instance-attribute`

`conv_block`

`AddNoise`

call `class-attribute`

weight `instance-attribute`

`ModConvBlock`

call `class-attribute`

add_noise `instance-attribute`

affine `instance-attribute`

leaky_relu `instance-attribute`

scale `instance-attribute`

weight `instance-attribute`

`UpModConvBlock`

call `class-attribute`

add_noise `instance-attribute`

affine `instance-attribute`

blur `instance-attribute`

leaky_relu `instance-attribute`

scale `instance-attribute`

up `instance-attribute`

weight `instance-attribute`

`mapping`

`MappingNetwork`

`Normalize`

`rgb`

`ToRGB`

affine `instance-attribute`

bias `instance-attribute`

scale `instance-attribute`

upsample `instance-attribute`

weight `instance-attribute`

`Upsample`

factor `instance-attribute`

kernel `instance-attribute`

pad `instance-attribute`