Source code for verskyt.layers.projection

"""
Tversky neural network layers.

Implements TverskySimilarityLayer (Equation 6) and TverskyProjectionLayer (Equation 7)
from the paper.
"""

from typing import Literal, Optional, Union

import torch
import torch.nn as nn

from verskyt.core.similarity import (
    DifferenceReduction,
    IntersectionReduction,
    tversky_contrast_similarity,
    tversky_similarity,
)



[docs]
class TverskySimilarityLayer(nn.Module):
    """
    Tversky Similarity Layer (Equation 6 from paper).

    Computes similarity between two objects using learnable feature bank
    and Tversky parameters (α, β, θ).

    S_Ω,α,β,θ(a,b): ℝ^d × ℝ^d → ℝ
    """


[docs]
    def __init__(
        self,
        in_features: int,
        num_features: int,
        alpha: float = 0.5,
        beta: float = 0.5,
        learnable_ab: bool = True,
        learnable_theta: bool = False,
        theta: float = 1e-7,
        intersection_reduction: Union[IntersectionReduction, str] = "product",
        difference_reduction: Union[DifferenceReduction, str] = "substractmatch",
        use_contrast_form: bool = False,
        feature_init: Literal[
            "uniform", "normal", "xavier_uniform", "xavier_normal"
        ] = "xavier_uniform",
    ):
        """
        Initialize Tversky Similarity Layer.

        Args:
            in_features: Dimension of input vectors
            num_features: Number of features in feature bank (|Ω|)
            alpha: Initial value for α parameter (weight for a's distinctive features)
            beta: Initial value for β parameter (weight for b's distinctive features)
            learnable_ab: Whether α and β are learnable parameters
            learnable_theta: Whether θ is a learnable parameter (only for contrast form)
            theta: Initial value or constant for numerical stability
            intersection_reduction: Method for computing feature intersections
            difference_reduction: Method for computing feature differences
            use_contrast_form: Use linear combination instead of ratio form
            feature_init: Initialization method for feature bank
        """
        super().__init__()

        self.in_features = in_features
        self.num_features = num_features
        self.intersection_reduction = intersection_reduction
        self.difference_reduction = difference_reduction
        self.use_contrast_form = use_contrast_form

        # Initialize feature bank Ω
        self.feature_bank = nn.Parameter(torch.empty(num_features, in_features))

        # Initialize Tversky parameters
        if learnable_ab:
            self.alpha = nn.Parameter(torch.tensor(float(alpha)))
            self.beta = nn.Parameter(torch.tensor(float(beta)))
        else:
            self.register_buffer("alpha", torch.tensor(float(alpha)))
            self.register_buffer("beta", torch.tensor(float(beta)))

        # Theta parameter (for numerical stability or contrast form)
        if use_contrast_form and learnable_theta:
            self.theta = nn.Parameter(torch.tensor(float(theta)))
        else:
            self.register_buffer("theta", torch.tensor(float(theta)))

        # Initialize parameters
        self.feature_init = feature_init
        self.reset_parameters()



[docs]
    def reset_parameters(self):
        """Initialize parameters according to specified method."""
        if self.feature_init == "uniform":
            # Uniform initialization as used in paper's XOR experiments
            nn.init.uniform_(self.feature_bank, -1, 1)
        elif self.feature_init == "normal":
            nn.init.normal_(self.feature_bank, std=0.02)
        elif self.feature_init == "xavier_uniform":
            nn.init.xavier_uniform_(self.feature_bank)
        elif self.feature_init == "xavier_normal":
            nn.init.xavier_normal_(self.feature_bank)



[docs]
    def forward(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
        """
        Compute element-wise Tversky similarity between objects a and b.

        Args:
            a: First object tensor of shape [batch_size, in_features]
            b: Second object tensor of shape [batch_size, in_features]

        Returns:
            Similarity scores of shape [batch_size]
        """
        batch_size = a.shape[0]

        # Compute similarities element-wise
        similarities = []
        for i in range(batch_size):
            a_i = a[i : i + 1]  # Keep batch dimension
            b_i = b[i : i + 1]  # Keep batch dimension

            if self.use_contrast_form:
                # Use linear combination form
                sim = tversky_contrast_similarity(
                    a_i,
                    b_i,
                    self.feature_bank,
                    self.alpha,
                    self.beta,
                    self.theta,
                    self.intersection_reduction,
                    self.difference_reduction,
                )
            else:
                # Use ratio (Tversky Index) form
                sim = tversky_similarity(
                    a_i,
                    b_i,
                    self.feature_bank,
                    self.alpha,
                    self.beta,
                    self.theta.item(),
                    self.intersection_reduction,
                    self.difference_reduction,
                )

            similarities.append(sim[0, 0])  # Extract scalar similarity

        return torch.stack(similarities)





[docs]
class TverskyProjectionLayer(nn.Module):
    """A projection layer based on Tversky similarity (Equation 7 from paper).

    This layer replaces standard linear projections by computing Tversky similarity
    between inputs and learned prototype vectors. Unlike linear layers, it can
    model non-linear functions like XOR with a single layer, making it suitable
    for complex pattern recognition tasks.

    The layer implements: P_Ω,α,β,θ,Π(a): ℝ^d → ℝ^p

    Where:
    - Ω: Learnable feature bank of shape [num_features, in_features]
    - Π: Learnable prototype vectors of shape [num_prototypes, in_features]
    - α, β: Asymmetry parameters controlling feature distinctiveness weights
    - θ: Numerical stability constant

    This layer can serve as a drop-in replacement for nn.Linear in many architectures,
    offering improved interpretability and non-linear modeling capabilities.

    Attributes:
        prototypes (nn.Parameter): Learnable prototype vectors of shape
            [num_prototypes, in_features].
        feature_bank (nn.Parameter): Learnable feature bank of shape
            [num_features, in_features].
        alpha (nn.Parameter or torch.Tensor): Tversky weight for input-distinctive
            features.
        beta (nn.Parameter or torch.Tensor): Tversky weight for prototype-distinctive
            features.
        bias (nn.Parameter or None): Optional bias term of shape [num_prototypes].
    """


[docs]
    def __init__(
        self,
        in_features: int,
        num_prototypes: int,
        num_features: int,
        alpha: float = 0.5,
        beta: float = 0.5,
        learnable_ab: bool = True,
        theta: float = 1e-7,
        intersection_reduction: Union[IntersectionReduction, str] = "product",
        difference_reduction: Union[DifferenceReduction, str] = "substractmatch",
        normalize_features: bool = False,
        normalize_prototypes: bool = False,
        prototype_init: Literal[
            "uniform", "normal", "xavier_uniform", "xavier_normal"
        ] = "xavier_uniform",
        feature_init: Literal[
            "uniform", "normal", "xavier_uniform", "xavier_normal"
        ] = "xavier_uniform",
        shared_feature_bank: Optional[nn.Parameter] = None,
        bias: bool = False,
    ):
        """Initialize Tversky Projection Layer.

        Args:
            in_features (int): Size of each input sample's embedding dimension.
            num_prototypes (int): Number of prototype vectors to learn. This typically
                corresponds to the output dimension or number of classes.
            num_features (int): Size of the shared feature bank (|Ω|). This is a key
                hyperparameter controlling the expressiveness of the feature space.
            alpha (float, optional): Initial Tversky weight for input-distinctive
                features (x \\ π). Higher values increase sensitivity to features
                present in input but not in prototypes. Defaults to 0.5.
            beta (float, optional): Initial Tversky weight for prototype-distinctive
                features (π \\ x). Higher values increase sensitivity to features
                present in prototypes but not in input. Defaults to 0.5.
            learnable_ab (bool, optional): Whether α and β are learnable parameters.
                If False, they remain fixed at initial values. Defaults to True.
            theta (float, optional): Small constant for numerical stability in
                similarity computation. Defaults to 1e-7.
            intersection_reduction (Union[IntersectionReduction, str], optional):
                Method for aggregating feature intersections. Options: "product",
                "min", "max", "mean", "gmean", "softmin". Defaults to "product".
            difference_reduction (Union[DifferenceReduction, str], optional):
                Method for computing feature differences. Options: "ignorematch",
                "substractmatch". Defaults to "substractmatch".
            normalize_features (bool, optional): Whether to L2-normalize feature
                bank vectors during forward pass. Defaults to False.
            normalize_prototypes (bool, optional): Whether to L2-normalize input
                and prototype vectors during forward pass. Defaults to False.
            prototype_init (Literal, optional): Initialization method for prototype
                vectors. Options: "uniform", "normal", "xavier_uniform",
                "xavier_normal". Defaults to "xavier_uniform".
            feature_init (Literal, optional): Initialization method for feature bank.
                Same options as prototype_init. Defaults to "xavier_uniform".
            shared_feature_bank (Optional[nn.Parameter], optional): Pre-existing
                feature bank to share across layers. If provided, feature_init
                is ignored. Defaults to None.
            bias (bool, optional): Whether to include a learnable bias term of
                shape [num_prototypes]. Defaults to False.

        Example:
            >>> # Create a projection layer as drop-in replacement for nn.Linear
            >>> layer = TverskyProjectionLayer(
            ...     in_features=128,
            ...     num_prototypes=10,  # like nn.Linear(128, 10)
            ...     num_features=64,    # internal feature space size
            ...     learnable_ab=True
            ... )
            >>> x = torch.randn(32, 128)  # batch of 32 samples
            >>> output = layer(x)         # shape: [32, 10]
        """
        super().__init__()

        self.in_features = in_features
        self.num_prototypes = num_prototypes
        self.num_features = num_features
        self.theta = theta
        self.intersection_reduction = intersection_reduction
        self.difference_reduction = difference_reduction
        self.normalize_features = normalize_features
        self.normalize_prototypes = normalize_prototypes

        # Initialize prototypes Π
        self.prototypes = nn.Parameter(torch.empty(num_prototypes, in_features))

        # Initialize or share feature bank Ω
        if shared_feature_bank is not None:
            # Share feature bank with another layer
            self.feature_bank = shared_feature_bank
            self.shared_features = True
        else:
            self.feature_bank = nn.Parameter(torch.empty(num_features, in_features))
            self.shared_features = False

        # Initialize Tversky parameters
        if learnable_ab:
            self.alpha = nn.Parameter(torch.tensor(float(alpha)))
            self.beta = nn.Parameter(torch.tensor(float(beta)))
        else:
            self.register_buffer("alpha", torch.tensor(float(alpha)))
            self.register_buffer("beta", torch.tensor(float(beta)))

        # Optional bias term
        if bias:
            self.bias = nn.Parameter(torch.zeros(num_prototypes))
        else:
            self.register_buffer("bias", None)

        # Store initialization methods
        self.prototype_init = prototype_init
        self.feature_init = feature_init

        # Initialize parameters
        self.reset_parameters()



[docs]
    def reset_parameters(self):
        """Initialize parameters according to specified methods."""
        # Initialize prototypes
        if self.prototype_init == "uniform":
            nn.init.uniform_(self.prototypes, -1, 1)
        elif self.prototype_init == "normal":
            nn.init.normal_(self.prototypes, std=0.02)
        elif self.prototype_init == "xavier_uniform":
            nn.init.xavier_uniform_(self.prototypes)
        elif self.prototype_init == "xavier_normal":
            nn.init.xavier_normal_(self.prototypes)

        # Initialize feature bank (only if not shared)
        if not self.shared_features:
            if self.feature_init == "uniform":
                nn.init.uniform_(self.feature_bank, -1, 1)
            elif self.feature_init == "normal":
                nn.init.normal_(self.feature_bank, std=0.02)
            elif self.feature_init == "xavier_uniform":
                nn.init.xavier_uniform_(self.feature_bank)
            elif self.feature_init == "xavier_normal":
                nn.init.xavier_normal_(self.feature_bank)

        # Initialize bias if present
        if self.bias is not None:
            nn.init.zeros_(self.bias)



[docs]
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Compute forward pass through the Tversky projection layer.

        Projects the input to prototype similarity space by computing Tversky
        similarity between each input and all learned prototype vectors.

        Args:
            x (torch.Tensor): Input tensor of shape [batch_size, in_features].

        Returns:
            torch.Tensor: Tversky similarity scores of shape
                [batch_size, num_prototypes].
                Values are in [0, 1] range for standard Tversky Index formulation,
                representing similarity to each prototype.

        Note:
            This layer can serve as a drop-in replacement for nn.Linear, but
            produces similarity-based rather than linear projections.
        """
        # Compute Tversky similarity to all prototypes
        similarity = tversky_similarity(
            x,
            self.prototypes,
            self.feature_bank,
            self.alpha,
            self.beta,
            self.theta,
            self.intersection_reduction,
            self.difference_reduction,
            self.normalize_features,
            self.normalize_prototypes,
        )

        # Add bias if present
        if self.bias is not None:
            similarity = similarity + self.bias

        return similarity



[docs]
    def get_prototype(self, index: int) -> torch.Tensor:
        """Get a specific prototype vector."""
        return self.prototypes[index].detach().clone()



[docs]
    def set_prototype(self, index: int, value: torch.Tensor):
        """Set a specific prototype vector."""
        with torch.no_grad():
            self.prototypes[index] = value



[docs]
    def get_feature(self, index: int) -> torch.Tensor:
        """Get a specific feature vector."""
        return self.feature_bank[index].detach().clone()



[docs]
    def set_feature(self, index: int, value: torch.Tensor):
        """Set a specific feature vector."""
        with torch.no_grad():
            self.feature_bank[index] = value


    @property
    def weight(self):
        """Compatibility property for drop-in replacement of nn.Linear."""
        # Return prototypes as 'weight' for compatibility
        # Note: This is not equivalent to linear layer weights
        return self.prototypes


[docs]
    def extra_repr(self) -> str:
        """String representation with layer configuration."""
        s = (
            f"in_features={self.in_features}, "
            f"num_prototypes={self.num_prototypes}, "
            f"num_features={self.num_features}"
        )
        if self.bias is not None:
            s += ", bias=True"
        if self.shared_features:
            s += ", shared_features=True"
        return s