Skip to content

capturegraph.scheduling.distance.combination #

Distance Function Combination#

Combines multiple distance functions into a single composite metric using Euclidean distance (L2 norm) in the multi-dimensional distance space.

CombinedBatchDistance #

Combined distance function with batch computation support.

Source code in capturegraph-lib/capturegraph/scheduling/distance/combination.py
class CombinedBatchDistance:
    """Combined distance function with batch computation support."""

    def __init__(
        self,
        distance_fns: list[tuple[str, Callable[[Any, Any], float]]],
    ):
        """Initialize combined distance function."""
        self._fns = distance_fns

    def __call__(self, a: cg.Dict[Any], b: cg.Dict[Any]) -> float:
        """Compute distance between two sessions (single pair)."""
        distance_sq = 0.0
        found_keys = set()
        reason = None

        for path, distance_fn in self._fns:
            val_a = _access(a, path)
            val_b = _access(b, path)
            if not cg.is_missing(val_a) and not cg.is_missing(val_b):
                try:
                    distance_sq += np.square(distance_fn(val_a, val_b))
                    found_keys.add(path)
                except Exception as e:
                    if reason is None:
                        reason = e

        for path, _ in self._fns:
            if path not in found_keys:
                if reason is None:
                    raise ValueError(f"All keys missing for path '{path}'")
                else:
                    raise ValueError(f"All keys missing for path '{path}'") from reason

        return float(np.sqrt(distance_sq))

    def matrix(
        self,
        sessions_a: cg.List[cg.Dict[Any]],
        sessions_b: cg.List[cg.Dict[Any]] | None = None,
    ) -> np.ndarray:
        """Compute pairwise distance matrix efficiently."""
        if sessions_b is None:
            sessions_b = sessions_a

        n, m = len(sessions_a), len(sessions_b)
        distance_sq = np.zeros((n, m), dtype=np.float64)

        # Process batch-capable functions with vectorization
        for path, fn in self._fns:
            # Extract values for this path from all sessions
            vals_a = cg.List([_access(s, path) for s in sessions_a])
            vals_b = cg.List([_access(s, path) for s in sessions_b])

            _check_all_missing(vals_a, "distance matrix")
            _check_all_missing(vals_b, "distance matrix")

            full_distances = batch_matrix(fn, vals_a, vals_b)
            distance_sq += np.square(full_distances)

        return np.sqrt(distance_sq)

__init__(distance_fns) #

Initialize combined distance function.

Source code in capturegraph-lib/capturegraph/scheduling/distance/combination.py
def __init__(
    self,
    distance_fns: list[tuple[str, Callable[[Any, Any], float]]],
):
    """Initialize combined distance function."""
    self._fns = distance_fns

__call__(a, b) #

Compute distance between two sessions (single pair).

Source code in capturegraph-lib/capturegraph/scheduling/distance/combination.py
def __call__(self, a: cg.Dict[Any], b: cg.Dict[Any]) -> float:
    """Compute distance between two sessions (single pair)."""
    distance_sq = 0.0
    found_keys = set()
    reason = None

    for path, distance_fn in self._fns:
        val_a = _access(a, path)
        val_b = _access(b, path)
        if not cg.is_missing(val_a) and not cg.is_missing(val_b):
            try:
                distance_sq += np.square(distance_fn(val_a, val_b))
                found_keys.add(path)
            except Exception as e:
                if reason is None:
                    reason = e

    for path, _ in self._fns:
        if path not in found_keys:
            if reason is None:
                raise ValueError(f"All keys missing for path '{path}'")
            else:
                raise ValueError(f"All keys missing for path '{path}'") from reason

    return float(np.sqrt(distance_sq))

matrix(sessions_a, sessions_b=None) #

Compute pairwise distance matrix efficiently.

Source code in capturegraph-lib/capturegraph/scheduling/distance/combination.py
def matrix(
    self,
    sessions_a: cg.List[cg.Dict[Any]],
    sessions_b: cg.List[cg.Dict[Any]] | None = None,
) -> np.ndarray:
    """Compute pairwise distance matrix efficiently."""
    if sessions_b is None:
        sessions_b = sessions_a

    n, m = len(sessions_a), len(sessions_b)
    distance_sq = np.zeros((n, m), dtype=np.float64)

    # Process batch-capable functions with vectorization
    for path, fn in self._fns:
        # Extract values for this path from all sessions
        vals_a = cg.List([_access(s, path) for s in sessions_a])
        vals_b = cg.List([_access(s, path) for s in sessions_b])

        _check_all_missing(vals_a, "distance matrix")
        _check_all_missing(vals_b, "distance matrix")

        full_distances = batch_matrix(fn, vals_a, vals_b)
        distance_sq += np.square(full_distances)

    return np.sqrt(distance_sq)

combine(**kwargs) #

Combine multiple distance functions into a single composite metric.

Each distance function can be provided as a kwarg where the key is the attribute path to extract from each session.

The combined distance is the Euclidean (L2) norm: d_combined = sqrt(d1² + d2² + ... + dn²)

The returned function supports both single-pair computation via __call__ and efficient batch computation via matrix().

Parameters:

Name Type Description Default
**kwargs Callable[[Any, Any], float]

Named distance functions. The key is the attribute path. solar_angle=fn extracts session.solar_angle and passes to fn.

{}

Returns:

Type Description
CombinedBatchDistance

A combined distance function with:

CombinedBatchDistance
  • __call__(a, b) -> float: Single pair distance.
CombinedBatchDistance
  • matrix(sessions_a, sessions_b) -> np.ndarray: Pairwise matrix.
Example
import capturegraph.scheduling as cgsh

# Using kwargs (cleanest for attribute-based distances)
dist_fn = cgsh.distance.combine(
    solar_angle=cgsh.distance.solar(sigma_deg=2.0),
    location=cgsh.distance.location(sigma_m=100.0),
)

# Single pair distance (existing API)
d = dist_fn(session_a, session_b)

# Batch: compute entire pairwise matrix (new optimized API)
matrix = dist_fn.matrix(potential_sessions, existing_sessions)
Source code in capturegraph-lib/capturegraph/scheduling/distance/combination.py
def combine(
    **kwargs: Callable[[Any, Any], float],
) -> CombinedBatchDistance:
    """Combine multiple distance functions into a single composite metric.

    Each distance function can be provided as a kwarg where the key is the
    attribute path to extract from each session.

    The combined distance is the Euclidean (L2) norm:
    `d_combined = sqrt(d1² + d2² + ... + dn²)`

    The returned function supports both single-pair computation via `__call__`
    and efficient batch computation via `matrix()`.

    Args:
        **kwargs: Named distance functions. The key is the attribute path.
            `solar_angle=fn` extracts `session.solar_angle` and passes to `fn`.

    Returns:
        A combined distance function with:
        - `__call__(a, b) -> float`: Single pair distance.
        - `matrix(sessions_a, sessions_b) -> np.ndarray`: Pairwise matrix.

    Example:
        ```python
        import capturegraph.scheduling as cgsh

        # Using kwargs (cleanest for attribute-based distances)
        dist_fn = cgsh.distance.combine(
            solar_angle=cgsh.distance.solar(sigma_deg=2.0),
            location=cgsh.distance.location(sigma_m=100.0),
        )

        # Single pair distance (existing API)
        d = dist_fn(session_a, session_b)

        # Batch: compute entire pairwise matrix (new optimized API)
        matrix = dist_fn.matrix(potential_sessions, existing_sessions)
        ```
    """
    # Convert kwargs to tuple form
    all_fns = [(path, fn) for path, fn in kwargs.items()]
    return CombinedBatchDistance(all_fns)