Skip to content

capturegraph.data.containers.list #

List - Vectorized Collection Operations#

List is a list subclass that broadcasts attribute access across all elements, similar to how NumPy broadcasts operations across arrays.

Example
from capturegraph.data import CaptureTarget
from pathlib import Path

target = CaptureTarget(Path("./MyCapture"))
sessions = target.surveys  # List of sessions

# Vectorized attribute access
ratings = sessions.tastiness_rating  # → List([5, 4, 3, ...])

# Projection to dict subset (similar to SQL SELECT)
subset = sessions["date", "rating"]  # → List of Dicts

# Apply function with map
names = sessions.date.map(lambda d: f"{d:%Y%m%d}.heic")

# Safe chaining - missing attributes return Missing
result = sessions.optional_field.nested  # → Missing (not AttributeError)
See Also
  • zip: Combine multiple Lists for row-wise processing.
  • Dict: Dictionary with attribute access.
  • Missing: Null object for safe chaining.

ListError #

Bases: Exception

Raised when a List operation fails on an element.

Source code in capturegraph-lib/capturegraph/data/containers/list.py
class ListError(Exception):
    """Raised when a List operation fails on an element."""

    pass

List #

Bases: list[T]

A list that broadcasts attribute access to all elements.

This enables NumPy-style vectorized operations on collections of objects.

Class Type Parameters:

Name Bound or Constraints Description Default
T

The type of elements in the list.

required
Attribute Access

sessions.foo returns a List where each element is item.foo.

Item Access
  • sessions[0] → first element (standard indexing)
  • sessions[1:3] → List slice
  • sessions["date"] → List of item["date"] for each item
  • sessions["date", "name"] → List of Dicts with those keys
Function Application
  • sessions.map(fn) → apply fn to each element
  • sessions.map_leaves(fn) → apply fn to each leaf in nested structures
Example
sessions = List([s1, s2, s3])
sessions.rating      # Vectorized: [s1.rating, s2.rating, s3.rating]
sessions.map(len)    # Apply function: [len(s1), len(s2), len(s3)]
Source code in capturegraph-lib/capturegraph/data/containers/list.py
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
class List[T](list[T]):
    """A list that broadcasts attribute access to all elements.

    This enables NumPy-style vectorized operations on collections of objects.

    Type Parameters:
        T: The type of elements in the list.

    Attribute Access:
        `sessions.foo` returns a List where each element is `item.foo`.

    Item Access:
        - `sessions[0]` → first element (standard indexing)
        - `sessions[1:3]` → List slice
        - `sessions["date"]` → List of `item["date"]` for each item
        - `sessions["date", "name"]` → List of Dicts with those keys

    Function Application:
        - `sessions.map(fn)` → apply fn to each element
        - `sessions.map_leaves(fn)` → apply fn to each leaf in nested structures

    Example:
        ```python
        sessions = List([s1, s2, s3])
        sessions.rating      # Vectorized: [s1.rating, s2.rating, s3.rating]
        sessions.map(len)    # Apply function: [len(s1), len(s2), len(s3)]
        ```
    """

    # --------------------------------------------------
    # Broadcasted Attribute Access
    # --------------------------------------------------

    def __getattr__(self, name: str) -> Any:
        """Broadcast attribute access to all elements.

        Dunder methods (``__foo__``) raise AttributeError to maintain Python protocols.
        All other attribute access is broadcast to each element.

        Returns:
            List of ``item.attr`` for each item, or ``Missing`` on failure.
        """
        # Dunder methods must raise AttributeError for Python/NumPy protocols to work
        if _is_dunder(name):
            raise AttributeError(
                f"'{type(self).__name__}' object has no attribute '{name}'"
            )

        result = List([_access(item, name) for item in self])
        _check_all_missing(result, f"attribute '{name}'")
        return result

    def __setattr__(self, name: str, value: Any) -> None:
        """Broadcast attribute assignment to all elements.

        If the list is empty and value is a sequence, creates new Dict elements
        with the given attribute. If value is a sequence of the same length,
        assigns element-wise. Otherwise, broadcasts the scalar to all elements.

        Args:
            name: Attribute name to set on each element.
            value: Value or sequence of values to assign.

        Raises:
            ValueError: If value is a sequence of different length (and list is not empty).

        Example:
            ```python
            import capturegraph.data as cg
            import capturegraph.scheduling as cgsh
            from datetime import timedelta

            # Create sessions from scratch
            potential = cg.List()
            potential.date = cgsh.forecast.times(span=timedelta(hours=24))
            potential.location = my_location  # scalar broadcast
            potential.solar_angle = cgsh.forecast.solar_position(potential.date, potential.location)
            ```
        """
        # Check if value is a sequence (list, List, tuple) and same length
        self_len = len(self)
        if isinstance(value, (list, tuple)):
            value_len = len(value)
            if self_len == 0:
                for item in value:
                    self.append(Dict({name: item}))
            elif value_len == self_len:
                for item, val in zip(self, value):
                    if not is_missing(item):
                        setattr(item, name, val)
            elif value_len == 1:
                for item in self:
                    if not is_missing(item):
                        setattr(item, name, value[0])
            else:
                raise ValueError(
                    f"Cannot assign sequence of length {value_len} to List of length {self_len}.\n"
                    f"Lengths must match for element-wise assignment, or use a scalar for broadcast."
                )
        else:
            for item in self:
                if not is_missing(item):
                    setattr(item, name, value)

    # --------------------------------------------------
    # Indexing (slicing, key access, projection)
    # --------------------------------------------------

    @overload
    def __getitem__(self, key: int) -> T: ...
    @overload
    def __getitem__(self, key: slice) -> "List[T]": ...
    @overload
    def __getitem__(self, key: str) -> "List[Any]": ...
    @overload
    def __getitem__(self, key: tuple[str, ...]) -> "List[Dict[Any]]": ...

    def __getitem__(
        self, key: int | slice | str | tuple[str, ...]
    ) -> T | "List[T]" | "List[Any]" | "List[Dict[Any]]":
        """Access by index, slice, string key, or tuple projection.

        Args:
            key: Access mode:
                - ``int``: Return single element at index
                - ``slice``: Return List of sliced elements
                - ``str``: Broadcast key access to all elements
                - ``tuple[str, ...]``: Project to Dicts with those keys

        Returns:
            Single element for int, List for others.
            Returns ``Missing`` for out-of-range access.

        Example:
            ```python
            sessions[0]               # First session
            sessions[1:3]             # Slice
            sessions["date"]          # Each item's "date" key
            sessions["date", "name"]  # Dicts with date and name
            ```
        """
        if isinstance(key, (int, np.integer)):
            try:
                result = _vectorize(super().__getitem__(key))
                super().__setitem__(key, result)
                return result
            except IndexError as e:
                return MissingType(e)

        if isinstance(key, slice):
            try:
                return List([_vectorize(item) for item in super().__getitem__(key)])
            except Exception as e:
                return MissingType(e)

        if isinstance(key, tuple):
            result = List(
                [Dict({name: _access(item, name) for name in key}) for item in self]
            )
            for name in key:
                values = [row[name] for row in result]
                _check_all_missing(values, f"key '{name}'")
            return result

        if isinstance(key, str):
            result = List([_access(item, key) for item in self])
            _check_all_missing(result, f"key '{key}'")
            return result

        raise TypeError(
            f"List indices must be int, slice, str, or tuple[str, ...].\n"
            f"Got: {type(key).__name__} = {key!r}\n\n"
            f"Examples:\n"
            f"  sessions[0]              → first element\n"
            f"  sessions[1:3]            → slice\n"
            f"  sessions['date']         → broadcast key access\n"
            f"  sessions['date', 'name'] → projection to Dict"
        )

    # --------------------------------------------------
    # Broadcasting function application
    # --------------------------------------------------

    def __call__[U](self, *args: Any, **kwargs: Any) -> "List[U]":
        """Call each element in the List with the given arguments.

        This treats the List as a collection of callables and invokes
        each one with the same arguments.

        Args:
            *args: Positional arguments to pass to each callable.
            **kwargs: Keyword arguments to pass to each callable.

        Returns:
            List of results from calling each element.

        Example:
            ```python
            # Get a List of bound methods
            formatted = sessions.date.strftime("%Y-%m-%d")
            # → List(["2026-01-08", "2026-01-07", ...])
            ```
        """

        def call(fn: Callable[[T], U], *args: Any, **kwargs: Any) -> U | MissingType:
            try:
                return fn(*args, **kwargs)
            except Exception as e:
                return MissingType(e)

        result = List([call(fn, *args, **kwargs) for fn in self])
        _check_all_missing(result, "call")
        return result

    # --------------------------------------------------
    # Function application
    # --------------------------------------------------

    def map[U](self, function: Callable[[T], U]) -> "List[U]":
        """Apply a function to each element of this list.

        This is the primary way to map a function over elements::

            names = sessions.date.map(lambda d: f"{d:%Y%m%d}.heic")

        Args:
            function: A callable to apply to each element.

        Returns:
            List of function results.

        Raises:
            TypeError: If function is not callable.
            ListError: If the function fails on any element.

        Example:
            ```python
            sessions.date.map(lambda d: d.year)
            # → List([2026, 2026, 2025, ...])

            sessions.map(len)  # Apply built-in function
            # → List([5, 3, 8, ...])
            ```
        """
        if not callable(function):
            raise TypeError(
                f"List.map requires a callable (function/lambda).\n"
                f"Got: {type(function).__name__} = {function!r}\n\n"
                f"Usage: sessions.map(lambda s: s.date.strftime('%Y%m%d'))"
            )

        results = []
        for item in self:
            try:
                results.append(_vectorize(function(item)))
            except Exception as e:
                results.append(MissingType(e))

        _check_all_missing(
            results,
            f"function '{function.__name__ if hasattr(function, '__name__') else repr(function)}'",
        )

        return List(results)

    def pmap[U](
        self,
        function: Callable[[T], U],
        workers: int | None = None,
    ) -> "List[U]":
        """Apply a function to each element in parallel using threads.

        Like ``map()``, but uses a ThreadPoolExecutor for parallel execution.
        Ideal for I/O-bound operations like loading images, network requests,
        or file operations.

        Args:
            function: A callable to apply to each element.
            workers: Maximum number of worker threads. Defaults to min(32, len(self)).

        Returns:
            List of function results (in original order).

        Example:
            ```python
            # Sequential - slow for I/O-bound work
            sessions.photo.map(lambda p: p.tooltip())

            # Parallel - much faster for I/O-bound work
            sessions.photo.pmap(lambda p: p.tooltip(), workers=8)

            # With default workers
            sessions.photo.pmap(lambda p: p.tooltip())
            ```
        """
        from concurrent.futures import ThreadPoolExecutor

        if not callable(function):
            raise TypeError(
                f"List.pmap requires a callable (function/lambda).\n"
                f"Got: {type(function).__name__} = {function!r}"
            )

        if not self:
            return List([])

        # Default workers: min(32, len(self)) - don't spawn more threads than items
        max_workers = workers if workers is not None else min(32, len(self))

        def safe_apply(item: T) -> U | MissingType:
            try:
                return _vectorize(function(item))
            except Exception as e:
                return MissingType(e)

        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            results = list(executor.map(safe_apply, self))

        _check_all_missing(
            results,
            f"function '{function.__name__ if hasattr(function, '__name__') else repr(function)}'",
        )

        return List(results)

    def map_leaves(self, function: Callable[[Any], Any]) -> "List[Any]":
        """Apply a function to each leaf element recursively.

        Unlike ``map()`` which applies to top-level elements, ``map_leaves()``
        descends into nested List/Dict structures and applies the function to leaf values.

        Args:
            function: A callable to apply to each leaf element.

        Returns:
            Nested List with same structure, function applied to leaves.

        Example:
            ```python
            # Nested lists - map_leaves descends into inner lists
            nested = List([List([1, 2]), List([3, 4])])
            nested.map_leaves(lambda x: x * 10)
            # → List([List([10, 20]), List([30, 40])])

            # Compare with map which applies to top-level only
            nested.map(sum)
            # → List([3, 7])  # sum([1,2]), sum([3,4])
            ```
        """
        result = _map_leaves(self, function)

        _check_all_missing(
            result,
            f"function '{function.__name__ if hasattr(function, '__name__') else repr(function)}'",
        )

        return result

    def pmap_leaves(
        self,
        function: Callable[[Any], Any],
        workers: int | None = None,
    ) -> "List[Any]":
        """Apply a function to each leaf element recursively in parallel.

        Like ``map_leaves()``, but first collects all leaf values, processes
        them in parallel using ThreadPoolExecutor, then reconstructs the
        nested structure.

        Args:
            function: A callable to apply to each leaf element.
            workers: Maximum number of worker threads. Defaults to min(32, num_leaves).

        Returns:
            Nested List with same structure, function applied to leaves.

        Example:
            ```python
            # Parallel processing of nested image paths
            sessions.photos.pmap_leaves(lambda p: p.tooltip(), workers=8)
            ```
        """
        from concurrent.futures import ThreadPoolExecutor

        if not callable(function):
            raise TypeError(
                f"List.pmap_leaves requires a callable (function/lambda).\n"
                f"Got: {type(function).__name__} = {function!r}"
            )

        # Collect all leaf values with their paths
        leaves: list[tuple[list[int], Any]] = []

        def collect_leaves(data: Any, path: list[int]) -> None:
            if isinstance(data, (List, list)):
                for i, item in enumerate(data):
                    collect_leaves(item, path + [i])
            elif isinstance(data, (Dict, dict)):
                for key, value in data.items():
                    collect_leaves(value, path + [key])
            else:
                leaves.append((path, data))

        collect_leaves(self, [])

        if not leaves:
            return List([])

        # Process leaves in parallel
        max_workers = workers if workers is not None else min(32, len(leaves))

        def safe_apply(item: Any) -> Any:
            try:
                return function(item)
            except Exception as e:
                return MissingType(e)

        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            results = list(executor.map(safe_apply, [leaf[1] for leaf in leaves]))

        # Reconstruct the nested structure
        def reconstruct(data: Any, path: list[int], result_iter: iter) -> Any:
            if isinstance(data, (List, list)):
                return List(
                    [
                        reconstruct(item, path + [i], result_iter)
                        for i, item in enumerate(data)
                    ]
                )
            elif isinstance(data, (Dict, dict)):
                return Dict(
                    {
                        key: reconstruct(value, path + [key], result_iter)
                        for key, value in data.items()
                    }
                )
            else:
                return next(result_iter)

        result_iter = iter(results)
        result = reconstruct(self, [], result_iter)

        _check_all_missing(
            result,
            f"function '{function.__name__ if hasattr(function, '__name__') else repr(function)}'",
        )

        return result

    def __add__(self, other: "List[T]") -> "List[T]":
        """Concatenate two Lists using the + operator.

        Args:
            other: Another List to concatenate with.

        Returns:
            List containing all elements from both lists.

        Example:
            ```python
            list1 = List([1, 2])
            list2 = List([3, 4])
            list1 + list2
            # → [1, 2, 3, 4]
            ```
        """
        return List(list(self) + list(other))

    def __or__(self, default: Any) -> "List[T]":
        """Fill Missing values with a default using the ``|`` operator.

        Replaces MissingType and None values throughout the structure.
        The container types (List/Dict) are preserved for continued chaining.

        Args:
            default: Value to substitute for Missing/None.

        Returns:
            List with Missing values filled in.

        Example:
            ```python
            ratings = List([5, 4, Missing, 3])
            ratings | 0
            # → List([5, 4, 0, 3])

            # Works with tuples for structured data
            pairs = List([(1, 'a'), Missing, (3, 'c')])
            pairs | (None, None)
            # → List([(1, 'a'), (None, None), (3, 'c')])
            ```
        """
        return _fill_missing(self, default)

    # --------------------------------------------------
    # Properties
    # --------------------------------------------------

    @property
    def dtype(self) -> type | None:
        """Return the type of the first non-Missing/None leaf element.

        Recursively descends into nested lists to find the first element
        that is not Missing or None, then returns its type.

        Returns:
            The type of the first valid leaf element, or None if all are Missing/None.
        """

        def _find_first_type(obj):
            if isinstance(obj, (list, tuple)):
                for item in obj:
                    result = _find_first_type(item)
                    if result is not None:
                        return result
                return None
            elif is_missing(obj):
                return None
            else:
                return type(obj)

        return _find_first_type(self)

    @property
    def dkeys(self) -> "List[str]":
        """Return the union of all keys from all leaf elements.

        Recursively descends into nested lists and collects keys from
        all dict-like leaf elements (dicts, objects with __dict__, or __slots__).

        Returns:
            List of unique keys found across all leaf elements.
        """

        def _collect_keys(obj, keys: set):
            if isinstance(obj, (list, tuple)):
                for item in obj:
                    _collect_keys(item, keys)
            elif is_missing(obj):
                pass
            else:
                keys.update(_vars(obj))

        all_keys: set[str] = set()
        _collect_keys(self, all_keys)
        return List(all_keys)

    # --------------------------------------------------
    # Exchanging Representations
    # --------------------------------------------------

    def __repr__(self) -> str:
        return list.__repr__(self)

    def __array__(self, dtype: np.dtype | None = None, copy: bool = True) -> np.ndarray:
        """Convert to NumPy array, replacing Missing with np.nan.

        This enables ``np.array(vector_list)`` to work directly.
        Missing and None values are converted to np.nan for numeric compatibility.

        Args:
            dtype: NumPy dtype for the resulting array.
            copy: If True (default), always copy the data.

        Returns:
            NumPy array with Missing/None replaced by np.nan.

        Example:
            ```python
            ratings = List([5, 4, Missing, 3])
            np.array(ratings)
            # → array([ 5.,  4., nan,  3.])

            np.nanmean(np.array(ratings))
            # → 4.0
            ```
        """
        return np.array(_unwrap(self, np.nan), dtype=dtype, copy=copy)

    def to_numpy(self) -> np.ndarray:
        """Convert to NumPy array.

        Convenience method equivalent to ``np.array(self)``.
        Missing values are replaced with np.nan.

        Returns:
            NumPy array representation.

        Example:
            ```python
            sessions.rating.to_numpy()
            # → array([5., 4., 3., ...])
            ```
        """
        return np.array(_unwrap(self, np.nan))

    def to_dict(
        self, keys: tuple[str, ...] | list[str] | None = None
    ) -> dict[str, list]:
        """Convert to a dictionary suitable for Pandas DataFrame construction.

        Each key becomes a column, with values from that attribute across all elements.

        Args:
            keys: Attribute names to include as columns. If None, attempts to
                auto-detect keys from the first element (works for Dicts).

        Returns:
            Dictionary mapping column names to lists of values.

        Raises:
            ValueError: If keys is None and cannot be auto-detected.

        Example:
            ```python
            # From a projected List
            subset = sessions["date", "rating", "store"]
            df = pd.DataFrame(subset.to_dict())

            # With explicit keys
            df = pd.DataFrame(sessions.to_dict(["date", "rating"]))

            # Direct attribute access also works
            data = {"date": list(sessions.date), "rating": list(sessions.rating)}
            df = pd.DataFrame(data)
            ```
        """
        if keys is None:
            if len(self) == 0:
                return {}

            keys = self.dkeys

        if len(self) == 0:
            return {key: [] for key in keys}

        return {
            key: _unwrap(
                getattr(self, key) if hasattr(self[0], key) else self[key],
                replace_missing=None,
            )
            for key in keys
        }

dtype property #

Return the type of the first non-Missing/None leaf element.

Recursively descends into nested lists to find the first element that is not Missing or None, then returns its type.

Returns:

Type Description
type | None

The type of the first valid leaf element, or None if all are Missing/None.

dkeys property #

Return the union of all keys from all leaf elements.

Recursively descends into nested lists and collects keys from all dict-like leaf elements (dicts, objects with dict, or slots).

Returns:

Type Description
List[str]

List of unique keys found across all leaf elements.

__getattr__(name) #

Broadcast attribute access to all elements.

Dunder methods (__foo__) raise AttributeError to maintain Python protocols. All other attribute access is broadcast to each element.

Returns:

Type Description
Any

List of item.attr for each item, or Missing on failure.

Source code in capturegraph-lib/capturegraph/data/containers/list.py
def __getattr__(self, name: str) -> Any:
    """Broadcast attribute access to all elements.

    Dunder methods (``__foo__``) raise AttributeError to maintain Python protocols.
    All other attribute access is broadcast to each element.

    Returns:
        List of ``item.attr`` for each item, or ``Missing`` on failure.
    """
    # Dunder methods must raise AttributeError for Python/NumPy protocols to work
    if _is_dunder(name):
        raise AttributeError(
            f"'{type(self).__name__}' object has no attribute '{name}'"
        )

    result = List([_access(item, name) for item in self])
    _check_all_missing(result, f"attribute '{name}'")
    return result

__setattr__(name, value) #

Broadcast attribute assignment to all elements.

If the list is empty and value is a sequence, creates new Dict elements with the given attribute. If value is a sequence of the same length, assigns element-wise. Otherwise, broadcasts the scalar to all elements.

Parameters:

Name Type Description Default
name str

Attribute name to set on each element.

required
value Any

Value or sequence of values to assign.

required

Raises:

Type Description
ValueError

If value is a sequence of different length (and list is not empty).

Example
import capturegraph.data as cg
import capturegraph.scheduling as cgsh
from datetime import timedelta

# Create sessions from scratch
potential = cg.List()
potential.date = cgsh.forecast.times(span=timedelta(hours=24))
potential.location = my_location  # scalar broadcast
potential.solar_angle = cgsh.forecast.solar_position(potential.date, potential.location)
Source code in capturegraph-lib/capturegraph/data/containers/list.py
def __setattr__(self, name: str, value: Any) -> None:
    """Broadcast attribute assignment to all elements.

    If the list is empty and value is a sequence, creates new Dict elements
    with the given attribute. If value is a sequence of the same length,
    assigns element-wise. Otherwise, broadcasts the scalar to all elements.

    Args:
        name: Attribute name to set on each element.
        value: Value or sequence of values to assign.

    Raises:
        ValueError: If value is a sequence of different length (and list is not empty).

    Example:
        ```python
        import capturegraph.data as cg
        import capturegraph.scheduling as cgsh
        from datetime import timedelta

        # Create sessions from scratch
        potential = cg.List()
        potential.date = cgsh.forecast.times(span=timedelta(hours=24))
        potential.location = my_location  # scalar broadcast
        potential.solar_angle = cgsh.forecast.solar_position(potential.date, potential.location)
        ```
    """
    # Check if value is a sequence (list, List, tuple) and same length
    self_len = len(self)
    if isinstance(value, (list, tuple)):
        value_len = len(value)
        if self_len == 0:
            for item in value:
                self.append(Dict({name: item}))
        elif value_len == self_len:
            for item, val in zip(self, value):
                if not is_missing(item):
                    setattr(item, name, val)
        elif value_len == 1:
            for item in self:
                if not is_missing(item):
                    setattr(item, name, value[0])
        else:
            raise ValueError(
                f"Cannot assign sequence of length {value_len} to List of length {self_len}.\n"
                f"Lengths must match for element-wise assignment, or use a scalar for broadcast."
            )
    else:
        for item in self:
            if not is_missing(item):
                setattr(item, name, value)

__getitem__(key) #

__getitem__(key: int) -> T
__getitem__(key: slice) -> List[T]
__getitem__(key: str) -> List[Any]
__getitem__(key: tuple[str, ...]) -> List[Dict[Any]]

Access by index, slice, string key, or tuple projection.

Parameters:

Name Type Description Default
key int | slice | str | tuple[str, ...]

Access mode: - int: Return single element at index - slice: Return List of sliced elements - str: Broadcast key access to all elements - tuple[str, ...]: Project to Dicts with those keys

required

Returns:

Type Description
T | List[T] | List[Any] | List[Dict[Any]]

Single element for int, List for others.

T | List[T] | List[Any] | List[Dict[Any]]

Returns Missing for out-of-range access.

Example
sessions[0]               # First session
sessions[1:3]             # Slice
sessions["date"]          # Each item's "date" key
sessions["date", "name"]  # Dicts with date and name
Source code in capturegraph-lib/capturegraph/data/containers/list.py
def __getitem__(
    self, key: int | slice | str | tuple[str, ...]
) -> T | "List[T]" | "List[Any]" | "List[Dict[Any]]":
    """Access by index, slice, string key, or tuple projection.

    Args:
        key: Access mode:
            - ``int``: Return single element at index
            - ``slice``: Return List of sliced elements
            - ``str``: Broadcast key access to all elements
            - ``tuple[str, ...]``: Project to Dicts with those keys

    Returns:
        Single element for int, List for others.
        Returns ``Missing`` for out-of-range access.

    Example:
        ```python
        sessions[0]               # First session
        sessions[1:3]             # Slice
        sessions["date"]          # Each item's "date" key
        sessions["date", "name"]  # Dicts with date and name
        ```
    """
    if isinstance(key, (int, np.integer)):
        try:
            result = _vectorize(super().__getitem__(key))
            super().__setitem__(key, result)
            return result
        except IndexError as e:
            return MissingType(e)

    if isinstance(key, slice):
        try:
            return List([_vectorize(item) for item in super().__getitem__(key)])
        except Exception as e:
            return MissingType(e)

    if isinstance(key, tuple):
        result = List(
            [Dict({name: _access(item, name) for name in key}) for item in self]
        )
        for name in key:
            values = [row[name] for row in result]
            _check_all_missing(values, f"key '{name}'")
        return result

    if isinstance(key, str):
        result = List([_access(item, key) for item in self])
        _check_all_missing(result, f"key '{key}'")
        return result

    raise TypeError(
        f"List indices must be int, slice, str, or tuple[str, ...].\n"
        f"Got: {type(key).__name__} = {key!r}\n\n"
        f"Examples:\n"
        f"  sessions[0]              → first element\n"
        f"  sessions[1:3]            → slice\n"
        f"  sessions['date']         → broadcast key access\n"
        f"  sessions['date', 'name'] → projection to Dict"
    )

__call__(*args, **kwargs) #

Call each element in the List with the given arguments.

This treats the List as a collection of callables and invokes each one with the same arguments.

Parameters:

Name Type Description Default
*args Any

Positional arguments to pass to each callable.

()
**kwargs Any

Keyword arguments to pass to each callable.

{}

Returns:

Type Description
List[U]

List of results from calling each element.

Example
# Get a List of bound methods
formatted = sessions.date.strftime("%Y-%m-%d")
# → List(["2026-01-08", "2026-01-07", ...])
Source code in capturegraph-lib/capturegraph/data/containers/list.py
def __call__[U](self, *args: Any, **kwargs: Any) -> "List[U]":
    """Call each element in the List with the given arguments.

    This treats the List as a collection of callables and invokes
    each one with the same arguments.

    Args:
        *args: Positional arguments to pass to each callable.
        **kwargs: Keyword arguments to pass to each callable.

    Returns:
        List of results from calling each element.

    Example:
        ```python
        # Get a List of bound methods
        formatted = sessions.date.strftime("%Y-%m-%d")
        # → List(["2026-01-08", "2026-01-07", ...])
        ```
    """

    def call(fn: Callable[[T], U], *args: Any, **kwargs: Any) -> U | MissingType:
        try:
            return fn(*args, **kwargs)
        except Exception as e:
            return MissingType(e)

    result = List([call(fn, *args, **kwargs) for fn in self])
    _check_all_missing(result, "call")
    return result

map(function) #

Apply a function to each element of this list.

This is the primary way to map a function over elements::

names = sessions.date.map(lambda d: f"{d:%Y%m%d}.heic")

Parameters:

Name Type Description Default
function Callable[[T], U]

A callable to apply to each element.

required

Returns:

Type Description
List[U]

List of function results.

Raises:

Type Description
TypeError

If function is not callable.

ListError

If the function fails on any element.

Example
sessions.date.map(lambda d: d.year)
# → List([2026, 2026, 2025, ...])

sessions.map(len)  # Apply built-in function
# → List([5, 3, 8, ...])
Source code in capturegraph-lib/capturegraph/data/containers/list.py
def map[U](self, function: Callable[[T], U]) -> "List[U]":
    """Apply a function to each element of this list.

    This is the primary way to map a function over elements::

        names = sessions.date.map(lambda d: f"{d:%Y%m%d}.heic")

    Args:
        function: A callable to apply to each element.

    Returns:
        List of function results.

    Raises:
        TypeError: If function is not callable.
        ListError: If the function fails on any element.

    Example:
        ```python
        sessions.date.map(lambda d: d.year)
        # → List([2026, 2026, 2025, ...])

        sessions.map(len)  # Apply built-in function
        # → List([5, 3, 8, ...])
        ```
    """
    if not callable(function):
        raise TypeError(
            f"List.map requires a callable (function/lambda).\n"
            f"Got: {type(function).__name__} = {function!r}\n\n"
            f"Usage: sessions.map(lambda s: s.date.strftime('%Y%m%d'))"
        )

    results = []
    for item in self:
        try:
            results.append(_vectorize(function(item)))
        except Exception as e:
            results.append(MissingType(e))

    _check_all_missing(
        results,
        f"function '{function.__name__ if hasattr(function, '__name__') else repr(function)}'",
    )

    return List(results)

pmap(function, workers=None) #

Apply a function to each element in parallel using threads.

Like map(), but uses a ThreadPoolExecutor for parallel execution. Ideal for I/O-bound operations like loading images, network requests, or file operations.

Parameters:

Name Type Description Default
function Callable[[T], U]

A callable to apply to each element.

required
workers int | None

Maximum number of worker threads. Defaults to min(32, len(self)).

None

Returns:

Type Description
List[U]

List of function results (in original order).

Example
# Sequential - slow for I/O-bound work
sessions.photo.map(lambda p: p.tooltip())

# Parallel - much faster for I/O-bound work
sessions.photo.pmap(lambda p: p.tooltip(), workers=8)

# With default workers
sessions.photo.pmap(lambda p: p.tooltip())
Source code in capturegraph-lib/capturegraph/data/containers/list.py
def pmap[U](
    self,
    function: Callable[[T], U],
    workers: int | None = None,
) -> "List[U]":
    """Apply a function to each element in parallel using threads.

    Like ``map()``, but uses a ThreadPoolExecutor for parallel execution.
    Ideal for I/O-bound operations like loading images, network requests,
    or file operations.

    Args:
        function: A callable to apply to each element.
        workers: Maximum number of worker threads. Defaults to min(32, len(self)).

    Returns:
        List of function results (in original order).

    Example:
        ```python
        # Sequential - slow for I/O-bound work
        sessions.photo.map(lambda p: p.tooltip())

        # Parallel - much faster for I/O-bound work
        sessions.photo.pmap(lambda p: p.tooltip(), workers=8)

        # With default workers
        sessions.photo.pmap(lambda p: p.tooltip())
        ```
    """
    from concurrent.futures import ThreadPoolExecutor

    if not callable(function):
        raise TypeError(
            f"List.pmap requires a callable (function/lambda).\n"
            f"Got: {type(function).__name__} = {function!r}"
        )

    if not self:
        return List([])

    # Default workers: min(32, len(self)) - don't spawn more threads than items
    max_workers = workers if workers is not None else min(32, len(self))

    def safe_apply(item: T) -> U | MissingType:
        try:
            return _vectorize(function(item))
        except Exception as e:
            return MissingType(e)

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = list(executor.map(safe_apply, self))

    _check_all_missing(
        results,
        f"function '{function.__name__ if hasattr(function, '__name__') else repr(function)}'",
    )

    return List(results)

map_leaves(function) #

Apply a function to each leaf element recursively.

Unlike map() which applies to top-level elements, map_leaves() descends into nested List/Dict structures and applies the function to leaf values.

Parameters:

Name Type Description Default
function Callable[[Any], Any]

A callable to apply to each leaf element.

required

Returns:

Type Description
List[Any]

Nested List with same structure, function applied to leaves.

Example
# Nested lists - map_leaves descends into inner lists
nested = List([List([1, 2]), List([3, 4])])
nested.map_leaves(lambda x: x * 10)
# → List([List([10, 20]), List([30, 40])])

# Compare with map which applies to top-level only
nested.map(sum)
# → List([3, 7])  # sum([1,2]), sum([3,4])
Source code in capturegraph-lib/capturegraph/data/containers/list.py
def map_leaves(self, function: Callable[[Any], Any]) -> "List[Any]":
    """Apply a function to each leaf element recursively.

    Unlike ``map()`` which applies to top-level elements, ``map_leaves()``
    descends into nested List/Dict structures and applies the function to leaf values.

    Args:
        function: A callable to apply to each leaf element.

    Returns:
        Nested List with same structure, function applied to leaves.

    Example:
        ```python
        # Nested lists - map_leaves descends into inner lists
        nested = List([List([1, 2]), List([3, 4])])
        nested.map_leaves(lambda x: x * 10)
        # → List([List([10, 20]), List([30, 40])])

        # Compare with map which applies to top-level only
        nested.map(sum)
        # → List([3, 7])  # sum([1,2]), sum([3,4])
        ```
    """
    result = _map_leaves(self, function)

    _check_all_missing(
        result,
        f"function '{function.__name__ if hasattr(function, '__name__') else repr(function)}'",
    )

    return result

pmap_leaves(function, workers=None) #

Apply a function to each leaf element recursively in parallel.

Like map_leaves(), but first collects all leaf values, processes them in parallel using ThreadPoolExecutor, then reconstructs the nested structure.

Parameters:

Name Type Description Default
function Callable[[Any], Any]

A callable to apply to each leaf element.

required
workers int | None

Maximum number of worker threads. Defaults to min(32, num_leaves).

None

Returns:

Type Description
List[Any]

Nested List with same structure, function applied to leaves.

Example
# Parallel processing of nested image paths
sessions.photos.pmap_leaves(lambda p: p.tooltip(), workers=8)
Source code in capturegraph-lib/capturegraph/data/containers/list.py
def pmap_leaves(
    self,
    function: Callable[[Any], Any],
    workers: int | None = None,
) -> "List[Any]":
    """Apply a function to each leaf element recursively in parallel.

    Like ``map_leaves()``, but first collects all leaf values, processes
    them in parallel using ThreadPoolExecutor, then reconstructs the
    nested structure.

    Args:
        function: A callable to apply to each leaf element.
        workers: Maximum number of worker threads. Defaults to min(32, num_leaves).

    Returns:
        Nested List with same structure, function applied to leaves.

    Example:
        ```python
        # Parallel processing of nested image paths
        sessions.photos.pmap_leaves(lambda p: p.tooltip(), workers=8)
        ```
    """
    from concurrent.futures import ThreadPoolExecutor

    if not callable(function):
        raise TypeError(
            f"List.pmap_leaves requires a callable (function/lambda).\n"
            f"Got: {type(function).__name__} = {function!r}"
        )

    # Collect all leaf values with their paths
    leaves: list[tuple[list[int], Any]] = []

    def collect_leaves(data: Any, path: list[int]) -> None:
        if isinstance(data, (List, list)):
            for i, item in enumerate(data):
                collect_leaves(item, path + [i])
        elif isinstance(data, (Dict, dict)):
            for key, value in data.items():
                collect_leaves(value, path + [key])
        else:
            leaves.append((path, data))

    collect_leaves(self, [])

    if not leaves:
        return List([])

    # Process leaves in parallel
    max_workers = workers if workers is not None else min(32, len(leaves))

    def safe_apply(item: Any) -> Any:
        try:
            return function(item)
        except Exception as e:
            return MissingType(e)

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = list(executor.map(safe_apply, [leaf[1] for leaf in leaves]))

    # Reconstruct the nested structure
    def reconstruct(data: Any, path: list[int], result_iter: iter) -> Any:
        if isinstance(data, (List, list)):
            return List(
                [
                    reconstruct(item, path + [i], result_iter)
                    for i, item in enumerate(data)
                ]
            )
        elif isinstance(data, (Dict, dict)):
            return Dict(
                {
                    key: reconstruct(value, path + [key], result_iter)
                    for key, value in data.items()
                }
            )
        else:
            return next(result_iter)

    result_iter = iter(results)
    result = reconstruct(self, [], result_iter)

    _check_all_missing(
        result,
        f"function '{function.__name__ if hasattr(function, '__name__') else repr(function)}'",
    )

    return result

__add__(other) #

Concatenate two Lists using the + operator.

Parameters:

Name Type Description Default
other List[T]

Another List to concatenate with.

required

Returns:

Type Description
List[T]

List containing all elements from both lists.

Example
list1 = List([1, 2])
list2 = List([3, 4])
list1 + list2
# → [1, 2, 3, 4]
Source code in capturegraph-lib/capturegraph/data/containers/list.py
def __add__(self, other: "List[T]") -> "List[T]":
    """Concatenate two Lists using the + operator.

    Args:
        other: Another List to concatenate with.

    Returns:
        List containing all elements from both lists.

    Example:
        ```python
        list1 = List([1, 2])
        list2 = List([3, 4])
        list1 + list2
        # → [1, 2, 3, 4]
        ```
    """
    return List(list(self) + list(other))

__or__(default) #

Fill Missing values with a default using the | operator.

Replaces MissingType and None values throughout the structure. The container types (List/Dict) are preserved for continued chaining.

Parameters:

Name Type Description Default
default Any

Value to substitute for Missing/None.

required

Returns:

Type Description
List[T]

List with Missing values filled in.

Example
ratings = List([5, 4, Missing, 3])
ratings | 0
# → List([5, 4, 0, 3])

# Works with tuples for structured data
pairs = List([(1, 'a'), Missing, (3, 'c')])
pairs | (None, None)
# → List([(1, 'a'), (None, None), (3, 'c')])
Source code in capturegraph-lib/capturegraph/data/containers/list.py
def __or__(self, default: Any) -> "List[T]":
    """Fill Missing values with a default using the ``|`` operator.

    Replaces MissingType and None values throughout the structure.
    The container types (List/Dict) are preserved for continued chaining.

    Args:
        default: Value to substitute for Missing/None.

    Returns:
        List with Missing values filled in.

    Example:
        ```python
        ratings = List([5, 4, Missing, 3])
        ratings | 0
        # → List([5, 4, 0, 3])

        # Works with tuples for structured data
        pairs = List([(1, 'a'), Missing, (3, 'c')])
        pairs | (None, None)
        # → List([(1, 'a'), (None, None), (3, 'c')])
        ```
    """
    return _fill_missing(self, default)

__array__(dtype=None, copy=True) #

Convert to NumPy array, replacing Missing with np.nan.

This enables np.array(vector_list) to work directly. Missing and None values are converted to np.nan for numeric compatibility.

Parameters:

Name Type Description Default
dtype dtype | None

NumPy dtype for the resulting array.

None
copy bool

If True (default), always copy the data.

True

Returns:

Type Description
ndarray

NumPy array with Missing/None replaced by np.nan.

Example
ratings = List([5, 4, Missing, 3])
np.array(ratings)
# → array([ 5.,  4., nan,  3.])

np.nanmean(np.array(ratings))
# → 4.0
Source code in capturegraph-lib/capturegraph/data/containers/list.py
def __array__(self, dtype: np.dtype | None = None, copy: bool = True) -> np.ndarray:
    """Convert to NumPy array, replacing Missing with np.nan.

    This enables ``np.array(vector_list)`` to work directly.
    Missing and None values are converted to np.nan for numeric compatibility.

    Args:
        dtype: NumPy dtype for the resulting array.
        copy: If True (default), always copy the data.

    Returns:
        NumPy array with Missing/None replaced by np.nan.

    Example:
        ```python
        ratings = List([5, 4, Missing, 3])
        np.array(ratings)
        # → array([ 5.,  4., nan,  3.])

        np.nanmean(np.array(ratings))
        # → 4.0
        ```
    """
    return np.array(_unwrap(self, np.nan), dtype=dtype, copy=copy)

to_numpy() #

Convert to NumPy array.

Convenience method equivalent to np.array(self). Missing values are replaced with np.nan.

Returns:

Type Description
ndarray

NumPy array representation.

Example
sessions.rating.to_numpy()
# → array([5., 4., 3., ...])
Source code in capturegraph-lib/capturegraph/data/containers/list.py
def to_numpy(self) -> np.ndarray:
    """Convert to NumPy array.

    Convenience method equivalent to ``np.array(self)``.
    Missing values are replaced with np.nan.

    Returns:
        NumPy array representation.

    Example:
        ```python
        sessions.rating.to_numpy()
        # → array([5., 4., 3., ...])
        ```
    """
    return np.array(_unwrap(self, np.nan))

to_dict(keys=None) #

Convert to a dictionary suitable for Pandas DataFrame construction.

Each key becomes a column, with values from that attribute across all elements.

Parameters:

Name Type Description Default
keys tuple[str, ...] | list[str] | None

Attribute names to include as columns. If None, attempts to auto-detect keys from the first element (works for Dicts).

None

Returns:

Type Description
dict[str, list]

Dictionary mapping column names to lists of values.

Raises:

Type Description
ValueError

If keys is None and cannot be auto-detected.

Example
# From a projected List
subset = sessions["date", "rating", "store"]
df = pd.DataFrame(subset.to_dict())

# With explicit keys
df = pd.DataFrame(sessions.to_dict(["date", "rating"]))

# Direct attribute access also works
data = {"date": list(sessions.date), "rating": list(sessions.rating)}
df = pd.DataFrame(data)
Source code in capturegraph-lib/capturegraph/data/containers/list.py
def to_dict(
    self, keys: tuple[str, ...] | list[str] | None = None
) -> dict[str, list]:
    """Convert to a dictionary suitable for Pandas DataFrame construction.

    Each key becomes a column, with values from that attribute across all elements.

    Args:
        keys: Attribute names to include as columns. If None, attempts to
            auto-detect keys from the first element (works for Dicts).

    Returns:
        Dictionary mapping column names to lists of values.

    Raises:
        ValueError: If keys is None and cannot be auto-detected.

    Example:
        ```python
        # From a projected List
        subset = sessions["date", "rating", "store"]
        df = pd.DataFrame(subset.to_dict())

        # With explicit keys
        df = pd.DataFrame(sessions.to_dict(["date", "rating"]))

        # Direct attribute access also works
        data = {"date": list(sessions.date), "rating": list(sessions.rating)}
        df = pd.DataFrame(data)
        ```
    """
    if keys is None:
        if len(self) == 0:
            return {}

        keys = self.dkeys

    if len(self) == 0:
        return {key: [] for key in keys}

    return {
        key: _unwrap(
            getattr(self, key) if hasattr(self[0], key) else self[key],
            replace_missing=None,
        )
        for key in keys
    }