Skip to content

capturegraph.data.load.traversal #

CaptureGraph Data Traversal#

Load CaptureGraph target directories as typed Python objects for convenient traversal. This module provides the primary API for accessing captured data.

Example
import capturegraph.data as cg
from pathlib import Path

target = cg.CaptureTarget(Path("./MyCapture"))

# Access files defined in the manifest
print(target.reference)  # Path to reference.heic

# Vectorized access across all sessions
ratings = target.surveys.tastiness_rating  # → List of ratings

Functions:

Name Description
load_directory

Recursively load a directory based on its manifest

Classes:

Name Description
CaptureTarget

Top-level entry point (loads capture_target_manifest.json)

CaptureTarget #

Bases: Dict

Top-level entry point for loading a CaptureGraph target.

A CaptureTarget represents a capture project directory that contains a capture_target_manifest.json file defining its structure.

Inherits from Dict, providing attribute-style access to all loaded data.

Example
import capturegraph.data as cg
from pathlib import Path

target = cg.CaptureTarget(Path("./MyCapture"))
print(target.reference)  # Path to reference.heic

# Vectorized access across all sessions
ratings = target.surveys.tastiness_rating
print(f"Average: {sum(r for r in ratings if r) / len([r for r in ratings if r])}")

Attributes:

Name Type Description
_path

The directory path on disk

_manifest

The raw ProcedureManifest (for introspection)

Source code in capturegraph-lib/capturegraph/data/load/traversal.py
class CaptureTarget(Dict):
    """Top-level entry point for loading a CaptureGraph target.

    A CaptureTarget represents a capture project directory that contains a
    `capture_target_manifest.json` file defining its structure.

    Inherits from Dict, providing attribute-style access to all loaded data.

    Example:
        ```python
        import capturegraph.data as cg
        from pathlib import Path

        target = cg.CaptureTarget(Path("./MyCapture"))
        print(target.reference)  # Path to reference.heic

        # Vectorized access across all sessions
        ratings = target.surveys.tastiness_rating
        print(f"Average: {sum(r for r in ratings if r) / len([r for r in ratings if r])}")
        ```

    Attributes:
        _path: The directory path on disk
        _manifest: The raw ProcedureManifest (for introspection)
    """

    MANIFEST_FILENAME = "capture_target_manifest.json"

    def __init__(self, path: Union[str, Path]):
        """Load a CaptureTarget from a directory.

        Args:
            path: Path to the target directory (must contain capture_target_manifest.json)

        Raises:
            FileNotFoundError: If the manifest file doesn't exist
        """
        path = Path(path)
        manifest_path = path / self.MANIFEST_FILENAME

        if not manifest_path.exists():
            raise FileNotFoundError(f"Manifest not found: {manifest_path}")

        manifest = ProcedureManifest.from_file(manifest_path)

        # Load all data using the recursive function
        data = load_directory(path, manifest)

        # Initialize Dict with loaded data
        super().__init__(data)

        # Store metadata (using object.__setattr__ to avoid dict assignment)
        object.__setattr__(self, "_path", path)
        object.__setattr__(self, "_manifest", manifest)

    @property
    def path(self) -> Path:
        """The directory path on disk."""
        return object.__getattribute__(self, "_path")

    @property
    def manifest(self) -> ProcedureManifest:
        """Access the raw manifest for introspection."""
        return object.__getattribute__(self, "_manifest")

    def __repr__(self) -> str:
        return f"CaptureTarget({self.path})"

path property #

The directory path on disk.

manifest property #

Access the raw manifest for introspection.

__init__(path) #

Load a CaptureTarget from a directory.

Parameters:

Name Type Description Default
path Union[str, Path]

Path to the target directory (must contain capture_target_manifest.json)

required

Raises:

Type Description
FileNotFoundError

If the manifest file doesn't exist

Source code in capturegraph-lib/capturegraph/data/load/traversal.py
def __init__(self, path: Union[str, Path]):
    """Load a CaptureTarget from a directory.

    Args:
        path: Path to the target directory (must contain capture_target_manifest.json)

    Raises:
        FileNotFoundError: If the manifest file doesn't exist
    """
    path = Path(path)
    manifest_path = path / self.MANIFEST_FILENAME

    if not manifest_path.exists():
        raise FileNotFoundError(f"Manifest not found: {manifest_path}")

    manifest = ProcedureManifest.from_file(manifest_path)

    # Load all data using the recursive function
    data = load_directory(path, manifest)

    # Initialize Dict with loaded data
    super().__init__(data)

    # Store metadata (using object.__setattr__ to avoid dict assignment)
    object.__setattr__(self, "_path", path)
    object.__setattr__(self, "_manifest", manifest)

load_directory(path, manifest) #

Recursively load a directory based on its manifest.

Parameters:

Name Type Description Default
path Path

Path to the directory

required
manifest ProcedureManifest

The manifest defining the directory's structure

required

Returns:

Type Description
Dict

Dict with attributes for all files, sequences, directories, and sessions

Dict

defined in the manifest.

Example
import capturegraph.data as cg

manifest = cg.ProcedureManifest.from_file(path / "manifest.json")
data = cg.load_directory(path, manifest)
print(data.reference)  # Access loaded file
print(data.photos)     # Access file sequence
Source code in capturegraph-lib/capturegraph/data/load/traversal.py
def load_directory(path: Path, manifest: ProcedureManifest) -> Dict:
    """Recursively load a directory based on its manifest.

    Args:
        path: Path to the directory
        manifest: The manifest defining the directory's structure

    Returns:
        Dict with attributes for all files, sequences, directories, and sessions
        defined in the manifest.

    Example:
        ```python
        import capturegraph.data as cg

        manifest = cg.ProcedureManifest.from_file(path / "manifest.json")
        data = cg.load_directory(path, manifest)
        print(data.reference)  # Access loaded file
        print(data.photos)     # Access file sequence
        ```
    """
    result = Dict()
    result._path = path
    result._manifest = manifest

    # Load files
    for name, file_manifest in manifest.files.items():
        sanitized = _sanitize_name(name)
        result[sanitized] = load_file(path, name, file_manifest)

    # Load file sequences as List
    for name, file_manifest in manifest.file_sequences.items():
        sanitized = _sanitize_name(name)
        sequence = load_sequence(path, name, file_manifest)
        result[sanitized] = List(sequence)

    # Load nested directories
    for name, dir_manifest in manifest.directories.items():
        sanitized = _sanitize_name(name)
        dir_path = path / name
        if dir_path.exists():
            result[sanitized] = load_directory(dir_path, dir_manifest)
        else:
            result[sanitized] = MissingType(
                FileNotFoundError(f"Directory not found: {dir_path}")
            )

    # Load sessions as List of Dicts with date field
    for name, session_manifest in manifest.sessions.items():
        sanitized = _sanitize_name(name)
        result[sanitized] = _load_sessions(path / name, session_manifest)

    return result