Source code for vivarium.cluster_tools.dagger.config.validation

"""
========================
Workflow Step Validation
========================

Per-step-type Python validation for workflow step kwargs. Each
``validate_<type>_step`` function is the single entry point that validates
all kwargs accepted by the matching ``get_<type>_step_tasks`` API function.
Shared helpers handle validation that's common across multiple step types.

"""

from __future__ import annotations

import re
from pathlib import Path
from typing import Any

from vivarium.cluster_tools.dagger.config.config import (
    DEFAULT_BACKUP_FREQ_SECONDS,
    ResourceConfig,
)
from vivarium.cluster_tools.dagger.config.utilities import check_scalar, validate_scalar_dict

_PYTHON_IDENTIFIER_RE = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
"""Notebook parameter keys must be valid Python identifiers because papermill
injects them as variable assignments in a notebook cell."""


def _validate_common(name: str, resources: ResourceConfig) -> None:
    """Validation shared by every step type: name + resource resolution."""
    if not name:
        raise ValueError("Step 'name' is required.")
    if not isinstance(resources.queue, str) or not isinstance(resources.project, str):
        raise ValueError(
            f"Step '{name}': resources 'queue' and 'project' must be "
            "configured. Set them at the step level or provide workflow-level defaults."
        )


def _validate_required_paths(name: str, paths: list[Path]) -> None:
    """Raise FileNotFoundError if any path does not exist."""
    for path in paths:
        if not path.exists():
            raise FileNotFoundError(f"Step '{name}': path does not exist: {path}")


def _validate_positional_args(step_name: str, positional_args: Any) -> None:
    """Validate that positional_args is a list of scalar values."""
    if not isinstance(positional_args, list):
        raise ValueError(
            f"Step '{step_name}': 'positional_args' must be a list, "
            f"got {type(positional_args).__name__}."
        )
    for arg_index, item in enumerate(positional_args):
        check_scalar(
            item,
            label=f"positional_args[{arg_index}]",
            step_name=step_name,
            allow_none=False,
        )


[docs] def validate_bash_step( *, name: str, resources: ResourceConfig, command: str, environment: str | None = None, ) -> None: """Validate kwargs for :func:`~vivarium.cluster_tools.dagger.config.interface.get_bash_step_tasks`.""" _validate_common(name, resources) if not command: raise ValueError(f"Step '{name}': 'command' is required.")
[docs] def validate_simulation_step( *, name: str, resources: ResourceConfig, model_specification: Path, branch_configuration: Path, environment: str | None = None, artifact_path: Path | None = None, backup_freq: float | None = DEFAULT_BACKUP_FREQ_SECONDS, sim_verbosity: int = 0, ) -> None: """Validate kwargs for :func:`~vivarium.cluster_tools.dagger.config.interface.get_simulation_step_tasks`.""" _validate_common(name, resources) if not model_specification: raise ValueError(f"Step '{name}': simulation type requires 'model_specification'.") if not branch_configuration: raise ValueError(f"Step '{name}': simulation type requires 'branch_configuration'.") paths = [model_specification, branch_configuration] if artifact_path is not None: paths.append(artifact_path) _validate_required_paths(name, paths)
[docs] def validate_pytest_step( *, name: str, resources: ResourceConfig, environment: str | None = None, path: str | list[str] | None = None, k: str | None = None, runslow: bool = False, ) -> None: """Validate kwargs for :func:`~vivarium.cluster_tools.dagger.config.interface.get_pytest_step_tasks`.""" _validate_common(name, resources) if not path and not k: raise ValueError( f"Step '{name}': pytest type requires at least one of 'path' or 'k'." ) if path is not None: raw_paths = path if isinstance(path, list) else [path] _validate_required_paths(name, [Path(p) for p in raw_paths])
[docs] def validate_python_step( *, name: str, resources: ResourceConfig, path: str, environment: str | None = None, positional_args: list[Any] | None = None, keyword_args: dict[str, Any] | None = None, ) -> None: """Validate kwargs for :func:`~vivarium.cluster_tools.dagger.config.interface.get_python_step_tasks`.""" _validate_common(name, resources) if not path: raise ValueError(f"Step '{name}': python type requires 'path' in args.") if not isinstance(path, str) or not path.endswith(".py"): raise ValueError( f"Step '{name}': 'path' must be a string ending with .py, got {path!r}." ) if positional_args is not None: _validate_positional_args(name, positional_args) if keyword_args is not None: validate_scalar_dict( keyword_args, field_name="keyword_args", step_name=name, ) _validate_required_paths(name, [Path(path)])
[docs] def validate_notebook_step( *, name: str, resources: ResourceConfig, path: Path, output_path: Path, environment: str | None = None, parameters: dict[str, Any] | None = None, cwd: Path | None = None, ) -> None: """Validate kwargs for :func:`~vivarium.cluster_tools.dagger.config.interface.get_notebook_step_tasks`.""" _validate_common(name, resources) if not str(path).endswith(".ipynb"): raise ValueError(f"Step '{name}': 'path' must end with .ipynb, got {path!r}.") if not str(output_path).endswith(".ipynb"): raise ValueError( f"Step '{name}': 'output_path' must end with .ipynb, got {output_path!r}." ) params = parameters if parameters is not None else {} validate_scalar_dict(params, field_name="parameters", step_name=name) for key in params: if not _PYTHON_IDENTIFIER_RE.match(key): raise ValueError( f"Step '{name}': parameter key {key!r} is not a valid " "Python identifier. Notebooks require parameter names that " "are valid Python identifiers (letters, digits, underscores; " "cannot start with a digit)." ) _validate_required_paths(name, [path])