"""
====================
File Path Management
====================
"""
from datetime import datetime
from fnmatch import fnmatch
from pathlib import Path
from typing import NamedTuple
from vivarium.engine.interface.utilities import get_output_model_name_string
from vivarium.cluster_tools import utilities as vct_utils
from vivarium.cluster_tools.psimulate import COMMANDS
DEFAULT_LOAD_TESTS_DIR = "/mnt/team/simulation_science/priv/engineering/load_tests"
CENTRAL_PERFORMANCE_LOGS_DIRECTORY = Path(
"/mnt/team/simulation_science/pub/performance_logs/"
)
[docs]
class OutputPaths(NamedTuple):
"""Container class for output filepaths.
In addition to the path attributes, this class provides a method to create
the required directories as well as a class method to create an instance
from the arguments passed to the entry point.
"""
# Directories
root: Path
"""The timestamped root directory for the simulation output."""
logging_root: Path
"""The parent directory for all logs."""
worker_logging_root: Path
"""The root directory for worker logs."""
metadata_dir: Path
"""The directory for task metadata JSON files."""
# Files
# Environment configuration
environment_file: Path
"""The path to the requirements.txt environment file."""
# Simulation configuration
model_specification: Path
"""The path to the model specification file."""
keyspace: Path
"""The path to the simulation keyspace file."""
branches: Path
"""The path to the simulation branches file."""
# outputs
results_dir: Path
"""The path to the results directory."""
backup_dir: Path
"""The path to the simulation backup directory."""
backup_metadata_path: Path
"""The path to the backup metadata file."""
# will not be reliable if we parallelized across artifacts
@property
def artifact_name(self) -> str:
"""Name of the artifact."""
return self.root.parent.stem
@property
def run_date(self) -> str:
"""Date of the simulation run."""
runtime_info = self.logging_root.stem
run_date = runtime_info[: runtime_info.rindex("_")]
return run_date
@property
def run_type(self) -> str:
"""Type of the simulation run."""
runtime_info = self.logging_root.stem
run_type = runtime_info[runtime_info.rindex("_") + 1 :]
return run_type
@property
def original_run_date(self) -> str:
"""Date of the original simulation run."""
return self.root.stem
@property
def project_name(self) -> str:
"""Name of the project."""
if self.logging_to_central_results_directory:
return self.root.parents[3].stem
else:
return self.root.parents[1].stem
@property
def root_path(self) -> str:
"""Path to the root directory."""
return str(self.root.parent)
@property
def logging_to_central_results_directory(self) -> bool:
"""Whether the logs are being written to the central results directory."""
return fnmatch(str(self.root), "/mnt/team/simulation_science/pub/models/*/results/*")
[docs]
@classmethod
def from_entry_point_args(
cls,
*, # No positional args allowed.
command: str,
input_artifact_path: Path | None,
result_directory: Path,
input_model_spec_path: Path | None,
) -> "OutputPaths":
"""Create an instance of OutputPaths from the arguments passed to the entry point.
Parameters
----------
command
The specific ``psimulate`` command being run.
input_artifact_path
The path to the data artifact.
result_directory
The path to the results directory.
input_model_spec_path
The path to the model specification file.
Returns
-------
An instance of OutputPaths.
"""
launch_time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
output_directory = result_directory
if command == COMMANDS.run:
if input_model_spec_path is None:
raise ValueError(
"Model specification path must be provided for 'run' command."
)
model_name = get_output_model_name_string(
input_artifact_path, input_model_spec_path
)
output_directory = output_directory / model_name / launch_time
elif command == COMMANDS.load_test:
output_directory = output_directory / "load_test" / launch_time
logging_directory = output_directory / "logs" / f"{launch_time}_{command}"
logging_dirs = {
"logging_root": logging_directory,
"worker_logging_root": logging_directory / "worker_logs",
}
output_paths = OutputPaths(
root=output_directory,
**logging_dirs,
metadata_dir=output_directory / "metadata",
environment_file=output_directory / "requirements.txt",
model_specification=output_directory / "model_specification.yaml",
keyspace=output_directory / "keyspace.yaml",
branches=output_directory / "branches.yaml",
results_dir=output_directory / "results",
backup_dir=output_directory / "sim_backups",
backup_metadata_path=output_directory / "sim_backups" / "backup_metadata.csv",
)
return output_paths
[docs]
def touch(self) -> None:
"""Create the required directories."""
for dir in [
self.root,
self.results_dir,
self.backup_dir,
self.metadata_dir,
]:
vct_utils.mkdir(dir, exists_ok=True, parents=True)
for dir in [self.logging_root, self.worker_logging_root]:
vct_utils.mkdir(dir, parents=True)