Project

Object to hold overarching project attributes, including its Docker image, snakemake configs, version, and other options for how the container should be run. A Project will also create one or more Batch objects, representing different sets of files created as part of the project. For example, different batches might have their IDs come from different text files, and maintain their success tracking independently.

Parameters:
  • name (str | None) –

    Project name

  • image (str) –

    Name or URL to docker image, already built

  • config_file (Path | str) –

    Path to snakemake config file

  • file_pattern (str) –

    String to be used with format() to create desired report filenames. Include "{id}" as a placeholder for report IDs such as location names, such as "{id}_equity_{year}.pdf", where year is a keyed value in the config file.

  • outdir (Path | str) –

    Directory for final generated reports

  • rename (bool, default: False ) –

    Whether generated reports should be renamed with the version tag appended, by default False

  • version (str | None, default: None ) –

    Text of version tag, e.g. "0.1.0". If None, must supply version_file. If both are supplied, this takes precedence.

  • version_file (Path | str | None, default: None ) –

    File that can be parsed to get a version tag, presumably either pyproject.toml for Python or DESCRIPTION for R. If None, must supply version directly

  • version_patt (str | None, default: None ) –

    Regex pattern to use to extract version if version_file='DESCRIPTION'. If None, one is used by default that should get a properly formatted version.

  • clean (bool, default: False ) –

    Whether to remove all files from outdir before generating new ones, by default False

  • clean_glob (str | None, default: None ) –

    If clean=True, the glob to use to designate what files to remove. If None, all files in outdir are removed

  • print_quarto (bool, default: False ) –

    Whether to run Quarto in verbose mode, including its printout of each chunk rendered, by default False

  • print_snakemake (bool, default: False ) –

    Whether to run snakemake in verbose mode, by default False

  • repo (str | None, default: None ) –

    GitHub repo of the project (currently unused), by default None

Source code in src/eqcli/project.py
class Project:
    """Object to hold overarching project attributes, including its Docker image, snakemake configs, version, and other options for how the container should be run. A `Project` will also create one or more `Batch` objects, representing different sets of files created as part of the project. For example, different batches might have their IDs come from different text files, and maintain their success tracking independently.

    Parameters
    ----------
    name : str | None
        Project name
    image : str
        Name or URL to docker image, already built
    config_file : Path | str
        Path to snakemake config file
    file_pattern : str
        String to be used with `format()` to create desired report filenames. Include `"{id}"` as a placeholder for report IDs such as location names, such as `"{id}_equity_{year}.pdf"`, where `year` is a keyed value in the config file.
    outdir : Path | str
        Directory for final generated reports
    rename : bool, optional
        Whether generated reports should be renamed with the version tag appended, by default False
    version : str | None, optional
        Text of version tag, e.g. "0.1.0". If None, must supply `version_file`. If both are supplied, this takes precedence.
    version_file : Path | str | None, optional
        File that can be parsed to get a version tag, presumably either pyproject.toml for Python or DESCRIPTION for R. If None, must supply `version` directly
    version_patt : str | None, optional
        Regex pattern to use to extract version if `version_file='DESCRIPTION'`. If None, one is used by default that should get a properly formatted version.
    clean : bool, optional
        Whether to remove all files from `outdir` before generating new ones, by default False
    clean_glob : str | None, optional
        If `clean=True`, the glob to use to designate what files to remove. If None, all files in `outdir` are removed
    print_quarto : bool, optional
        Whether to run Quarto in verbose mode, including its printout of each chunk rendered, by default False
    print_snakemake : bool, optional
        Whether to run snakemake in verbose mode, by default False
    repo : str | None, optional
        GitHub repo of the project (currently unused), by default None
    """

    def __init__(
        self,
        name: str | None,
        image: str,
        config_file: Path | str,
        file_pattern: str,
        outdir: Path | str,
        rename: bool = False,
        version: str | None = None,
        version_file: Path | str | None = None,
        version_patt: str | None = None,
        clean: bool = False,
        clean_glob: str | None = None,
        print_quarto: bool = False,
        print_snakemake: bool = False,
        repo: str | None = None,
    ):
        self.name = name
        self.repo = repo
        self.image = image
        self.file_pattern = file_pattern
        self.outdir = Path(outdir)
        self.rename = rename
        self.config = self._read_config(config_file)
        self.version = check_version(version, version_file, version_patt)
        self.print_quarto = print_quarto
        self.print_snakemake = print_snakemake
        self.batches: list[Batch] = []

        # setup outdir
        self._setup_dir()
        if clean and clean_glob is not None:
            self._clean_dir(clean_glob)

    ## SETUP PROJECT ----
    def _setup_dir(self) -> None:
        """Give a directory permissions necessary for docker & GitHub Actions runner to have write access, creating the directory if needed"""
        if not self.outdir.exists():
            click.echo(f"Creating directory {self.outdir}")
            self.outdir.mkdir(parents=True, exist_ok=True)
        self.outdir.chmod(mode=0o777)
        return None

    def _clean_dir(self, glob: str) -> None:
        """Delete files in a directory based on a glob

        Parameters
        ----------
        glob : str
            Glob pattern to match files for deletion
        """
        files = list(self.outdir.glob(glob))
        if len(files) == 0:
            click.echo(f"No files in {self.outdir} to remove")
        else:
            click.echo(f"Removing {len(files)} files from {self.outdir}")
            for file in files:
                file.unlink()
        return None

    def _read_config(self, config_file: Path | str) -> dict:
        """Read a snakemake-syle yaml config file

        Parameters
        ----------
        config_file : Path | str
            Path to config file

        Returns
        -------
        dict
            A dict as represented in `config_file`, with a default value of "batch" set if no key `batch_dir` already exists
        """
        with open(config_file, "r") as f:
            config = yaml.safe_load(f)
        if "batch_dir" not in config or "batchdir" not in config:
            config["batch_dir"] = "batch"
        return config

    ## MAKE & DEPLOY BATCHES ----
    def _create_batch(
        self, batch_name: str, ids: Path | str | list[str], rename: bool
    ) -> Batch:
        batch_version = self.version if rename else None
        # skipping default args for now
        return Batch(
            name=batch_name,
            ids=ids,
            file_pattern=self.file_pattern,
            image=self.image,
            config=self.config,
            outdir=self.outdir,
            batchdir=self.config["batch_dir"],
            version=batch_version,
            print_quarto=self.print_quarto,
            print_snakemake=self.print_snakemake,
        )

    def add_batch(
        self, ids: Path | str | list[str], rename: bool = False, append: bool = True
    ) -> None:
        batch_id = len(self.batches)
        batch = self._create_batch(
            batch_name=f"{self.name}-batch-{batch_id}", ids=ids, rename=rename
        )
        if append:
            self.batches.append(batch)
        else:
            self.batches = [batch]

    def run_batches(self) -> None:
        for batch in self.batches:
            batch.run_docker()

    def print_overview(self) -> None:
        click.secho("\nPROJECT: -------------------------", fg="yellow", bold=True)
        print(self)
        click.secho("\nBATCHES: -------------------------", fg="yellow", bold=True)
        for batch in self.batches:
            print(batch)

    def print_docker_logs(self) -> None:
        for batch in self.batches:
            for log in batch.logs:
                click.echo(log)

    ## BASIC METHODS ----
    def __str__(self) -> str:
        return f"""
Project: `{self.name}` version {self.version}
Docker image: '{self.image}'
Batches: {len(self.batches)}"""

    def __iter__(self):
        return iter(self.batches)