Utilities

Utility functions, mostly for working with filenames to parse information in snakemake workflows.

`check_version(version, version_file, version_patt, is_required=True)`

Read and/or verify project version

Parameters:

version (str | None) –

Version, such as "0.2.1"
version_file (Path | str | None) –

File containing a version, such as "pyproject.toml" or "DESCRIPTION"
version_patt (str | None) –

Regex pattern to use to find version tag
is_required (bool, default: True ) –

If true (default), must resolve to some version. Otherwise, not finding a version and returning None is allowed

Returns:	`str \| None` – The version, either as supplied or as read from a file. `version` argument takes precedence if both it and `version_file` are given. If `is_required` is false and no version is found, will return `None`.

Raises:	`ValueError` – Error if neither `version` nor `version_file` is given, and `is_required=True`.

Source code in src/eqcli/utils.py

def check_version(
    version: str | None,
    version_file: Path | str | None,
    version_patt: str | None,
    is_required: bool = True,
) -> str | None:
    """Read and/or verify project version

    Parameters
    ----------
    version : str | None
        Version, such as "0.2.1"
    version_file : Path | str | None
        File containing a version, such as "pyproject.toml" or "DESCRIPTION"
    version_patt : str | None
        Regex pattern to use to find version tag
    is_required : bool
        If true (default), must resolve to some version. Otherwise, not finding a version and returning None is allowed

    Returns
    -------
    str | None
        The version, either as supplied or as read from a file. `version` argument takes precedence if both it and `version_file` are given. If `is_required` is false and no version is found, will return `None`.

    Raises
    ------
    ValueError
        Error if neither `version` nor `version_file` is given, and `is_required=True`.
    """
    if version is None:
        if version_file is None:
            if is_required:
                raise ValueError("must supply a version or a version file")
            else:
                return None
        else:
            if is_required:
                return read_version(version_file, version_patt)
            else:
                try:
                    return read_version(version_file, version_patt)
                except ValueError:
                    return None
    else:
        return version

`create_file_names(template, path=None, ids=None, to_snakecase=True, comment='#', **kwargs)`

Batch create filenames based on IDs read from a text file and/or a list, formatted with a template literal and kwargs

Parameters:

template (str) –

Template that can be interpreted by format(). Should contain '{id}' to have that filled in by identifiers
path (str | Path | None, default: None ) –

Path to a text file of identifiers that can be passed to read_commented, by default None
ids (list[str] | None, default: None ) –

List of strings of identifiers, by default None
to_snakecase (bool, default: True ) –

Convert filenames to snakecase, by default True
comment (str | None, default: '#' ) –

Single string designating a comment for omitting lines to pass to read_commented, by default "#"
**kwargs –

Keyword args to fill into the template

Returns:	`list[str]` – A list of strings giving paths to output files with keywords filled in.

Raises:	`ValueError` – Errors if both 'path' and 'ids' are None

Examples:

>>> create_file_names("{outdir}/{id}_report_{yr}.pdf", ids=["New Haven", "Hartford"], outdir="to_distro", yr=2026)
['to_distro/new_haven_report_2026.pdf', 'to_distro/hartford_report_2026.pdf']

Source code in src/eqcli/utils.py

def create_file_names(
    template: str,
    path: str | Path | None = None,
    ids: list[str] | None = None,
    to_snakecase: bool = True,
    comment: str | None = "#",
    **kwargs,
) -> list[str]:
    """Batch create filenames based on IDs read from a text file and/or a list, formatted with a template literal and kwargs

    Parameters
    ----------
    template : str
        Template that can be interpreted by `format()`. Should contain '{id}' to have that filled in by identifiers
    path : str | Path | None, optional
        Path to a text file of identifiers that can be passed to `read_commented`, by default None
    ids : list[str] | None, optional
        List of strings of identifiers, by default None
    to_snakecase : bool, optional
        Convert filenames to snakecase, by default True
    comment : str | None, optional
        Single string designating a comment for omitting lines to pass to `read_commented`, by default "#"
    **kwargs: dict
        Keyword args to fill into the template

    Returns
    -------
    list[str]
        A list of strings giving paths to output files with keywords filled in.

    Raises
    ------
    ValueError
        Errors if both 'path' and 'ids' are None

    Examples
    ------
    >>> create_file_names("{outdir}/{id}_report_{yr}.pdf", ids=["New Haven", "Hartford"], outdir="to_distro", yr=2026)
    ['to_distro/new_haven_report_2026.pdf', 'to_distro/hartford_report_2026.pdf']
    """
    if path is None and ids is None:
        raise ValueError("must supply 'path' and/or 'ids'")
    if ids is None:
        ids_out = []
    else:
        ids_out = ids
    if path is not None:
        ids_out = ids_out + read_commented(path, comment)
    if to_snakecase:
        ids_out = snakecase(ids_out)
    return [template.format(**kwargs, id=id) for id in ids_out]

`file_timestamp(path, fmt='%Y-%m-%d %H:%M:%S')`

Get file's modification time as a formatted timestamp

Parameters:	`path` (`Path \| str`) – Path to file `fmt` (`str`, default: `'%Y-%m-%d %H:%M:%S'` ) – Datetime format compatable with `strftime`, defaults "%Y-%m-%d %H:%M:%S"

Returns:	`str` – Formatted timestamp

Source code in src/eqcli/utils.py

def file_timestamp(path: Path | str, fmt: str = "%Y-%m-%d %H:%M:%S") -> str:
    """Get file's modification time as a formatted timestamp

    Parameters
    ----------
    path : Path | str
        Path to file
    fmt  : str
        Datetime format compatable with `strftime`, defaults "%Y-%m-%d %H:%M:%S"

    Returns
    -------
    str
        Formatted timestamp
    """
    path = Path(path)
    mod = path.stat().st_mtime
    timestamp = datetime.fromtimestamp(mod)
    fmttd = timestamp.strftime(fmt)
    return fmttd

`id_from_file(path, patt)`

Extract a report ID (location, etc) from its filename given a regex pattern

Parameters:	`path` (`Path \| str`) – Path to file `patt` (`str \| Pattern`) – Pattern or string that can be compiled to re pattern. Should probably contain a group to match.

Returns:	`str \| None` – If the pattern matches, the first match is returned; otherwise, None

Examples:

>>> id_from_file("to_distro/capitol_region_cog_equity_2026.pdf", "(\w+)_equity")
'capitol_region_cog'

Source code in src/eqcli/utils.py

def id_from_file(path: Path | str, patt: str | re.Pattern) -> str | None:
    """Extract a report ID (location, etc) from its filename given a regex pattern

    Parameters
    ----------
    path : Path | str
        Path to file
    patt : str | re.Pattern
        Pattern or string that can be compiled to re pattern. Should probably contain a group to match.

    Returns
    -------
    str | None
        If the pattern matches, the first match is returned; otherwise, None

    Examples
    --------
    >>> id_from_file("to_distro/capitol_region_cog_equity_2026.pdf", "(\\w+)_equity")
    'capitol_region_cog'
    """
    fn = Path(path).name
    if isinstance(patt, str):
        patt = re.compile(patt)
    id = patt.findall(fn)
    if id:
        return id[0]
    else:
        return None

`parse_file_pattern(filename, file_pattern)`

Convert format string used for generating file names into regex to extract IDs

Source code in src/eqcli/utils.py

def parse_file_pattern(filename: Path | str, file_pattern: str):
    """Convert format string used for generating file names into regex to extract IDs"""
    filename = str(filename)
    # replace {id} with regex for named matching group
    # e.g. "{id}_equity_{doc_yr}.{ext}" -> "(?P<id>.+)_equity_{doc_yr}.{ext}"
    patt1 = file_pattern.replace("{id}", "(?P<id>.+)")
    # replace brackets with wildcards
    patt2 = re.sub(r"\{.*?\}", ".+", patt1)
    id = re.compile(patt2).findall(filename)
    return id[0]

`read_commented(path, comment='#')`

Read lines in a file, optionally omitting commented lines

Parameters:	`path` (`str \| Path`) – Path to a text file `comment` (`str \| None`, default: `'#'` ) – Single character string designating a line to omit, by default '#'

Returns:	`list[str]` – Lines of the file as a list of strings, excluding commented lines

Raises:	`ValueError` – 'comment' must be a string of length 1

Source code in src/eqcli/utils.py

def read_commented(path: str | Path, comment: str | None = "#") -> list[str]:
    """Read lines in a file, optionally omitting commented lines

    Parameters
    ----------
    path : str | Path
        Path to a text file
    comment : str | None, optional
        Single character string designating a line to omit, by default '#'

    Returns
    -------
    list[str]
        Lines of the file as a list of strings, excluding commented lines

    Raises
    ------
    ValueError
        'comment' must be a string of length 1
    """
    if comment is not None and len(comment) != 1:
        raise ValueError("'comment' should be a string of length 1")
    with open(path, "r") as f:
        lines = f.read().splitlines()
    if comment is None:
        return lines
    else:
        return [line for line in lines if line[0] != comment]

`read_version(file, patt=None)`

Extract project version from a file based on a pattern

Parameters:	`file` (`Path \| str`) – Path to a file containing project version `patt` (`str \| None`, default: `None` ) – Pattern to compile to regex in order to extract version. If None, will supply a pattern that matches either an R description file or a common pyproject.toml pattern.

Returns:	`str` – First match found, in "v$version" format.

Source code in src/eqcli/utils.py

def read_version(file: Path | str, patt: str | None = None) -> str:
    """Extract project version from a file based on a pattern

    Parameters
    ----------
    file : Path | str, optional
        Path to a file containing project version
    patt : str | None, optional
        Pattern to compile to regex in order to extract version. If None, will supply a pattern that matches either an R description file or a common pyproject.toml pattern.

    Returns
    -------
    str
        First match found, in "v$version" format.
    """
    # if no pattern supplied, use appropriate for file type
    file = Path(file)
    if file.stem == "DESCRIPTION":
        txt = file.read_text()
        if patt is None:
            patt = r"(?<=Version:\s)([0-9a-z\-\.]+)(?=\n)"
        version = re.compile(patt).findall(txt)
    else:
        version = value_from_toml(file, key="version")
    if version:
        return f"v{version[0]}"
    else:
        raise ValueError("pattern 'patt' not found for project version")

`snakecase(x)`

Convert single string or list of strings to snakecase.

Parameters:	`x` (`list[str] \| str`) – String or list of strings to be converted.

Returns:	`list[str] \| str` – String or list of strings in snakecase.

Source code in src/eqcli/utils.py

def snakecase(x: list[str] | str) -> list[str] | str:
    """Convert single string or list of strings to snakecase.

    Parameters
    ----------
    x : list[str] | str
        String or list of strings to be converted.

    Returns
    -------
    list[str] | str
        String or list of strings in snakecase.
    """
    if isinstance(x, list):
        return [_snakecase(i) for i in x]
    else:
        return _snakecase(x)

`titlecase(x, cap=['COG', 'HSA'])`

Convert single string or list of strings to titlecase, optionally writing some text in all caps (such as acronyms).

Parameters:	`x` (`list[str] \| str`) – String or list of strings to be converted. `cap` (`list[str] \| None`, default: `['COG', 'HSA']` ) – Optional list of strings to be all caps. If `None`, this step is skipped. By default, ["COG", "HSA"].

Returns:	`list[str] \| str` – String or list of strings in titlecase, with `cap` substrings in all caps.

Source code in src/eqcli/utils.py

def titlecase(
    x: list[str] | str, cap: list[str] | None = ["COG", "HSA"]
) -> list[str] | str:
    """Convert single string or list of strings to titlecase, optionally writing some text in all caps (such as acronyms).

    Parameters
    ----------
    x : list[str] | str
        String or list of strings to be converted.
    cap : list[str] | None, optional
        Optional list of strings to be all caps. If `None`, this step is skipped. By default, ["COG", "HSA"].

    Returns
    -------
    list[str] | str
        String or list of strings in titlecase, with `cap` substrings in all caps.
    """
    if isinstance(x, list):
        return [_titlecase(i, cap) for i in x]
    else:
        return _titlecase(x, cap)

`unabbrev_yrs(x, sep='_')`

Split a concatenated string of years in 2-digit format

Parameters:	`x` (`str`) – A string of two 2-digit years smooshed together, e.g. "1524" `sep` (`str`, default: `'_'` ) – Character to separate strings in output, by default "_"

Returns:	`str` – A string of years separated, e.g. "1524" -> "15_24"

Source code in src/eqcli/utils.py

def unabbrev_yrs(x: str, sep: str = "_") -> str:
    """Split a concatenated string of years in 2-digit format

    Parameters
    ----------
    x : str
        A string of two 2-digit years smooshed together, e.g. "1524"
    sep : str, optional
        Character to separate strings in output, by default "_"

    Returns
    -------
    str
        A string of years separated, e.g. "1524" -> "15_24"
    """
    return f"{x[0:2]}{sep}{x[2:4]}"

`y2k(x, sep='_')`

Bring 2-digit years into the 21st century.

Parameters:	`x` (`str`) – String with one or more sets of 21st century years in 2-digit format, e.g. "15_24" `sep` (`str`, default: `'_'` ) – Separator on which to split years, by default '_'

Returns:	`str` – String where 2-digit years have been converted to 4-digit, e.g. "15_24" -> "2015_2024"

Source code in src/eqcli/utils.py

def y2k(x: str, sep: str = "_") -> str:
    """Bring 2-digit years into the 21st century.

    Parameters
    ----------
    x : str
        String with one or more sets of 21st century years in 2-digit format, e.g. "15_24"
    sep : str, optional
        Separator on which to split years, by default '_'

    Returns
    -------
    str
        String where 2-digit years have been converted to 4-digit, e.g. "15_24" -> "2015_2024"
    """
    xs = x.split(sep)
    return sep.join([_y2k(i) for i in xs])