Utilities

Utility functions, mostly for working with filenames to parse information in snakemake workflows.

check_version(version, version_file, version_patt, is_required=True)

Read and/or verify project version

Parameters:
  • version (str | None) –

    Version, such as "0.2.1"

  • version_file (Path | str | None) –

    File containing a version, such as "pyproject.toml" or "DESCRIPTION"

  • version_patt (str | None) –

    Regex pattern to use to find version tag

  • is_required (bool, default: True ) –

    If true (default), must resolve to some version. Otherwise, not finding a version and returning None is allowed

Returns:
  • str | None

    The version, either as supplied or as read from a file. version argument takes precedence if both it and version_file are given. If is_required is false and no version is found, will return None.

Raises:
  • ValueError

    Error if neither version nor version_file is given, and is_required=True.

Source code in src/eqcli/utils.py
def check_version(
    version: str | None,
    version_file: Path | str | None,
    version_patt: str | None,
    is_required: bool = True,
) -> str | None:
    """Read and/or verify project version

    Parameters
    ----------
    version : str | None
        Version, such as "0.2.1"
    version_file : Path | str | None
        File containing a version, such as "pyproject.toml" or "DESCRIPTION"
    version_patt : str | None
        Regex pattern to use to find version tag
    is_required : bool
        If true (default), must resolve to some version. Otherwise, not finding a version and returning None is allowed

    Returns
    -------
    str | None
        The version, either as supplied or as read from a file. `version` argument takes precedence if both it and `version_file` are given. If `is_required` is false and no version is found, will return `None`.

    Raises
    ------
    ValueError
        Error if neither `version` nor `version_file` is given, and `is_required=True`.
    """
    if version is None:
        if version_file is None:
            if is_required:
                raise ValueError("must supply a version or a version file")
            else:
                return None
        else:
            if is_required:
                return read_version(version_file, version_patt)
            else:
                try:
                    return read_version(version_file, version_patt)
                except ValueError:
                    return None
    else:
        return version

create_file_names(template, path=None, ids=None, to_snakecase=True, comment='#', **kwargs)

Batch create filenames based on IDs read from a text file and/or a list, formatted with a template literal and kwargs

Parameters:
  • template (str) –

    Template that can be interpreted by format(). Should contain '{id}' to have that filled in by identifiers

  • path (str | Path | None, default: None ) –

    Path to a text file of identifiers that can be passed to read_commented, by default None

  • ids (list[str] | None, default: None ) –

    List of strings of identifiers, by default None

  • to_snakecase (bool, default: True ) –

    Convert filenames to snakecase, by default True

  • comment (str | None, default: '#' ) –

    Single string designating a comment for omitting lines to pass to read_commented, by default "#"

  • **kwargs

    Keyword args to fill into the template

Returns:
  • list[str]

    A list of strings giving paths to output files with keywords filled in.

Raises:
  • ValueError

    Errors if both 'path' and 'ids' are None

Examples:

>>> create_file_names("{outdir}/{id}_report_{yr}.pdf", ids=["New Haven", "Hartford"], outdir="to_distro", yr=2026)
['to_distro/new_haven_report_2026.pdf', 'to_distro/hartford_report_2026.pdf']
Source code in src/eqcli/utils.py
def create_file_names(
    template: str,
    path: str | Path | None = None,
    ids: list[str] | None = None,
    to_snakecase: bool = True,
    comment: str | None = "#",
    **kwargs,
) -> list[str]:
    """Batch create filenames based on IDs read from a text file and/or a list, formatted with a template literal and kwargs

    Parameters
    ----------
    template : str
        Template that can be interpreted by `format()`. Should contain '{id}' to have that filled in by identifiers
    path : str | Path | None, optional
        Path to a text file of identifiers that can be passed to `read_commented`, by default None
    ids : list[str] | None, optional
        List of strings of identifiers, by default None
    to_snakecase : bool, optional
        Convert filenames to snakecase, by default True
    comment : str | None, optional
        Single string designating a comment for omitting lines to pass to `read_commented`, by default "#"
    **kwargs: dict
        Keyword args to fill into the template

    Returns
    -------
    list[str]
        A list of strings giving paths to output files with keywords filled in.

    Raises
    ------
    ValueError
        Errors if both 'path' and 'ids' are None

    Examples
    ------
    >>> create_file_names("{outdir}/{id}_report_{yr}.pdf", ids=["New Haven", "Hartford"], outdir="to_distro", yr=2026)
    ['to_distro/new_haven_report_2026.pdf', 'to_distro/hartford_report_2026.pdf']
    """
    if path is None and ids is None:
        raise ValueError("must supply 'path' and/or 'ids'")
    if ids is None:
        ids_out = []
    else:
        ids_out = ids
    if path is not None:
        ids_out = ids_out + read_commented(path, comment)
    if to_snakecase:
        ids_out = snakecase(ids_out)
    return [template.format(**kwargs, id=id) for id in ids_out]

file_timestamp(path, fmt='%Y-%m-%d %H:%M:%S')

Get file's modification time as a formatted timestamp

Parameters:
  • path (Path | str) –

    Path to file

  • fmt (str, default: '%Y-%m-%d %H:%M:%S' ) –

    Datetime format compatable with strftime, defaults "%Y-%m-%d %H:%M:%S"

Returns:
  • str

    Formatted timestamp

Source code in src/eqcli/utils.py
def file_timestamp(path: Path | str, fmt: str = "%Y-%m-%d %H:%M:%S") -> str:
    """Get file's modification time as a formatted timestamp

    Parameters
    ----------
    path : Path | str
        Path to file
    fmt  : str
        Datetime format compatable with `strftime`, defaults "%Y-%m-%d %H:%M:%S"

    Returns
    -------
    str
        Formatted timestamp
    """
    path = Path(path)
    mod = path.stat().st_mtime
    timestamp = datetime.fromtimestamp(mod)
    fmttd = timestamp.strftime(fmt)
    return fmttd

id_from_file(path, patt)

Extract a report ID (location, etc) from its filename given a regex pattern

Parameters:
  • path (Path | str) –

    Path to file

  • patt (str | Pattern) –

    Pattern or string that can be compiled to re pattern. Should probably contain a group to match.

Returns:
  • str | None

    If the pattern matches, the first match is returned; otherwise, None

Examples:

>>> id_from_file("to_distro/capitol_region_cog_equity_2026.pdf", "(\w+)_equity")
'capitol_region_cog'
Source code in src/eqcli/utils.py
def id_from_file(path: Path | str, patt: str | re.Pattern) -> str | None:
    """Extract a report ID (location, etc) from its filename given a regex pattern

    Parameters
    ----------
    path : Path | str
        Path to file
    patt : str | re.Pattern
        Pattern or string that can be compiled to re pattern. Should probably contain a group to match.

    Returns
    -------
    str | None
        If the pattern matches, the first match is returned; otherwise, None

    Examples
    --------
    >>> id_from_file("to_distro/capitol_region_cog_equity_2026.pdf", "(\\w+)_equity")
    'capitol_region_cog'
    """
    fn = Path(path).name
    if isinstance(patt, str):
        patt = re.compile(patt)
    id = patt.findall(fn)
    if id:
        return id[0]
    else:
        return None

parse_file_pattern(filename, file_pattern)

Convert format string used for generating file names into regex to extract IDs

Source code in src/eqcli/utils.py
def parse_file_pattern(filename: Path | str, file_pattern: str):
    """Convert format string used for generating file names into regex to extract IDs"""
    filename = str(filename)
    # replace {id} with regex for named matching group
    # e.g. "{id}_equity_{doc_yr}.{ext}" -> "(?P<id>.+)_equity_{doc_yr}.{ext}"
    patt1 = file_pattern.replace("{id}", "(?P<id>.+)")
    # replace brackets with wildcards
    patt2 = re.sub(r"\{.*?\}", ".+", patt1)
    id = re.compile(patt2).findall(filename)
    return id[0]

read_commented(path, comment='#')

Read lines in a file, optionally omitting commented lines

Parameters:
  • path (str | Path) –

    Path to a text file

  • comment (str | None, default: '#' ) –

    Single character string designating a line to omit, by default '#'

Returns:
  • list[str]

    Lines of the file as a list of strings, excluding commented lines

Raises:
  • ValueError

    'comment' must be a string of length 1

Source code in src/eqcli/utils.py
def read_commented(path: str | Path, comment: str | None = "#") -> list[str]:
    """Read lines in a file, optionally omitting commented lines

    Parameters
    ----------
    path : str | Path
        Path to a text file
    comment : str | None, optional
        Single character string designating a line to omit, by default '#'

    Returns
    -------
    list[str]
        Lines of the file as a list of strings, excluding commented lines

    Raises
    ------
    ValueError
        'comment' must be a string of length 1
    """
    if comment is not None and len(comment) != 1:
        raise ValueError("'comment' should be a string of length 1")
    with open(path, "r") as f:
        lines = f.read().splitlines()
    if comment is None:
        return lines
    else:
        return [line for line in lines if line[0] != comment]

read_version(file, patt=None)

Extract project version from a file based on a pattern

Parameters:
  • file (Path | str) –

    Path to a file containing project version

  • patt (str | None, default: None ) –

    Pattern to compile to regex in order to extract version. If None, will supply a pattern that matches either an R description file or a common pyproject.toml pattern.

Returns:
  • str

    First match found, in "v$version" format.

Source code in src/eqcli/utils.py
def read_version(file: Path | str, patt: str | None = None) -> str:
    """Extract project version from a file based on a pattern

    Parameters
    ----------
    file : Path | str, optional
        Path to a file containing project version
    patt : str | None, optional
        Pattern to compile to regex in order to extract version. If None, will supply a pattern that matches either an R description file or a common pyproject.toml pattern.

    Returns
    -------
    str
        First match found, in "v$version" format.
    """
    # if no pattern supplied, use appropriate for file type
    file = Path(file)
    if file.stem == "DESCRIPTION":
        txt = file.read_text()
        if patt is None:
            patt = r"(?<=Version:\s)([0-9a-z\-\.]+)(?=\n)"
        version = re.compile(patt).findall(txt)
    else:
        version = value_from_toml(file, key="version")
    if version:
        return f"v{version[0]}"
    else:
        raise ValueError("pattern 'patt' not found for project version")

snakecase(x)

Convert single string or list of strings to snakecase.

Parameters:
  • x (list[str] | str) –

    String or list of strings to be converted.

Returns:
  • list[str] | str

    String or list of strings in snakecase.

Source code in src/eqcli/utils.py
def snakecase(x: list[str] | str) -> list[str] | str:
    """Convert single string or list of strings to snakecase.

    Parameters
    ----------
    x : list[str] | str
        String or list of strings to be converted.

    Returns
    -------
    list[str] | str
        String or list of strings in snakecase.
    """
    if isinstance(x, list):
        return [_snakecase(i) for i in x]
    else:
        return _snakecase(x)

titlecase(x, cap=['COG', 'HSA'])

Convert single string or list of strings to titlecase, optionally writing some text in all caps (such as acronyms).

Parameters:
  • x (list[str] | str) –

    String or list of strings to be converted.

  • cap (list[str] | None, default: ['COG', 'HSA'] ) –

    Optional list of strings to be all caps. If None, this step is skipped. By default, ["COG", "HSA"].

Returns:
  • list[str] | str

    String or list of strings in titlecase, with cap substrings in all caps.

Source code in src/eqcli/utils.py
def titlecase(
    x: list[str] | str, cap: list[str] | None = ["COG", "HSA"]
) -> list[str] | str:
    """Convert single string or list of strings to titlecase, optionally writing some text in all caps (such as acronyms).

    Parameters
    ----------
    x : list[str] | str
        String or list of strings to be converted.
    cap : list[str] | None, optional
        Optional list of strings to be all caps. If `None`, this step is skipped. By default, ["COG", "HSA"].

    Returns
    -------
    list[str] | str
        String or list of strings in titlecase, with `cap` substrings in all caps.
    """
    if isinstance(x, list):
        return [_titlecase(i, cap) for i in x]
    else:
        return _titlecase(x, cap)

unabbrev_yrs(x, sep='_')

Split a concatenated string of years in 2-digit format

Parameters:
  • x (str) –

    A string of two 2-digit years smooshed together, e.g. "1524"

  • sep (str, default: '_' ) –

    Character to separate strings in output, by default "_"

Returns:
  • str

    A string of years separated, e.g. "1524" -> "15_24"

Source code in src/eqcli/utils.py
def unabbrev_yrs(x: str, sep: str = "_") -> str:
    """Split a concatenated string of years in 2-digit format

    Parameters
    ----------
    x : str
        A string of two 2-digit years smooshed together, e.g. "1524"
    sep : str, optional
        Character to separate strings in output, by default "_"

    Returns
    -------
    str
        A string of years separated, e.g. "1524" -> "15_24"
    """
    return f"{x[0:2]}{sep}{x[2:4]}"

y2k(x, sep='_')

Bring 2-digit years into the 21st century.

Parameters:
  • x (str) –

    String with one or more sets of 21st century years in 2-digit format, e.g. "15_24"

  • sep (str, default: '_' ) –

    Separator on which to split years, by default '_'

Returns:
  • str

    String where 2-digit years have been converted to 4-digit, e.g. "15_24" -> "2015_2024"

Source code in src/eqcli/utils.py
def y2k(x: str, sep: str = "_") -> str:
    """Bring 2-digit years into the 21st century.

    Parameters
    ----------
    x : str
        String with one or more sets of 21st century years in 2-digit format, e.g. "15_24"
    sep : str, optional
        Separator on which to split years, by default '_'

    Returns
    -------
    str
        String where 2-digit years have been converted to 4-digit, e.g. "15_24" -> "2015_2024"
    """
    xs = x.split(sep)
    return sep.join([_y2k(i) for i in xs])