Skip to content

daft.functions.columns_avg#

columns_avg #

columns_avg(*exprs: Expression | str) -> Expression

Average values across columns. Akin to columns_mean.

Parameters:

Name Type Description Default
exprs Expression | str

The columns to average across.

()

Examples:

1
2
3
4
5
>>> import daft
>>> from daft.functions import columns_avg
>>> df = daft.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> df = df.with_column("avg", columns_avg("a", "b"))
>>> df.show()
╭───────┬───────┬─────────╮
│ a     ┆ b     ┆ avg     │
│ ---   ┆ ---   ┆ ---     │
│ Int64 ┆ Int64 ┆ Float64 │
╞═══════╪═══════╪═════════╡
│ 1     ┆ 4     ┆ 2.5     │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ 2     ┆ 5     ┆ 3.5     │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ 3     ┆ 6     ┆ 4.5     │
╰───────┴───────┴─────────╯
(Showing first 3 of 3 rows)
Source code in daft/functions/columnar.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def columns_avg(*exprs: Expression | str) -> Expression:
    """Average values across columns. Akin to `columns_mean`.

    Args:
        exprs: The columns to average across.

    Examples:
        >>> import daft
        >>> from daft.functions import columns_avg
        >>> df = daft.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]})
        >>> df = df.with_column("avg", columns_avg("a", "b"))
        >>> df.show()
        ╭───────┬───────┬─────────╮
        │ a     ┆ b     ┆ avg     │
        │ ---   ┆ ---   ┆ ---     │
        │ Int64 ┆ Int64 ┆ Float64 │
        ╞═══════╪═══════╪═════════╡
        │ 1     ┆ 4     ┆ 2.5     │
        ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
        │ 2     ┆ 5     ┆ 3.5     │
        ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
        │ 3     ┆ 6     ┆ 4.5     │
        ╰───────┴───────┴─────────╯
        <BLANKLINE>
        (Showing first 3 of 3 rows)
    """
    if not exprs:
        raise ValueError("columns_avg requires at least one expression")
    exprs_list = [col(e) if isinstance(e, str) else e for e in exprs]
    return to_list(*exprs_list).list_mean().alias("columns_avg")