Skip to content

daft.functions.lag#

lag #

lag(expr: Expression, offset: int = 1, default: Expression | None = None) -> Expression

Get the value from a previous row within a window partition.

Parameters:

Name Type Description Default
expr Expression

The expression to get the lagged value of.

required
offset int

The number of rows to shift backward. Must be >= 0.

1
default Expression | None

Value to use when no previous row exists. Can be a column reference.

None

Returns:

Name Type Description
Expression Expression

Value from the row offset positions before the current row.

Examples:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
>>> import daft
>>> from daft.functions import lag
>>>
>>> df = daft.from_pydict(
...     {
...         "category": ["A", "A", "A", "B", "B", "B"],
...         "value": [1, 2, 3, 4, 5, 6],
...         "default_val": [10, 20, 30, 40, 50, 60],
...     }
... )
>>>
>>> # Simple lag with null default
>>> window = daft.Window().partition_by("category").order_by("value")
>>> df = df.with_column("lagged", lag(df["value"], 1).over(window))
>>>
>>> # Lag with column reference as default
>>> df = df.with_column("lagged_with_default", lag(df["value"], 1, default=df["default_val"]).over(window))
>>> df.sort(["category", "value"]).show()
╭──────────┬───────┬─────────────┬────────┬─────────────────────╮
│ category ┆ value ┆ default_val ┆ lagged ┆ lagged_with_default │
│ ---      ┆ ---   ┆ ---         ┆ ---    ┆ ---                 │
│ String   ┆ Int64 ┆ Int64       ┆ Int64  ┆ Int64               │
╞══════════╪═══════╪═════════════╪════════╪═════════════════════╡
│ A        ┆ 1     ┆ 10          ┆ None   ┆ 10                  │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ A        ┆ 2     ┆ 20          ┆ 1      ┆ 1                   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ A        ┆ 3     ┆ 30          ┆ 2      ┆ 2                   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ B        ┆ 4     ┆ 40          ┆ None   ┆ 40                  │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ B        ┆ 5     ┆ 50          ┆ 4      ┆ 4                   │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ B        ┆ 6     ┆ 60          ┆ 5      ┆ 5                   │
╰──────────┴───────┴─────────────┴────────┴─────────────────────╯
(Showing first 6 of 6 rows)
Source code in daft/functions/window.py
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
def lag(expr: Expression, offset: int = 1, default: Expression | None = None) -> Expression:
    """Get the value from a previous row within a window partition.

    Args:
        expr: The expression to get the lagged value of.
        offset: The number of rows to shift backward. Must be >= 0.
        default: Value to use when no previous row exists. Can be a column reference.

    Returns:
        Expression: Value from the row `offset` positions before the current row.

    Examples:
        >>> import daft
        >>> from daft.functions import lag
        >>>
        >>> df = daft.from_pydict(
        ...     {
        ...         "category": ["A", "A", "A", "B", "B", "B"],
        ...         "value": [1, 2, 3, 4, 5, 6],
        ...         "default_val": [10, 20, 30, 40, 50, 60],
        ...     }
        ... )
        >>>
        >>> # Simple lag with null default
        >>> window = daft.Window().partition_by("category").order_by("value")
        >>> df = df.with_column("lagged", lag(df["value"], 1).over(window))
        >>>
        >>> # Lag with column reference as default
        >>> df = df.with_column("lagged_with_default", lag(df["value"], 1, default=df["default_val"]).over(window))
        >>> df.sort(["category", "value"]).show()
        ╭──────────┬───────┬─────────────┬────────┬─────────────────────╮
        │ category ┆ value ┆ default_val ┆ lagged ┆ lagged_with_default │
        │ ---      ┆ ---   ┆ ---         ┆ ---    ┆ ---                 │
        │ String   ┆ Int64 ┆ Int64       ┆ Int64  ┆ Int64               │
        ╞══════════╪═══════╪═════════════╪════════╪═════════════════════╡
        │ A        ┆ 1     ┆ 10          ┆ None   ┆ 10                  │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ A        ┆ 2     ┆ 20          ┆ 1      ┆ 1                   │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ A        ┆ 3     ┆ 30          ┆ 2      ┆ 2                   │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ B        ┆ 4     ┆ 40          ┆ None   ┆ 40                  │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ B        ┆ 5     ┆ 50          ┆ 4      ┆ 4                   │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ B        ┆ 6     ┆ 60          ┆ 5      ┆ 5                   │
        ╰──────────┴───────┴─────────────┴────────┴─────────────────────╯
        <BLANKLINE>
        (Showing first 6 of 6 rows)
    """
    if default is not None:
        default = Expression._to_expression(default)
    return Expression._from_pyexpr(expr._expr.offset(-offset, default._expr if default is not None else None))