Skip to content

daft.functions.lead#

lead #

lead(expr: Expression, offset: int = 1, default: Expression | None = None) -> Expression

Get the value from a future row within a window partition.

Parameters:

Name Type Description Default
expr Expression

The expression to get the lead value of.

required
offset int

The number of rows to shift forward. Must be >= 0.

1
default Expression | None

Value to use when no future row exists. Can be a column reference.

None

Returns:

Name Type Description
Expression Expression

Value from the row offset positions after the current row.

Examples:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
>>> import daft
>>> from daft.functions import lead
>>>
>>> df = daft.from_pydict(
...     {
...         "category": ["A", "A", "A", "B", "B", "B"],
...         "value": [1, 2, 3, 4, 5, 6],
...         "default_val": [10, 20, 30, 40, 50, 60],
...     }
... )
>>>
>>> # Simple lead with null default
>>> window = daft.Window().partition_by("category").order_by("value")
>>> df = df.with_column("lead", lead(df["value"], 1).over(window))
>>>
>>> # Lead with column reference as default
>>> df = df.with_column("lead_with_default", lead(df["value"], 1, default=df["default_val"]).over(window))
>>> df.sort(["category", "value"]).show()
╭──────────┬───────┬─────────────┬───────┬───────────────────╮
│ category ┆ value ┆ default_val ┆ lead  ┆ lead_with_default │
│ ---      ┆ ---   ┆ ---         ┆ ---   ┆ ---               │
│ String   ┆ Int64 ┆ Int64       ┆ Int64 ┆ Int64             │
╞══════════╪═══════╪═════════════╪═══════╪═══════════════════╡
│ A        ┆ 1     ┆ 10          ┆ 2     ┆ 2                 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ A        ┆ 2     ┆ 20          ┆ 3     ┆ 3                 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ A        ┆ 3     ┆ 30          ┆ None  ┆ 30                │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ B        ┆ 4     ┆ 40          ┆ 5     ┆ 5                 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ B        ┆ 5     ┆ 50          ┆ 6     ┆ 6                 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ B        ┆ 6     ┆ 60          ┆ None  ┆ 60                │
╰──────────┴───────┴─────────────┴───────┴───────────────────╯
(Showing first 6 of 6 rows)
Source code in daft/functions/window.py
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
def lead(expr: Expression, offset: int = 1, default: Expression | None = None) -> Expression:
    """Get the value from a future row within a window partition.

    Args:
        expr: The expression to get the lead value of.
        offset: The number of rows to shift forward. Must be >= 0.
        default: Value to use when no future row exists. Can be a column reference.

    Returns:
        Expression: Value from the row `offset` positions after the current row.

    Examples:
        >>> import daft
        >>> from daft.functions import lead
        >>>
        >>> df = daft.from_pydict(
        ...     {
        ...         "category": ["A", "A", "A", "B", "B", "B"],
        ...         "value": [1, 2, 3, 4, 5, 6],
        ...         "default_val": [10, 20, 30, 40, 50, 60],
        ...     }
        ... )
        >>>
        >>> # Simple lead with null default
        >>> window = daft.Window().partition_by("category").order_by("value")
        >>> df = df.with_column("lead", lead(df["value"], 1).over(window))
        >>>
        >>> # Lead with column reference as default
        >>> df = df.with_column("lead_with_default", lead(df["value"], 1, default=df["default_val"]).over(window))
        >>> df.sort(["category", "value"]).show()
        ╭──────────┬───────┬─────────────┬───────┬───────────────────╮
        │ category ┆ value ┆ default_val ┆ lead  ┆ lead_with_default │
        │ ---      ┆ ---   ┆ ---         ┆ ---   ┆ ---               │
        │ String   ┆ Int64 ┆ Int64       ┆ Int64 ┆ Int64             │
        ╞══════════╪═══════╪═════════════╪═══════╪═══════════════════╡
        │ A        ┆ 1     ┆ 10          ┆ 2     ┆ 2                 │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ A        ┆ 2     ┆ 20          ┆ 3     ┆ 3                 │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ A        ┆ 3     ┆ 30          ┆ None  ┆ 30                │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ B        ┆ 4     ┆ 40          ┆ 5     ┆ 5                 │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ B        ┆ 5     ┆ 50          ┆ 6     ┆ 6                 │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ B        ┆ 6     ┆ 60          ┆ None  ┆ 60                │
        ╰──────────┴───────┴─────────────┴───────┴───────────────────╯
        <BLANKLINE>
        (Showing first 6 of 6 rows)
    """
    if default is not None:
        default = Expression._to_expression(default)
    return Expression._from_pyexpr(expr._expr.offset(offset, default._expr if default is not None else None))