Skip to content

daft.functions.first_value#

first_value #

first_value(expr: Expression, ignore_nulls: bool = False) -> Expression

Returns the first value in the window frame.

Must be used with over() to specify the window partition, order, and frame. When ignore_nulls=True, skips null values and returns the first non-null value.

Parameters:

Name Type Description Default
expr Expression

The input expression.

required
ignore_nulls bool

whether to ignore null values. Defaults to False.

False

Returns:

Name Type Description
Expression Expression

The first value in the window frame.

Examples:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
>>> import daft
>>> from daft.functions import first_value
>>>
>>> df = daft.from_pydict(
...     {
...         "category": ["A", "A", "A", "A", "B", "B", "B", "B"],
...         "time": [1, 2, 3, 4, 1, 2, 3, 4],
...         "value": [1, None, None, 4, 10, None, 30, None],
...     }
... )
>>>
>>> # Backward fill using first_value: look ahead for the earliest following non-null value
>>> window = (
...     daft.Window()
...     .partition_by("category")
...     .order_by("time")
...     .rows_between(daft.Window.current_row, daft.Window.unbounded_following)
... )
>>> df = df.with_column("bfill", first_value(df["value"], ignore_nulls=True).over(window))
>>> df.sort(["category", "time"]).show()
╭──────────┬───────┬───────┬───────╮
│ category ┆ time  ┆ value ┆ bfill │
│ ---      ┆ ---   ┆ ---   ┆ ---   │
│ String   ┆ Int64 ┆ Int64 ┆ Int64 │
╞══════════╪═══════╪═══════╪═══════╡
│ A        ┆ 1     ┆ 1     ┆ 1     │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ A        ┆ 2     ┆ None  ┆ 4     │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ A        ┆ 3     ┆ None  ┆ 4     │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ A        ┆ 4     ┆ 4     ┆ 4     │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ B        ┆ 1     ┆ 10    ┆ 10    │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ B        ┆ 2     ┆ None  ┆ 30    │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ B        ┆ 3     ┆ 30    ┆ 30    │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ B        ┆ 4     ┆ None  ┆ None  │
╰──────────┴───────┴───────┴───────╯
(Showing first 8 rows)
Source code in daft/functions/window.py
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
def first_value(expr: Expression, ignore_nulls: bool = False) -> Expression:
    """Returns the first value in the window frame.

    Must be used with ``over()`` to specify the window partition, order, and frame.
    When ``ignore_nulls=True``, skips null values and returns the first non-null value.

    Args:
        expr (Expression): The input expression.
        ignore_nulls: whether to ignore null values. Defaults to False.

    Returns:
        Expression: The first value in the window frame.

    Examples:
        >>> import daft
        >>> from daft.functions import first_value
        >>>
        >>> df = daft.from_pydict(
        ...     {
        ...         "category": ["A", "A", "A", "A", "B", "B", "B", "B"],
        ...         "time": [1, 2, 3, 4, 1, 2, 3, 4],
        ...         "value": [1, None, None, 4, 10, None, 30, None],
        ...     }
        ... )
        >>>
        >>> # Backward fill using first_value: look ahead for the earliest following non-null value
        >>> window = (
        ...     daft.Window()
        ...     .partition_by("category")
        ...     .order_by("time")
        ...     .rows_between(daft.Window.current_row, daft.Window.unbounded_following)
        ... )
        >>> df = df.with_column("bfill", first_value(df["value"], ignore_nulls=True).over(window))
        >>> df.sort(["category", "time"]).show()
        ╭──────────┬───────┬───────┬───────╮
        │ category ┆ time  ┆ value ┆ bfill │
        │ ---      ┆ ---   ┆ ---   ┆ ---   │
        │ String   ┆ Int64 ┆ Int64 ┆ Int64 │
        ╞══════════╪═══════╪═══════╪═══════╡
        │ A        ┆ 1     ┆ 1     ┆ 1     │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
        │ A        ┆ 2     ┆ None  ┆ 4     │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
        │ A        ┆ 3     ┆ None  ┆ 4     │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
        │ A        ┆ 4     ┆ 4     ┆ 4     │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
        │ B        ┆ 1     ┆ 10    ┆ 10    │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
        │ B        ┆ 2     ┆ None  ┆ 30    │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
        │ B        ┆ 3     ┆ 30    ┆ 30    │
        ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
        │ B        ┆ 4     ┆ None  ┆ None  │
        ╰──────────┴───────┴───────┴───────╯
        <BLANKLINE>
        (Showing first 8 rows)
    """
    return Expression._from_pyexpr(expr._expr.first_value(ignore_nulls))