Aggregates the values in the expression into a list of distinct values (ignoring nulls).
Returns:
| Name | Type | Description |
Expression | Expression | A List expression containing the distinct values from the input |
Examples:
| >>> import daft
>>> from daft.functions import list_agg_distinct
>>>
>>> df = daft.from_pydict({"values": [1, 1, None, 2, 2, None]})
>>> df.agg(list_agg_distinct(df["values"]).alias("distinct_values")).show()
|
╭─────────────────╮
│ distinct_values │
│ --- │
│ List[Int64] │
╞═════════════════╡
│ [1, 2] │
╰─────────────────╯
(Showing first 1 of 1 rows)
Note that null values are ignored by default:
| >>> df = daft.from_pydict({"values": [None, None, None]})
>>> df.agg(list_agg_distinct(df["values"]).alias("distinct_values")).show()
|
╭─────────────────╮
│ distinct_values │
│ --- │
│ List[Null] │
╞═════════════════╡
│ [] │
╰─────────────────╯
(Showing first 1 of 1 rows)
Source code in daft/functions/agg.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317 | def list_agg_distinct(expr: Expression) -> Expression:
"""Aggregates the values in the expression into a list of distinct values (ignoring nulls).
Returns:
Expression: A List expression containing the distinct values from the input
Examples:
>>> import daft
>>> from daft.functions import list_agg_distinct
>>>
>>> df = daft.from_pydict({"values": [1, 1, None, 2, 2, None]})
>>> df.agg(list_agg_distinct(df["values"]).alias("distinct_values")).show()
╭─────────────────╮
│ distinct_values │
│ --- │
│ List[Int64] │
╞═════════════════╡
│ [1, 2] │
╰─────────────────╯
<BLANKLINE>
(Showing first 1 of 1 rows)
Note that null values are ignored by default:
>>> df = daft.from_pydict({"values": [None, None, None]})
>>> df.agg(list_agg_distinct(df["values"]).alias("distinct_values")).show()
╭─────────────────╮
│ distinct_values │
│ --- │
│ List[Null] │
╞═════════════════╡
│ [] │
╰─────────────────╯
<BLANKLINE>
(Showing first 1 of 1 rows)
"""
return Expression._from_pyexpr(expr._expr.agg_set())
|