# Dplyr's across: Replicating within Polars

[dplyr](https://dplyr.tidyverse.org/index.html) has the [across](https://dplyr.tidyverse.org/reference/across.html) function, which is meant to make column wise processing easy. 
This article aims to replicate solutions in the dplyr [column-wise operations](https://dplyr.tidyverse.org/articles/colwise.html) vignette with Polars.

Let's load in the relevant libraries

In [2]:
import polars as pl
import polars.selectors as cs
import sys

print(" polars version :", pl.__version__, "\n", "python version :", sys.version)

 polars version : 0.20.31 
 python version : 3.12.4 | packaged by conda-forge | (main, Jun 17 2024, 10:13:44) [Clang 16.0.6 ]


In [3]:
# https://raw.githubusercontent.com/tidyverse/dplyr/main/data-raw/starwars.csv
starwars = pl.read_csv('Data_files/starwars.csv')
starwars.head()

name,height,mass,hair_color,skin_color,eye_color,birth_year,sex,gender,homeworld,species,films,vehicles,starships
str,f64,f64,str,str,str,f64,str,str,str,str,str,str,str
"""Luke Skywalker""",172.0,77.0,"""blond""","""fair""","""blue""",19.0,"""male""","""masculine""","""Tatooine""","""Human""","""A New Hope, The Empire Strikes…","""Snowspeeder, Imperial Speeder …","""X-wing, Imperial shuttle"""
"""C-3PO""",167.0,75.0,,"""gold""","""yellow""",112.0,"""none""","""masculine""","""Tatooine""","""Droid""","""A New Hope, The Empire Strikes…",,
"""R2-D2""",96.0,32.0,,"""white, blue""","""red""",33.0,"""none""","""masculine""","""Naboo""","""Droid""","""A New Hope, The Empire Strikes…",,
"""Darth Vader""",202.0,136.0,"""none""","""white""","""yellow""",41.9,"""male""","""masculine""","""Tatooine""","""Human""","""A New Hope, The Empire Strikes…",,"""TIE Advanced x1"""
"""Leia Organa""",150.0,49.0,"""brown""","""light""","""brown""",19.0,"""female""","""feminine""","""Alderaan""","""Human""","""A New Hope, The Empire Strikes…","""Imperial Speeder Bike""",


In [4]:
# dplyr :
# starwars %>%
#   summarise(across(where(is.character), n_distinct))
starwars.select(cs.string().n_unique())

name,hair_color,skin_color,eye_color,sex,gender,homeworld,species,films,vehicles,starships
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
87,12,31,15,5,3,49,38,24,11,16


In [5]:
# dplyr :
# starwars %>%
#   group_by(species) %>%
#   filter(n() > 1) %>%
#   summarise(across(c(sex, gender, homeworld), n_distinct))
(starwars
 .group_by("species")
 .agg(pl.n_unique("sex", "gender", "homeworld"), 
      pl.len().alias("n")
)
.filter(pl.col("n").gt(1))
.select(pl.exclude("n"))
)

species,sex,gender,homeworld
str,u32,u32,u32
"""Twi'lek""",2,2,1
"""Mirialan""",1,1,1
"""Droid""",1,2,3
"""Human""",2,2,15
,1,1,3
"""Zabrak""",1,1,2
"""Gungan""",1,1,1
"""Wookiee""",1,1,1
"""Kaminoan""",2,2,1


In [6]:
# dplyr :
# starwars %>%
#   group_by(homeworld) %>%
#   filter(n() > 1) %>%
#   summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)))
(starwars
 .group_by("homeworld")
 .agg(cs.numeric().mean(), pl.len().alias("n"))
 .filter(pl.col("n").gt(1))
.select(pl.exclude("n"))
)

homeworld,height,mass,birth_year
str,f64,f64,f64
"""Mirial""",168.0,53.1,49.0
"""Alderaan""",176.333333,64.0,43.0
,138.75,82.0,334.333333
"""Ryloth""",179.0,55.0,48.0
"""Tatooine""",169.8,85.375,54.644444
"""Kamino""",208.333333,83.1,31.5
"""Corellia""",175.0,78.5,25.0
"""Kashyyyk""",231.0,124.0,200.0
"""Coruscant""",173.666667,50.0,91.0
"""Naboo""",177.272727,64.166667,55.0


In [7]:
# dplyr:
# starwars %>% distinct(across(contains("color")))
starwars.select(cs.ends_with('color')).unique()

hair_color,skin_color,eye_color
str,str,str
"""none""","""grey""","""orange"""
,"""green-tan, brown""","""orange"""
"""none""","""metal""","""red"""
"""none""","""green""","""yellow"""
"""none""","""green, grey""","""unknown"""
…,…,…
"""brown""","""fair""","""blue"""
"""brown""","""unknown""","""blue"""
"""none""","""blue""","""hazel"""
"""brown""","""light""","""brown"""


In [8]:
# dplyr
# starwars %>% count(across(contains("color")), sort = TRUE)
(starwars
 .select(pl.struct(cs.ends_with('color')).value_counts())
 .unnest('hair_color')
 .unnest('hair_color')
 )

hair_color,skin_color,eye_color,count
str,str,str,u32
"""none""","""dark""","""blue""",1
"""none""","""blue, grey""","""yellow""",1
"""none""","""pale""","""pink""",1
,"""gold""","""yellow""",1
"""none""","""blue""","""hazel""",1
…,…,…,…
"""black""","""blue, grey""","""yellow""",1
"""none""","""none""","""unknown""",1
"""black""","""fair""","""brown""",2
"""brown""","""light""","""blue""",1


In [14]:
# starwars %>%
#   filter(if_any(everything(), ~ !is.na(.x)))
starwars.filter(pl.any_horizontal(pl.all().is_not_null()))

name,height,mass,hair_color,skin_color,eye_color,birth_year,sex,gender,homeworld,species,films,vehicles,starships
str,f64,f64,str,str,str,f64,str,str,str,str,str,str,str
"""Luke Skywalker""",172.0,77.0,"""blond""","""fair""","""blue""",19.0,"""male""","""masculine""","""Tatooine""","""Human""","""A New Hope, The Empire Strikes…","""Snowspeeder, Imperial Speeder …","""X-wing, Imperial shuttle"""
"""C-3PO""",167.0,75.0,,"""gold""","""yellow""",112.0,"""none""","""masculine""","""Tatooine""","""Droid""","""A New Hope, The Empire Strikes…",,
"""R2-D2""",96.0,32.0,,"""white, blue""","""red""",33.0,"""none""","""masculine""","""Naboo""","""Droid""","""A New Hope, The Empire Strikes…",,
"""Darth Vader""",202.0,136.0,"""none""","""white""","""yellow""",41.9,"""male""","""masculine""","""Tatooine""","""Human""","""A New Hope, The Empire Strikes…",,"""TIE Advanced x1"""
"""Leia Organa""",150.0,49.0,"""brown""","""light""","""brown""",19.0,"""female""","""feminine""","""Alderaan""","""Human""","""A New Hope, The Empire Strikes…","""Imperial Speeder Bike""",
…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Finn""",,,"""black""","""dark""","""dark""",,"""male""","""masculine""",,"""Human""","""The Force Awakens""",,
"""Rey""",,,"""brown""","""light""","""hazel""",,"""female""","""feminine""",,"""Human""","""The Force Awakens""",,
"""Poe Dameron""",,,"""brown""","""light""","""brown""",,"""male""","""masculine""",,"""Human""","""The Force Awakens""",,"""X-wing"""
"""BB8""",,,"""none""","""none""","""black""",,"""none""","""masculine""",,"""Droid""","""The Force Awakens""",,


Let's look at another solution, from [Stack Overflow](https://stackoverflow.com/questions/63200530/python-pandas-equivalent-to-dplyr-1-0-0-summarizeacross):

In [17]:
# "https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv"
cars = pl.read_csv('Data_files/cars.csv')
cars.head()

model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
str,f64,i64,f64,i64,f64,f64,f64,i64,i64,i64,i64
"""Mazda RX4""",21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
"""Mazda RX4 Wag""",21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
"""Datsun 710""",22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
"""Hornet 4 Drive""",21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
"""Hornet Sportabout""",18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [20]:
# dplyr:
# dat <- group_by(mtcars, cyl)
# summarize(dat, across(ends_with('p'), sum), across(ends_with('t'), mean))
cars.group_by('cyl').agg(cs.ends_with('p').mean(), cs.ends_with('t').sum())

cyl,disp,hp,drat,wt
i64,f64,f64,f64,f64
6,183.314286,122.285714,25.1,21.82
4,105.136364,82.636364,44.78,25.143
8,353.1,209.214286,45.21,55.989


## Comments
<script src="https://utteranc.es/client.js"
        repo="samukweku/data-wrangling-blog"
        issue-term="title"
        theme="github-light"
        crossorigin="anonymous"
        async>
</script>